From 687e921994f5eca185acb93f14f29f805c8996bf Mon Sep 17 00:00:00 2001 From: Jonathan Kimmitt Date: Tue, 23 Apr 2024 10:42:46 +0100 Subject: [PATCH] Test correct and incorrect type parameters from CVA6 --- .../common/local/util/instr_tracer.sv | 223 ++ .../common/local/util/instr_tracer_if.sv | 67 + test/type_param/common/local/util/sram.sv | 107 + .../common/local/util/tc_sram_wrapper.sv | 60 + test/type_param/core/acc_dispatcher.sv | 423 ++ .../core/acc_dispatcher_corrected.sv | 423 ++ test/type_param/core/alu.sv | 359 ++ test/type_param/core/amo_buffer.sv | 82 + test/type_param/core/ariane_regfile_ff.sv | 83 + test/type_param/core/ariane_regfile_fpga.sv | 125 + test/type_param/core/axi_shim.sv | 310 ++ test/type_param/core/branch_unit.sv | 106 + .../core/cache_subsystem/axi_adapter.sv | 520 +++ .../core/cache_subsystem/cache_ctrl.sv | 475 +++ .../cva6_hpdcache_if_adapter.sv | 200 + .../cva6_hpdcache_subsystem.sv | 609 +++ .../cva6_hpdcache_subsystem_axi_arbiter.sv | 586 +++ .../core/cache_subsystem/cva6_icache.sv | 584 +++ .../cva6_icache_axi_wrapper.sv | 202 + .../hpdcache/rtl/include/hpdcache_typedef.svh | 62 + .../rtl/src/common/hpdcache_data_downsize.sv | 181 + .../rtl/src/common/hpdcache_data_upsize.sv | 181 + .../hpdcache/rtl/src/common/hpdcache_demux.sv | 69 + .../rtl/src/common/hpdcache_fifo_reg.sv | 167 + .../hpdcache/rtl/src/common/hpdcache_fxarb.sv | 85 + .../hpdcache/rtl/src/common/hpdcache_mux.sv | 79 + .../src/common/hpdcache_prio_1hot_encoder.sv | 43 + .../hpdcache_regbank_wbyteenable_1rw.sv | 63 + .../src/common/hpdcache_regbank_wmask_1rw.sv | 61 + .../hpdcache/rtl/src/common/hpdcache_rrarb.sv | 121 + .../hpdcache/rtl/src/common/hpdcache_sram.sv | 56 + .../src/common/hpdcache_sram_wbyteenable.sv | 58 + .../rtl/src/common/hpdcache_sram_wmask.sv | 58 + .../rtl/src/common/hpdcache_sync_buffer.sv | 89 + .../common/macros/behav/hpdcache_sram_1rw.sv | 60 + .../behav/hpdcache_sram_wbyteenable_1rw.sv | 63 + .../macros/behav/hpdcache_sram_wmask_1rw.sv | 61 + .../hpdcache/rtl/src/hpdcache.sv | 658 ++++ .../hpdcache/rtl/src/hpdcache_amo.sv | 67 + .../hpdcache/rtl/src/hpdcache_cmo.sv | 250 ++ .../hpdcache/rtl/src/hpdcache_core_arbiter.sv | 171 + .../hpdcache/rtl/src/hpdcache_ctrl.sv | 760 ++++ .../hpdcache/rtl/src/hpdcache_ctrl_pe.sv | 620 +++ .../hpdcache/rtl/src/hpdcache_memarray.sv | 120 + .../hpdcache/rtl/src/hpdcache_memctrl.sv | 656 ++++ .../hpdcache/rtl/src/hpdcache_miss_handler.sv | 659 ++++ .../hpdcache/rtl/src/hpdcache_mshr.sv | 385 ++ .../rtl/src/hpdcache_mshr_to_cache_set.sv | 105 + .../hpdcache/rtl/src/hpdcache_pkg.sv | 623 +++ .../hpdcache/rtl/src/hpdcache_plru.sv | 138 + .../hpdcache/rtl/src/hpdcache_rtab.sv | 666 ++++ .../hpdcache/rtl/src/hpdcache_uncached.sv | 965 +++++ .../hpdcache/rtl/src/hpdcache_wbuf.sv | 678 ++++ .../hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv | 228 ++ .../rtl/src/hwpf_stride/hwpf_stride.sv | 374 ++ .../rtl/src/hwpf_stride/hwpf_stride_arb.sv | 117 + .../rtl/src/hwpf_stride/hwpf_stride_pkg.sv | 68 + .../src/hwpf_stride/hwpf_stride_snooper.sv | 38 + .../src/hwpf_stride/hwpf_stride_wrapper.sv | 265 ++ .../utils/hpdcache_mem_req_read_arbiter.sv | 103 + .../utils/hpdcache_mem_req_write_arbiter.sv | 193 + .../rtl/src/utils/hpdcache_mem_resp_demux.sv | 108 + .../rtl/src/utils/hpdcache_mem_to_axi_read.sv | 95 + .../src/utils/hpdcache_mem_to_axi_write.sv | 148 + .../core/cache_subsystem/miss_handler.sv | 826 ++++ .../cache_subsystem/std_cache_subsystem.sv | 315 ++ .../core/cache_subsystem/std_nbdcache.sv | 279 ++ .../core/cache_subsystem/tag_cmp.sv | 106 + .../core/cache_subsystem/wt_axi_adapter.sv | 712 ++++ .../cache_subsystem/wt_cache_subsystem.sv | 233 ++ .../core/cache_subsystem/wt_dcache.sv | 360 ++ .../core/cache_subsystem/wt_dcache_ctrl.sv | 299 ++ .../core/cache_subsystem/wt_dcache_mem.sv | 428 +++ .../cache_subsystem/wt_dcache_missunit.sv | 645 ++++ .../core/cache_subsystem/wt_dcache_wbuffer.sv | 635 +++ test/type_param/core/commit_stage.sv | 298 ++ test/type_param/core/compressed_decoder.sv | 935 +++++ test/type_param/core/controller.sv | 194 + test/type_param/core/csr_buffer.sv | 76 + test/type_param/core/csr_regfile.sv | 1646 ++++++++ test/type_param/core/cva6.sv | 1401 +++++++ .../cva6_accel_first_pass_decoder_stub.sv | 31 + test/type_param/core/cva6_rvfi.sv | 294 ++ test/type_param/core/cva6_rvfi_probes.sv | 81 + .../cvxif_example_coprocessor.sv | 155 + .../cvxif_example/include/cvxif_instr_pkg.sv | 47 + .../core/cvxif_example/instr_decoder.sv | 49 + test/type_param/core/cvxif_fu.sv | 112 + test/type_param/core/decoder.sv | 1397 +++++++ test/type_param/core/ex_stage.sv | 413 ++ test/type_param/core/fpu_wrap.sv | 568 +++ test/type_param/core/frontend/bht.sv | 215 ++ test/type_param/core/frontend/btb.sv | 185 + test/type_param/core/frontend/frontend.sv | 516 +++ test/type_param/core/frontend/instr_queue.sv | 459 +++ test/type_param/core/frontend/instr_scan.sv | 83 + test/type_param/core/frontend/ras.sv | 71 + test/type_param/core/id_stage.sv | 143 + test/type_param/core/include/acc_pkg.sv | 47 + test/type_param/core/include/ariane_pkg.sv | 994 +++++ test/type_param/core/include/config_pkg.sv | 181 + .../include/cv64a6_imafdc_sv39_config_pkg.sv | 150 + .../cva6_hpdcache_default_config_pkg.sv | 123 + test/type_param/core/include/cvxif_pkg.sv | 110 + .../core/include/instr_tracer_pkg.sv | 202 + test/type_param/core/include/riscv_pkg.sv | 851 ++++ test/type_param/core/include/std_cache_pkg.sv | 98 + test/type_param/core/include/wt_cache_pkg.sv | 344 ++ test/type_param/core/instr_realign.sv | 361 ++ test/type_param/core/issue_read_operands.sv | 604 +++ test/type_param/core/issue_stage.sv | 199 + test/type_param/core/load_store_unit.sv | 493 +++ test/type_param/core/load_unit.sv | 534 +++ test/type_param/core/lsu_bypass.sv | 122 + .../type_param/core/mmu_sv32/cva6_mmu_sv32.sv | 565 +++ .../type_param/core/mmu_sv32/cva6_ptw_sv32.sv | 400 ++ .../core/mmu_sv32/cva6_shared_tlb_sv32.sv | 367 ++ .../type_param/core/mmu_sv32/cva6_tlb_sv32.sv | 281 ++ test/type_param/core/mmu_sv39/mmu.sv | 519 +++ test/type_param/core/mmu_sv39/ptw.sv | 409 ++ test/type_param/core/mmu_sv39/tlb.sv | 290 ++ test/type_param/core/mult.sv | 149 + test/type_param/core/multiplier.sv | 158 + test/type_param/core/perf_counters.sv | 226 ++ test/type_param/core/pmp/src/pmp.sv | 94 + test/type_param/core/pmp/src/pmp_entry.sv | 125 + test/type_param/core/scoreboard.sv | 452 +++ test/type_param/core/serdiv.sv | 269 ++ test/type_param/core/store_buffer.sv | 291 ++ test/type_param/core/store_unit.sv | 300 ++ .../corev_apu/axi_mem_if/src/axi2mem.sv | 301 ++ test/type_param/corev_apu/bootrom/bootrom.sv | 225 ++ .../corev_apu/clint/axi_lite_interface.sv | 170 + test/type_param/corev_apu/clint/clint.sv | 294 ++ .../corev_apu/fpga/src/apb_timer/apb_timer.sv | 88 + .../corev_apu/fpga/src/apb_timer/timer.sv | 145 + .../corev_apu/fpga/src/axi2apb/src/axi2apb.sv | 449 +++ .../fpga/src/axi2apb/src/axi2apb_64_32.sv | 745 ++++ .../fpga/src/axi_slice/src/axi_ar_buffer.sv | 74 + .../fpga/src/axi_slice/src/axi_aw_buffer.sv | 74 + .../fpga/src/axi_slice/src/axi_b_buffer.sv | 54 + .../fpga/src/axi_slice/src/axi_r_buffer.sv | 60 + .../src/axi_slice/src/axi_single_slice.sv | 51 + .../fpga/src/axi_slice/src/axi_slice.sv | 311 ++ .../fpga/src/axi_slice/src/axi_slice_wrap.sv | 115 + .../fpga/src/axi_slice/src/axi_w_buffer.sv | 55 + .../include/register_interface/assign.svh | 46 + .../include/register_interface/typedef.svh | 38 + .../register_interface/src/apb_to_reg.sv | 39 + .../register_interface/src/reg_intf.sv | 43 + .../riscv-dbg/debug_rom/debug_rom.sv | 66 + .../corev_apu/riscv-dbg/src/dm_csrs.sv | 634 +++ .../corev_apu/riscv-dbg/src/dm_mem.sv | 523 +++ .../corev_apu/riscv-dbg/src/dm_pkg.sv | 436 +++ .../corev_apu/riscv-dbg/src/dm_sba.sv | 170 + .../corev_apu/riscv-dbg/src/dm_top.sv | 218 ++ .../corev_apu/riscv-dbg/src/dmi_cdc.sv | 73 + .../corev_apu/riscv-dbg/src/dmi_jtag.sv | 271 ++ .../corev_apu/riscv-dbg/src/dmi_jtag_tap.sv | 349 ++ .../corev_apu/rv_plic/rtl/plic_regmap.sv | 357 ++ .../corev_apu/rv_plic/rtl/plic_top.sv | 157 + .../corev_apu/rv_plic/rtl/rv_plic_gateway.sv | 60 + .../corev_apu/rv_plic/rtl/rv_plic_target.sv | 125 + test/type_param/corev_apu/src/ariane.sv | 86 + .../src/axi_riscv_atomics/src/axi_res_tbl.sv | 93 + .../axi_riscv_atomics/src/axi_riscv_amos.sv | 1004 +++++ .../src/axi_riscv_amos_alu.sv | 78 + .../src/axi_riscv_atomics.sv | 400 ++ .../src/axi_riscv_atomics_wrap.sv | 151 + .../axi_riscv_atomics/src/axi_riscv_lrsc.sv | 509 +++ .../src/axi_riscv_lrsc_wrap.sv | 148 + .../type_param/corev_apu/tb/ariane_axi_pkg.sv | 109 + .../corev_apu/tb/ariane_axi_soc_pkg.sv | 102 + .../corev_apu/tb/ariane_peripherals.sv | 619 +++ .../type_param/corev_apu/tb/ariane_soc_pkg.sv | 68 + .../corev_apu/tb/ariane_testharness.sv | 807 ++++ test/type_param/corev_apu/tb/axi_intf.sv | 311 ++ .../corev_apu/tb/common/mock_uart.sv | 120 + test/type_param/corev_apu/tb/common/uart.sv | 104 + test/type_param/corev_apu/tb/rvfi_tracer.sv | 134 + test/type_param/sv2v.sh | 249 ++ test/type_param/sv2v_corrected.sh | 249 ++ .../openhwgroup/cvfpu/src/fpnew_cast_multi.sv | 794 ++++ .../openhwgroup/cvfpu/src/fpnew_classifier.sv | 74 + .../cvfpu/src/fpnew_divsqrt_multi.sv | 366 ++ .../vendor/openhwgroup/cvfpu/src/fpnew_fma.sv | 690 ++++ .../openhwgroup/cvfpu/src/fpnew_fma_multi.sv | 839 ++++ .../openhwgroup/cvfpu/src/fpnew_noncomp.sv | 415 ++ .../cvfpu/src/fpnew_opgroup_block.sv | 244 ++ .../cvfpu/src/fpnew_opgroup_fmt_slice.sv | 292 ++ .../cvfpu/src/fpnew_opgroup_multifmt_slice.sv | 449 +++ .../vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv | 495 +++ .../openhwgroup/cvfpu/src/fpnew_rounding.sv | 76 + .../vendor/openhwgroup/cvfpu/src/fpnew_top.sv | 185 + .../src/fpu_div_sqrt_mvp/hdl/control_mvp.sv | 3413 +++++++++++++++++ .../fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv | 83 + .../fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv | 180 + .../hdl/iteration_div_sqrt_mvp.sv | 61 + .../fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv | 470 +++ .../src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv | 104 + .../fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv | 425 ++ .../pulp-platform/axi/include/axi/assign.svh | 541 +++ .../pulp-platform/axi/include/axi/typedef.svh | 211 + .../pulp-platform/axi/src/axi_atop_filter.sv | 444 +++ .../vendor/pulp-platform/axi/src/axi_cut.sv | 265 ++ .../pulp-platform/axi/src/axi_delayer.sv | 198 + .../vendor/pulp-platform/axi/src/axi_demux.sv | 786 ++++ .../pulp-platform/axi/src/axi_err_slv.sv | 261 ++ .../pulp-platform/axi/src/axi_id_prepend.sv | 161 + .../vendor/pulp-platform/axi/src/axi_join.sv | 37 + .../pulp-platform/axi/src/axi_multicut.sv | 237 ++ .../vendor/pulp-platform/axi/src/axi_mux.sv | 522 +++ .../vendor/pulp-platform/axi/src/axi_pkg.sv | 423 ++ .../pulp-platform/axi/src/axi_to_axi_lite.sv | 323 ++ .../vendor/pulp-platform/axi/src/axi_xbar.sv | 324 ++ .../include/common_cells/registers.svh | 221 ++ .../common_cells/src/addr_decode.sv | 161 + .../common_cells/src/cdc_2phase.sv | 175 + .../common_cells/src/cf_math_pkg.sv | 61 + .../pulp-platform/common_cells/src/counter.sv | 43 + .../common_cells/src/delta_counter.sv | 74 + .../common_cells/src/deprecated/fifo_v1.sv | 57 + .../common_cells/src/deprecated/fifo_v2.sv | 79 + .../common_cells/src/exp_backoff.sv | 98 + .../pulp-platform/common_cells/src/fifo_v3.sv | 191 + .../pulp-platform/common_cells/src/lfsr.sv | 315 ++ .../common_cells/src/lfsr_16bit.sv | 68 + .../common_cells/src/lfsr_8bit.sv | 61 + .../pulp-platform/common_cells/src/lzc.sv | 112 + .../common_cells/src/popcount.sv | 60 + .../common_cells/src/rr_arb_tree.sv | 348 ++ .../pulp-platform/common_cells/src/rstgen.sv | 30 + .../common_cells/src/rstgen_bypass.sv | 57 + .../common_cells/src/shift_reg.sv | 53 + .../common_cells/src/spill_register.sv | 46 + .../src/spill_register_flushable.sv | 105 + .../common_cells/src/stream_arbiter.sv | 49 + .../src/stream_arbiter_flushable.sv | 82 + .../common_cells/src/stream_delay.sv | 132 + .../common_cells/src/stream_demux.sv | 36 + .../common_cells/src/stream_mux.sv | 46 + .../common_cells/src/stream_register.sv | 57 + .../pulp-platform/common_cells/src/unread.sv | 21 + .../fpga-support/rtl/AsyncDpRam.sv | 62 + .../fpga-support/rtl/AsyncThreePortRam.sv | 66 + .../fpga-support/rtl/SyncDpRam.sv | 182 + .../src/deprecated/cluster_clk_cells.sv | 94 + .../src/deprecated/pulp_clk_cells.sv | 107 + .../tech_cells_generic/src/rtl/tc_clk.sv | 120 + .../tech_cells_generic/src/rtl/tc_sram.sv | 245 ++ 250 files changed, 71133 insertions(+) create mode 100644 test/type_param/common/local/util/instr_tracer.sv create mode 100644 test/type_param/common/local/util/instr_tracer_if.sv create mode 100644 test/type_param/common/local/util/sram.sv create mode 100644 test/type_param/common/local/util/tc_sram_wrapper.sv create mode 100644 test/type_param/core/acc_dispatcher.sv create mode 100644 test/type_param/core/acc_dispatcher_corrected.sv create mode 100644 test/type_param/core/alu.sv create mode 100644 test/type_param/core/amo_buffer.sv create mode 100644 test/type_param/core/ariane_regfile_ff.sv create mode 100644 test/type_param/core/ariane_regfile_fpga.sv create mode 100644 test/type_param/core/axi_shim.sv create mode 100644 test/type_param/core/branch_unit.sv create mode 100644 test/type_param/core/cache_subsystem/axi_adapter.sv create mode 100644 test/type_param/core/cache_subsystem/cache_ctrl.sv create mode 100644 test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv create mode 100644 test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv create mode 100644 test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv create mode 100644 test/type_param/core/cache_subsystem/cva6_icache.sv create mode 100644 test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv create mode 100755 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv create mode 100755 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv create mode 100755 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv create mode 100755 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv create mode 100644 test/type_param/core/cache_subsystem/miss_handler.sv create mode 100644 test/type_param/core/cache_subsystem/std_cache_subsystem.sv create mode 100644 test/type_param/core/cache_subsystem/std_nbdcache.sv create mode 100644 test/type_param/core/cache_subsystem/tag_cmp.sv create mode 100644 test/type_param/core/cache_subsystem/wt_axi_adapter.sv create mode 100644 test/type_param/core/cache_subsystem/wt_cache_subsystem.sv create mode 100644 test/type_param/core/cache_subsystem/wt_dcache.sv create mode 100644 test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv create mode 100644 test/type_param/core/cache_subsystem/wt_dcache_mem.sv create mode 100644 test/type_param/core/cache_subsystem/wt_dcache_missunit.sv create mode 100644 test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv create mode 100644 test/type_param/core/commit_stage.sv create mode 100644 test/type_param/core/compressed_decoder.sv create mode 100644 test/type_param/core/controller.sv create mode 100644 test/type_param/core/csr_buffer.sv create mode 100644 test/type_param/core/csr_regfile.sv create mode 100644 test/type_param/core/cva6.sv create mode 100644 test/type_param/core/cva6_accel_first_pass_decoder_stub.sv create mode 100644 test/type_param/core/cva6_rvfi.sv create mode 100644 test/type_param/core/cva6_rvfi_probes.sv create mode 100644 test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv create mode 100644 test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv create mode 100644 test/type_param/core/cvxif_example/instr_decoder.sv create mode 100644 test/type_param/core/cvxif_fu.sv create mode 100644 test/type_param/core/decoder.sv create mode 100644 test/type_param/core/ex_stage.sv create mode 100644 test/type_param/core/fpu_wrap.sv create mode 100644 test/type_param/core/frontend/bht.sv create mode 100644 test/type_param/core/frontend/btb.sv create mode 100644 test/type_param/core/frontend/frontend.sv create mode 100644 test/type_param/core/frontend/instr_queue.sv create mode 100644 test/type_param/core/frontend/instr_scan.sv create mode 100644 test/type_param/core/frontend/ras.sv create mode 100644 test/type_param/core/id_stage.sv create mode 100644 test/type_param/core/include/acc_pkg.sv create mode 100644 test/type_param/core/include/ariane_pkg.sv create mode 100644 test/type_param/core/include/config_pkg.sv create mode 100644 test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv create mode 100644 test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv create mode 100644 test/type_param/core/include/cvxif_pkg.sv create mode 100644 test/type_param/core/include/instr_tracer_pkg.sv create mode 100644 test/type_param/core/include/riscv_pkg.sv create mode 100644 test/type_param/core/include/std_cache_pkg.sv create mode 100644 test/type_param/core/include/wt_cache_pkg.sv create mode 100644 test/type_param/core/instr_realign.sv create mode 100644 test/type_param/core/issue_read_operands.sv create mode 100644 test/type_param/core/issue_stage.sv create mode 100644 test/type_param/core/load_store_unit.sv create mode 100644 test/type_param/core/load_unit.sv create mode 100644 test/type_param/core/lsu_bypass.sv create mode 100644 test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv create mode 100644 test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv create mode 100644 test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv create mode 100644 test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv create mode 100644 test/type_param/core/mmu_sv39/mmu.sv create mode 100644 test/type_param/core/mmu_sv39/ptw.sv create mode 100644 test/type_param/core/mmu_sv39/tlb.sv create mode 100644 test/type_param/core/mult.sv create mode 100644 test/type_param/core/multiplier.sv create mode 100644 test/type_param/core/perf_counters.sv create mode 100644 test/type_param/core/pmp/src/pmp.sv create mode 100644 test/type_param/core/pmp/src/pmp_entry.sv create mode 100644 test/type_param/core/scoreboard.sv create mode 100644 test/type_param/core/serdiv.sv create mode 100644 test/type_param/core/store_buffer.sv create mode 100644 test/type_param/core/store_unit.sv create mode 100644 test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv create mode 100644 test/type_param/corev_apu/bootrom/bootrom.sv create mode 100644 test/type_param/corev_apu/clint/axi_lite_interface.sv create mode 100644 test/type_param/corev_apu/clint/clint.sv create mode 100644 test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv create mode 100644 test/type_param/corev_apu/fpga/src/apb_timer/timer.sv create mode 100644 test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv create mode 100644 test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv create mode 100644 test/type_param/corev_apu/register_interface/include/register_interface/assign.svh create mode 100644 test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh create mode 100644 test/type_param/corev_apu/register_interface/src/apb_to_reg.sv create mode 100644 test/type_param/corev_apu/register_interface/src/reg_intf.sv create mode 100644 test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv create mode 100755 test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dm_top.sv create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv create mode 100644 test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv create mode 100644 test/type_param/corev_apu/rv_plic/rtl/plic_top.sv create mode 100644 test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv create mode 100644 test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv create mode 100644 test/type_param/corev_apu/src/ariane.sv create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv create mode 100644 test/type_param/corev_apu/tb/ariane_axi_pkg.sv create mode 100644 test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv create mode 100644 test/type_param/corev_apu/tb/ariane_peripherals.sv create mode 100644 test/type_param/corev_apu/tb/ariane_soc_pkg.sv create mode 100644 test/type_param/corev_apu/tb/ariane_testharness.sv create mode 100644 test/type_param/corev_apu/tb/axi_intf.sv create mode 100644 test/type_param/corev_apu/tb/common/mock_uart.sv create mode 100644 test/type_param/corev_apu/tb/common/uart.sv create mode 100644 test/type_param/corev_apu/tb/rvfi_tracer.sv create mode 100755 test/type_param/sv2v.sh create mode 100755 test/type_param/sv2v_corrected.sh create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh create mode 100644 test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_join.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/counter.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/unread.sv create mode 100644 test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv create mode 100644 test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv create mode 100644 test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv create mode 100644 test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv create mode 100644 test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv create mode 100644 test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv create mode 100644 test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv diff --git a/test/type_param/common/local/util/instr_tracer.sv b/test/type_param/common/local/util/instr_tracer.sv new file mode 100644 index 0000000..17c11e5 --- /dev/null +++ b/test/type_param/common/local/util/instr_tracer.sv @@ -0,0 +1,223 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 16.05.2017 +// Description: Instruction Tracer Main Class + +`ifndef VERILATOR +//pragma translate_off +`include "ex_trace_item.svh" +`include "instr_trace_item.svh" + +module instr_tracer ( + instr_tracer_if tracer_if, + input logic[riscv::XLEN-1:0] hart_id_i +); + + // keep the decoded instructions in a queue + logic [31:0] decode_queue [$]; + // keep the issued instructions in a queue + logic [31:0] issue_queue [$]; + // issue scoreboard entries + ariane_pkg::scoreboard_entry_t issue_sbe_queue [$]; + ariane_pkg::scoreboard_entry_t issue_sbe; + // store resolved branches, get (mis-)predictions + ariane_pkg::bp_resolve_t bp [$]; + // shadow copy of the register files + logic [63:0] gp_reg_file [32]; + logic [63:0] fp_reg_file [32]; + // 64 bit clock tick count + longint unsigned clk_ticks; + int f, commit_log; + // address mapping + // contains mappings of the form vaddr <-> paddr + logic [63:0] store_mapping[$], load_mapping[$], address_mapping; + + // static uvm_cmdline_processor uvcl = uvm_cmdline_processor::get_inst(); + + function void create_file(logic [63:0] hart_id); + string fn, fn_commit_log; + $sformat(fn, "trace_hart_%0.0f.log", hart_id); + $sformat(fn_commit_log, "trace_hart_%0.0f_commit.log", hart_id); + $display("[TRACER] Output filename is: %s", fn); + + f = $fopen(fn,"w"); + if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG) commit_log = $fopen(fn_commit_log, "w"); + endfunction : create_file + + task trace(); + automatic logic [31:0] decode_instruction, issue_instruction, issue_commit_instruction; + automatic ariane_pkg::scoreboard_entry_t commit_instruction; + // initialize register 0 + gp_reg_file = '{default:0}; + fp_reg_file = '{default:0}; + + forever begin + automatic ariane_pkg::bp_resolve_t bp_instruction = '0; + // new cycle, we are only interested if reset is de-asserted + @(tracer_if.pck) if (tracer_if.pck.rstn !== 1'b1) begin + flush(); + continue; + end + + // increment clock tick + clk_ticks++; + + // ------------------- + // Instruction Decode + // ------------------- + // we are decoding an instruction + if (tracer_if.pck.fetch_valid && tracer_if.pck.fetch_ack) begin + decode_instruction = tracer_if.pck.instruction; + decode_queue.push_back(decode_instruction); + end + // ------------------- + // Instruction Issue + // ------------------- + // we got a new issue ack, so put the element from the decode queue to + // the issue queue + if (tracer_if.pck.issue_ack && !tracer_if.pck.flush_unissued) begin + issue_instruction = decode_queue.pop_front(); + issue_queue.push_back(issue_instruction); + // also save the scoreboard entry to a separate issue queue + issue_sbe_queue.push_back(ariane_pkg::scoreboard_entry_t'(tracer_if.pck.issue_sbe)); + end + + // -------------------- + // Address Translation + // -------------------- + if (tracer_if.pck.st_valid) begin + store_mapping.push_back(tracer_if.pck.st_paddr); + end + + if (tracer_if.pck.ld_valid && !tracer_if.pck.ld_kill) begin + load_mapping.push_back(tracer_if.pck.ld_paddr); + end + // ---------------------- + // Store predictions + // ---------------------- + if (tracer_if.pck.resolve_branch.valid) begin + bp.push_back(tracer_if.pck.resolve_branch); + end + // -------------- + // Commit + // -------------- + // we are committing an instruction + for (int i = 0; i < 2; i++) begin + if (tracer_if.pck.commit_ack[i]) begin + commit_instruction = ariane_pkg::scoreboard_entry_t'(tracer_if.pck.commit_instr[i]); + issue_commit_instruction = issue_queue.pop_front(); + issue_sbe = issue_sbe_queue.pop_front(); + // check if the instruction retiring is a load or store, get the physical address accordingly + if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::LOAD) + address_mapping = load_mapping.pop_front(); + else if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::STORE) + address_mapping = store_mapping.pop_front(); + + if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::CTRL_FLOW) + bp_instruction = bp.pop_front(); + // the scoreboards issue entry still contains the immediate value as a result + // check if the write back is valid, if not we need to source the result from the register file + // as the most recent version of this register will be there. + if (tracer_if.pck.we_gpr[i] || tracer_if.pck.we_fpr[i]) begin + printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); + end else if (ariane_pkg::is_rd_fpr(commit_instruction.op)) begin + printInstr(issue_sbe, issue_commit_instruction, fp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); + end else begin + printInstr(issue_sbe, issue_commit_instruction, gp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); + end + end + end + // -------------- + // Exceptions + // -------------- + if (tracer_if.pck.exception.valid && !(tracer_if.pck.debug_mode && tracer_if.pck.exception.cause == riscv::BREAKPOINT)) begin + // print exception + printException(tracer_if.pck.commit_instr[0].pc, tracer_if.pck.exception.cause, tracer_if.pck.exception.tval); + end + // ---------------------- + // Commit Registers + // ---------------------- + // update shadow reg files here + for (int i = 0; i < 2; i++) begin + if (tracer_if.pck.we_gpr[i] && tracer_if.pck.waddr[i] != 5'b0) begin + gp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; + end else if (tracer_if.pck.we_fpr[i]) begin + fp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; + end + end + // -------------- + // Flush Signals + // -------------- + // flush un-issued instructions + if (tracer_if.pck.flush_unissued) begin + flushDecode(); + end + // flush whole pipeline + if (tracer_if.pck.flush) begin + flush(); + end + end + + endtask + + // flush all decoded instructions + function void flushDecode (); + decode_queue = {}; + endfunction + + // flush everything, we took an exception/interrupt + function void flush (); + flushDecode(); + // clear all elements in the queue + issue_queue = {}; + issue_sbe_queue = {}; + // also clear mappings + store_mapping = {}; + load_mapping = {}; + bp = {}; + endfunction + + function void printInstr(ariane_pkg::scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [riscv::PLEN-1:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, ariane_pkg::bp_resolve_t bp); + automatic instr_trace_item iti = new ($time, clk_ticks, sbe, instr, gp_reg_file, fp_reg_file, result, paddr, priv_lvl, debug_mode, bp); + // print instruction to console + automatic string print_instr = iti.printInstr(); + if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin + $fwrite(commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result, ariane_pkg::is_rd_fpr(sbe.op))); + end + $fwrite(f, {print_instr, "\n"}); + endfunction + + function void printException(logic [riscv::VLEN-1:0] pc, logic [63:0] cause, logic [63:0] tval); + automatic ex_trace_item eti = new (pc, cause, tval); + automatic string print_ex = eti.printException(); + $fwrite(f, {print_ex, "\n"}); + endfunction + + function void close(); + if (f) $fclose(f); + if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG && commit_log) $fclose(commit_log); + endfunction + + + initial begin + #15ns; + create_file(hart_id_i); + trace(); + end + + final begin + close(); + end + +endmodule : instr_tracer +//pragma translate_on +`endif diff --git a/test/type_param/common/local/util/instr_tracer_if.sv b/test/type_param/common/local/util/instr_tracer_if.sv new file mode 100644 index 0000000..5015cfd --- /dev/null +++ b/test/type_param/common/local/util/instr_tracer_if.sv @@ -0,0 +1,67 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 16.05.2017 +// Description: Instruction Tracer Interface + +`ifndef VERILATOR +`ifndef INSTR_TRACER_IF_SV +`define INSTR_TRACER_IF_SV +interface instr_tracer_if ( + input clk + ); + + logic rstn; + logic flush_unissued; + logic flush; + // Decode + logic [31:0] instruction; + logic fetch_valid; + logic fetch_ack; + // Issue stage + logic issue_ack; // issue acknowledged + ariane_pkg::scoreboard_entry_t issue_sbe; // issue scoreboard entry + // WB stage + logic [1:0][4:0] waddr; + logic [1:0][63:0] wdata; + logic [1:0] we_gpr; + logic [1:0] we_fpr; + // commit stage + ariane_pkg::scoreboard_entry_t [1:0] commit_instr; // commit instruction + logic [1:0] commit_ack; + // address translation + // stores + logic st_valid; + logic [riscv::PLEN-1:0] st_paddr; + // loads + logic ld_valid; + logic ld_kill; + logic [riscv::PLEN-1:0] ld_paddr; + // misprediction + ariane_pkg::bp_resolve_t resolve_branch; + // exceptions + ariane_pkg::exception_t exception; + // current privilege level + riscv::priv_lvl_t priv_lvl; + logic debug_mode; + // the tracer just has a passive interface we do not drive anything with it + + //pragma translate_off + clocking pck @(posedge clk); + input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr, + st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, resolve_branch, + wdata, we_gpr, we_fpr, commit_instr, commit_ack, exception, priv_lvl, debug_mode; + endclocking + //pragma translate_on + +endinterface +`endif +`endif diff --git a/test/type_param/common/local/util/sram.sv b/test/type_param/common/local/util/sram.sv new file mode 100644 index 0000000..4c0f2d2 --- /dev/null +++ b/test/type_param/common/local/util/sram.sv @@ -0,0 +1,107 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba , ETH Zurich +// Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: SRAM wrapper for FPGA (requires the fpga-support submodule) +// +// Note: the wrapped module contains two different implementations for +// ALTERA and XILINX tools, since these follow different coding styles for +// inferrable RAMS with byte enable. define `FPGA_TARGET_XILINX or +// `FPGA_TARGET_ALTERA in your build environment (default is ALTERA) + +module sram #( + parameter DATA_WIDTH = 64, + parameter USER_WIDTH = 1, + parameter USER_EN = 0, + parameter NUM_WORDS = 1024, + parameter SIM_INIT = "none", + parameter OUT_REGS = 0 // enables output registers in FPGA macro (read lat = 2) +)( + input logic clk_i, + input logic rst_ni, + input logic req_i, + input logic we_i, + input logic [$clog2(NUM_WORDS)-1:0] addr_i, + input logic [USER_WIDTH-1:0] wuser_i, + input logic [DATA_WIDTH-1:0] wdata_i, + input logic [(DATA_WIDTH+7)/8-1:0] be_i, + output logic [USER_WIDTH-1:0] ruser_o, + output logic [DATA_WIDTH-1:0] rdata_o +); + +localparam DATA_WIDTH_ALIGNED = ((DATA_WIDTH+63)/64)*64; +localparam USER_WIDTH_ALIGNED = DATA_WIDTH_ALIGNED; // To be fine tuned to reduce memory size +localparam BE_WIDTH_ALIGNED = (((DATA_WIDTH+7)/8+7)/8)*8; + +logic [DATA_WIDTH_ALIGNED-1:0] wdata_aligned; +logic [USER_WIDTH_ALIGNED-1:0] wuser_aligned; +logic [BE_WIDTH_ALIGNED-1:0] be_aligned; +logic [DATA_WIDTH_ALIGNED-1:0] rdata_aligned; +logic [USER_WIDTH_ALIGNED-1:0] ruser_aligned; + +// align to 64 bits for inferrable macro below +always_comb begin : p_align + wdata_aligned ='0; + wuser_aligned ='0; + be_aligned ='0; + wdata_aligned[DATA_WIDTH-1:0] = wdata_i; + wuser_aligned[USER_WIDTH-1:0] = wuser_i; + be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i; + + rdata_o = rdata_aligned[DATA_WIDTH-1:0]; + ruser_o = ruser_aligned[USER_WIDTH-1:0]; +end + + for (genvar k = 0; k<(DATA_WIDTH+63)/64; k++) begin : gen_cut + // unused byte-enable segments (8bits) are culled by the tool + tc_sram_wrapper #( + .NumWords(NUM_WORDS), // Number of Words in data array + .DataWidth(64), // Data signal width + .ByteWidth(32'd8), // Width of a data byte + .NumPorts(32'd1), // Number of read and write ports + .Latency(32'd1), // Latency when the read data is available + .SimInit(SIM_INIT), // Simulation initialization + .PrintSimCfg(1'b0) // Print configuration + ) i_tc_sram_wrapper ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req_i ), + .we_i ( we_i ), + .be_i ( be_aligned[k*8 +: 8] ), + .wdata_i ( wdata_aligned[k*64 +: 64] ), + .addr_i ( addr_i ), + .rdata_o ( rdata_aligned[k*64 +: 64] ) + ); + if (USER_EN > 0) begin : gen_mem_user + tc_sram_wrapper #( + .NumWords(NUM_WORDS), // Number of Words in data array + .DataWidth(64), // Data signal width + .ByteWidth(32'd8), // Width of a data byte + .NumPorts(32'd1), // Number of read and write ports + .Latency(32'd1), // Latency when the read data is available + .SimInit(SIM_INIT), // Simulation initialization + .PrintSimCfg(1'b0) // Print configuration + ) i_tc_sram_wrapper_user ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req_i ), + .we_i ( we_i ), + .be_i ( be_aligned[k*8 +: 8] ), + .wdata_i ( wuser_aligned[k*64 +: 64] ), + .addr_i ( addr_i ), + .rdata_o ( ruser_aligned[k*64 +: 64] ) + ); + end else begin + assign ruser_aligned[k*64 +: 64] = '0; + end + end +endmodule : sram diff --git a/test/type_param/common/local/util/tc_sram_wrapper.sv b/test/type_param/common/local/util/tc_sram_wrapper.sv new file mode 100644 index 0000000..ae3287d --- /dev/null +++ b/test/type_param/common/local/util/tc_sram_wrapper.sv @@ -0,0 +1,60 @@ +// Copyright 2022 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Jean-Roch COULON - Thales + +module tc_sram_wrapper #( + parameter int unsigned NumWords = 32'd1024, // Number of Words in data array + parameter int unsigned DataWidth = 32'd128, // Data signal width + parameter int unsigned ByteWidth = 32'd8, // Width of a data byte + parameter int unsigned NumPorts = 32'd2, // Number of read and write ports + parameter int unsigned Latency = 32'd1, // Latency when the read data is available + parameter SimInit = "none", // Simulation initialization + parameter bit PrintSimCfg = 1'b0, // Print configuration + // DEPENDENT PARAMETERS, DO NOT OVERWRITE! + parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1, + parameter int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div + parameter type addr_t = logic [AddrWidth-1:0], + parameter type data_t = logic [DataWidth-1:0], + parameter type be_t = logic [BeWidth-1:0] +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // input ports + input logic [NumPorts-1:0] req_i, // request + input logic [NumPorts-1:0] we_i, // write enable + input addr_t [NumPorts-1:0] addr_i, // request address + input data_t [NumPorts-1:0] wdata_i, // write data + input be_t [NumPorts-1:0] be_i, // write byte enable + // output ports + output data_t [NumPorts-1:0] rdata_o // read data +); + +// synthesis translate_off + + tc_sram #( + .NumWords(NumWords), + .DataWidth(DataWidth), + .ByteWidth(ByteWidth), + .NumPorts(NumPorts), + .Latency(Latency), + .SimInit(SimInit), + .PrintSimCfg(PrintSimCfg) + ) i_tc_sram ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req_i ), + .we_i ( we_i ), + .be_i ( be_i ), + .wdata_i ( wdata_i ), + .addr_i ( addr_i ), + .rdata_o ( rdata_o ) + ); + +// synthesis translate_on + +endmodule diff --git a/test/type_param/core/acc_dispatcher.sv b/test/type_param/core/acc_dispatcher.sv new file mode 100644 index 0000000..8b5998a --- /dev/null +++ b/test/type_param/core/acc_dispatcher.sv @@ -0,0 +1,423 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: Matheus Cavalcante, ETH Zurich +// Nils Wistoff, ETH Zurich +// Date: 20.11.2020 +// Description: Functional unit that dispatches CVA6 instructions to accelerators. + +module acc_dispatcher + import ariane_pkg::*; + import riscv::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type acc_req_t = acc_pkg::accelerator_req_t, + parameter type acc_resp_t = acc_pkg::accelerator_resp_t, + parameter type acc_cfg_t = logic, + parameter acc_cfg_t AccCfg = '0 +) ( + input logic clk_i, + input logic rst_ni, + // Interface with the CSR regfile + input logic acc_cons_en_i, // Accelerator memory consistent mode + output logic acc_fflags_valid_o, + output logic [4:0] acc_fflags_o, + // Interface with the CSRs + input priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][PLEN-3:0] pmpaddr_i, + input logic [2:0] fcsr_frm_i, + output logic dirty_v_state_o, + // Interface with the issue stage + input scoreboard_entry_t issue_instr_i, + input logic issue_instr_hs_i, + output logic issue_stall_o, + input fu_data_t fu_data_i, + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, + output logic [TRANS_ID_BITS-1:0] acc_trans_id_o, + output xlen_t acc_result_o, + output logic acc_valid_o, + output exception_t acc_exception_o, + // Interface with the execute stage + output logic acc_valid_ex_o, // FU executed + // Interface with the commit stage + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, + input logic commit_st_barrier_i, // A store barrier was commited + // Interface with the load/store unit + output logic acc_stall_st_pending_o, + input logic acc_no_st_pending_i, + input dcache_req_i_t [2:0] dcache_req_ports_i, + // Interface with the controller + output logic ctrl_halt_o, + input logic flush_unissued_instr_i, + input logic flush_ex_i, + output logic flush_pipeline_o, + // Interface with cache subsystem + output dcache_req_i_t [1:0] acc_dcache_req_ports_o, + input dcache_req_o_t [1:0] acc_dcache_req_ports_i, + input logic inval_ready_i, + output logic inval_valid_o, + output logic [63:0] inval_addr_o, + // Accelerator interface + output acc_req_t acc_req_o, + input acc_resp_t acc_resp_i +); + + `include "common_cells/registers.svh" + + import cf_math_pkg::idx_width; + + /*********************** + * Common signals * + ***********************/ + + logic acc_ready; + logic acc_valid_d, acc_valid_q; + + /************************** + * Accelerator issue * + **************************/ + + // Issue accelerator instructions + `FF(acc_valid_q, acc_valid_d, '0) + + assign acc_valid_ex_o = acc_valid_q; + assign acc_valid_d = ~issue_instr_i.ex.valid & + issue_instr_hs_i & + (issue_instr_i.fu == ACCEL) & + ~flush_unissued_instr_i; + + // Accelerator load/store pending signals + logic acc_no_ld_pending; + logic acc_no_st_pending; + + // Stall issue stage in three cases: + always_comb begin : stall_issue + unique case (issue_instr_i.fu) + ACCEL: + // 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet + issue_stall_o = ~acc_ready; + LOAD: + // 2. We're issuing a scalar load but there is an inflight accelerator store. + issue_stall_o = acc_cons_en_i & ~acc_no_st_pending; + STORE: + // 3. We're issuing a scalar store but there is an inflight accelerator load or store. + issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending); + default: issue_stall_o = 1'b0; + endcase + end + + /*********************** + * Instruction queue * + ***********************/ + + localparam InstructionQueueDepth = 3; + + fu_data_t acc_data; + fu_data_t acc_insn_queue_o; + logic acc_insn_queue_pop; + logic acc_insn_queue_empty; + logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage; + logic acc_commit; + logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id; + + assign acc_data = acc_valid_ex_o ? fu_data_i : '0; + + fifo_v3 #( + .DEPTH (InstructionQueueDepth), + .FALL_THROUGH(1'b1), + .dtype (fu_data_t) + ) i_acc_insn_queue ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_ex_i), + .testmode_i(1'b0), + .data_i (fu_data_i), + .push_i (acc_valid_q), + .full_o ( /* Unused */), + .data_o (acc_insn_queue_o), + .pop_i (acc_insn_queue_pop), + .empty_o (acc_insn_queue_empty), + .usage_o (acc_insn_queue_usage) + ); + + // We are ready if the instruction queue is able to accept at least one more entry. + assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1); + + /********************************** + * Non-speculative instructions * + **********************************/ + + // Keep track of the instructions that were received by the dispatcher. + logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q; + `FF(insn_pending_q, insn_pending_d, '0) + + // Only non-speculative instructions can be issued to the accelerators. + // The following block keeps track of which transaction IDs reached the + // top of the scoreboard, and are therefore no longer speculative. + logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q; + `FF(insn_ready_q, insn_ready_d, '0) + + always_comb begin : p_non_speculative_ff + // Maintain state + insn_pending_d = insn_pending_q; + insn_ready_d = insn_ready_q; + + // We received a new instruction + if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1; + // Flush all received instructions + if (flush_ex_i) insn_pending_d = '0; + + // An accelerator instruction is no longer speculative. + if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin + insn_ready_d[acc_commit_trans_id] = 1'b1; + insn_pending_d[acc_commit_trans_id] = 1'b0; + end + + // An accelerator instruction was issued. + if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0; + end : p_non_speculative_ff + + /************************* + * Accelerator request * + *************************/ + + acc_pkg::accelerator_req_t acc_req; + logic acc_req_valid; + logic acc_req_ready; + + acc_pkg::accelerator_req_t acc_req_int; + fall_through_register #( + .T(acc_pkg::accelerator_req_t) + ) i_accelerator_req_register ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clr_i (1'b0), + .testmode_i(1'b0), + .data_i (acc_req), + .valid_i (acc_req_valid), + .ready_o (acc_req_ready), + .data_o (acc_req_int), + .valid_o (acc_req_o.req_valid), + .ready_i (acc_resp_i.req_ready) + ); + + assign acc_req_o.insn = acc_req_int.insn; + assign acc_req_o.rs1 = acc_req_int.rs1; + assign acc_req_o.rs2 = acc_req_int.rs2; + assign acc_req_o.frm = acc_req_int.frm; + assign acc_req_o.trans_id = acc_req_int.trans_id; + assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i; + assign acc_req_o.acc_cons_en = acc_cons_en_i; + assign acc_req_o.inval_ready = inval_ready_i; + + always_comb begin : accelerator_req_dispatcher + // Do not fetch from the instruction queue + acc_insn_queue_pop = 1'b0; + + // Default values + acc_req = '0; + acc_req_valid = 1'b0; + + // Unpack fu_data_t into accelerator_req_t + if (!acc_insn_queue_empty) begin + acc_req = '{ + // Instruction is forwarded from the decoder as an immediate + // - + // frm rounding information is up to date during a valid request to the accelerator + // The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes + // do not take place until the accelerator answers (Ariane commits in-order) + insn : + acc_insn_queue_o.imm[ + 31 + : + 0 + ], + rs1 : acc_insn_queue_o.operand_a, + rs2 : acc_insn_queue_o.operand_b, + frm : fpnew_pkg::roundmode_e'(fcsr_frm_i), + trans_id: acc_insn_queue_o.trans_id, + default: '0 + }; + // Wait until the instruction is no longer speculative. + acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] || + (acc_commit && insn_pending_q[acc_commit_trans_id]); + acc_insn_queue_pop = acc_req_valid && acc_req_ready; + end + end + + /************************** + * Accelerator response * + **************************/ + + logic acc_ld_disp; + logic acc_st_disp; + + // Unpack the accelerator response + assign acc_trans_id_o = acc_resp_i.trans_id; + assign acc_result_o = acc_resp_i.result; + assign acc_valid_o = acc_resp_i.resp_valid; + assign acc_exception_o = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error}; + assign acc_fflags_valid_o = acc_resp_i.fflags_valid; + assign acc_fflags_o = acc_resp_i.fflags; + // Always ready to receive responses + assign acc_req_o.resp_ready = 1'b1; + + // Signal dispatched load/store to issue stage + assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD); + assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE); + + // Cache invalidation + assign inval_valid_o = acc_resp_i.inval_valid; + assign inval_addr_o = acc_resp_i.inval_addr; + + /************************** + * Accelerator commit * + **************************/ + + // Instruction can be issued to the (in-order) back-end if + // it reached the top of the scoreboard and it hasn't been + // issued yet + always_comb begin : accelerator_commit + acc_commit = 1'b0; + if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1; + if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL) + acc_commit = 1'b1; + end + + // Dirty the V state if we are committing anything related to the vector accelerator + always_comb begin : dirty_v_state + dirty_v_state_o = 1'b0; + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL); + end + end + + assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id + : commit_instr_i[1].trans_id; + + /************************** + * Accelerator barriers * + **************************/ + + // On a store barrier (i.e. any barrier that requires preceeding stores to complete + // before continuing execution), halt execution while there are pending stores in + // the accelerator pipeline. + logic wait_acc_store_d, wait_acc_store_q; + `FF(wait_acc_store_q, wait_acc_store_d, '0) + + // Set on store barrier. Clear when no store is pending. + assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending; + assign ctrl_halt_o = wait_acc_store_q; + + /************************** + * Load/Store tracking * + **************************/ + + // Loads + logic acc_spec_loads_overflow; + logic [2:0] acc_spec_loads_pending; + logic acc_disp_loads_overflow; + logic [2:0] acc_disp_loads_pending; + + assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0); + + // Count speculative loads. These can still be flushed. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_spec_loads ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (flush_ex_i), + .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp), + .load_i (1'b0), + .down_i (acc_ld_disp), + .d_i ('0), + .q_o (acc_spec_loads_pending), + .overflow_o(acc_spec_loads_overflow) + ); + + // Count dispatched loads. These cannot be flushed anymore. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_disp_loads ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (1'b0), + .en_i (acc_ld_disp ^ acc_resp_i.load_complete), + .load_i (1'b0), + .down_i (acc_resp_i.load_complete), + .d_i ('0), + .q_o (acc_disp_loads_pending), + .overflow_o(acc_disp_loads_overflow) + ); + + acc_dispatcher_no_load_overflow : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) ) + else $error("[acc_dispatcher] Too many pending loads."); + + // Stores + logic acc_spec_stores_overflow; + logic [2:0] acc_spec_stores_pending; + logic acc_disp_stores_overflow; + logic [2:0] acc_disp_stores_pending; + + assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0); + + // Count speculative stores. These can still be flushed. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_spec_stores ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (flush_ex_i), + .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp), + .load_i (1'b0), + .down_i (acc_st_disp), + .d_i ('0), + .q_o (acc_spec_stores_pending), + .overflow_o(acc_spec_stores_overflow) + ); + + // Count dispatched stores. These cannot be flushed anymore. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_disp_stores ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (1'b0), + .en_i (acc_st_disp ^ acc_resp_i.store_complete), + .load_i (1'b0), + .down_i (acc_resp_i.store_complete), + .d_i ('0), + .q_o (acc_disp_stores_pending), + .overflow_o(acc_disp_stores_overflow) + ); + + acc_dispatcher_no_store_overflow : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) ) + else $error("[acc_dispatcher] Too many pending stores."); + + /************************** + * Tie Off Unused Signals * + **************************/ + + assign acc_stall_st_pending_o = 1'b0; + assign flush_pipeline_o = 1'b0; + assign acc_dcache_req_ports_o = '0; + +endmodule : acc_dispatcher diff --git a/test/type_param/core/acc_dispatcher_corrected.sv b/test/type_param/core/acc_dispatcher_corrected.sv new file mode 100644 index 0000000..6f9c8fc --- /dev/null +++ b/test/type_param/core/acc_dispatcher_corrected.sv @@ -0,0 +1,423 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: Matheus Cavalcante, ETH Zurich +// Nils Wistoff, ETH Zurich +// Date: 20.11.2020 +// Description: Functional unit that dispatches CVA6 instructions to accelerators. + +module acc_dispatcher + import ariane_pkg::*; + import riscv::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type acc_req_t = acc_pkg::accelerator_req_t, + parameter type acc_resp_t = acc_pkg::accelerator_resp_t, + parameter type acc_cfg_t = logic, + parameter acc_cfg_t AccCfg = '0 +) ( + input logic clk_i, + input logic rst_ni, + // Interface with the CSR regfile + input logic acc_cons_en_i, // Accelerator memory consistent mode + output logic acc_fflags_valid_o, + output logic [4:0] acc_fflags_o, + // Interface with the CSRs + input priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][PLEN-3:0] pmpaddr_i, + input logic [2:0] fcsr_frm_i, + output logic dirty_v_state_o, + // Interface with the issue stage + input scoreboard_entry_t issue_instr_i, + input logic issue_instr_hs_i, + output logic issue_stall_o, + input fu_data_t fu_data_i, + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, + output logic [TRANS_ID_BITS-1:0] acc_trans_id_o, + output xlen_t acc_result_o, + output logic acc_valid_o, + output exception_t acc_exception_o, + // Interface with the execute stage + output logic acc_valid_ex_o, // FU executed + // Interface with the commit stage + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, + input logic commit_st_barrier_i, // A store barrier was commited + // Interface with the load/store unit + output logic acc_stall_st_pending_o, + input logic acc_no_st_pending_i, + input dcache_req_i_t [2:0] dcache_req_ports_i, + // Interface with the controller + output logic ctrl_halt_o, + input logic flush_unissued_instr_i, + input logic flush_ex_i, + output logic flush_pipeline_o, + // Interface with cache subsystem + output dcache_req_i_t [1:0] acc_dcache_req_ports_o, + input dcache_req_o_t [1:0] acc_dcache_req_ports_i, + input logic inval_ready_i, + output logic inval_valid_o, + output logic [63:0] inval_addr_o, + // Accelerator interface + output acc_pkg::accelerator_req_t acc_req_o, + input acc_pkg::accelerator_resp_t acc_resp_i +); + + `include "common_cells/registers.svh" + + import cf_math_pkg::idx_width; + + /*********************** + * Common signals * + ***********************/ + + logic acc_ready; + logic acc_valid_d, acc_valid_q; + + /************************** + * Accelerator issue * + **************************/ + + // Issue accelerator instructions + `FF(acc_valid_q, acc_valid_d, '0) + + assign acc_valid_ex_o = acc_valid_q; + assign acc_valid_d = ~issue_instr_i.ex.valid & + issue_instr_hs_i & + (issue_instr_i.fu == ACCEL) & + ~flush_unissued_instr_i; + + // Accelerator load/store pending signals + logic acc_no_ld_pending; + logic acc_no_st_pending; + + // Stall issue stage in three cases: + always_comb begin : stall_issue + unique case (issue_instr_i.fu) + ACCEL: + // 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet + issue_stall_o = ~acc_ready; + LOAD: + // 2. We're issuing a scalar load but there is an inflight accelerator store. + issue_stall_o = acc_cons_en_i & ~acc_no_st_pending; + STORE: + // 3. We're issuing a scalar store but there is an inflight accelerator load or store. + issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending); + default: issue_stall_o = 1'b0; + endcase + end + + /*********************** + * Instruction queue * + ***********************/ + + localparam InstructionQueueDepth = 3; + + fu_data_t acc_data; + fu_data_t acc_insn_queue_o; + logic acc_insn_queue_pop; + logic acc_insn_queue_empty; + logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage; + logic acc_commit; + logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id; + + assign acc_data = acc_valid_ex_o ? fu_data_i : '0; + + fifo_v3 #( + .DEPTH (InstructionQueueDepth), + .FALL_THROUGH(1'b1), + .dtype (fu_data_t) + ) i_acc_insn_queue ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_ex_i), + .testmode_i(1'b0), + .data_i (fu_data_i), + .push_i (acc_valid_q), + .full_o ( /* Unused */), + .data_o (acc_insn_queue_o), + .pop_i (acc_insn_queue_pop), + .empty_o (acc_insn_queue_empty), + .usage_o (acc_insn_queue_usage) + ); + + // We are ready if the instruction queue is able to accept at least one more entry. + assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1); + + /********************************** + * Non-speculative instructions * + **********************************/ + + // Keep track of the instructions that were received by the dispatcher. + logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q; + `FF(insn_pending_q, insn_pending_d, '0) + + // Only non-speculative instructions can be issued to the accelerators. + // The following block keeps track of which transaction IDs reached the + // top of the scoreboard, and are therefore no longer speculative. + logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q; + `FF(insn_ready_q, insn_ready_d, '0) + + always_comb begin : p_non_speculative_ff + // Maintain state + insn_pending_d = insn_pending_q; + insn_ready_d = insn_ready_q; + + // We received a new instruction + if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1; + // Flush all received instructions + if (flush_ex_i) insn_pending_d = '0; + + // An accelerator instruction is no longer speculative. + if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin + insn_ready_d[acc_commit_trans_id] = 1'b1; + insn_pending_d[acc_commit_trans_id] = 1'b0; + end + + // An accelerator instruction was issued. + if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0; + end : p_non_speculative_ff + + /************************* + * Accelerator request * + *************************/ + + acc_pkg::accelerator_req_t acc_req; + logic acc_req_valid; + logic acc_req_ready; + + acc_pkg::accelerator_req_t acc_req_int; + fall_through_register #( + .T(acc_pkg::accelerator_req_t) + ) i_accelerator_req_register ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clr_i (1'b0), + .testmode_i(1'b0), + .data_i (acc_req), + .valid_i (acc_req_valid), + .ready_o (acc_req_ready), + .data_o (acc_req_int), + .valid_o (acc_req_o.req_valid), + .ready_i (acc_resp_i.req_ready) + ); + + assign acc_req_o.insn = acc_req_int.insn; + assign acc_req_o.rs1 = acc_req_int.rs1; + assign acc_req_o.rs2 = acc_req_int.rs2; + assign acc_req_o.frm = acc_req_int.frm; + assign acc_req_o.trans_id = acc_req_int.trans_id; + assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i; + assign acc_req_o.acc_cons_en = acc_cons_en_i; + assign acc_req_o.inval_ready = inval_ready_i; + + always_comb begin : accelerator_req_dispatcher + // Do not fetch from the instruction queue + acc_insn_queue_pop = 1'b0; + + // Default values + acc_req = '0; + acc_req_valid = 1'b0; + + // Unpack fu_data_t into accelerator_req_t + if (!acc_insn_queue_empty) begin + acc_req = '{ + // Instruction is forwarded from the decoder as an immediate + // - + // frm rounding information is up to date during a valid request to the accelerator + // The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes + // do not take place until the accelerator answers (Ariane commits in-order) + insn : + acc_insn_queue_o.imm[ + 31 + : + 0 + ], + rs1 : acc_insn_queue_o.operand_a, + rs2 : acc_insn_queue_o.operand_b, + frm : fpnew_pkg::roundmode_e'(fcsr_frm_i), + trans_id: acc_insn_queue_o.trans_id, + default: '0 + }; + // Wait until the instruction is no longer speculative. + acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] || + (acc_commit && insn_pending_q[acc_commit_trans_id]); + acc_insn_queue_pop = acc_req_valid && acc_req_ready; + end + end + + /************************** + * Accelerator response * + **************************/ + + logic acc_ld_disp; + logic acc_st_disp; + + // Unpack the accelerator response + assign acc_trans_id_o = acc_resp_i.trans_id; + assign acc_result_o = acc_resp_i.result; + assign acc_valid_o = acc_resp_i.resp_valid; + assign acc_exception_o = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error}; + assign acc_fflags_valid_o = acc_resp_i.fflags_valid; + assign acc_fflags_o = acc_resp_i.fflags; + // Always ready to receive responses + assign acc_req_o.resp_ready = 1'b1; + + // Signal dispatched load/store to issue stage + assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD); + assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE); + + // Cache invalidation + assign inval_valid_o = acc_resp_i.inval_valid; + assign inval_addr_o = acc_resp_i.inval_addr; + + /************************** + * Accelerator commit * + **************************/ + + // Instruction can be issued to the (in-order) back-end if + // it reached the top of the scoreboard and it hasn't been + // issued yet + always_comb begin : accelerator_commit + acc_commit = 1'b0; + if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1; + if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL) + acc_commit = 1'b1; + end + + // Dirty the V state if we are committing anything related to the vector accelerator + always_comb begin : dirty_v_state + dirty_v_state_o = 1'b0; + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL); + end + end + + assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id + : commit_instr_i[1].trans_id; + + /************************** + * Accelerator barriers * + **************************/ + + // On a store barrier (i.e. any barrier that requires preceeding stores to complete + // before continuing execution), halt execution while there are pending stores in + // the accelerator pipeline. + logic wait_acc_store_d, wait_acc_store_q; + `FF(wait_acc_store_q, wait_acc_store_d, '0) + + // Set on store barrier. Clear when no store is pending. + assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending; + assign ctrl_halt_o = wait_acc_store_q; + + /************************** + * Load/Store tracking * + **************************/ + + // Loads + logic acc_spec_loads_overflow; + logic [2:0] acc_spec_loads_pending; + logic acc_disp_loads_overflow; + logic [2:0] acc_disp_loads_pending; + + assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0); + + // Count speculative loads. These can still be flushed. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_spec_loads ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (flush_ex_i), + .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp), + .load_i (1'b0), + .down_i (acc_ld_disp), + .d_i ('0), + .q_o (acc_spec_loads_pending), + .overflow_o(acc_spec_loads_overflow) + ); + + // Count dispatched loads. These cannot be flushed anymore. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_disp_loads ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (1'b0), + .en_i (acc_ld_disp ^ acc_resp_i.load_complete), + .load_i (1'b0), + .down_i (acc_resp_i.load_complete), + .d_i ('0), + .q_o (acc_disp_loads_pending), + .overflow_o(acc_disp_loads_overflow) + ); + + acc_dispatcher_no_load_overflow : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) ) + else $error("[acc_dispatcher] Too many pending loads."); + + // Stores + logic acc_spec_stores_overflow; + logic [2:0] acc_spec_stores_pending; + logic acc_disp_stores_overflow; + logic [2:0] acc_disp_stores_pending; + + assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0); + + // Count speculative stores. These can still be flushed. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_spec_stores ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (flush_ex_i), + .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp), + .load_i (1'b0), + .down_i (acc_st_disp), + .d_i ('0), + .q_o (acc_spec_stores_pending), + .overflow_o(acc_spec_stores_overflow) + ); + + // Count dispatched stores. These cannot be flushed anymore. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_disp_stores ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (1'b0), + .en_i (acc_st_disp ^ acc_resp_i.store_complete), + .load_i (1'b0), + .down_i (acc_resp_i.store_complete), + .d_i ('0), + .q_o (acc_disp_stores_pending), + .overflow_o(acc_disp_stores_overflow) + ); + + acc_dispatcher_no_store_overflow : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) ) + else $error("[acc_dispatcher] Too many pending stores."); + + /************************** + * Tie Off Unused Signals * + **************************/ + + assign acc_stall_st_pending_o = 1'b0; + assign flush_pipeline_o = 1'b0; + assign acc_dcache_req_ports_o = '0; + +endmodule : acc_dispatcher diff --git a/test/type_param/core/alu.sv b/test/type_param/core/alu.sv new file mode 100644 index 0000000..a928725 --- /dev/null +++ b/test/type_param/core/alu.sv @@ -0,0 +1,359 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Matthias Baer +// Author: Igor Loi +// Author: Andreas Traber +// Author: Lukas Mueller +// Author: Florian Zaruba +// +// Date: 19.03.2017 +// Description: Ariane ALU based on RI5CY's ALU + + +module alu + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input fu_data_t fu_data_i, + output riscv::xlen_t result_o, + output logic alu_branch_res_o +); + + riscv::xlen_t operand_a_rev; + logic [ 31:0] operand_a_rev32; + logic [ riscv::XLEN:0] operand_b_neg; + logic [riscv::XLEN+1:0] adder_result_ext_o; + logic less; // handles both signed and unsigned forms + logic [ 31:0] rolw; // Rotate Left Word + logic [ 31:0] rorw; // Rotate Right Word + logic [31:0] orcbw, rev8w; + logic [ $clog2(riscv::XLEN) : 0] cpop; // Count Population + logic [$clog2(riscv::XLEN)-1 : 0] lz_tz_count; // Count Leading Zeros + logic [ 4:0] lz_tz_wcount; // Count Leading Zeros Word + logic lz_tz_empty, lz_tz_wempty; + riscv::xlen_t orcbw_result, rev8w_result; + + // bit reverse operand_a for left shifts and bit counting + generate + genvar k; + for (k = 0; k < riscv::XLEN; k++) + assign operand_a_rev[k] = fu_data_i.operand_a[riscv::XLEN-1-k]; + + for (k = 0; k < 32; k++) assign operand_a_rev32[k] = fu_data_i.operand_a[31-k]; + endgenerate + + // ------ + // Adder + // ------ + logic adder_op_b_negate; + logic adder_z_flag; + logic [riscv::XLEN:0] adder_in_a, adder_in_b; + riscv::xlen_t adder_result; + logic [riscv::XLEN-1:0] operand_a_bitmanip, bit_indx; + + always_comb begin + adder_op_b_negate = 1'b0; + + unique case (fu_data_i.operation) + // ADDER OPS + EQ, NE, SUB, SUBW, ANDN, ORN, XNOR: adder_op_b_negate = 1'b1; + default: ; + endcase + end + + always_comb begin + operand_a_bitmanip = fu_data_i.operand_a; + + if (CVA6Cfg.RVB) begin + if (riscv::IS_XLEN64) begin + unique case (fu_data_i.operation) + SH1ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1; + SH2ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2; + SH3ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 3; + CTZW: operand_a_bitmanip = operand_a_rev32; + ADDUW, CPOPW, CLZW: operand_a_bitmanip = fu_data_i.operand_a[31:0]; + default: ; + endcase + end + unique case (fu_data_i.operation) + SH1ADD: operand_a_bitmanip = fu_data_i.operand_a << 1; + SH2ADD: operand_a_bitmanip = fu_data_i.operand_a << 2; + SH3ADD: operand_a_bitmanip = fu_data_i.operand_a << 3; + CTZ: operand_a_bitmanip = operand_a_rev; + default: ; + endcase + end + end + + // prepare operand a + assign adder_in_a = {operand_a_bitmanip, 1'b1}; + + // prepare operand b + assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {riscv::XLEN + 1{adder_op_b_negate}}; + assign adder_in_b = operand_b_neg; + + // actual adder + assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b); + assign adder_result = adder_result_ext_o[riscv::XLEN:1]; + assign adder_z_flag = ~|adder_result; + + // get the right branch comparison result + always_comb begin : branch_resolve + // set comparison by default + alu_branch_res_o = 1'b1; + case (fu_data_i.operation) + EQ: alu_branch_res_o = adder_z_flag; + NE: alu_branch_res_o = ~adder_z_flag; + LTS, LTU: alu_branch_res_o = less; + GES, GEU: alu_branch_res_o = ~less; + default: alu_branch_res_o = 1'b1; + endcase + end + + // --------- + // Shifts + // --------- + + // TODO: this can probably optimized significantly + logic shift_left; // should we shift left + logic shift_arithmetic; + + riscv::xlen_t shift_amt; // amount of shift, to the right + riscv::xlen_t shift_op_a; // input of the shifter + logic [ 31:0] shift_op_a32; // input to the 32 bit shift operation + + riscv::xlen_t shift_result; + logic [ 31:0] shift_result32; + + logic [riscv::XLEN:0] shift_right_result; + logic [ 32:0] shift_right_result32; + + riscv::xlen_t shift_left_result; + logic [ 31:0] shift_left_result32; + + assign shift_amt = fu_data_i.operand_b; + + assign shift_left = (fu_data_i.operation == SLL) | (fu_data_i.operation == SLLW); + + assign shift_arithmetic = (fu_data_i.operation == SRA) | (fu_data_i.operation == SRAW); + + // right shifts, we let the synthesizer optimize this + logic [riscv::XLEN:0] shift_op_a_64; + logic [32:0] shift_op_a_32; + + // choose the bit reversed or the normal input for shift operand a + assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a; + assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0]; + + assign shift_op_a_64 = {shift_arithmetic & shift_op_a[riscv::XLEN-1], shift_op_a}; + assign shift_op_a_32 = {shift_arithmetic & shift_op_a[31], shift_op_a32}; + + assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]); + + assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]); + // bit reverse the shift_right_result for left shifts + genvar j; + generate + for (j = 0; j < riscv::XLEN; j++) + assign shift_left_result[j] = shift_right_result[riscv::XLEN-1-j]; + + for (j = 0; j < 32; j++) assign shift_left_result32[j] = shift_right_result32[31-j]; + + endgenerate + + assign shift_result = shift_left ? shift_left_result : shift_right_result[riscv::XLEN-1:0]; + assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0]; + + // ------------ + // Comparisons + // ------------ + + always_comb begin + logic sgn; + sgn = 1'b0; + + if ((fu_data_i.operation == SLTS) || + (fu_data_i.operation == LTS) || + (fu_data_i.operation == GES) || + (fu_data_i.operation == MAX) || + (fu_data_i.operation == MIN)) + sgn = 1'b1; + + less = ($signed({sgn & fu_data_i.operand_a[riscv::XLEN-1], fu_data_i.operand_a}) < + $signed({sgn & fu_data_i.operand_b[riscv::XLEN-1], fu_data_i.operand_b})); + end + + if (CVA6Cfg.RVB) begin : gen_bitmanip + // Count Population + Count population Word + + popcount #( + .INPUT_WIDTH(riscv::XLEN) + ) i_cpop_count ( + .data_i (operand_a_bitmanip), + .popcount_o(cpop) + ); + + // Count Leading/Trailing Zeros + // 64b + lzc #( + .WIDTH(riscv::XLEN), + .MODE (1) + ) i_clz_64b ( + .in_i(operand_a_bitmanip), + .cnt_o(lz_tz_count), + .empty_o(lz_tz_empty) + ); + if (riscv::IS_XLEN64) begin + //32b + lzc #( + .WIDTH(32), + .MODE (1) + ) i_clz_32b ( + .in_i(operand_a_bitmanip[31:0]), + .cnt_o(lz_tz_wcount), + .empty_o(lz_tz_wempty) + ); + end + end + + if (CVA6Cfg.RVB) begin : gen_orcbw_rev8w_results + assign orcbw = { + {8{|fu_data_i.operand_a[31:24]}}, + {8{|fu_data_i.operand_a[23:16]}}, + {8{|fu_data_i.operand_a[15:8]}}, + {8{|fu_data_i.operand_a[7:0]}} + }; + assign rev8w = { + {fu_data_i.operand_a[7:0]}, + {fu_data_i.operand_a[15:8]}, + {fu_data_i.operand_a[23:16]}, + {fu_data_i.operand_a[31:24]} + }; + if (riscv::IS_XLEN64) begin : gen_64b + assign orcbw_result = { + {8{|fu_data_i.operand_a[63:56]}}, + {8{|fu_data_i.operand_a[55:48]}}, + {8{|fu_data_i.operand_a[47:40]}}, + {8{|fu_data_i.operand_a[39:32]}}, + orcbw + }; + assign rev8w_result = { + rev8w, + {fu_data_i.operand_a[39:32]}, + {fu_data_i.operand_a[47:40]}, + {fu_data_i.operand_a[55:48]}, + {fu_data_i.operand_a[63:56]} + }; + end else begin : gen_32b + assign orcbw_result = orcbw; + assign rev8w_result = rev8w; + end + end + + // ----------- + // Result MUX + // ----------- + always_comb begin + result_o = '0; + if (riscv::IS_XLEN64) begin + unique case (fu_data_i.operation) + // Add word: Ignore the upper bits and sign extend to 64 bit + ADDW, SUBW: result_o = {{riscv::XLEN - 32{adder_result[31]}}, adder_result[31:0]}; + SH1ADDUW, SH2ADDUW, SH3ADDUW: result_o = adder_result; + // Shifts 32 bit + SLLW, SRLW, SRAW: result_o = {{riscv::XLEN - 32{shift_result32[31]}}, shift_result32[31:0]}; + default: ; + endcase + end + unique case (fu_data_i.operation) + // Standard Operations + ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[riscv::XLEN:1]; + ORL, ORN: result_o = fu_data_i.operand_a | operand_b_neg[riscv::XLEN:1]; + XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[riscv::XLEN:1]; + // Adder Operations + ADD, SUB, ADDUW, SH1ADD, SH2ADD, SH3ADD: result_o = adder_result; + // Shift Operations + SLL, SRL, SRA: result_o = (riscv::IS_XLEN64) ? shift_result : shift_result32; + // Comparison Operations + SLTS, SLTU: result_o = {{riscv::XLEN - 1{1'b0}}, less}; + default: ; // default case to suppress unique warning + endcase + + if (CVA6Cfg.RVB) begin + // Index for Bitwise Rotation + bit_indx = 1 << (fu_data_i.operand_b & (riscv::XLEN - 1)); + // rolw, roriw, rorw + rolw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (riscv::XLEN-32-fu_data_i.operand_b[4:0])); + rorw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (riscv::XLEN-32-fu_data_i.operand_b[4:0])); + if (riscv::IS_XLEN64) begin + unique case (fu_data_i.operation) + CLZW, CTZW: + result_o = (lz_tz_wempty) ? 32 : {{riscv::XLEN - 5{1'b0}}, lz_tz_wcount}; // change + ROLW: result_o = {{riscv::XLEN - 32{rolw[31]}}, rolw}; + RORW, RORIW: result_o = {{riscv::XLEN - 32{rorw[31]}}, rorw}; + default: ; + endcase + end + unique case (fu_data_i.operation) + // Integer minimum/maximum + MAX: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a; + MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a; + MIN: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a; + MINU: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a; + + // Single bit instructions operations + BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx; + BEXT, BEXTI: result_o = {{riscv::XLEN - 1{1'b0}}, |(fu_data_i.operand_a & bit_indx)}; + BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx; + BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx; + + // Count Leading/Trailing Zeros + CLZ, CTZ: + result_o = (lz_tz_empty) ? ({{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count} + 1) : + {{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count}; + + // Count population + CPOP, CPOPW: result_o = {{(riscv::XLEN - ($clog2(riscv::XLEN) + 1)) {1'b0}}, cpop}; + + // Sign and Zero Extend + SEXTB: result_o = {{riscv::XLEN - 8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]}; + SEXTH: result_o = {{riscv::XLEN - 16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]}; + ZEXTH: result_o = {{riscv::XLEN - 16{1'b0}}, fu_data_i.operand_a[15:0]}; + + // Bitwise Rotation + ROL: + result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[4:0]))); + + ROR, RORI: + result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[4:0]))); + + ORCB: result_o = orcbw_result; + REV8: result_o = rev8w_result; + + default: + if (fu_data_i.operation == SLLIUW && riscv::IS_XLEN64) + result_o = {{riscv::XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0]; // Left Shift 32 bit unsigned + endcase + end + if (CVA6Cfg.ZiCondExtEn) begin + unique case (fu_data_i.operation) + CZERO_EQZ: + result_o = (|fu_data_i.operand_b) ? fu_data_i.operand_a : '0; // move zero to rd if rs2 is equal to zero else rs1 + CZERO_NEZ: + result_o = (|fu_data_i.operand_b) ? '0 : fu_data_i.operand_a; // move zero to rd if rs2 is nonzero else rs1 + default: ; // default case to suppress unique warning + endcase + end + end +endmodule diff --git a/test/type_param/core/amo_buffer.sv b/test/type_param/core/amo_buffer.sv new file mode 100644 index 0000000..24a98dd --- /dev/null +++ b/test/type_param/core/amo_buffer.sv @@ -0,0 +1,82 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 20.09.2018 +// Description: Buffers AMO requests +// This unit buffers an atomic memory operations for the cache subsyste. +// Furthermore it handles interfacing with the commit stage + +module amo_buffer #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // pipeline flush + + input logic valid_i, // AMO is valid + output logic ready_o, // AMO unit is ready + input ariane_pkg::amo_t amo_op_i, // AMO Operation + input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue + input riscv::xlen_t data_i, // data which is placed in the queue + input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write) + // D$ + output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem + input ariane_pkg::amo_resp_t amo_resp_i, // response from cache subsystem + // Auxiliary signals + input logic amo_valid_commit_i, // We have a vaild AMO in the commit stage + input logic no_st_pending_i // there is currently no store pending anymore +); + logic flush_amo_buffer; + logic amo_valid; + + typedef struct packed { + ariane_pkg::amo_t op; + logic [riscv::PLEN-1:0] paddr; + riscv::xlen_t data; + logic [1:0] size; + } amo_op_t; + + amo_op_t amo_data_in, amo_data_out; + + // validate this request as soon as all stores have drained and the AMO is in the commit stage + assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid; + assign amo_req_o.amo_op = amo_data_out.op; + assign amo_req_o.size = amo_data_out.size; + assign amo_req_o.operand_a = {{64 - riscv::PLEN{1'b0}}, amo_data_out.paddr}; + assign amo_req_o.operand_b = {{64 - riscv::XLEN{1'b0}}, amo_data_out.data}; + + assign amo_data_in.op = amo_op_i; + assign amo_data_in.data = data_i; + assign amo_data_in.paddr = paddr_i; + assign amo_data_in.size = data_size_i; + + // only flush if we are currently not committing the AMO + // e.g.: it is not speculative anymore + assign flush_amo_buffer = flush_i & !amo_valid_commit_i; + + fifo_v3 #( + .DEPTH(1), + .dtype(amo_op_t) + ) i_amo_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_amo_buffer), + .testmode_i(1'b0), + .full_o (amo_valid), + .empty_o (ready_o), + .usage_o (), // left open + .data_i (amo_data_in), + .push_i (valid_i), + .data_o (amo_data_out), + .pop_i (amo_resp_i.ack) + ); + +endmodule diff --git a/test/type_param/core/ariane_regfile_ff.sv b/test/type_param/core/ariane_regfile_ff.sv new file mode 100644 index 0000000..ae5cbeb --- /dev/null +++ b/test/type_param/core/ariane_regfile_ff.sv @@ -0,0 +1,83 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Engineer: Francesco Conti - f.conti@unibo.it +// +// Additional contributions by: +// Markus Wegmann - markus.wegmann@technokrat.ch +// +// Design Name: RISC-V register file +// Project Name: zero-riscy +// Language: SystemVerilog +// +// Description: Register file with 31 or 15x 32 bit wide registers. +// Register 0 is fixed to 0. This register file is based on +// flip flops. +// + +module ariane_regfile #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned NR_READ_PORTS = 2, + parameter bit ZERO_REG_ZERO = 0 +) ( + // clock and reset + input logic clk_i, + input logic rst_ni, + // disable clock gates for testing + input logic test_en_i, + // read port + input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i, + output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, + // write port + input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_i +); + + localparam ADDR_WIDTH = 5; + localparam NUM_WORDS = 2 ** ADDR_WIDTH; + + logic [ NUM_WORDS-1:0][DATA_WIDTH-1:0] mem; + logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec; + + + always_comb begin : we_decoder + for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin + for (int unsigned i = 0; i < NUM_WORDS; i++) begin + if (waddr_i[j] == i) we_dec[j][i] = we_i[j]; + else we_dec[j][i] = 1'b0; + end + end + end + + // loop from 1 to NUM_WORDS-1 as R0 is nil + always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral + if (~rst_ni) begin + mem <= '{default: '0}; + end else begin + for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin + for (int unsigned i = 0; i < NUM_WORDS; i++) begin + if (we_dec[j][i]) begin + mem[i] <= wdata_i[j]; + end + end + if (ZERO_REG_ZERO) begin + mem[0] <= '0; + end + end + end + end + + for (genvar i = 0; i < NR_READ_PORTS; i++) begin + assign rdata_o[i] = mem[raddr_i[i]]; + end + +endmodule diff --git a/test/type_param/core/ariane_regfile_fpga.sv b/test/type_param/core/ariane_regfile_fpga.sv new file mode 100644 index 0000000..22d5aaa --- /dev/null +++ b/test/type_param/core/ariane_regfile_fpga.sv @@ -0,0 +1,125 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Engineer: Francesco Conti - f.conti@unibo.it +// +// Additional contributions by: +// Markus Wegmann - markus.wegmann@technokrat.ch +// Noam Gallmann - gnoam@live.com +// Felipe Lisboa Malaquias +// Henry Suzukawa +// +// +// Description: This register file is optimized for implementation on +// FPGAs. The register file features one distributed RAM block per implemented +// sync-write port, each with a parametrized number of async-read ports. +// Read-accesses are multiplexed from the relevant block depending on which block +// was last written to. For that purpose an additional array of registers is +// maintained keeping track of write acesses. +// + +module ariane_regfile_fpga #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned NR_READ_PORTS = 2, + parameter bit ZERO_REG_ZERO = 0 +) ( + // clock and reset + input logic clk_i, + input logic rst_ni, + // disable clock gates for testing + input logic test_en_i, + // read port + input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i, + output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, + // write port + input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_i +); + + localparam ADDR_WIDTH = 5; + localparam NUM_WORDS = 2 ** ADDR_WIDTH; + localparam LOG_NR_WRITE_PORTS = CVA6Cfg.NrCommitPorts == 1 ? 1 : $clog2(CVA6Cfg.NrCommitPorts); + + // Distributed RAM usually supports one write port per block - duplicate for each write port. + logic [ NUM_WORDS-1:0][ DATA_WIDTH-1:0] mem [CVA6Cfg.NrCommitPorts]; + + logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec; + logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel; + logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q; + + // write adress decoder (for block selector) + always_comb begin + for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin + for (int unsigned i = 0; i < NUM_WORDS; i++) begin + if (waddr_i[j] == i) begin + we_dec[j][i] = we_i[j]; + end else begin + we_dec[j][i] = 1'b0; + end + end + end + end + + // update block selector: + // signal mem_block_sel records where the current valid value is stored. + // if multiple ports try to write to the same address simultaneously, the port with the highest + // index has priority. + always_comb begin + mem_block_sel = mem_block_sel_q; + for (int i = 0; i < NUM_WORDS; i++) begin + for (int j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin + if (we_dec[j][i] == 1'b1) begin + mem_block_sel[i] = LOG_NR_WRITE_PORTS'(j); + end + end + end + end + + // block selector flops + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + mem_block_sel_q <= '0; + end else begin + mem_block_sel_q <= mem_block_sel; + end + end + + // distributed RAM blocks + logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts]; + for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block + always_ff @(posedge clk_i) begin + if (we_i[j] && ~waddr_i[j] != 0) begin + mem[j][waddr_i[j]] <= wdata_i[j]; + end + end + for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read + assign mem_read[j][k] = mem[j][raddr_i[k]]; + end + end + + // output MUX + logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr; + for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port + assign block_addr[k] = mem_block_sel_q[raddr_i[k]]; + assign rdata_o[k] = (ZERO_REG_ZERO && raddr_i[k] == '0) ? '0 : mem_read[block_addr[k]][k]; + end + + // random initialization of the memory to suppress assert warnings on Questa. + initial begin + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + for (int j = 0; j < NUM_WORDS; j++) begin + mem[i][j] = $random(); + end + end + end + +endmodule diff --git a/test/type_param/core/axi_shim.sv b/test/type_param/core/axi_shim.sv new file mode 100644 index 0000000..8e1cfa8 --- /dev/null +++ b/test/type_param/core/axi_shim.sv @@ -0,0 +1,310 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: axi_shim.sv + * Author: Michael Schaffner + * Florian Zaruba + * Date: 1.8.2018 + * + * Description: Manages communication with the AXI Bus. Note that this unit does not + * buffer requests and register the signals. + * + */ + + +module axi_shim #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned AxiNumWords = 4, // data width in dwords, this is also the maximum burst length, must be >=2 + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // read channel + // request + input logic rd_req_i, + output logic rd_gnt_o, + input logic [CVA6Cfg.AxiAddrWidth-1:0] rd_addr_i, + input logic [$clog2(AxiNumWords)-1:0] rd_blen_i, // axi convention: LEN-1 + input logic [2:0] rd_size_i, + input logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_i, // use same ID for reads, or make sure you only have one outstanding read tx + input logic rd_lock_i, + // read response (we have to unconditionally sink the response) + input logic rd_rdy_i, + output logic rd_last_o, + output logic rd_valid_o, + output logic [CVA6Cfg.AxiDataWidth-1:0] rd_data_o, + output logic [CVA6Cfg.AxiUserWidth-1:0] rd_user_o, + output logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_o, + output logic rd_exokay_o, // indicates whether exclusive tx succeeded + // write channel + input logic wr_req_i, + output logic wr_gnt_o, + input logic [CVA6Cfg.AxiAddrWidth-1:0] wr_addr_i, + input logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] wr_data_i, + input logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] wr_user_i, + input logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] wr_be_i, + input logic [$clog2(AxiNumWords)-1:0] wr_blen_i, // axi convention: LEN-1 + input logic [2:0] wr_size_i, + input logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_i, + input logic wr_lock_i, + input logic [5:0] wr_atop_i, + // write response + input logic wr_rdy_i, + output logic wr_valid_o, + output logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_o, + output logic wr_exokay_o, // indicates whether exclusive tx succeeded + // AXI port + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i +); + localparam AddrIndex = ($clog2(AxiNumWords) > 0) ? $clog2(AxiNumWords) : 1; + + /////////////////////////////////////////////////////// + // write channel + /////////////////////////////////////////////////////// + + enum logic [3:0] { + IDLE, + WAIT_AW_READY, + WAIT_LAST_W_READY, + WAIT_LAST_W_READY_AW_READY, + WAIT_AW_READY_BURST + } + wr_state_q, wr_state_d; + + // AXI tx counter + logic [AddrIndex-1:0] wr_cnt_d, wr_cnt_q; + logic wr_single_req, wr_cnt_done, wr_cnt_clr, wr_cnt_en; + + assign wr_single_req = (wr_blen_i == 0); + + // address + assign axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction + assign axi_req_o.aw.addr = wr_addr_i[CVA6Cfg.AxiAddrWidth-1:0]; + assign axi_req_o.aw.size = wr_size_i; + assign axi_req_o.aw.len = wr_blen_i; + assign axi_req_o.aw.id = wr_id_i; + assign axi_req_o.aw.prot = 3'b0; + assign axi_req_o.aw.region = 4'b0; + assign axi_req_o.aw.lock = wr_lock_i; + assign axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE; + assign axi_req_o.aw.qos = 4'b0; + assign axi_req_o.aw.atop = wr_atop_i; + assign axi_req_o.aw.user = '0; + + // data + assign axi_req_o.w.data = wr_data_i[wr_cnt_q]; + assign axi_req_o.w.user = wr_user_i[wr_cnt_q]; + assign axi_req_o.w.strb = wr_be_i[wr_cnt_q]; + assign axi_req_o.w.last = wr_cnt_done; + + // write response + assign wr_exokay_o = (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY); + assign axi_req_o.b_ready = wr_rdy_i; + assign wr_valid_o = axi_resp_i.b_valid; + assign wr_id_o = axi_resp_i.b.id; + + // tx counter + assign wr_cnt_done = (wr_cnt_q == wr_blen_i); + assign wr_cnt_d = (wr_cnt_clr) ? '0 : (wr_cnt_en && CVA6Cfg.AxiBurstWriteEn) ? wr_cnt_q + 1 : wr_cnt_q; + + always_comb begin : p_axi_write_fsm + // default + wr_state_d = wr_state_q; + + axi_req_o.aw_valid = 1'b0; + axi_req_o.w_valid = 1'b0; + wr_gnt_o = 1'b0; + + wr_cnt_en = 1'b0; + wr_cnt_clr = 1'b0; + + case (wr_state_q) + /////////////////////////////////// + IDLE: begin + // we have an incoming request + if (wr_req_i) begin + // is this a read or write? + axi_req_o.aw_valid = 1'b1; + axi_req_o.w_valid = 1'b1; + + if (CVA6Cfg.AxiBurstWriteEn && !wr_single_req) begin + wr_cnt_en = axi_resp_i.w_ready; + + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + 2'b11: wr_state_d = WAIT_LAST_W_READY; + 2'b01: wr_state_d = WAIT_LAST_W_READY_AW_READY; + 2'b10: wr_state_d = WAIT_LAST_W_READY; + default: ; + endcase + end else if (wr_single_req) begin // its a single write + wr_cnt_clr = 1'b1; + // single req can be granted here + wr_gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready; + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + 2'b01: wr_state_d = WAIT_AW_READY; + 2'b10: wr_state_d = WAIT_LAST_W_READY; + default: wr_state_d = IDLE; + endcase + // its a request for the whole cache line + end + end + end + /////////////////////////////////// + // ~> from single write + WAIT_AW_READY: begin + axi_req_o.aw_valid = 1'b1; + + if (axi_resp_i.aw_ready) begin + wr_state_d = IDLE; + wr_gnt_o = 1'b1; + end + end + /////////////////////////////////// + // ~> from write, there is an outstanding write + WAIT_LAST_W_READY: begin + axi_req_o.w_valid = 1'b1; + + if (CVA6Cfg.AxiBurstWriteEn && axi_resp_i.w_ready && !wr_cnt_done) begin + wr_cnt_en = 1'b1; + end else if (wr_cnt_done) begin // this is the last write + if (axi_resp_i.w_ready) begin + wr_state_d = IDLE; + wr_cnt_clr = 1'b1; + wr_gnt_o = 1'b1; + end + end + end + /////////////////////////////////// + default: begin + /////////////////////////////////// + // ~> we need to wait for an aw_ready and there is at least one outstanding write + if (CVA6Cfg.AxiBurstWriteEn) begin + if (wr_state_q == WAIT_LAST_W_READY_AW_READY) begin + axi_req_o.w_valid = 1'b1; + axi_req_o.aw_valid = 1'b1; + // we got an aw_ready + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + // we got an aw ready + 2'b01: begin + // are there any outstanding transactions? + if (wr_cnt_done) begin + wr_state_d = WAIT_AW_READY_BURST; + wr_cnt_clr = 1'b1; + end else begin + // yes, so reduce the count and stay here + wr_cnt_en = 1'b1; + end + end + 2'b10: wr_state_d = WAIT_LAST_W_READY; + 2'b11: begin + // we are finished + if (wr_cnt_done) begin + wr_state_d = IDLE; + wr_gnt_o = 1'b1; + wr_cnt_clr = 1'b1; + // there are outstanding transactions + end else begin + wr_state_d = WAIT_LAST_W_READY; + wr_cnt_en = 1'b1; + end + end + default: ; + endcase + end /////////////////////////////////// + // ~> all data has already been sent, we are only waiting for the aw_ready + else if (wr_state_q == WAIT_AW_READY_BURST) begin + axi_req_o.aw_valid = 1'b1; + + if (axi_resp_i.aw_ready) begin + wr_state_d = IDLE; + wr_gnt_o = 1'b1; + end + end + end else begin + wr_state_d = IDLE; + end + end + endcase + end + + + /////////////////////////////////////////////////////// + // read channel + /////////////////////////////////////////////////////// + + // address + // in case of a wrapping transfer we can simply begin at the address, if we want to request a cache-line + // with an incremental transfer we need to output the corresponding base address of the cache line + assign axi_req_o.ar.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction + assign axi_req_o.ar.addr = rd_addr_i[CVA6Cfg.AxiAddrWidth-1:0]; + assign axi_req_o.ar.size = rd_size_i; + assign axi_req_o.ar.len = rd_blen_i; + assign axi_req_o.ar.id = rd_id_i; + assign axi_req_o.ar.prot = 3'b0; + assign axi_req_o.ar.region = 4'b0; + assign axi_req_o.ar.lock = rd_lock_i; + assign axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE; + assign axi_req_o.ar.qos = 4'b0; + assign axi_req_o.ar.user = '0; + + // make the read request + assign axi_req_o.ar_valid = rd_req_i; + assign rd_gnt_o = rd_req_i & axi_resp_i.ar_ready; + + // return path + assign axi_req_o.r_ready = rd_rdy_i; + assign rd_data_o = axi_resp_i.r.data; + if (ariane_pkg::AXI_USER_EN) begin + assign rd_user_o = axi_resp_i.r.user; + end else begin + assign rd_user_o = '0; + end + assign rd_last_o = axi_resp_i.r.last; + assign rd_valid_o = axi_resp_i.r_valid; + assign rd_id_o = axi_resp_i.r.id; + assign rd_exokay_o = (axi_resp_i.r.resp == axi_pkg::RESP_EXOKAY); + + + // ---------------- + // Registers + // ---------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + // start in flushing state and initialize the memory + wr_state_q <= IDLE; + wr_cnt_q <= '0; + end else begin + wr_state_q <= wr_state_d; + wr_cnt_q <= wr_cnt_d; + end + end + + // ---------------- + // Assertions + // ---------------- + + //pragma translate_off + initial begin + assert (AxiNumWords >= 1) + else $fatal(1, "[axi adapter] AxiNumWords must be >= 1"); + assert (CVA6Cfg.AxiIdWidth >= 2) + else $fatal(1, "[axi adapter] AXI id width must be at least 2 bit wide"); + end + //pragma translate_on + +endmodule // axi_adapter2 diff --git a/test/type_param/core/branch_unit.sv b/test/type_param/core/branch_unit.sv new file mode 100644 index 0000000..dfcb1c7 --- /dev/null +++ b/test/type_param/core/branch_unit.sv @@ -0,0 +1,106 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 09.05.2017 +// Description: Branch target calculation and comparison + +module branch_unit #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic debug_mode_i, + input ariane_pkg::fu_data_t fu_data_i, + input logic [riscv::VLEN-1:0] pc_i, // PC of instruction + input logic is_compressed_instr_i, + input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict + input logic branch_valid_i, + input logic branch_comp_res_i, // branch comparison result from ALU + output logic [riscv::VLEN-1:0] branch_result_o, + + input ariane_pkg::branchpredict_sbe_t branch_predict_i, // this is the address we predicted + output ariane_pkg::bp_resolve_t resolved_branch_o, // this is the actual address we are targeting + output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can + // accept new entries to the scoreboard + output ariane_pkg::exception_t branch_exception_o // branch exception out +); + logic [riscv::VLEN-1:0] target_address; + logic [riscv::VLEN-1:0] next_pc; + + // here we handle the various possibilities of mis-predicts + always_comb begin : mispredict_handler + // set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC + automatic logic [riscv::VLEN-1:0] jump_base; + // TODO(zarubaf): The ALU can be used to calculate the branch target + jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[riscv::VLEN-1:0] : pc_i; + + target_address = {riscv::VLEN{1'b0}}; + resolve_branch_o = 1'b0; + resolved_branch_o.target_address = {riscv::VLEN{1'b0}}; + resolved_branch_o.is_taken = 1'b0; + resolved_branch_o.valid = branch_valid_i; + resolved_branch_o.is_mispredict = 1'b0; + resolved_branch_o.cf_type = branch_predict_i.cf; + // calculate next PC, depending on whether the instruction is compressed or not this may be different + // TODO(zarubaf): We already calculate this a couple of times, maybe re-use? + next_pc = pc_i + ((is_compressed_instr_i) ? {{riscv::VLEN-2{1'b0}}, 2'h2} : {{riscv::VLEN-3{1'b0}}, 3'h4}); + // calculate target address simple 64 bit addition + target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[riscv::VLEN-1:0])); + // on a JALR we are supposed to reset the LSB to 0 (according to the specification) + if (fu_data_i.operation == ariane_pkg::JALR) target_address[0] = 1'b0; + // we need to put the branch target address into rd, this is the result of this unit + branch_result_o = next_pc; + resolved_branch_o.pc = pc_i; + // There are only two sources of mispredicts: + // 1. Branches + // 2. Jumps to register addresses + if (branch_valid_i) begin + // write target address which goes to PC Gen + resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc; + resolved_branch_o.is_taken = branch_comp_res_i; + // check the outcome of the branch speculation + if (ariane_pkg::op_is_branch(fu_data_i.operation)) begin + // Set the `cf_type` of the output as `branch`, this will update the BHT. + resolved_branch_o.cf_type = ariane_pkg::Branch; + // If the ALU comparison does not agree with the BHT prediction set the resolution as mispredicted. + resolved_branch_o.is_mispredict = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch); + end + if (fu_data_i.operation == ariane_pkg::JALR + // check if the address of the jump register is correct and that we actually predicted + && (branch_predict_i.cf == ariane_pkg::NoCF || target_address != branch_predict_i.predict_address)) begin + resolved_branch_o.is_mispredict = 1'b1; + // update BTB only if this wasn't a return + if (branch_predict_i.cf != ariane_pkg::Return) + resolved_branch_o.cf_type = ariane_pkg::JumpR; + end + // to resolve the branch in ID + resolve_branch_o = 1'b1; + end + end + // use ALU exception signal for storing instruction fetch exceptions if + // the target address is not aligned to a 2 byte boundary + // + logic jump_taken; + always_comb begin : exception_handling + + // Do a jump if it is either unconditional jump (JAL | JALR) or `taken` conditional jump + jump_taken = !(ariane_pkg::op_is_branch(fu_data_i.operation)) || + ((ariane_pkg::op_is_branch(fu_data_i.operation)) && branch_comp_res_i); + branch_exception_o.cause = riscv::INSTR_ADDR_MISALIGNED; + branch_exception_o.valid = 1'b0; + branch_exception_o.tval = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + // Only throw instruction address misaligned exception if this is indeed a `taken` conditional branch or + // an unconditional jump + if (branch_valid_i && (target_address[0] || (!CVA6Cfg.RVC && target_address[1])) && jump_taken) begin + branch_exception_o.valid = 1'b1; + end + end +endmodule diff --git a/test/type_param/core/cache_subsystem/axi_adapter.sv b/test/type_param/core/cache_subsystem/axi_adapter.sv new file mode 100644 index 0000000..0b8f9eb --- /dev/null +++ b/test/type_param/core/cache_subsystem/axi_adapter.sv @@ -0,0 +1,520 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: axi_adapter.sv + * Author: Florian Zaruba + * Date: 1.8.2018 + * + * Description: Manages communication with the AXI Bus + */ +//import std_cache_pkg::*; + +module axi_adapter #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned DATA_WIDTH = 256, + parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature + parameter int unsigned CACHELINE_BYTE_OFFSET = 8, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + input logic req_i, + input ariane_pkg::ad_req_t type_i, + input ariane_pkg::amo_t amo_i, + output logic gnt_o, + input logic [riscv::XLEN-1:0] addr_i, + input logic we_i, + input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] wdata_i, + input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] be_i, + input logic [1:0] size_i, + input logic [CVA6Cfg.AxiIdWidth-1:0] id_i, + // read port + output logic valid_o, + output logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] rdata_o, + output logic [CVA6Cfg.AxiIdWidth-1:0] id_o, + // critical word - read port + output logic [CVA6Cfg.AxiDataWidth-1:0] critical_word_o, + output logic critical_word_valid_o, + // AXI port + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i +); + localparam BURST_SIZE = (DATA_WIDTH / CVA6Cfg.AxiDataWidth) - 1; + localparam ADDR_INDEX = ($clog2( + DATA_WIDTH / CVA6Cfg.AxiDataWidth + ) > 0) ? $clog2( + DATA_WIDTH / CVA6Cfg.AxiDataWidth + ) : 1; + localparam MAX_OUTSTANDING_AW = CVA6Cfg.MaxOutstandingStores; + localparam MAX_OUTSTANDING_AW_CNT_WIDTH = $clog2( + MAX_OUTSTANDING_AW + 1 + ) > 0 ? $clog2( + MAX_OUTSTANDING_AW + 1 + ) : 1; + + typedef logic [MAX_OUTSTANDING_AW_CNT_WIDTH-1:0] outstanding_aw_cnt_t; + + enum logic [3:0] { + IDLE, + WAIT_B_VALID, + WAIT_AW_READY, + WAIT_LAST_W_READY, + WAIT_LAST_W_READY_AW_READY, + WAIT_AW_READY_BURST, + WAIT_R_VALID, + WAIT_R_VALID_MULTIPLE, + COMPLETE_READ, + WAIT_AMO_R_VALID + } + state_q, state_d; + + // counter for AXI transfers + logic [ADDR_INDEX-1:0] cnt_d, cnt_q; + logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] + cache_line_d, cache_line_q; + // save the address for a read, as we allow for non-cacheline aligned accesses + logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0] addr_offset_d, addr_offset_q; + logic [CVA6Cfg.AxiIdWidth-1:0] id_d, id_q; + logic [ADDR_INDEX-1:0] index; + // save the atomic operation and size + ariane_pkg::amo_t amo_d, amo_q; + logic [1:0] size_d, size_q; + // outstanding write transactions counter + outstanding_aw_cnt_t outstanding_aw_cnt_q, outstanding_aw_cnt_d; + logic any_outstanding_aw; + + assign any_outstanding_aw = outstanding_aw_cnt_q != '0; + + always_comb begin : axi_fsm + // Default assignments + axi_req_o.aw_valid = 1'b0; + // Cast to AXI address width + axi_req_o.aw.addr = addr_i; + axi_req_o.aw.prot = 3'b0; + axi_req_o.aw.region = 4'b0; + axi_req_o.aw.len = 8'b0; + axi_req_o.aw.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes + axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction + axi_req_o.aw.lock = 1'b0; + axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE; + axi_req_o.aw.qos = 4'b0; + axi_req_o.aw.id = id_i; + axi_req_o.aw.atop = atop_from_amo(amo_i); + axi_req_o.aw.user = '0; + + axi_req_o.ar_valid = 1'b0; + // Cast to AXI address width + axi_req_o.ar.addr = addr_i; + // in case of a single request or wrapping transfer we can simply begin at the address, if we want to request a cache-line + // with an incremental transfer we need to output the corresponding base address of the cache line + if (!CRITICAL_WORD_FIRST && type_i != ariane_pkg::SINGLE_REQ) begin + axi_req_o.ar.addr[CACHELINE_BYTE_OFFSET-1:0] = '0; + end + axi_req_o.ar.prot = 3'b0; + axi_req_o.ar.region = 4'b0; + axi_req_o.ar.len = 8'b0; + axi_req_o.ar.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes + axi_req_o.ar.burst = (CRITICAL_WORD_FIRST ? axi_pkg::BURST_WRAP : axi_pkg::BURST_INCR); // wrapping transfer in case of a critical word first strategy + axi_req_o.ar.lock = 1'b0; + axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE; + axi_req_o.ar.qos = 4'b0; + axi_req_o.ar.id = id_i; + axi_req_o.ar.user = '0; + + axi_req_o.w_valid = 1'b0; + axi_req_o.w.data = wdata_i[0]; + axi_req_o.w.strb = be_i[0]; + axi_req_o.w.last = 1'b0; + axi_req_o.w.user = '0; + + axi_req_o.b_ready = 1'b0; + axi_req_o.r_ready = 1'b0; + + gnt_o = 1'b0; + valid_o = 1'b0; + id_o = axi_resp_i.r.id; + + critical_word_o = axi_resp_i.r.data; + critical_word_valid_o = 1'b0; + rdata_o = cache_line_q; + + state_d = state_q; + cnt_d = cnt_q; + cache_line_d = cache_line_q; + addr_offset_d = addr_offset_q; + id_d = id_q; + amo_d = amo_q; + size_d = size_q; + index = '0; + + outstanding_aw_cnt_d = outstanding_aw_cnt_q; + + case (state_q) + + IDLE: begin + cnt_d = '0; + // we have an incoming request + if (req_i) begin + // is this a read or write? + // write + if (we_i) begin + // multiple outstanding write transactions are only + // allowed if they are guaranteed not to be reordered + // i.e. same ID + if (!any_outstanding_aw || ((id_i == id_q) && (amo_i == ariane_pkg::AMO_NONE))) begin + // the data is valid + axi_req_o.aw_valid = 1'b1; + axi_req_o.w_valid = 1'b1; + // store-conditional requires exclusive access + axi_req_o.aw.lock = amo_i == ariane_pkg::AMO_SC; + // its a single write + if (type_i == ariane_pkg::SINGLE_REQ) begin + // only a single write so the data is already the last one + axi_req_o.w.last = 1'b1; + // single req can be granted here + gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready; + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + 2'b11: state_d = WAIT_B_VALID; + 2'b01: state_d = WAIT_AW_READY; + 2'b10: state_d = WAIT_LAST_W_READY; + default: state_d = IDLE; + endcase + + if (axi_resp_i.aw_ready) begin + id_d = id_i; + amo_d = amo_i; + size_d = size_i; + end + + // its a request for the whole cache line + end else begin + // bursts of AMOs unsupported + assert (amo_i == ariane_pkg::AMO_NONE) + else $fatal("Bursts of atomic operations are not supported"); + + axi_req_o.aw.len = BURST_SIZE[7:0]; // number of bursts to do + axi_req_o.w.data = wdata_i[0]; + axi_req_o.w.strb = be_i[0]; + + if (axi_resp_i.w_ready) cnt_d = BURST_SIZE[ADDR_INDEX-1:0] - 1; + else cnt_d = BURST_SIZE[ADDR_INDEX-1:0]; + + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + 2'b11: state_d = WAIT_LAST_W_READY; + 2'b01: state_d = WAIT_LAST_W_READY_AW_READY; + 2'b10: state_d = WAIT_LAST_W_READY; + default: ; + endcase + end + end + // read + end else begin + // only multiple outstanding write transactions are allowed + if (!any_outstanding_aw) begin + + axi_req_o.ar_valid = 1'b1; + // load-reserved requires exclusive access + axi_req_o.ar.lock = amo_i == ariane_pkg::AMO_LR; + + gnt_o = axi_resp_i.ar_ready; + if (type_i != ariane_pkg::SINGLE_REQ) begin + assert (amo_i == ariane_pkg::AMO_NONE) + else $fatal("Bursts of atomic operations are not supported"); + + axi_req_o.ar.len = BURST_SIZE[7:0]; + cnt_d = BURST_SIZE[ADDR_INDEX-1:0]; + end + + if (axi_resp_i.ar_ready) begin + state_d = (type_i == ariane_pkg::SINGLE_REQ) ? WAIT_R_VALID : WAIT_R_VALID_MULTIPLE; + addr_offset_d = addr_i[ADDR_INDEX-1+3:3]; + end + end + end + end + end + + // ~> from single write + WAIT_AW_READY: begin + axi_req_o.aw_valid = 1'b1; + + if (axi_resp_i.aw_ready) begin + gnt_o = 1'b1; + state_d = WAIT_B_VALID; + id_d = id_i; + amo_d = amo_i; + size_d = size_i; + end + end + + // ~> we need to wait for an aw_ready and there is at least one outstanding write + WAIT_LAST_W_READY_AW_READY: begin + axi_req_o.w_valid = 1'b1; + axi_req_o.w.last = (cnt_q == '0); + if (type_i == ariane_pkg::SINGLE_REQ) begin + axi_req_o.w.data = wdata_i[0]; + axi_req_o.w.strb = be_i[0]; + end else begin + axi_req_o.w.data = wdata_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q]; + axi_req_o.w.strb = be_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q]; + end + axi_req_o.aw_valid = 1'b1; + // we are here because we want to write a cache line + axi_req_o.aw.len = BURST_SIZE[7:0]; + // we got an aw_ready + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + // we got an aw ready + 2'b01: begin + // are there any outstanding transactions? + if (cnt_q == 0) state_d = WAIT_AW_READY_BURST; + else // yes, so reduce the count and stay here + cnt_d = cnt_q - 1; + end + 2'b10: state_d = WAIT_LAST_W_READY; + 2'b11: begin + // we are finished + if (cnt_q == 0) begin + state_d = WAIT_B_VALID; + gnt_o = 1'b1; + // there are outstanding transactions + end else begin + state_d = WAIT_LAST_W_READY; + cnt_d = cnt_q - 1; + end + end + default: ; + endcase + + end + + // ~> all data has already been sent, we are only waiting for the aw_ready + WAIT_AW_READY_BURST: begin + axi_req_o.aw_valid = 1'b1; + axi_req_o.aw.len = BURST_SIZE[7:0]; + + if (axi_resp_i.aw_ready) begin + state_d = WAIT_B_VALID; + gnt_o = 1'b1; + end + end + + // ~> from write, there is an outstanding write + WAIT_LAST_W_READY: begin + axi_req_o.w_valid = 1'b1; + + if (type_i != ariane_pkg::SINGLE_REQ) begin + axi_req_o.w.data = wdata_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q]; + axi_req_o.w.strb = be_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q]; + end + + // this is the last write + if (cnt_q == '0) begin + axi_req_o.w.last = 1'b1; + if (axi_resp_i.w_ready) begin + state_d = WAIT_B_VALID; + gnt_o = 1'b1; + end + end else if (axi_resp_i.w_ready) begin + cnt_d = cnt_q - 1; + end + end + + // ~> finish write transaction + WAIT_B_VALID: begin + id_o = axi_resp_i.b.id; + + // Write is valid + if (axi_resp_i.b_valid && !any_outstanding_aw) begin + axi_req_o.b_ready = 1'b1; + + // some atomics must wait for read data + // we only accept it after accepting bvalid + if (amo_returns_data(amo_q)) begin + if (axi_resp_i.r_valid) begin + // return read data if valid + valid_o = 1'b1; + axi_req_o.r_ready = 1'b1; + state_d = IDLE; + rdata_o = axi_resp_i.r.data; + end else begin + // wait otherwise + state_d = WAIT_AMO_R_VALID; + end + end else begin + valid_o = 1'b1; + state_d = IDLE; + + // store-conditional response + if (amo_q == ariane_pkg::AMO_SC) begin + if (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY) begin + // success -> return 0 + rdata_o = 'b0; + end else begin + // failure -> when request is 64-bit, return 1; + // when request is 32-bit place a 1 in both upper + // and lower half words. The right word will be + // realigned/masked externally + rdata_o = size_q == 2'b10 ? (1'b1 << 32) | 64'b1 : 64'b1; + end + end + end + // if the request was not an atomic we can possibly issue + // other requests while waiting for the response + end else begin + if ((amo_q == ariane_pkg::AMO_NONE) && (outstanding_aw_cnt_q != MAX_OUTSTANDING_AW)) begin + state_d = IDLE; + outstanding_aw_cnt_d = outstanding_aw_cnt_q + 1; + end + end + end + + // ~> some atomics wait for read data + WAIT_AMO_R_VALID: begin + // acknowledge data and terminate atomic + if (axi_resp_i.r_valid) begin + axi_req_o.r_ready = 1'b1; + state_d = IDLE; + valid_o = 1'b1; + rdata_o = axi_resp_i.r.data; + end + end + + // ~> cacheline read, single read + WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin + if (CRITICAL_WORD_FIRST) index = addr_offset_q + (BURST_SIZE[ADDR_INDEX-1:0] - cnt_q); + else index = BURST_SIZE[ADDR_INDEX-1:0] - cnt_q; + + // reads are always wrapping here + axi_req_o.r_ready = 1'b1; + // this is the first read a.k.a the critical word + if (axi_resp_i.r_valid) begin + if (CRITICAL_WORD_FIRST) begin + // this is the first word of a cacheline read, e.g.: the word which was causing the miss + if (state_q == WAIT_R_VALID_MULTIPLE && cnt_q == BURST_SIZE) begin + critical_word_valid_o = 1'b1; + critical_word_o = axi_resp_i.r.data; + end + end else begin + // check if the address offset matches - then we are getting the critical word + if (index == addr_offset_q) begin + critical_word_valid_o = 1'b1; + critical_word_o = axi_resp_i.r.data; + end + end + + // this is the last read + if (axi_resp_i.r.last) begin + id_d = axi_resp_i.r.id; + state_d = COMPLETE_READ; + end + + // save the word + if (state_q == WAIT_R_VALID_MULTIPLE) begin + cache_line_d[index] = axi_resp_i.r.data; + + end else cache_line_d[0] = axi_resp_i.r.data; + + // Decrease the counter + cnt_d = cnt_q - 1; + end + end + // ~> read is complete + COMPLETE_READ: begin + valid_o = 1'b1; + state_d = IDLE; + id_o = id_q; + end + + default: state_d = IDLE; + endcase + + // This process handles B responses when accepting + // multiple outstanding write transactions + if (any_outstanding_aw && axi_resp_i.b_valid) begin + axi_req_o.b_ready = 1'b1; + valid_o = 1'b1; + // Right hand side contains non-registered signal as we want + // to preserve a possible increment from the WAIT_B_VALID state + outstanding_aw_cnt_d = outstanding_aw_cnt_d - 1; + end + end + + // ---------------- + // Registers + // ---------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + // start in flushing state and initialize the memory + state_q <= IDLE; + cnt_q <= '0; + cache_line_q <= '0; + addr_offset_q <= '0; + id_q <= '0; + amo_q <= ariane_pkg::AMO_NONE; + size_q <= '0; + outstanding_aw_cnt_q <= '0; + end else begin + state_q <= state_d; + cnt_q <= cnt_d; + cache_line_q <= cache_line_d; + addr_offset_q <= addr_offset_d; + id_q <= id_d; + amo_q <= amo_d; + size_q <= size_d; + outstanding_aw_cnt_q <= outstanding_aw_cnt_d; + end + end + + function automatic axi_pkg::atop_t atop_from_amo(ariane_pkg::amo_t amo); + axi_pkg::atop_t result = 6'b000000; + + unique case (amo) + ariane_pkg::AMO_NONE: result = {axi_pkg::ATOP_NONE, 4'b0000}; + ariane_pkg::AMO_SWAP: result = {axi_pkg::ATOP_ATOMICSWAP}; + ariane_pkg::AMO_ADD: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD}; + ariane_pkg::AMO_AND: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR}; + ariane_pkg::AMO_OR: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET}; + ariane_pkg::AMO_XOR: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR}; + ariane_pkg::AMO_MAX: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX}; + ariane_pkg::AMO_MAXU: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX}; + ariane_pkg::AMO_MIN: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN}; + ariane_pkg::AMO_MINU: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN}; + ariane_pkg::AMO_CAS1: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported + ariane_pkg::AMO_CAS2: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported + default: result = 6'b000000; + endcase + + return result; + endfunction + + function automatic logic amo_returns_data(ariane_pkg::amo_t amo); + axi_pkg::atop_t atop = atop_from_amo(amo); + logic is_load = atop[5:4] == axi_pkg::ATOP_ATOMICLOAD; + logic is_swap_or_cmp = atop[5:4] == axi_pkg::ATOP_ATOMICSWAP[5:4]; + return is_load || is_swap_or_cmp; + endfunction + +endmodule diff --git a/test/type_param/core/cache_subsystem/cache_ctrl.sv b/test/type_param/core/cache_subsystem/cache_ctrl.sv new file mode 100644 index 0000000..e8770d2 --- /dev/null +++ b/test/type_param/core/cache_subsystem/cache_ctrl.sv @@ -0,0 +1,475 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// File: cache_ctrl.svh +// Author: Florian Zaruba +// Date: 14.10.2017 +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// Description: Cache controller + + +module cache_ctrl + import ariane_pkg::*; + import std_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + input logic bypass_i, // enable cache + output logic busy_o, + // Core request ports + input dcache_req_i_t req_port_i, + output dcache_req_o_t req_port_o, + // SRAM interface + output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid + output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array + input logic gnt_i, + output cache_line_t data_o, + output cl_be_t be_o, + output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later + input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, + output logic we_o, + input logic [DCACHE_SET_ASSOC-1:0] hit_way_i, + // Miss handling + output miss_req_t miss_req_o, + // return + input logic miss_gnt_i, + input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss + input logic [63:0] critical_word_i, + input logic critical_word_valid_i, + // bypass ports + input logic bypass_gnt_i, + input logic bypass_valid_i, + input logic [63:0] bypass_data_i, + // check MSHR for aliasing + output logic [55:0] mshr_addr_o, + input logic mshr_addr_matches_i, + input logic mshr_index_matches_i +); + + enum logic [3:0] { + IDLE, // 0 + WAIT_TAG, // 1 + WAIT_TAG_BYPASSED, // 2 + WAIT_GNT, // 3 + WAIT_GNT_SAVED, // 4 + STORE_REQ, // 5 + WAIT_REFILL_VALID, // 6 + WAIT_REFILL_GNT, // 7 + WAIT_TAG_SAVED, // 8 + WAIT_MSHR, // 9 + WAIT_CRITICAL_WORD // 10 + } + state_d, state_q; + + typedef struct packed { + logic [DCACHE_INDEX_WIDTH-1:0] index; + logic [DCACHE_TAG_WIDTH-1:0] tag; + logic [DCACHE_TID_WIDTH-1:0] id; + logic [7:0] be; + logic [1:0] size; + logic we; + logic [63:0] wdata; + logic bypass; + logic killed; + } mem_req_t; + + logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q; + + mem_req_t mem_req_d, mem_req_q; + + assign busy_o = (state_q != IDLE); + assign tag_o = mem_req_d.tag; + + logic [DCACHE_LINE_WIDTH-1:0] cl_i; + + always_comb begin : way_select + cl_i = '0; + for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) if (hit_way_i[i]) cl_i = data_i[i].data; + + // cl_i = data_i[one_hot_to_bin(hit_way_i)].data; + end + + // -------------- + // Cache FSM + // -------------- + always_comb begin : cache_ctrl_fsm + automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; + // incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array + // cache-line offset -> multiple of 64 + cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left + // default assignments + state_d = state_q; + mem_req_d = mem_req_q; + hit_way_d = hit_way_q; + // output assignments + req_port_o.data_gnt = 1'b0; + req_port_o.data_rvalid = 1'b0; + req_port_o.data_rdata = '0; + req_port_o.data_rid = mem_req_q.id; + miss_req_o = '0; + mshr_addr_o = '0; + // Memory array communication + req_o = '0; + addr_o = req_port_i.address_index; + data_o = '0; + be_o = '0; + we_o = '0; + + mem_req_d.killed |= req_port_i.kill_req; + + case (state_q) + + IDLE: begin + // a new request arrived + if (req_port_i.data_req && !flush_i) begin + // request the cache line - we can do this speculatively + req_o = '1; + + // save index, be and we + mem_req_d.index = req_port_i.address_index; + mem_req_d.id = req_port_i.data_id; + mem_req_d.be = req_port_i.data_be; + mem_req_d.size = req_port_i.data_size; + mem_req_d.we = req_port_i.data_we; + mem_req_d.wdata = req_port_i.data_wdata; + mem_req_d.killed = req_port_i.kill_req; + + // Bypass mode, check for uncacheable address here as well + if (bypass_i) begin + state_d = WAIT_TAG_BYPASSED; + // grant this access only if it was a load + req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1; + mem_req_d.bypass = 1'b1; + // ------------------ + // Cache is enabled + // ------------------ + end else begin + // Wait that we have access on the memory array + if (gnt_i) begin + state_d = WAIT_TAG; + mem_req_d.bypass = 1'b0; + // only for a read + if (!req_port_i.data_we) req_port_o.data_gnt = 1'b1; + end + end + end + end + + // cache enabled and waiting for tag + WAIT_TAG, WAIT_TAG_SAVED: begin + // check that the client really wants to do the request and that we have a valid tag + if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin + // save tag if we didn't already save it + if (state_q != WAIT_TAG_SAVED) begin + mem_req_d.tag = req_port_i.address_tag; + end + // we speculatively request another transfer + if (req_port_i.data_req && !flush_i) begin + req_o = '1; + end + // ------------ + // HIT CASE + // ------------ + if (|hit_way_i) begin + // we can request another cache-line if this was a load + if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin + state_d = WAIT_TAG; // switch back to WAIT_TAG + mem_req_d.index = req_port_i.address_index; + mem_req_d.id = req_port_i.data_id; + mem_req_d.be = req_port_i.data_be; + mem_req_d.size = req_port_i.data_size; + mem_req_d.we = req_port_i.data_we; + mem_req_d.wdata = req_port_i.data_wdata; + mem_req_d.killed = req_port_i.kill_req; + mem_req_d.bypass = 1'b0; + + req_port_o.data_gnt = gnt_i; + + if (!gnt_i) begin + state_d = IDLE; + end + end else begin + state_d = IDLE; + end + + // this is timing critical + req_port_o.data_rdata = cl_i[cl_offset+:64]; + + // report data for a read + if (!mem_req_q.we) begin + req_port_o.data_rvalid = ~mem_req_q.killed; + // else this was a store so we need an extra step to handle it + end else begin + state_d = STORE_REQ; + hit_way_d = hit_way_i; + end + // ------------ + // MISS CASE + // ------------ + end else begin + // make a miss request + state_d = WAIT_REFILL_GNT; + end + // ---------------------------------------------- + // Check MSHR - Miss Status Handling Register + // ---------------------------------------------- + mshr_addr_o = {tag_o, mem_req_q.index}; + // 1. We've got a match on MSHR and while are going down the + // store path. This means that the miss controller is + // currently evicting our cache-line. As the store is + // non-atomic we need to constantly check whether we are + // matching the address the miss handler is serving. + // Furthermore we need to check for the whole index + // because a completely different memory line could alias + // with the cache-line we are evicting. + // 2. The second case is where we are currently loading and + // the address matches the exact CL the miss controller + // is currently serving. That means we need to wait for + // the miss controller to finish its request before we + // can continue to serve this CL. Otherwise we will fetch + // the cache-line again and potentially loosing any + // content we've written so far. This as a consequence + // means we can't have hit on the CL which mean the + // req_port_o.data_rvalid will be de-asserted. + if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin + state_d = WAIT_MSHR; + end + + // ------------------------- + // Check for cache-ability + // ------------------------- + if (!config_pkg::is_inside_cacheable_regions( + CVA6Cfg, {{{64 - riscv::PLEN} {1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}} + )) begin + mem_req_d.bypass = 1'b1; + state_d = WAIT_REFILL_GNT; + end + + // we are still waiting for a valid tag + end else begin + // request cache line for saved index + addr_o = mem_req_q.index; + req_o = '1; + + // check that we still have a memory grant + if (!gnt_i) begin + state_d = WAIT_GNT; + end + end + end + + // ~> we already granted the request but lost the memory grant while waiting for the tag + WAIT_GNT, WAIT_GNT_SAVED: begin + // request cache line for saved index + addr_o = mem_req_q.index; + req_o = '1; + + // if we get a valid tag while waiting for the memory grant, save it + if (req_port_i.tag_valid) begin + mem_req_d.tag = req_port_i.address_tag; + state_d = WAIT_GNT_SAVED; + end + + // we have a memory grant again ~> go back to WAIT_TAG + if (gnt_i) begin + state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED; + end + end + + // ~> we are here as we need a second round of memory access for a store + STORE_REQ: begin + // check if the MSHR still doesn't match + mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; + + // We need to re-check for MSHR aliasing here as the store requires at least + // two memory look-ups on a single-ported SRAM and therefore is non-atomic + if (!mshr_index_matches_i) begin + // store data, write dirty bit + req_o = hit_way_q; + addr_o = mem_req_q.index; + we_o = 1'b1; + + be_o.vldrty = hit_way_q; + + // set the correct byte enable + be_o.data[cl_offset>>3+:8] = mem_req_q.be; + data_o.data[cl_offset+:64] = mem_req_q.wdata; + // ~> change the state + data_o.dirty = 1'b1; + data_o.valid = 1'b1; + + // got a grant ~> this is finished now + if (gnt_i) begin + req_port_o.data_gnt = 1'b1; + state_d = IDLE; + end + end else begin + state_d = WAIT_MSHR; + end + end // case: STORE_REQ + + // we've got a match on MSHR ~> miss unit is currently serving a request + WAIT_MSHR: begin + mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; + // we can start a new request + if (!mshr_index_matches_i) begin + req_o = '1; + + addr_o = mem_req_q.index; + + if (gnt_i) state_d = WAIT_TAG_SAVED; + end + end + + // its for sure a miss + WAIT_TAG_BYPASSED: begin + // check that the client really wants to do the request and that we have a valid tag + if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin + // save tag + mem_req_d.tag = req_port_i.address_tag; + state_d = WAIT_REFILL_GNT; + end + end + + // ~> wait for grant from miss unit + WAIT_REFILL_GNT: begin + + mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; + + miss_req_o.valid = 1'b1; + miss_req_o.bypass = mem_req_q.bypass; + miss_req_o.addr = {mem_req_q.tag, mem_req_q.index}; + miss_req_o.be = mem_req_q.be; + miss_req_o.size = mem_req_q.size; + miss_req_o.we = mem_req_q.we; + miss_req_o.wdata = mem_req_q.wdata; + + // got a grant so go to valid + if (bypass_gnt_i) begin + state_d = WAIT_REFILL_VALID; + // if this was a write we still need to give a grant to the store unit. + // We can also avoid waiting for the response valid, this signal is + // currently not used by the store unit + if (mem_req_q.we) begin + req_port_o.data_gnt = 1'b1; + state_d = IDLE; + end + end + + if (miss_gnt_i && !mem_req_q.we) state_d = WAIT_CRITICAL_WORD; + else if (miss_gnt_i) begin + state_d = IDLE; + req_port_o.data_gnt = 1'b1; + end + + // it can be the case that the miss unit is currently serving a + // request which matches ours + // so we need to check the MSHR for matching continuously + // if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i + if (mshr_addr_matches_i && !active_serving_i) begin + state_d = WAIT_MSHR; + end + end + + // ~> wait for critical word to arrive + WAIT_CRITICAL_WORD: begin + // speculatively request another word + if (req_port_i.data_req) begin + // request the cache line + req_o = '1; + end + + if (critical_word_valid_i) begin + req_port_o.data_rvalid = ~mem_req_q.killed; + req_port_o.data_rdata = critical_word_i; + // we can make another request + if (req_port_i.data_req && !flush_i) begin + // save index, be and we + mem_req_d.index = req_port_i.address_index; + mem_req_d.id = req_port_i.data_id; + mem_req_d.be = req_port_i.data_be; + mem_req_d.size = req_port_i.data_size; + mem_req_d.we = req_port_i.data_we; + mem_req_d.wdata = req_port_i.data_wdata; + mem_req_d.killed = req_port_i.kill_req; + + state_d = IDLE; + + // Wait until we have access on the memory array + if (gnt_i) begin + state_d = WAIT_TAG; + mem_req_d.bypass = 1'b0; + req_port_o.data_gnt = 1'b1; + end + end else begin + state_d = IDLE; + end + end + end + // ~> wait until the bypass request is valid + WAIT_REFILL_VALID: begin + // got a valid answer + if (bypass_valid_i) begin + req_port_o.data_rdata = bypass_data_i; + req_port_o.data_rvalid = ~mem_req_q.killed; + state_d = IDLE; + end + end + endcase + + if (req_port_i.kill_req) begin + req_port_o.data_rvalid = 1'b1; + if (!(state_q inside {WAIT_REFILL_GNT, WAIT_CRITICAL_WORD})) begin + state_d = IDLE; + end + end + end + + // -------------- + // Registers + // -------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + mem_req_q <= '0; + hit_way_q <= '0; + end else begin + state_q <= state_d; + mem_req_q <= mem_req_d; + hit_way_q <= hit_way_d; + end + end + + //pragma translate_off +`ifndef VERILATOR + initial begin + assert (DCACHE_LINE_WIDTH == 128) + else + $error( + "Cacheline width has to be 128 for the moment. But only small changes required in data select logic" + ); + end + // if the full MSHR address matches so should also match the partial one + partial_full_mshr_match : + assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i) + else $fatal(1, "partial mshr index doesn't match"); + // there should never be a valid answer when the MSHR matches and we are not being served + no_valid_on_mshr_match : + assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req) + else $fatal(1, "rvalid_o should not be set on MSHR match"); +`endif + //pragma translate_on +endmodule diff --git a/test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv b/test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv new file mode 100644 index 0000000..3115403 --- /dev/null +++ b/test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv @@ -0,0 +1,200 @@ +// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies +// Alternatives (CEA) +// +// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Authors: Cesar Fuguet +// Date: February, 2023 +// Description: Interface adapter for the CVA6 core +module cva6_hpdcache_if_adapter + import hpdcache_pkg::*; + +// Parameters +// {{{ +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter bit is_load_port = 1'b1 +) +// }}} + +// Ports +// {{{ +( + // Clock and active-low reset pins + input logic clk_i, + input logic rst_ni, + + // Port ID + input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i, + + // Request/response ports from/to the CVA6 core + input ariane_pkg::dcache_req_i_t cva6_req_i, + output ariane_pkg::dcache_req_o_t cva6_req_o, + input ariane_pkg::amo_req_t cva6_amo_req_i, + output ariane_pkg::amo_resp_t cva6_amo_resp_o, + + // Request port to the L1 Dcache + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_pkg::hpdcache_req_t hpdcache_req_o, + output logic hpdcache_req_abort_o, + output hpdcache_pkg::hpdcache_tag_t hpdcache_req_tag_o, + output hpdcache_pkg::hpdcache_pma_t hpdcache_req_pma_o, + + // Response port from the L1 Dcache + input logic hpdcache_rsp_valid_i, + input hpdcache_pkg::hpdcache_rsp_t hpdcache_rsp_i +); + // }}} + + // Internal nets and registers + // {{{ + logic forward_store, forward_amo; + logic hpdcache_req_is_uncacheable; + // }}} + + // Request forwarding + // {{{ + generate + // LOAD request + // {{{ + if (is_load_port == 1'b1) begin : load_port_gen + assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions( + CVA6Cfg, + { + {64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}} + , cva6_req_i.address_tag + , {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}} + } + ); + + // Request forwarding + assign hpdcache_req_valid_o = cva6_req_i.data_req, + hpdcache_req_o.addr_offset = cva6_req_i.address_index, + hpdcache_req_o.wdata = '0, + hpdcache_req_o.op = hpdcache_pkg::HPDCACHE_REQ_LOAD, + hpdcache_req_o.be = cva6_req_i.data_be, + hpdcache_req_o.size = cva6_req_i.data_size, + hpdcache_req_o.sid = hpdcache_req_sid_i, + hpdcache_req_o.tid = cva6_req_i.data_id, + hpdcache_req_o.need_rsp = 1'b1, + hpdcache_req_o.phys_indexed = 1'b0, + hpdcache_req_o.addr_tag = '0, // unused on virtually indexed request + hpdcache_req_o.pma = '0; // unused on virtually indexed request + + assign hpdcache_req_abort_o = cva6_req_i.kill_req, + hpdcache_req_tag_o = cva6_req_i.address_tag, + hpdcache_req_pma_o.uncacheable = hpdcache_req_is_uncacheable, + hpdcache_req_pma_o.io = 1'b0; + + // Response forwarding + assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i, + cva6_req_o.data_rdata = hpdcache_rsp_i.rdata, + cva6_req_o.data_rid = hpdcache_rsp_i.tid, + cva6_req_o.data_gnt = hpdcache_req_ready_i; + end // }}} + + // {{{ + else begin : store_amo_gen + // STORE/AMO request + hpdcache_req_addr_t amo_addr; + hpdcache_req_offset_t amo_addr_offset; + hpdcache_tag_t amo_tag; + logic amo_is_word, amo_is_word_hi; + hpdcache_req_data_t amo_data; + hpdcache_req_be_t amo_data_be; + hpdcache_req_op_t amo_op; + logic [31:0] amo_resp_word; + + // AMO logic + // {{{ + always_comb begin : amo_op_comb + amo_addr = cva6_amo_req_i.operand_a; + amo_addr_offset = amo_addr[0+:HPDCACHE_REQ_OFFSET_WIDTH]; + amo_tag = amo_addr[HPDCACHE_REQ_OFFSET_WIDTH+:HPDCACHE_TAG_WIDTH]; + amo_is_word = (cva6_amo_req_i.size == 2'b10); + amo_is_word_hi = cva6_amo_req_i.operand_a[2]; + + amo_data = amo_is_word ? {2{cva6_amo_req_i.operand_b[0+:32]}} : cva6_amo_req_i.operand_b; + + amo_data_be = amo_is_word_hi ? 8'hf0 : amo_is_word ? 8'h0f : 8'hff; + + unique case (cva6_amo_req_i.amo_op) + ariane_pkg::AMO_LR: amo_op = HPDCACHE_REQ_AMO_LR; + ariane_pkg::AMO_SC: amo_op = HPDCACHE_REQ_AMO_SC; + ariane_pkg::AMO_SWAP: amo_op = HPDCACHE_REQ_AMO_SWAP; + ariane_pkg::AMO_ADD: amo_op = HPDCACHE_REQ_AMO_ADD; + ariane_pkg::AMO_AND: amo_op = HPDCACHE_REQ_AMO_AND; + ariane_pkg::AMO_OR: amo_op = HPDCACHE_REQ_AMO_OR; + ariane_pkg::AMO_XOR: amo_op = HPDCACHE_REQ_AMO_XOR; + ariane_pkg::AMO_MAX: amo_op = HPDCACHE_REQ_AMO_MAX; + ariane_pkg::AMO_MAXU: amo_op = HPDCACHE_REQ_AMO_MAXU; + ariane_pkg::AMO_MIN: amo_op = HPDCACHE_REQ_AMO_MIN; + ariane_pkg::AMO_MINU: amo_op = HPDCACHE_REQ_AMO_MINU; + default: amo_op = HPDCACHE_REQ_LOAD; + endcase + end + + assign amo_resp_word = amo_is_word_hi ? hpdcache_rsp_i.rdata[0][32 +: 32] + : hpdcache_rsp_i.rdata[0][0 +: 32]; + // }}} + + // Request forwarding + // {{{ + assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions( + CVA6Cfg, + { + {64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}} + , hpdcache_req_o.addr_tag, + {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}} + } + ); + + assign forward_store = cva6_req_i.data_req, forward_amo = cva6_amo_req_i.req; + + assign hpdcache_req_valid_o = forward_store | forward_amo, + hpdcache_req_o.addr_offset = forward_amo ? amo_addr_offset : cva6_req_i.address_index, + hpdcache_req_o.wdata = forward_amo ? amo_data : cva6_req_i.data_wdata, + hpdcache_req_o.op = forward_amo ? amo_op : hpdcache_pkg::HPDCACHE_REQ_STORE, + hpdcache_req_o.be = forward_amo ? amo_data_be : cva6_req_i.data_be, + hpdcache_req_o.size = forward_amo ? cva6_amo_req_i.size : cva6_req_i.data_size, + hpdcache_req_o.sid = hpdcache_req_sid_i, + hpdcache_req_o.tid = forward_amo ? '1 : '0, + hpdcache_req_o.need_rsp = forward_amo, + hpdcache_req_o.phys_indexed = 1'b1, + hpdcache_req_o.addr_tag = forward_amo ? amo_tag : cva6_req_i.address_tag, + hpdcache_req_o.pma.uncacheable = hpdcache_req_is_uncacheable, + hpdcache_req_o.pma.io = 1'b0, + hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests + hpdcache_req_tag_o = '0, // unused on physically indexed requests + hpdcache_req_pma_o = '0; // unused on physically indexed requests + // }}} + + // Response forwarding + // {{{ + assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid != '1), + cva6_req_o.data_rdata = hpdcache_rsp_i.rdata, + cva6_req_o.data_rid = hpdcache_rsp_i.tid, + cva6_req_o.data_gnt = hpdcache_req_ready_i; + + assign cva6_amo_resp_o.ack = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid == '1), + cva6_amo_resp_o.result = amo_is_word ? {{32{amo_resp_word[31]}}, amo_resp_word} + : hpdcache_rsp_i.rdata[0][63:0]; + // }}} + end + // }}} + endgenerate + // }}} + + // Assertions + // {{{ + // pragma translate_off + forward_one_request_assert : + assert property (@(posedge clk_i) ($onehot0({forward_store, forward_amo}))) + else $error("Only one request shall be forwarded"); + // pragma translate_on + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv new file mode 100644 index 0000000..7e90b91 --- /dev/null +++ b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv @@ -0,0 +1,609 @@ +// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies +// Alternatives (CEA) +// +// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Authors: Cesar Fuguet +// Date: February, 2023 +// Description: CVA6 cache subsystem integrating standard CVA6's +// instruction cache and the Core-V High-Performance L1 +// data cache (CV-HPDcache). + +module cva6_hpdcache_subsystem +// Parameters +// {{{ +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int NumPorts = 4, + parameter int NrHwPrefetchers = 4, + parameter type noc_req_t = logic, + parameter type noc_resp_t = logic, + parameter type cmo_req_t = logic, + parameter type cmo_rsp_t = logic +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // I$ + // {{{ + input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) + input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together + output logic icache_miss_o, // to performance counter + // address translation requests + input ariane_pkg::icache_areq_t icache_areq_i, // to/from frontend + output ariane_pkg::icache_arsp_t icache_areq_o, + // data requests + input ariane_pkg::icache_dreq_t icache_dreq_i, // to/from frontend + output ariane_pkg::icache_drsp_t icache_dreq_o, + // }}} + + // D$ + // {{{ + // Cache management + input logic dcache_enable_i, // from CSR + input logic dcache_flush_i, // high until acknowledged + output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic dcache_miss_o, // we missed on a ld/st + + // AMO interface + input ariane_pkg::amo_req_t dcache_amo_req_i, // from LSU + output ariane_pkg::amo_resp_t dcache_amo_resp_o, // to LSU + // CMO interface + input cmo_req_t dcache_cmo_req_i, // from CMO FU + output cmo_rsp_t dcache_cmo_resp_o, // to CMO FU + // Request ports + input ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU + output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU + // Write Buffer status + output logic wbuffer_empty_o, + output logic wbuffer_not_ni_o, + + // Hardware memory prefetcher configuration + input logic [NrHwPrefetchers-1:0] hwpf_base_set_i, + input logic [NrHwPrefetchers-1:0][63:0] hwpf_base_i, + output logic [NrHwPrefetchers-1:0][63:0] hwpf_base_o, + input logic [NrHwPrefetchers-1:0] hwpf_param_set_i, + input logic [NrHwPrefetchers-1:0][63:0] hwpf_param_i, + output logic [NrHwPrefetchers-1:0][63:0] hwpf_param_o, + input logic [NrHwPrefetchers-1:0] hwpf_throttle_set_i, + input logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_i, + output logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_o, + output logic [ 63:0] hwpf_status_o, + // }}} + + // AXI port to upstream memory/peripherals + // {{{ + output noc_req_t noc_req_o, + input noc_resp_t noc_resp_i + // }}} +); + // }}} + + `include "axi/typedef.svh" + + // I$ instantiation + // {{{ + logic icache_miss_valid, icache_miss_ready; + wt_cache_pkg::icache_req_t icache_miss; + + logic icache_miss_resp_valid; + wt_cache_pkg::icache_rtrn_t icache_miss_resp; + + localparam int ICACHE_RDTXID = 1 << (ariane_pkg::MEM_TID_WIDTH - 1); + + cva6_icache #( + .CVA6Cfg(CVA6Cfg), + .RdTxId (ICACHE_RDTXID) + ) i_cva6_icache ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (icache_flush_i), + .en_i (icache_en_i), + .miss_o (icache_miss_o), + .areq_i (icache_areq_i), + .areq_o (icache_areq_o), + .dreq_i (icache_dreq_i), + .dreq_o (icache_dreq_o), + .mem_rtrn_vld_i(icache_miss_resp_valid), + .mem_rtrn_i (icache_miss_resp), + .mem_data_req_o(icache_miss_valid), + .mem_data_ack_i(icache_miss_ready), + .mem_data_o (icache_miss) + ); + // }}} + + // D$ instantiation + // {{{ + `include "hpdcache_typedef.svh" + + // 0: Page-Table Walk (PTW) + // 1: Load unit + // 2: Accelerator load + // 3: Store/AMO + // . + // . + // . + // NumPorts: CMO + // NumPorts + 1: Hardware Memory Prefetcher (hwpf) + localparam int HPDCACHE_NREQUESTERS = NumPorts + 2; + + typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t; + typedef logic [ariane_pkg::MEM_TID_WIDTH-1:0] hpdcache_mem_id_t; + typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t; + typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t; + `HPDCACHE_TYPEDEF_MEM_REQ_T(hpdcache_mem_req_t, hpdcache_mem_addr_t, hpdcache_mem_id_t); + `HPDCACHE_TYPEDEF_MEM_RESP_R_T(hpdcache_mem_resp_r_t, hpdcache_mem_id_t, hpdcache_mem_data_t); + `HPDCACHE_TYPEDEF_MEM_REQ_W_T(hpdcache_mem_req_w_t, hpdcache_mem_data_t, hpdcache_mem_be_t); + `HPDCACHE_TYPEDEF_MEM_RESP_W_T(hpdcache_mem_resp_w_t, hpdcache_mem_id_t); + + typedef logic [63:0] hwpf_stride_param_t; + + logic dcache_req_valid[HPDCACHE_NREQUESTERS-1:0]; + logic dcache_req_ready[HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_req_t dcache_req [HPDCACHE_NREQUESTERS-1:0]; + logic dcache_req_abort[HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_tag_t dcache_req_tag [HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_pma_t dcache_req_pma [HPDCACHE_NREQUESTERS-1:0]; + logic dcache_rsp_valid[HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_rsp_t dcache_rsp [HPDCACHE_NREQUESTERS-1:0]; + logic dcache_read_miss, dcache_write_miss; + + logic [ 2:0] snoop_valid; + logic [ 2:0] snoop_abort; + hpdcache_pkg::hpdcache_req_offset_t [ 2:0] snoop_addr_offset; + hpdcache_pkg::hpdcache_tag_t [ 2:0] snoop_addr_tag; + logic [ 2:0] snoop_phys_indexed; + + logic dcache_cmo_req_is_prefetch; + + logic dcache_miss_ready; + logic dcache_miss_valid; + hpdcache_mem_req_t dcache_miss; + + logic dcache_miss_resp_ready; + logic dcache_miss_resp_valid; + hpdcache_mem_resp_r_t dcache_miss_resp; + + logic dcache_wbuf_ready; + logic dcache_wbuf_valid; + hpdcache_mem_req_t dcache_wbuf; + + logic dcache_wbuf_data_ready; + logic dcache_wbuf_data_valid; + hpdcache_mem_req_w_t dcache_wbuf_data; + + logic dcache_wbuf_resp_ready; + logic dcache_wbuf_resp_valid; + hpdcache_mem_resp_w_t dcache_wbuf_resp; + + logic dcache_uc_read_ready; + logic dcache_uc_read_valid; + hpdcache_mem_req_t dcache_uc_read; + + logic dcache_uc_read_resp_ready; + logic dcache_uc_read_resp_valid; + hpdcache_mem_resp_r_t dcache_uc_read_resp; + + logic dcache_uc_write_ready; + logic dcache_uc_write_valid; + hpdcache_mem_req_t dcache_uc_write; + + logic dcache_uc_write_data_ready; + logic dcache_uc_write_data_valid; + hpdcache_mem_req_w_t dcache_uc_write_data; + + logic dcache_uc_write_resp_ready; + logic dcache_uc_write_resp_valid; + hpdcache_mem_resp_w_t dcache_uc_write_resp; + + hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_in; + hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_out; + + generate + ariane_pkg::dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0]; + + for (genvar r = 0; r < (NumPorts - 1); r++) begin : cva6_hpdcache_load_if_adapter_gen + assign dcache_req_ports[r] = dcache_req_ports_i[r]; + + cva6_hpdcache_if_adapter #( + .CVA6Cfg (CVA6Cfg), + .is_load_port(1'b1) + ) i_cva6_hpdcache_load_if_adapter ( + .clk_i, + .rst_ni, + + .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(r)), + + .cva6_req_i (dcache_req_ports[r]), + .cva6_req_o (dcache_req_ports_o[r]), + .cva6_amo_req_i ('0), + .cva6_amo_resp_o( /* unused */), + + .hpdcache_req_valid_o(dcache_req_valid[r]), + .hpdcache_req_ready_i(dcache_req_ready[r]), + .hpdcache_req_o (dcache_req[r]), + .hpdcache_req_abort_o(dcache_req_abort[r]), + .hpdcache_req_tag_o (dcache_req_tag[r]), + .hpdcache_req_pma_o (dcache_req_pma[r]), + + .hpdcache_rsp_valid_i(dcache_rsp_valid[r]), + .hpdcache_rsp_i (dcache_rsp[r]) + ); + end + + cva6_hpdcache_if_adapter #( + .CVA6Cfg (CVA6Cfg), + .is_load_port(1'b0) + ) i_cva6_hpdcache_store_if_adapter ( + .clk_i, + .rst_ni, + + .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts - 1)), + + .cva6_req_i (dcache_req_ports_i[NumPorts-1]), + .cva6_req_o (dcache_req_ports_o[NumPorts-1]), + .cva6_amo_req_i (dcache_amo_req_i), + .cva6_amo_resp_o(dcache_amo_resp_o), + + .hpdcache_req_valid_o(dcache_req_valid[NumPorts-1]), + .hpdcache_req_ready_i(dcache_req_ready[NumPorts-1]), + .hpdcache_req_o (dcache_req[NumPorts-1]), + .hpdcache_req_abort_o(dcache_req_abort[NumPorts-1]), + .hpdcache_req_tag_o (dcache_req_tag[NumPorts-1]), + .hpdcache_req_pma_o (dcache_req_pma[NumPorts-1]), + + .hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts-1]), + .hpdcache_rsp_i (dcache_rsp[NumPorts-1]) + ); + +`ifdef HPDCACHE_ENABLE_CMO + cva6_hpdcache_cmo_if_adapter #( + .cmo_req_t(cmo_req_t), + .cmo_rsp_t(cmo_rsp_t) + ) i_cva6_hpdcache_cmo_if_adapter ( + .clk_i, + .rst_ni, + + .dcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts)), + + .cva6_cmo_req_i (dcache_cmo_req_i), + .cva6_cmo_resp_o(dcache_cmo_resp_o), + + .dcache_req_valid_o(dcache_req_valid[NumPorts]), + .dcache_req_ready_i(dcache_req_ready[NumPorts]), + .dcache_req_o (dcache_req[NumPorts]), + .dcache_req_abort_o(dcache_req_abort[NumPorts]), + .dcache_req_tag_o (dcache_req_tag[NumPorts]), + .dcache_req_pma_o (dcache_req_pma[NumPorts]), + + .dcache_rsp_valid_i(dcache_rsp_valid[NumPorts]), + .dcache_rsp_i (dcache_rsp[NumPorts]) + ); +`else + assign dcache_req_valid[NumPorts] = 1'b0, + dcache_req[NumPorts] = '0, + dcache_req_abort[NumPorts] = 1'b0, + dcache_req_tag[NumPorts] = '0, + dcache_req_pma[NumPorts] = '0; +`endif + endgenerate + + // Snoop load port + assign snoop_valid[0] = dcache_req_valid[1] & dcache_req_ready[1], + snoop_abort[0] = dcache_req_abort[1], + snoop_addr_offset[0] = dcache_req[1].addr_offset, + snoop_addr_tag[0] = dcache_req_tag[1], + snoop_phys_indexed[0] = dcache_req[1].phys_indexed; + + // Snoop Store/AMO port + assign snoop_valid[1] = dcache_req_valid[NumPorts-1] & dcache_req_ready[NumPorts-1], + snoop_abort[1] = dcache_req_abort[NumPorts-1], + snoop_addr_offset[1] = dcache_req[NumPorts-1].addr_offset, + snoop_addr_tag[1] = dcache_req_tag[NumPorts-1], + snoop_phys_indexed[1] = dcache_req[NumPorts-1].phys_indexed; + +`ifdef HPDCACHE_ENABLE_CMO + // Snoop CMO port (in case of read prefetch accesses) + assign dcache_cmo_req_is_prefetch = hpdcache_pkg::is_cmo_prefetch( + dcache_req[NumPorts].op, dcache_req[NumPorts].size + ); + assign snoop_valid[2] = dcache_req_valid[NumPorts] + & dcache_req_ready[NumPorts] + & dcache_cmo_req_is_prefetch, + snoop_abort[2] = dcache_req_abort[NumPorts], + snoop_addr_offset[2] = dcache_req[NumPorts].addr_offset, + snoop_addr_tag[2] = dcache_req_tag[NumPorts], + snoop_phys_indexed[2] = dcache_req[NumPorts].phys_indexed; +`else + assign snoop_valid[2] = 1'b0, + snoop_abort[2] = 1'b0, + snoop_addr_offset[2] = '0, + snoop_addr_tag[2] = '0, + snoop_phys_indexed[2] = 1'b0; +`endif + + generate + for (genvar h = 0; h < NrHwPrefetchers; h++) begin : hwpf_throttle_gen + assign hwpf_throttle_in[h] = hwpf_stride_pkg::hwpf_stride_throttle_t'(hwpf_throttle_i[h]), + hwpf_throttle_o[h] = hwpf_stride_pkg::hwpf_stride_param_t'(hwpf_throttle_out[h]); + end + endgenerate + + hwpf_stride_wrapper #( + .NUM_HW_PREFETCH(NrHwPrefetchers), + .NUM_SNOOP_PORTS(3) + ) i_hwpf_stride_wrapper ( + .clk_i, + .rst_ni, + + .hwpf_stride_base_set_i (hwpf_base_set_i), + .hwpf_stride_base_i (hwpf_base_i), + .hwpf_stride_base_o (hwpf_base_o), + .hwpf_stride_param_set_i (hwpf_param_set_i), + .hwpf_stride_param_i (hwpf_param_i), + .hwpf_stride_param_o (hwpf_param_o), + .hwpf_stride_throttle_set_i(hwpf_throttle_set_i), + .hwpf_stride_throttle_i (hwpf_throttle_in), + .hwpf_stride_throttle_o (hwpf_throttle_out), + .hwpf_stride_status_o (hwpf_status_o), + + .snoop_valid_i (snoop_valid), + .snoop_abort_i (snoop_abort), + .snoop_addr_offset_i (snoop_addr_offset), + .snoop_addr_tag_i (snoop_addr_tag), + .snoop_phys_indexed_i(snoop_phys_indexed), + + .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts + 1)), + + .hpdcache_req_valid_o(dcache_req_valid[NumPorts+1]), + .hpdcache_req_ready_i(dcache_req_ready[NumPorts+1]), + .hpdcache_req_o (dcache_req[NumPorts+1]), + .hpdcache_req_abort_o(dcache_req_abort[NumPorts+1]), + .hpdcache_req_tag_o (dcache_req_tag[NumPorts+1]), + .hpdcache_req_pma_o (dcache_req_pma[NumPorts+1]), + .hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts+1]), + .hpdcache_rsp_i (dcache_rsp[NumPorts+1]) + ); + + hpdcache #( + .NREQUESTERS (HPDCACHE_NREQUESTERS), + .HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH), + .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t), + .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t) + ) i_hpdcache ( + .clk_i, + .rst_ni, + + .wbuf_flush_i(dcache_flush_i), + + .core_req_valid_i(dcache_req_valid), + .core_req_ready_o(dcache_req_ready), + .core_req_i (dcache_req), + .core_req_abort_i(dcache_req_abort), + .core_req_tag_i (dcache_req_tag), + .core_req_pma_i (dcache_req_pma), + + .core_rsp_valid_o(dcache_rsp_valid), + .core_rsp_o (dcache_rsp), + + .mem_req_miss_read_ready_i(dcache_miss_ready), + .mem_req_miss_read_valid_o(dcache_miss_valid), + .mem_req_miss_read_o (dcache_miss), + + .mem_resp_miss_read_ready_o(dcache_miss_resp_ready), + .mem_resp_miss_read_valid_i(dcache_miss_resp_valid), + .mem_resp_miss_read_i (dcache_miss_resp), + + .mem_req_wbuf_write_ready_i(dcache_wbuf_ready), + .mem_req_wbuf_write_valid_o(dcache_wbuf_valid), + .mem_req_wbuf_write_o (dcache_wbuf), + + .mem_req_wbuf_write_data_ready_i(dcache_wbuf_data_ready), + .mem_req_wbuf_write_data_valid_o(dcache_wbuf_data_valid), + .mem_req_wbuf_write_data_o (dcache_wbuf_data), + + .mem_resp_wbuf_write_ready_o(dcache_wbuf_resp_ready), + .mem_resp_wbuf_write_valid_i(dcache_wbuf_resp_valid), + .mem_resp_wbuf_write_i (dcache_wbuf_resp), + + .mem_req_uc_read_ready_i(dcache_uc_read_ready), + .mem_req_uc_read_valid_o(dcache_uc_read_valid), + .mem_req_uc_read_o (dcache_uc_read), + + .mem_resp_uc_read_ready_o(dcache_uc_read_resp_ready), + .mem_resp_uc_read_valid_i(dcache_uc_read_resp_valid), + .mem_resp_uc_read_i (dcache_uc_read_resp), + + .mem_req_uc_write_ready_i(dcache_uc_write_ready), + .mem_req_uc_write_valid_o(dcache_uc_write_valid), + .mem_req_uc_write_o (dcache_uc_write), + + .mem_req_uc_write_data_ready_i(dcache_uc_write_data_ready), + .mem_req_uc_write_data_valid_o(dcache_uc_write_data_valid), + .mem_req_uc_write_data_o (dcache_uc_write_data), + + .mem_resp_uc_write_ready_o(dcache_uc_write_resp_ready), + .mem_resp_uc_write_valid_i(dcache_uc_write_resp_valid), + .mem_resp_uc_write_i (dcache_uc_write_resp), + + .evt_cache_write_miss_o(dcache_write_miss), + .evt_cache_read_miss_o (dcache_read_miss), + .evt_uncached_req_o ( /* unused */), + .evt_cmo_req_o ( /* unused */), + .evt_write_req_o ( /* unused */), + .evt_read_req_o ( /* unused */), + .evt_prefetch_req_o ( /* unused */), + .evt_req_on_hold_o ( /* unused */), + .evt_rtab_rollback_o ( /* unused */), + .evt_stall_refill_o ( /* unused */), + .evt_stall_o ( /* unused */), + + .wbuf_empty_o(wbuffer_empty_o), + + .cfg_enable_i (dcache_enable_i), + .cfg_wbuf_threshold_i (4'd2), + .cfg_wbuf_reset_timecnt_on_write_i (1'b1), + .cfg_wbuf_sequential_waw_i (1'b0), + .cfg_wbuf_inhibit_write_coalescing_i(1'b0), + .cfg_prefetch_updt_plru_i (1'b1), + .cfg_error_on_cacheable_amo_i (1'b0), + .cfg_rtab_single_entry_i (1'b0) + ); + + assign dcache_miss_o = dcache_read_miss, wbuffer_not_ni_o = wbuffer_empty_o; + + always_ff @(posedge clk_i or negedge rst_ni) begin : dcache_flush_ff + if (!rst_ni) dcache_flush_ack_o <= 1'b0; + else dcache_flush_ack_o <= ~dcache_flush_ack_o & dcache_flush_i; + end + + // }}} + + // AXI arbiter instantiation + // {{{ + typedef logic [CVA6Cfg.AxiAddrWidth-1:0] axi_addr_t; + typedef logic [CVA6Cfg.AxiDataWidth-1:0] axi_data_t; + typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] axi_strb_t; + typedef logic [CVA6Cfg.AxiIdWidth-1:0] axi_id_t; + typedef logic [CVA6Cfg.AxiUserWidth-1:0] axi_user_t; + `AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, axi_addr_t, axi_id_t, axi_user_t) + `AXI_TYPEDEF_W_CHAN_T(axi_w_chan_t, axi_data_t, axi_strb_t, axi_user_t) + `AXI_TYPEDEF_B_CHAN_T(axi_b_chan_t, axi_id_t, axi_user_t) + `AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, axi_addr_t, axi_id_t, axi_user_t) + `AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, axi_data_t, axi_id_t, axi_user_t) + + cva6_hpdcache_subsystem_axi_arbiter #( + .HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH), + .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t), + .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t), + + .AxiAddrWidth (CVA6Cfg.AxiAddrWidth), + .AxiDataWidth (CVA6Cfg.AxiDataWidth), + .AxiIdWidth (CVA6Cfg.AxiIdWidth), + .AxiUserWidth (CVA6Cfg.AxiUserWidth), + .axi_ar_chan_t(axi_ar_chan_t), + .axi_aw_chan_t(axi_aw_chan_t), + .axi_w_chan_t (axi_w_chan_t), + .axi_req_t (noc_req_t), + .axi_rsp_t (noc_resp_t) + ) i_axi_arbiter ( + .clk_i, + .rst_ni, + + .icache_miss_valid_i(icache_miss_valid), + .icache_miss_ready_o(icache_miss_ready), + .icache_miss_i (icache_miss), + .icache_miss_id_i (hpdcache_mem_id_t'(ICACHE_RDTXID)), + + .icache_miss_resp_valid_o(icache_miss_resp_valid), + .icache_miss_resp_o (icache_miss_resp), + + .dcache_miss_ready_o(dcache_miss_ready), + .dcache_miss_valid_i(dcache_miss_valid), + .dcache_miss_i (dcache_miss), + + .dcache_miss_resp_ready_i(dcache_miss_resp_ready), + .dcache_miss_resp_valid_o(dcache_miss_resp_valid), + .dcache_miss_resp_o (dcache_miss_resp), + + .dcache_wbuf_ready_o(dcache_wbuf_ready), + .dcache_wbuf_valid_i(dcache_wbuf_valid), + .dcache_wbuf_i (dcache_wbuf), + + .dcache_wbuf_data_ready_o(dcache_wbuf_data_ready), + .dcache_wbuf_data_valid_i(dcache_wbuf_data_valid), + .dcache_wbuf_data_i (dcache_wbuf_data), + + .dcache_wbuf_resp_ready_i(dcache_wbuf_resp_ready), + .dcache_wbuf_resp_valid_o(dcache_wbuf_resp_valid), + .dcache_wbuf_resp_o (dcache_wbuf_resp), + + .dcache_uc_read_ready_o(dcache_uc_read_ready), + .dcache_uc_read_valid_i(dcache_uc_read_valid), + .dcache_uc_read_i (dcache_uc_read), + .dcache_uc_read_id_i ('1), + + .dcache_uc_read_resp_ready_i(dcache_uc_read_resp_ready), + .dcache_uc_read_resp_valid_o(dcache_uc_read_resp_valid), + .dcache_uc_read_resp_o (dcache_uc_read_resp), + + .dcache_uc_write_ready_o(dcache_uc_write_ready), + .dcache_uc_write_valid_i(dcache_uc_write_valid), + .dcache_uc_write_i (dcache_uc_write), + .dcache_uc_write_id_i ('1), + + .dcache_uc_write_data_ready_o(dcache_uc_write_data_ready), + .dcache_uc_write_data_valid_i(dcache_uc_write_data_valid), + .dcache_uc_write_data_i (dcache_uc_write_data), + + .dcache_uc_write_resp_ready_i(dcache_uc_write_resp_ready), + .dcache_uc_write_resp_valid_o(dcache_uc_write_resp_valid), + .dcache_uc_write_resp_o (dcache_uc_write_resp), + + .axi_req_o (noc_req_o), + .axi_resp_i(noc_resp_i) + ); + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + assert (hpdcache_pkg::HPDCACHE_REQ_SRC_ID_WIDTH >= $clog2(HPDCACHE_NREQUESTERS)) + else $fatal(1, "HPDCACHE_REQ_SRC_ID_WIDTH is not wide enough"); + + a_invalid_instruction_fetch : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X", + icache_dreq_o.vaddr, + icache_dreq_o.data + ); + + a_invalid_write_data : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X", + { + dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index + }, + dcache_req_ports_i[2].data_be, + dcache_req_ports_i[2].data_wdata + ); + + for (genvar j = 0; j < 2; j++) begin : gen_assertion + a_invalid_read_data : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid data on port %01d: data=%016X", + j, + dcache_req_ports_o[j].data_rdata + ); + end + // pragma translate_on + // }}} + +endmodule : cva6_hpdcache_subsystem diff --git a/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv new file mode 100644 index 0000000..9eb0a8b --- /dev/null +++ b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv @@ -0,0 +1,586 @@ +// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies +// Alternatives (CEA) +// +// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Authors: Cesar Fuguet +// Date: February, 2023 +// Description: AXI arbiter for the CVA6 cache subsystem integrating standard +// CVA6's instruction cache and the Core-V High-Performance +// L1 Dcache (CV-HPDcache). + +module cva6_hpdcache_subsystem_axi_arbiter +// Parameters +// {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + + parameter int unsigned AxiAddrWidth = 1, + parameter int unsigned AxiDataWidth = 1, + parameter int unsigned AxiIdWidth = 1, + parameter int unsigned AxiUserWidth = 1, + parameter type axi_ar_chan_t = logic, + parameter type axi_aw_chan_t = logic, + parameter type axi_w_chan_t = logic, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic, + + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Interfaces from/to I$ + // {{{ + input logic icache_miss_valid_i, + output logic icache_miss_ready_o, + input wt_cache_pkg::icache_req_t icache_miss_i, + input hpdcache_mem_id_t icache_miss_id_i, + + output logic icache_miss_resp_valid_o, + output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o, + // }}} + + // Interfaces from/to D$ + // {{{ + output logic dcache_miss_ready_o, + input logic dcache_miss_valid_i, + input hpdcache_mem_req_t dcache_miss_i, + + input logic dcache_miss_resp_ready_i, + output logic dcache_miss_resp_valid_o, + output hpdcache_mem_resp_r_t dcache_miss_resp_o, + + // Write-buffer write interface + output logic dcache_wbuf_ready_o, + input logic dcache_wbuf_valid_i, + input hpdcache_mem_req_t dcache_wbuf_i, + + output logic dcache_wbuf_data_ready_o, + input logic dcache_wbuf_data_valid_i, + input hpdcache_mem_req_w_t dcache_wbuf_data_i, + + input logic dcache_wbuf_resp_ready_i, + output logic dcache_wbuf_resp_valid_o, + output hpdcache_mem_resp_w_t dcache_wbuf_resp_o, + + // Uncached read interface + output logic dcache_uc_read_ready_o, + input logic dcache_uc_read_valid_i, + input hpdcache_mem_req_t dcache_uc_read_i, + input hpdcache_mem_id_t dcache_uc_read_id_i, + + input logic dcache_uc_read_resp_ready_i, + output logic dcache_uc_read_resp_valid_o, + output hpdcache_mem_resp_r_t dcache_uc_read_resp_o, + + // Uncached write interface + output logic dcache_uc_write_ready_o, + input logic dcache_uc_write_valid_i, + input hpdcache_mem_req_t dcache_uc_write_i, + input hpdcache_mem_id_t dcache_uc_write_id_i, + + output logic dcache_uc_write_data_ready_o, + input logic dcache_uc_write_data_valid_i, + input hpdcache_mem_req_w_t dcache_uc_write_data_i, + + input logic dcache_uc_write_resp_ready_i, + output logic dcache_uc_write_resp_valid_o, + output hpdcache_mem_resp_w_t dcache_uc_write_resp_o, + // }}} + + // AXI port to upstream memory/peripherals + // {{{ + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i + // }}} +); + // }}} + + // Internal type definitions + // {{{ + typedef struct packed { + logic [AxiIdWidth-1:0] id; + logic [AxiDataWidth-1:0] data; + axi_pkg::resp_t resp; + logic last; + logic [AxiUserWidth-1:0] user; + } axi_r_chan_t; + + typedef struct packed { + logic [AxiIdWidth-1:0] id; + axi_pkg::resp_t resp; + logic [AxiUserWidth-1:0] user; + } axi_b_chan_t; + + localparam int MEM_RESP_RT_DEPTH = (1 << HPDcacheMemIdWidth); + typedef hpdcache_mem_id_t [MEM_RESP_RT_DEPTH-1:0] mem_resp_rt_t; + typedef logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] icache_resp_data_t; + // }}} + + // Adapt the I$ interface to the HPDcache memory interface + // {{{ + localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH / 64; + localparam int ICACHE_CL_WORD_INDEX = $clog2(ICACHE_CL_WORDS); + localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8); + localparam int ICACHE_WORD_SIZE = 3; + localparam int ICACHE_MEM_REQ_CL_LEN = + (ariane_pkg::ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth; + localparam int ICACHE_MEM_REQ_CL_SIZE = + (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) ? + $clog2( + HPDcacheMemDataWidth / 8 + ) : ICACHE_CL_SIZE; + + // I$ request + hpdcache_mem_req_t icache_miss_req_wdata; + logic icache_miss_req_w, icache_miss_req_wok; + + hpdcache_mem_req_t icache_miss_req_rdata; + logic icache_miss_req_r, icache_miss_req_rok; + + logic icache_miss_pending_q; + + // This FIFO has two functionnalities: + // - Stabilize the ready-valid protocol. The ICACHE can abort a valid + // transaction without receiving the corresponding ready signal. This + // behavior is not supported by AXI. + // - Cut a possible long timing path. + hpdcache_fifo_reg #( + .FIFO_DEPTH (1), + .fifo_data_t(hpdcache_mem_req_t) + ) i_icache_miss_req_fifo ( + .clk_i, + .rst_ni, + + .w_i (icache_miss_req_w), + .wok_o (icache_miss_req_wok), + .wdata_i(icache_miss_req_wdata), + + .r_i (icache_miss_req_r), + .rok_o (icache_miss_req_rok), + .rdata_o(icache_miss_req_rdata) + ); + + assign icache_miss_req_w = icache_miss_valid_i, icache_miss_ready_o = icache_miss_req_wok; + + assign icache_miss_req_wdata.mem_req_addr = icache_miss_i.paddr, + icache_miss_req_wdata.mem_req_len = icache_miss_i.nc ? 0 : ICACHE_MEM_REQ_CL_LEN - 1, + icache_miss_req_wdata.mem_req_size = icache_miss_i.nc ? ICACHE_WORD_SIZE : ICACHE_MEM_REQ_CL_SIZE, + icache_miss_req_wdata.mem_req_id = icache_miss_i.tid, + icache_miss_req_wdata.mem_req_command = hpdcache_pkg::HPDCACHE_MEM_READ, + icache_miss_req_wdata.mem_req_atomic = hpdcache_pkg::hpdcache_mem_atomic_e'(0), + icache_miss_req_wdata.mem_req_cacheable = ~icache_miss_i.nc; + + + // I$ response + logic icache_miss_resp_w, icache_miss_resp_wok; + hpdcache_mem_resp_r_t icache_miss_resp_wdata; + + logic icache_miss_resp_data_w, icache_miss_resp_data_wok; + logic icache_miss_resp_data_r, icache_miss_resp_data_rok; + icache_resp_data_t icache_miss_resp_data_rdata; + + logic icache_miss_resp_meta_w, icache_miss_resp_meta_wok; + logic icache_miss_resp_meta_r, icache_miss_resp_meta_rok; + hpdcache_mem_id_t icache_miss_resp_meta_id; + + icache_resp_data_t icache_miss_rdata; + + generate + if (HPDcacheMemDataWidth < ariane_pkg::ICACHE_LINE_WIDTH) begin + hpdcache_fifo_reg #( + .FIFO_DEPTH (1), + .fifo_data_t(hpdcache_mem_id_t) + ) i_icache_refill_meta_fifo ( + .clk_i, + .rst_ni, + + .w_i (icache_miss_resp_meta_w), + .wok_o (icache_miss_resp_meta_wok), + .wdata_i(icache_miss_resp_wdata.mem_resp_r_id), + + .r_i (icache_miss_resp_meta_r), + .rok_o (icache_miss_resp_meta_rok), + .rdata_o(icache_miss_resp_meta_id) + ); + + hpdcache_data_upsize #( + .WR_WIDTH(HPDcacheMemDataWidth), + .RD_WIDTH(ariane_pkg::ICACHE_LINE_WIDTH), + .DEPTH (1) + ) i_icache_hpdcache_data_upsize ( + .clk_i, + .rst_ni, + + .w_i (icache_miss_resp_data_w), + .wlast_i(icache_miss_resp_wdata.mem_resp_r_last), + .wok_o (icache_miss_resp_data_wok), + .wdata_i(icache_miss_resp_wdata.mem_resp_r_data), + + .r_i (icache_miss_resp_data_r), + .rok_o (icache_miss_resp_data_rok), + .rdata_o(icache_miss_resp_data_rdata) + ); + + assign icache_miss_resp_meta_r = 1'b1, icache_miss_resp_data_r = 1'b1; + + assign icache_miss_resp_meta_w = icache_miss_resp_w & icache_miss_resp_wdata.mem_resp_r_last; + + assign icache_miss_resp_data_w = icache_miss_resp_w; + + assign icache_miss_resp_wok = icache_miss_resp_data_wok & ( + icache_miss_resp_meta_wok | ~icache_miss_resp_wdata.mem_resp_r_last); + + assign icache_miss_rdata = icache_miss_resp_data_rdata; + + end else begin + assign icache_miss_resp_data_rok = icache_miss_resp_w; + assign icache_miss_resp_meta_rok = icache_miss_resp_w; + assign icache_miss_resp_wok = 1'b1; + assign icache_miss_resp_meta_id = icache_miss_resp_wdata.mem_resp_r_id; + assign icache_miss_resp_data_rdata = icache_miss_resp_wdata.mem_resp_r_data; + + // In the case of uncacheable accesses, the Icache expects the data to be right-aligned + always_comb begin : icache_miss_resp_data_comb + if (!icache_miss_req_rdata.mem_req_cacheable) begin + automatic logic [ICACHE_CL_WORD_INDEX - 1:0] icache_miss_word_index; + automatic logic [63:0] icache_miss_word; + icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3+:ICACHE_CL_WORD_INDEX]; + icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64+:64]; + icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH - 64{1'b0}}, icache_miss_word}; + end else begin + icache_miss_rdata = icache_miss_resp_data_rdata; + end + end + end + endgenerate + + assign icache_miss_resp_valid_o = icache_miss_resp_meta_rok, + icache_miss_resp_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK, + icache_miss_resp_o.user = '0, + icache_miss_resp_o.inv = '0, + icache_miss_resp_o.tid = icache_miss_resp_meta_id, + icache_miss_resp_o.data = icache_miss_rdata; + + // consume the Icache miss on the arrival of the response. The request + // metadata is decoded to forward the correct word in case of uncacheable + // Icache access + assign icache_miss_req_r = icache_miss_resp_meta_rok; + // }}} + + // Read request arbiter + // {{{ + logic mem_req_read_ready [2:0]; + logic mem_req_read_valid [2:0]; + hpdcache_mem_req_t mem_req_read [2:0]; + + logic mem_req_read_ready_arb; + logic mem_req_read_valid_arb; + hpdcache_mem_req_t mem_req_read_arb; + + assign mem_req_read_valid[0] = icache_miss_req_rok & ~icache_miss_pending_q, + mem_req_read[0] = icache_miss_req_rdata; + + assign dcache_miss_ready_o = mem_req_read_ready[1], + mem_req_read_valid[1] = dcache_miss_valid_i, + mem_req_read[1] = dcache_miss_i; + + assign dcache_uc_read_ready_o = mem_req_read_ready[2], + mem_req_read_valid[2] = dcache_uc_read_valid_i, + mem_req_read[2] = dcache_uc_read_i; + + hpdcache_mem_req_read_arbiter #( + .N (3), + .hpdcache_mem_req_t(hpdcache_mem_req_t) + ) i_mem_req_read_arbiter ( + .clk_i, + .rst_ni, + + .mem_req_read_ready_o(mem_req_read_ready), + .mem_req_read_valid_i(mem_req_read_valid), + .mem_req_read_i (mem_req_read), + + .mem_req_read_ready_i(mem_req_read_ready_arb), + .mem_req_read_valid_o(mem_req_read_valid_arb), + .mem_req_read_o (mem_req_read_arb) + ); + // }}} + + // Read response demultiplexor + // {{{ + logic mem_resp_read_ready; + logic mem_resp_read_valid; + hpdcache_mem_resp_r_t mem_resp_read; + + logic mem_resp_read_ready_arb[2:0]; + logic mem_resp_read_valid_arb[2:0]; + hpdcache_mem_resp_r_t mem_resp_read_arb [2:0]; + + mem_resp_rt_t mem_resp_read_rt; + + always_comb begin + for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin + mem_resp_read_rt[i] = (i == int'( icache_miss_id_i)) ? 0 : + (i == int'(dcache_uc_read_id_i)) ? 2 : 1; + end + end + + hpdcache_mem_resp_demux #( + .N (3), + .resp_t (hpdcache_mem_resp_r_t), + .resp_id_t(hpdcache_mem_id_t) + ) i_mem_resp_read_demux ( + .clk_i, + .rst_ni, + + .mem_resp_ready_o(mem_resp_read_ready), + .mem_resp_valid_i(mem_resp_read_valid), + .mem_resp_id_i (mem_resp_read.mem_resp_r_id), + .mem_resp_i (mem_resp_read), + + .mem_resp_ready_i(mem_resp_read_ready_arb), + .mem_resp_valid_o(mem_resp_read_valid_arb), + .mem_resp_o (mem_resp_read_arb), + + .mem_resp_rt_i(mem_resp_read_rt) + ); + + assign icache_miss_resp_w = mem_resp_read_valid_arb[0], + icache_miss_resp_wdata = mem_resp_read_arb[0], + mem_resp_read_ready_arb[0] = icache_miss_resp_wok; + + assign dcache_miss_resp_valid_o = mem_resp_read_valid_arb[1], + dcache_miss_resp_o = mem_resp_read_arb[1], + mem_resp_read_ready_arb[1] = dcache_miss_resp_ready_i; + + assign dcache_uc_read_resp_valid_o = mem_resp_read_valid_arb[2], + dcache_uc_read_resp_o = mem_resp_read_arb[2], + mem_resp_read_ready_arb[2] = dcache_uc_read_resp_ready_i; + // }}} + + // Write request arbiter + // {{{ + logic mem_req_write_ready [1:0]; + logic mem_req_write_valid [1:0]; + hpdcache_mem_req_t mem_req_write [1:0]; + + logic mem_req_write_data_ready [1:0]; + logic mem_req_write_data_valid [1:0]; + hpdcache_mem_req_w_t mem_req_write_data [1:0]; + + logic mem_req_write_ready_arb; + logic mem_req_write_valid_arb; + hpdcache_mem_req_t mem_req_write_arb; + + logic mem_req_write_data_ready_arb; + logic mem_req_write_data_valid_arb; + hpdcache_mem_req_w_t mem_req_write_data_arb; + + assign dcache_wbuf_ready_o = mem_req_write_ready[0], + mem_req_write_valid[0] = dcache_wbuf_valid_i, + mem_req_write[0] = dcache_wbuf_i; + + assign dcache_wbuf_data_ready_o = mem_req_write_data_ready[0], + mem_req_write_data_valid[0] = dcache_wbuf_data_valid_i, + mem_req_write_data[0] = dcache_wbuf_data_i; + + assign dcache_uc_write_ready_o = mem_req_write_ready[1], + mem_req_write_valid[1] = dcache_uc_write_valid_i, + mem_req_write[1] = dcache_uc_write_i; + + assign dcache_uc_write_data_ready_o = mem_req_write_data_ready[1], + mem_req_write_data_valid[1] = dcache_uc_write_data_valid_i, + mem_req_write_data[1] = dcache_uc_write_data_i; + + hpdcache_mem_req_write_arbiter #( + .N (2), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t(hpdcache_mem_req_w_t) + ) i_mem_req_write_arbiter ( + .clk_i, + .rst_ni, + + .mem_req_write_ready_o(mem_req_write_ready), + .mem_req_write_valid_i(mem_req_write_valid), + .mem_req_write_i (mem_req_write), + + .mem_req_write_data_ready_o(mem_req_write_data_ready), + .mem_req_write_data_valid_i(mem_req_write_data_valid), + .mem_req_write_data_i (mem_req_write_data), + + .mem_req_write_ready_i(mem_req_write_ready_arb), + .mem_req_write_valid_o(mem_req_write_valid_arb), + .mem_req_write_o (mem_req_write_arb), + + .mem_req_write_data_ready_i(mem_req_write_data_ready_arb), + .mem_req_write_data_valid_o(mem_req_write_data_valid_arb), + .mem_req_write_data_o (mem_req_write_data_arb) + ); + // }}} + + // Write response demultiplexor + // {{{ + logic mem_resp_write_ready; + logic mem_resp_write_valid; + hpdcache_mem_resp_w_t mem_resp_write; + + logic mem_resp_write_ready_arb[1:0]; + logic mem_resp_write_valid_arb[1:0]; + hpdcache_mem_resp_w_t mem_resp_write_arb [1:0]; + + mem_resp_rt_t mem_resp_write_rt; + + always_comb begin + for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin + mem_resp_write_rt[i] = (i == int'(dcache_uc_write_id_i)) ? 1 : 0; + end + end + + hpdcache_mem_resp_demux #( + .N (2), + .resp_t (hpdcache_mem_resp_w_t), + .resp_id_t(hpdcache_mem_id_t) + ) i_hpdcache_mem_resp_write_demux ( + .clk_i, + .rst_ni, + + .mem_resp_ready_o(mem_resp_write_ready), + .mem_resp_valid_i(mem_resp_write_valid), + .mem_resp_id_i (mem_resp_write.mem_resp_w_id), + .mem_resp_i (mem_resp_write), + + .mem_resp_ready_i(mem_resp_write_ready_arb), + .mem_resp_valid_o(mem_resp_write_valid_arb), + .mem_resp_o (mem_resp_write_arb), + + .mem_resp_rt_i(mem_resp_write_rt) + ); + + assign dcache_wbuf_resp_valid_o = mem_resp_write_valid_arb[0], + dcache_wbuf_resp_o = mem_resp_write_arb[0], + mem_resp_write_ready_arb[0] = dcache_wbuf_resp_ready_i; + + assign dcache_uc_write_resp_valid_o = mem_resp_write_valid_arb[1], + dcache_uc_write_resp_o = mem_resp_write_arb[1], + mem_resp_write_ready_arb[1] = dcache_uc_write_resp_ready_i; + // }}} + + // I$ miss pending + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) begin : icache_miss_pending_ff + if (!rst_ni) begin + icache_miss_pending_q <= 1'b0; + end else begin + icache_miss_pending_q <= ( (icache_miss_req_rok & mem_req_read_ready[0]) & ~icache_miss_pending_q) | + (~(icache_miss_req_r & icache_miss_req_rok) & icache_miss_pending_q); + end + end + // }}} + + // AXI adapters + // {{{ + axi_req_t axi_req; + axi_rsp_t axi_resp; + + hpdcache_mem_to_axi_write #( + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t), + .aw_chan_t (axi_aw_chan_t), + .w_chan_t (axi_w_chan_t), + .b_chan_t (axi_b_chan_t) + ) i_hpdcache_mem_to_axi_write ( + .req_ready_o(mem_req_write_ready_arb), + .req_valid_i(mem_req_write_valid_arb), + .req_i (mem_req_write_arb), + + .req_data_ready_o(mem_req_write_data_ready_arb), + .req_data_valid_i(mem_req_write_data_valid_arb), + .req_data_i (mem_req_write_data_arb), + + .resp_ready_i(mem_resp_write_ready), + .resp_valid_o(mem_resp_write_valid), + .resp_o (mem_resp_write), + + .axi_aw_valid_o(axi_req.aw_valid), + .axi_aw_o (axi_req.aw), + .axi_aw_ready_i(axi_resp.aw_ready), + + .axi_w_valid_o(axi_req.w_valid), + .axi_w_o (axi_req.w), + .axi_w_ready_i(axi_resp.w_ready), + + .axi_b_valid_i(axi_resp.b_valid), + .axi_b_i (axi_resp.b), + .axi_b_ready_o(axi_req.b_ready) + ); + + hpdcache_mem_to_axi_read #( + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t), + .ar_chan_t (axi_ar_chan_t), + .r_chan_t (axi_r_chan_t) + ) i_hpdcache_mem_to_axi_read ( + .req_ready_o(mem_req_read_ready_arb), + .req_valid_i(mem_req_read_valid_arb), + .req_i (mem_req_read_arb), + + .resp_ready_i(mem_resp_read_ready), + .resp_valid_o(mem_resp_read_valid), + .resp_o (mem_resp_read), + + .axi_ar_valid_o(axi_req.ar_valid), + .axi_ar_o (axi_req.ar), + .axi_ar_ready_i(axi_resp.ar_ready), + + .axi_r_valid_i(axi_resp.r_valid), + .axi_r_i (axi_resp.r), + .axi_r_ready_o(axi_req.r_ready) + ); + + assign axi_req_o = axi_req; + assign axi_resp = axi_resp_i; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + assert (HPDcacheMemIdWidth <= AxiIdWidth) + else $fatal("HPDcacheMemIdWidth shall be less or equal to AxiIdWidth"); + initial + assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_MSHR_SET_WIDTH + hpdcache_pkg::HPDCACHE_MSHR_WAY_WIDTH + 1)) + else + $fatal( + "HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache misses and Icache misses" + ); + initial + assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_WBUF_DIR_PTR_WIDTH + 1)) + else + $fatal( + "HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes" + ); + initial + assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) + else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line"); + initial + assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH) + else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line"); + // pragma translate_on + // }}} + +endmodule : cva6_hpdcache_subsystem_axi_arbiter diff --git a/test/type_param/core/cache_subsystem/cva6_icache.sv b/test/type_param/core/cache_subsystem/cva6_icache.sv new file mode 100644 index 0000000..37dd8d1 --- /dev/null +++ b/test/type_param/core/cache_subsystem/cva6_icache.sv @@ -0,0 +1,584 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Instruction cache that is compatible with openpiton. +// +// Some notes: +// +// 1) refills always have the size of one cache line, except for accesses to the I/O region, which is mapped +// to the top half of the physical address space (bit 39 = 1). the data width of the interface has the width +// of one cache line, and hence the ifills can be transferred in a single cycle. note that the ifills must be +// consumed unconditionally. +// +// 2) instruction fetches are always assumed to be aligned to 32bit (lower 2 bits are ignored) +// +// 3) NC accesses to I/O space are expected to return 32bit from memory. +// + + +module cva6_icache + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + /// ID to be used for read transactions + parameter logic [MEM_TID_WIDTH-1:0] RdTxId = 0 +) ( + input logic clk_i, + input logic rst_ni, + + /// flush the icache, flush and kill have to be asserted together + input logic flush_i, + /// enable icache + input logic en_i, + /// to performance counter + output logic miss_o, + // address translation requests + input icache_areq_t areq_i, + output icache_arsp_t areq_o, + // data requests + input icache_dreq_t dreq_i, + output icache_drsp_t dreq_o, + // refill port + input logic mem_rtrn_vld_i, + input icache_rtrn_t mem_rtrn_i, + output logic mem_data_req_o, + input logic mem_data_ack_i, + output icache_req_t mem_data_o +); + + // functions + function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh( + input logic [L1I_WAY_WIDTH-1:0] in); + logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out; + out = '0; + out[in] = 1'b1; + return out; + endfunction + + // signals + logic cache_en_d, cache_en_q; // cache is enabled + logic [riscv::VLEN-1:0] vaddr_d, vaddr_q; + logic paddr_is_nc; // asserted if physical address is non-cacheable + logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare + logic cache_rden; // triggers cache lookup + logic cache_wren; // triggers write to cacheline + logic + cmp_en_d, + cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal. + logic flush_d, flush_q; // used to register and signal pending flushes + + // replacement strategy + logic update_lfsr; // shift the LFSR + logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered + logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement + logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace + logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot) + logic all_ways_valid; // we need to switch repl strategy since all are valid + + // invalidations / flushing + logic inv_en; // incoming invalidations + logic inv_d, inv_q; // invalidation in progress + logic flush_en, flush_done; // used to flush cache entries + logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries + + // mem arrays + logic cl_we; // write enable to memory array + logic [ ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array + logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array + logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line + logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag + logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem + logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache + logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache + logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; // selected word from each cacheline + logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline + logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs + logic vld_we; // valid bits write enable + logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write + logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs + logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit + + // cpmtroller FSM + typedef enum logic [2:0] { + FLUSH, + IDLE, + READ, + MISS, + KILL_ATRANS, + KILL_MISS + } state_e; + state_e state_d, state_q; + + /////////////////////////////////////////////////////// + // address -> cl_index mapping, interface plumbing + /////////////////////////////////////////////////////// + + // extract tag from physical address, check if NC + assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q; + + // noncacheable if request goes to I/O space, or if cache is disabled + assign paddr_is_nc = (~cache_en_q) | (~config_pkg::is_inside_cacheable_regions( + CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}} + )); + + // pass exception through + assign dreq_o.ex = areq_i.fetch_exception; + + // latch this in case we have to stall later on + // make sure this is 32bit aligned + assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q; + assign areq_o.fetch_vaddr = {vaddr_q[riscv::VLEN-1:2], 2'b0}; + + // split virtual address into index and offset to address cache arrays + assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH]; + + + if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset + // if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory + assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr[ICACHE_OFFSET_WIDTH-1:2], 2'b0} : + ( paddr_is_nc & mem_data_req_o ) ? {{ICACHE_OFFSET_WIDTH-1{1'b0}}, cl_offset_q[2]}<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case + cl_offset_q; + // request word address instead of cl address in case of NC access + assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit + {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl + end else begin : gen_piton_offset + // icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not. + // since the piton cache system replicates the data, we can always index the full CL + assign cl_offset_d = (dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr >> 2, 2'b0} : cl_offset_q; + + // request word address instead of cl address in case of NC access + assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit + {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl + end + + + assign mem_data_o.tid = RdTxId; + + assign mem_data_o.nc = paddr_is_nc; + // way that is being replaced + assign mem_data_o.way = repl_way; + assign dreq_o.vaddr = vaddr_q; + + // invalidations take two cycles + assign inv_d = inv_en; + + /////////////////////////////////////////////////////// + // main control logic + /////////////////////////////////////////////////////// + logic addr_ni; + assign addr_ni = config_pkg::is_inside_nonidempotent_regions( + CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, areq_i.fetch_paddr} + ); + always_comb begin : p_fsm + // default assignment + state_d = state_q; + cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush + flush_en = 1'b0; + cmp_en_d = 1'b0; + cache_rden = 1'b0; + cache_wren = 1'b0; + inv_en = 1'b0; + flush_d = flush_q | flush_i; // register incoming flush + + // interfaces + dreq_o.ready = 1'b0; + areq_o.fetch_req = 1'b0; + dreq_o.valid = 1'b0; + mem_data_req_o = 1'b0; + // performance counter + miss_o = 1'b0; + + // handle invalidations unconditionally + // note: invald are mutually exclusive with + // ifills, since both arrive over the same IF + // however, we need to make sure below that we + // do not trigger a cache readout at the same time... + if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_INV_REQ) begin + inv_en = 1'b1; + end + + unique case (state_q) + ////////////////////////////////// + // this clears all valid bits + FLUSH: begin + flush_en = 1'b1; + if (flush_done) begin + state_d = IDLE; + flush_d = 1'b0; + // if the cache was not enabled set this + cache_en_d = en_i; + end + end + ////////////////////////////////// + // wait for an incoming request + IDLE: begin + // only enable tag comparison if cache is enabled + cmp_en_d = cache_en_q; + + // handle pending flushes, or perform cache clear upon enable + if (flush_d || (en_i && !cache_en_q)) begin + state_d = FLUSH; + // wait for incoming requests + end else begin + // mem requests are for sure invals here + if (!mem_rtrn_vld_i) begin + dreq_o.ready = 1'b1; + // we have a new request + if (dreq_i.req) begin + cache_rden = 1'b1; + state_d = READ; + end + end + if (dreq_i.kill_s1) begin + state_d = IDLE; + end + end + end + ////////////////////////////////// + // check whether we have a hit + // in case the cache is disabled, + // or in case the address is NC, we + // reuse the miss mechanism to handle + // the request + READ: begin + areq_o.fetch_req = '1; + // only enable tag comparison if cache is enabled + cmp_en_d = cache_en_q; + // readout speculatively + cache_rden = cache_en_q; + + if (areq_i.fetch_valid && (!dreq_i.spec || ((CVA6Cfg.NonIdemPotenceEn && !addr_ni) || (!CVA6Cfg.NonIdemPotenceEn)))) begin + // check if we have to flush + if (flush_d) begin + state_d = IDLE; + // we have a hit or an exception output valid result + end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin + dreq_o.valid = ~dreq_i.kill_s2; // just don't output in this case + state_d = IDLE; + + // we can accept another request + // and stay here, but only if no inval is coming in + // note: we are not expecting ifill return packets here... + if (!mem_rtrn_vld_i) begin + dreq_o.ready = 1'b1; + if (dreq_i.req) begin + state_d = READ; + end + end + // if a request is being killed at this stage, + // we have to bail out and wait for the address translation to complete + if (dreq_i.kill_s1) begin + state_d = IDLE; + end + // we have a miss / NC transaction + end else if (dreq_i.kill_s2) begin + state_d = IDLE; + end else if (!inv_q) begin + cmp_en_d = 1'b0; + // only count this as a miss if the cache is enabled, and + // the address is cacheable + // send out ifill request + mem_data_req_o = 1'b1; + if (mem_data_ack_i) begin + miss_o = ~paddr_is_nc; + state_d = MISS; + end + end + // bail out if this request is being killed (and we missed on the TLB) + end else if (dreq_i.kill_s2 || flush_d) begin + state_d = KILL_ATRANS; + end + end + ////////////////////////////////// + // wait until the memory transaction + // returns. do not write to memory + // if the nc bit is set. + MISS: begin + // note: this is mutually exclusive with ICACHE_INV_REQ, + // so we do not have to check for invals here + if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin + state_d = IDLE; + // only return data if request is not being killed + if (!(dreq_i.kill_s2 || flush_d)) begin + dreq_o.valid = 1'b1; + // only write to cache if this address is cacheable + cache_wren = ~paddr_is_nc; + end + // bail out if this request is being killed + end else if (dreq_i.kill_s2 || flush_d) begin + state_d = KILL_MISS; + end + end + ////////////////////////////////// + // killed address translation, + // wait until paddr is valid, and go + // back to idle + KILL_ATRANS: begin + areq_o.fetch_req = '1; + if (areq_i.fetch_valid) begin + state_d = IDLE; + end + end + ////////////////////////////////// + // killed miss, + // wait until memory responds and + // go back to idle + KILL_MISS: begin + if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin + state_d = IDLE; + end + end + default: begin + // we should never get here + state_d = FLUSH; + end + endcase // state_q + end + + /////////////////////////////////////////////////////// + // valid bit invalidation and replacement strategy + /////////////////////////////////////////////////////// + + // note: it cannot happen that we get an invalidation + a cl replacement + // in the same cycle as these requests arrive via the same interface + // flushes take precedence over invalidations (it is ok if we ignore + // the inval since the cache is cleared anyway) + + assign flush_cnt_d = (flush_done) ? '0 : (flush_en) ? flush_cnt_q + 1 : flush_cnt_q; + + assign flush_done = (flush_cnt_q == (ICACHE_NUM_WORDS - 1)); + + // invalidation/clearing address + // flushing takes precedence over invals + assign vld_addr = (flush_en) ? flush_cnt_q : + (inv_en) ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] : + cl_index; + + assign vld_req = (flush_en || cache_rden) ? '1 : + (mem_rtrn_i.inv.all && inv_en) ? '1 : + (mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh( + mem_rtrn_i.inv.way + ) : repl_way_oh_q; + + assign vld_wdata = (cache_wren) ? '1 : '0; + + assign vld_we = (cache_wren | inv_en | flush_en); + // assign vld_req = (vld_we | cache_rden); + + + // chose random replacement if all are valid + assign update_lfsr = cache_wren & all_ways_valid; + assign repl_way = (all_ways_valid) ? rnd_way : inv_way; + assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q; + + // enable signals for memory arrays + assign cl_req = (cache_rden) ? '1 : (cache_wren) ? repl_way_oh_q : '0; + assign cl_we = cache_wren; + + + // find invalid cache line + lzc #( + .WIDTH(ICACHE_SET_ASSOC) + ) i_lzc ( + .in_i (~vld_rdata), + .cnt_o (inv_way), + .empty_o(all_ways_valid) + ); + + // generate random cacheline index + lfsr #( + .LfsrWidth(8), + .OutWidth ($clog2(ariane_pkg::ICACHE_SET_ASSOC)) + ) i_lfsr ( + .clk_i (clk_i), + .rst_ni(rst_ni), + .en_i (update_lfsr), + .out_o (rnd_way) + ); + + + /////////////////////////////////////////////////////// + // tag comparison, hit generation + /////////////////////////////////////////////////////// + + logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx; + + for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel + assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i]; + assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:FETCH_WIDTH]; + assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH]; + end + + + lzc #( + .WIDTH(ICACHE_SET_ASSOC) + ) i_lzc_hit ( + .in_i (cl_hit), + .cnt_o (hit_idx), + .empty_o() + ); + + always_comb begin + if (cmp_en_q) begin + dreq_o.data = cl_sel[hit_idx]; + dreq_o.user = cl_user[hit_idx]; + end else begin + dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:FETCH_WIDTH]; + dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH]; + end + end + + /////////////////////////////////////////////////////// + // memory arrays and regs + /////////////////////////////////////////////////////// + + + logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata[ICACHE_SET_ASSOC-1:0]; + + for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram + // Tag RAM + sram #( + // tag + valid bit + .DATA_WIDTH(ICACHE_TAG_WIDTH + 1), + .NUM_WORDS (ICACHE_NUM_WORDS) + ) tag_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (vld_req[i]), + .we_i (vld_we), + .addr_i (vld_addr), + // we can always use the saved tag here since it takes a + // couple of cycle until we write to the cache upon a miss + .wuser_i('0), + .wdata_i({vld_wdata[i], cl_tag_q}), + .be_i ('1), + .ruser_o(), + .rdata_o(cl_tag_valid_rdata[i]) + ); + + assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0]; + assign vld_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH]; + + // Data RAM + sram #( + .USER_WIDTH(ICACHE_USER_LINE_WIDTH), + .DATA_WIDTH(ICACHE_LINE_WIDTH), + .USER_EN (ariane_pkg::FETCH_USER_EN), + .NUM_WORDS (ICACHE_NUM_WORDS) + ) data_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (cl_req[i]), + .we_i (cl_we), + .addr_i (cl_index), + .wuser_i(mem_rtrn_i.user), + .wdata_i(mem_rtrn_i.data), + .be_i ('1), + .ruser_o(cl_ruser[i]), + .rdata_o(cl_rdata[i]) + ); + end + + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + cl_tag_q <= '0; + flush_cnt_q <= '0; + vaddr_q <= '0; + cmp_en_q <= '0; + cache_en_q <= '0; + flush_q <= '0; + state_q <= FLUSH; + cl_offset_q <= '0; + repl_way_oh_q <= '0; + inv_q <= '0; + end else begin + cl_tag_q <= cl_tag_d; + flush_cnt_q <= flush_cnt_d; + vaddr_q <= vaddr_d; + cmp_en_q <= cmp_en_d; + cache_en_q <= cache_en_d; + flush_q <= flush_d; + state_q <= state_d; + cl_offset_q <= cl_offset_d; + repl_way_oh_q <= repl_way_oh_d; + inv_q <= inv_d; + end + end + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + repl_inval0 : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) cache_wren |-> !(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld)) + else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously"); + + repl_inval1 : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> !cache_wren) + else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously"); + + invalid_state : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS})) + else $fatal(1, "[l1 icache] fsm reached an invalid state"); + + hot1 : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0( + cl_hit + )) + else $fatal(1, "[l1 icache] cl_hit signal must be hot1"); + + // this is only used for verification! + logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; + logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; + logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror + if (!rst_ni) begin + vld_mirror <= '{default: '0}; + tag_mirror <= '{default: '0}; + end else begin + for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin + if (vld_req[i] & vld_we) begin + vld_mirror[vld_addr][i] <= vld_wdata[i]; + tag_mirror[vld_addr][i] <= cl_tag_q; + end + end + end + end + + for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_dupl + assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata); + end + + tag_write_duplicate : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test)) + else $fatal(1, "[l1 icache] cannot allocate a CL that is already present in the cache"); + + + initial begin + // assert wrong parameterizations + assert (ICACHE_INDEX_WIDTH <= 12) + else $fatal(1, "[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages"); + end +`endif + //pragma translate_on + +endmodule // cva6_icache diff --git a/test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv b/test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv new file mode 100644 index 0000000..7579fe4 --- /dev/null +++ b/test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv @@ -0,0 +1,202 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Nils Wistoff , ETH Zurich +// Date: 07.09.2020 +// Description: wrapper module to connect the L1I$ to a 64bit AXI bus. +// + +module cva6_icache_axi_wrapper + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + input riscv::priv_lvl_t priv_lvl_i, + + input logic flush_i, // flush the icache, flush and kill have to be asserted together + input logic en_i, // enable icache + output logic miss_o, // to performance counter + // address translation requests + input icache_areq_t areq_i, + output icache_arsp_t areq_o, + // data requests + input icache_dreq_t dreq_i, + output icache_drsp_t dreq_o, + // AXI refill port + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i +); + + localparam AxiNumWords = (ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH > DCACHE_LINE_WIDTH) + + (DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ; + + logic icache_mem_rtrn_vld; + icache_rtrn_t icache_mem_rtrn; + logic icache_mem_data_req; + logic icache_mem_data_ack; + icache_req_t icache_mem_data; + + logic axi_rd_req; + logic axi_rd_gnt; + logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr; + logic [ $clog2(AxiNumWords)-1:0] axi_rd_blen; + logic [ 2:0] axi_rd_size; + logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in; + logic axi_rd_rdy; + logic axi_rd_lock; + logic axi_rd_last; + logic axi_rd_valid; + logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data; + logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_out; + logic axi_rd_exokay; + + logic req_valid_d, req_valid_q; + icache_req_t req_data_d, req_data_q; + logic first_d, first_q; + logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] + rd_shift_d, rd_shift_q; + + // Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but + // required by AXI). + assign req_valid_d = ~axi_rd_gnt & (icache_mem_data_req | req_valid_q); + + // Update read request information on a new request + assign req_data_d = (icache_mem_data_req) ? icache_mem_data : req_data_q; + + // We have a new or pending read request + assign axi_rd_req = icache_mem_data_req | req_valid_q; + assign axi_rd_addr = CVA6Cfg.AxiAddrWidth'(req_data_d.paddr); + + // Fetch a full cache line on a cache miss, or a single word on a bypassed access + assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH / 64 - 1; + assign axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth / 8); // Maximum + assign axi_rd_id_in = req_data_d.tid; + assign axi_rd_rdy = 1'b1; + assign axi_rd_lock = 1'b0; + + // Immediately acknowledge read request. This is an implicit requirement for the icache. + assign icache_mem_data_ack = icache_mem_data_req; + + // Return data as soon as last word arrives + assign icache_mem_rtrn_vld = axi_rd_valid & axi_rd_last; + assign icache_mem_rtrn.data = rd_shift_d; + assign icache_mem_rtrn.tid = req_data_q.tid; + assign icache_mem_rtrn.rtype = wt_cache_pkg::ICACHE_IFILL_ACK; + assign icache_mem_rtrn.inv = '0; + + // ------- + // I-Cache + // ------- + cva6_icache #( + // use ID 0 for icache reads + .CVA6Cfg(CVA6Cfg), + .RdTxId (0) + ) i_cva6_icache ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .en_i (en_i), + .miss_o (miss_o), + .areq_i (areq_i), + .areq_o (areq_o), + .dreq_i (dreq_i), + .dreq_o (dreq_o), + .mem_rtrn_vld_i(icache_mem_rtrn_vld), + .mem_rtrn_i (icache_mem_rtrn), + .mem_data_req_o(icache_mem_data_req), + .mem_data_ack_i(icache_mem_data_ack), + .mem_data_o (icache_mem_data) + ); + + // -------- + // AXI shim + // -------- + axi_shim #( + .CVA6Cfg (CVA6Cfg), + .AxiNumWords(AxiNumWords), + .axi_req_t (axi_req_t), + .axi_rsp_t (axi_rsp_t) + ) i_axi_shim ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .rd_req_i (axi_rd_req), + .rd_gnt_o (axi_rd_gnt), + .rd_addr_i (axi_rd_addr), + .rd_blen_i (axi_rd_blen), + .rd_size_i (axi_rd_size), + .rd_id_i (axi_rd_id_in), + .rd_rdy_i (axi_rd_rdy), + .rd_lock_i (axi_rd_lock), + .rd_last_o (axi_rd_last), + .rd_valid_o (axi_rd_valid), + .rd_data_o (axi_rd_data), + .rd_user_o (), + .rd_id_o (axi_rd_id_out), + .rd_exokay_o(axi_rd_exokay), + .wr_req_i ('0), + .wr_gnt_o (), + .wr_addr_i ('0), + .wr_data_i ('0), + .wr_user_i ('0), + .wr_be_i ('0), + .wr_blen_i ('0), + .wr_size_i ('0), + .wr_id_i ('0), + .wr_lock_i ('0), + .wr_atop_i ('0), + .wr_rdy_i ('0), + .wr_valid_o (), + .wr_id_o (), + .wr_exokay_o(), + .axi_req_o (axi_req_o), + .axi_resp_i (axi_resp_i) + ); + + // Buffer burst data in shift register + always_comb begin : p_axi_rtrn_shift + first_d = first_q; + rd_shift_d = rd_shift_q; + + if (axi_rd_valid) begin + first_d = axi_rd_last; + if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin + rd_shift_d = axi_rd_data; + end else begin + rd_shift_d = {axi_rd_data, rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]}; + end + + // If this is a single word transaction, we need to make sure that word is placed at offset 0 + if (first_q) begin + rd_shift_d[0] = axi_rd_data; + end + end + end + + // Registers + always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf + if (!rst_ni) begin + req_valid_q <= 1'b0; + req_data_q <= '0; + first_q <= 1'b1; + rd_shift_q <= '0; + end else begin + req_valid_q <= req_valid_d; + req_data_q <= req_data_d; + first_q <= first_d; + rd_shift_q <= rd_shift_d; + end + end + +endmodule // cva6_icache_axi_wrapper diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh b/test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh new file mode 100644 index 0000000..5e92a79 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh @@ -0,0 +1,62 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : February, 2023 + * Description : HPDcache Types' Definition + * History : + */ +`ifndef __HPDCACHE_TYPEDEF_SVH__ +`define __HPDCACHE_TYPEDEF_SVH__ + +`define HPDCACHE_TYPEDEF_MEM_REQ_T(__name__, addr_t, id_t) \ + typedef struct packed { \ + addr_t mem_req_addr; \ + hpdcache_pkg::hpdcache_mem_len_t mem_req_len; \ + hpdcache_pkg::hpdcache_mem_size_t mem_req_size; \ + id_t mem_req_id; \ + hpdcache_pkg::hpdcache_mem_command_e mem_req_command; \ + hpdcache_pkg::hpdcache_mem_atomic_e mem_req_atomic; \ + logic mem_req_cacheable; \ + } __name__ + +`define HPDCACHE_TYPEDEF_MEM_RESP_R_T(__name__, id_t, data_t) \ + typedef struct packed { \ + hpdcache_pkg::hpdcache_mem_error_e mem_resp_r_error; \ + id_t mem_resp_r_id; \ + data_t mem_resp_r_data; \ + logic mem_resp_r_last; \ + } __name__ + +`define HPDCACHE_TYPEDEF_MEM_REQ_W_T(__name__, data_t, be_t) \ + typedef struct packed { \ + data_t mem_req_w_data; \ + be_t mem_req_w_be; \ + logic mem_req_w_last; \ + } __name__ + +`define HPDCACHE_TYPEDEF_MEM_RESP_W_T(__name__, id_t) \ + typedef struct packed { \ + logic mem_resp_w_is_atomic; \ + hpdcache_pkg::hpdcache_mem_error_e mem_resp_w_error; \ + id_t mem_resp_w_id; \ + } __name__ + +`endif diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv new file mode 100644 index 0000000..d3e0a11 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv @@ -0,0 +1,181 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : November 22, 2022 + * Description : Refill data downsize + * History : + */ +module hpdcache_data_downsize +// {{{ +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int WR_WIDTH = 0, + parameter int RD_WIDTH = 0, + parameter int DEPTH = 0, + + localparam type wdata_t = logic [WR_WIDTH-1:0], + localparam type rdata_t = logic [RD_WIDTH-1:0] +) +// }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + input logic w_i, + output logic wok_o, + input wdata_t wdata_i, + + input logic r_i, + output logic rok_o, + output rdata_t rdata_o +); +// }}} +// Architecture +// {{{ + // Local definitions + // {{{ + localparam int RD_WORDS = WR_WIDTH/RD_WIDTH; + localparam int PTR_WIDTH = $clog2(DEPTH); + localparam int WORDCNT_WIDTH = $clog2(RD_WORDS); + typedef logic [PTR_WIDTH-1:0] bufptr_t; + typedef logic [WORDCNT_WIDTH-1:0] wordptr_t; + typedef logic [PTR_WIDTH:0] occupancy_t; + // }}} + + // Internal registers and signals + // {{{ + rdata_t [DEPTH-1:0][RD_WORDS-1:0] buf_q; + bufptr_t wrptr_q, wrptr_d; + bufptr_t rdptr_q, rdptr_d; + occupancy_t used_q, used_d; + wordptr_t [DEPTH-1:0] words_q, words_d; + logic words_set; + logic full, empty; + // }}} + + // Control-Path + // {{{ + assign full = (hpdcache_uint'(used_q) == DEPTH), + empty = (used_q == 0), + wok_o = ~full, + rok_o = ~empty; + + always_comb + begin : ctrl_comb + automatic logic used_inc, used_dec; + automatic logic words_dec; + + rdptr_d = rdptr_q; + wrptr_d = wrptr_q; + used_dec = 1'b0; + used_inc = 1'b0; + words_dec = 1'b0; + words_set = 1'b0; + + if (w_i && wok_o) begin + used_inc = 1'b1; + words_set = 1'b1; + if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin + wrptr_d = 0; + end else begin + wrptr_d = wrptr_q + 1; + end + end + + if (r_i && rok_o) begin + words_dec = (words_q[rdptr_q] > 0); + if (words_q[rdptr_q] == 0) begin + used_dec = 1'b1; + if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin + rdptr_d = 0; + end else begin + rdptr_d = rdptr_q + 1; + end + end + end + + case ({used_inc, used_dec}) + 2'b10 : used_d = used_q + 1; + 2'b01 : used_d = used_q - 1; + default: used_d = used_q; + endcase + + words_d = words_q; + if (words_set) begin + words_d[wrptr_q] = wordptr_t'(RD_WORDS - 1); + end + if (words_dec) begin + words_d[rdptr_q] = words_q[rdptr_q] - 1; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : ctrl_ff + if (!rst_ni) begin + rdptr_q <= 0; + wrptr_q <= 0; + used_q <= 0; + words_q <= 0; + end else begin + rdptr_q <= rdptr_d; + wrptr_q <= wrptr_d; + used_q <= used_d; + words_q <= words_d; + end + end + // }}} + + // Data-Path + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : buf_ff + if (!rst_ni) begin + buf_q <= '0; + end else begin + if (words_set) begin + buf_q[wrptr_q] <= wdata_i; + end + end + end + + assign rdata_o = buf_q[rdptr_q][RD_WORDS - hpdcache_uint'(words_q[rdptr_q]) - 1]; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + begin : initial_assertions + assert (DEPTH > 0) else $error("DEPTH must be greater than 0"); + assert (WR_WIDTH > 0) else $error("WR_WIDTH must be greater than 0"); + assert (RD_WIDTH > 0) else $error("RD_WIDTH must be greater than 0"); + assert (RD_WIDTH < WR_WIDTH) else $error("RD_WIDTH must be less to WR_WIDTH"); + assert ((WR_WIDTH % RD_WIDTH) == 0) else $error("WR_WIDTH must be a multiple RD_WIDTH"); + end + // pragma translate_on + // }}} +// }}} +endmodule +// }}} diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv new file mode 100644 index 0000000..c4af81c --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv @@ -0,0 +1,181 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : November 22, 2022 + * Description : Refill data upsize + * History : + */ +module hpdcache_data_upsize +// {{{ +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int WR_WIDTH = 0, + parameter int RD_WIDTH = 0, + parameter int DEPTH = 0, + + localparam type wdata_t = logic [WR_WIDTH-1:0], + localparam type rdata_t = logic [RD_WIDTH-1:0] +) +// }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + input logic w_i, + input logic wlast_i, + output logic wok_o, + input wdata_t wdata_i, + + input logic r_i, + output logic rok_o, + output rdata_t rdata_o +); +// }}} +// Architecture +// {{{ + // Local definitions + // {{{ + localparam int WR_WORDS = RD_WIDTH/WR_WIDTH; + localparam int PTR_WIDTH = $clog2(DEPTH); + localparam int WORDCNT_WIDTH = $clog2(WR_WORDS); + typedef logic [PTR_WIDTH-1:0] bufptr_t; + typedef logic [WORDCNT_WIDTH-1:0] wordptr_t; + typedef logic [PTR_WIDTH:0] occupancy_t; + // }}} + + // Internal registers and signals + // {{{ + wdata_t [DEPTH-1:0][WR_WORDS-1:0] buf_q; + bufptr_t wrptr_q, wrptr_d; + bufptr_t rdptr_q, rdptr_d; + occupancy_t used_q, used_d; + wordptr_t [DEPTH-1:0] words_q, words_d; + logic full, empty; + logic shift; + // }}} + + // Control-Path + // {{{ + assign full = (hpdcache_uint'(used_q) == DEPTH), + empty = (used_q == 0), + wok_o = ~full, + rok_o = ~empty; + + always_comb + begin : ctrl_comb + automatic logic used_inc, used_dec; + automatic logic words_inc, words_reset; + + wrptr_d = wrptr_q; + rdptr_d = rdptr_q; + words_d = words_q; + used_dec = 1'b0; + used_inc = 1'b0; + words_reset = 1'b0; + words_inc = 1'b0; + shift = 1'b0; + + if (w_i && wok_o) begin + shift = 1'b1; + words_inc = (hpdcache_uint'(words_q[wrptr_q]) < (WR_WORDS-1)); + if (hpdcache_uint'(words_q[wrptr_q]) == (WR_WORDS-1) || wlast_i) begin + used_inc = 1'b1; + if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin + wrptr_d = 0; + end else begin + wrptr_d = wrptr_q + 1; + end + end + end + + if (r_i && rok_o) begin + used_dec = 1'b1; + words_reset = 1'b1; + if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin + rdptr_d = 0; + end else begin + rdptr_d = rdptr_q + 1; + end + end + + case ({used_inc, used_dec}) + 2'b10 : used_d = used_q + 1; + 2'b01 : used_d = used_q - 1; + default: used_d = used_q; + endcase + + if (words_inc) words_d[wrptr_q] = words_q[wrptr_q] + 1; + if (words_reset) words_d[rdptr_q] = 0; + end + + + always_ff @(posedge clk_i or negedge rst_ni) + begin : ctrl_ff + if (!rst_ni) begin + rdptr_q <= 0; + wrptr_q <= 0; + used_q <= 0; + words_q <= '0; + end else begin + rdptr_q <= rdptr_d; + wrptr_q <= wrptr_d; + used_q <= used_d; + words_q <= words_d; + end + end + // }}} + + // Data-Path + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : buf_ff + if (!rst_ni) begin + buf_q <= '0; + end else begin + if (shift) begin + buf_q[wrptr_q][words_q[wrptr_q]] <= wdata_i; + end + end + end + + assign rdata_o = buf_q[rdptr_q]; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + begin : initial_assertions + assert (DEPTH > 0) else $error("DEPTH must be greater than 0"); + assert (WR_WIDTH > 0) else $error("WR_WIDTH must be greater than 0"); + assert (RD_WIDTH > 0) else $error("RD_WIDTH must be greater than 0"); + assert (WR_WIDTH < RD_WIDTH) else $error("WR_WIDTH must be less to RD_WIDTH"); + assert ((RD_WIDTH % WR_WIDTH) == 0) else $error("RD_WIDTH must be a multiple WR_WIDTH"); + end + // pragma translate_on + // }}} +// }}} +endmodule +// }}} diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv new file mode 100644 index 0000000..3be21e0 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv @@ -0,0 +1,69 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Simple multiplexor + * History : + */ +module hpdcache_demux +// Parameters +// {{{ +#( + // Number of outputs + parameter int unsigned NOUTPUT = 0, + + // Width in bits of each input + parameter int unsigned DATA_WIDTH = 0, + + // Selector signal is one-hot encoded + parameter bit ONE_HOT_SEL = 0, + + // Compute the width of the selection signal + localparam int unsigned NOUTPUT_LOG2 = $clog2(NOUTPUT), + localparam int unsigned SEL_WIDTH = ONE_HOT_SEL ? NOUTPUT : NOUTPUT_LOG2, + + localparam type data_t = logic [DATA_WIDTH-1:0], + localparam type sel_t = logic [SEL_WIDTH-1:0] +) +// }}} + +// Ports +// {{{ +( + input data_t data_i, + input sel_t sel_i, + output data_t [NOUTPUT-1:0] data_o +); +// }}} + + generate + always_comb + begin : demux_comb + for (int unsigned i = 0; i < NOUTPUT; i++) begin + if (!ONE_HOT_SEL) begin + data_o[i] = (sel_t'(i) == sel_i) ? data_i : '0; + end else begin + data_o[i] = sel_i[i] ? data_i : '0; + end + end + end + endgenerate +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv new file mode 100644 index 0000000..ba3be5f --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv @@ -0,0 +1,167 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : FIFO buffer (using registers) + * History : + */ +module hpdcache_fifo_reg + // Parameters + // {{{ +#( + parameter int unsigned FIFO_DEPTH = 0, + parameter bit FEEDTHROUGH = 1'b0, + parameter type fifo_data_t = logic +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic w_i, + output logic wok_o, + input fifo_data_t wdata_i, + input logic r_i, + output logic rok_o, + output fifo_data_t rdata_o +); + // }}} + + /* + * Single-entry FIFO buffer -> synchronization buffer + */ + if (FIFO_DEPTH == 1) begin : gen_sync_buffer + hpdcache_sync_buffer #( + .FEEDTHROUGH (FEEDTHROUGH), + .data_t (fifo_data_t) + ) i_sync_buffer ( + .clk_i, + .rst_ni, + .w_i, + .wok_o, + .wdata_i, + .r_i, + .rok_o, + .rdata_o + ); + + /* + * Multi-entry FIFO buffer + */ + end else if (FIFO_DEPTH > 0) begin : gen_fifo + // Declaration of constants, types and functions + // {{{ + typedef logic unsigned [$clog2(FIFO_DEPTH)-1:0] fifo_addr_t; + // }}} + + // Declaration of internal wires and registers + // {{{ + fifo_data_t [FIFO_DEPTH-1:0] fifo_mem_q; + fifo_addr_t rptr_q, rptr_d; // read pointer + fifo_addr_t wptr_q, wptr_d; // write pointer + logic crossover_q, crossover_d; // write pointer has wrap + logic rexec, wexec; + logic rptr_max, wptr_max; + logic match_ptr; + logic empty, full; + // }}} + + // Global control signals + // {{{ + assign match_ptr = (wptr_q == rptr_q); + + assign empty = match_ptr & ~crossover_q, + full = match_ptr & crossover_q; + + assign rok_o = ~empty | (FEEDTHROUGH & w_i), + wok_o = ~full | (FEEDTHROUGH & r_i); + + assign rexec = r_i & ~empty, + wexec = w_i & (( FEEDTHROUGH & ((empty & ~r_i) | (full & r_i) | (~full & ~empty))) | + (~FEEDTHROUGH & ~full)); + + // }}} + + // Control of read and write pointers + // {{{ + assign rptr_max = (rptr_q == fifo_addr_t'(FIFO_DEPTH-1)); + assign wptr_max = (wptr_q == fifo_addr_t'(FIFO_DEPTH-1)); + + always_comb + begin : fifo_ctrl_comb + rptr_d = rptr_q; + wptr_d = wptr_q; + crossover_d = crossover_q; + + if (rexec) begin + rptr_d = rptr_max ? 0 : rptr_q + 1; + end + + if (wexec) begin + wptr_d = wptr_max ? 0 : wptr_q + 1; + end + + if (wexec && wptr_max) begin + crossover_d = 1'b1; + end else if (rexec && rptr_max) begin + crossover_d = 1'b0; + end + end + // }}} + + // FIFO buffer memory management + // {{{ + always_ff @(posedge clk_i) + begin + if (wexec) fifo_mem_q[wptr_q] <= wdata_i; + end + + assign rdata_o = FEEDTHROUGH && empty ? wdata_i : fifo_mem_q[rptr_q]; + // }}} + + // Setting of internal state + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + rptr_q <= 0; + wptr_q <= 0; + crossover_q <= 1'b0; + end else begin + rptr_q <= rptr_d; + wptr_q <= wptr_d; + crossover_q <= crossover_d; + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + rptr_ahead_wptr_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + ((rptr_q <= wptr_q) && !crossover_q) || + ((rptr_q >= wptr_q) && crossover_q)) else + $error("fifo: read pointer is ahead of the write pointer"); + // pragma translate_on + // }}} + end +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv new file mode 100644 index 0000000..bbd8d90 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv @@ -0,0 +1,85 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Fixed-Priority Arbiter + * History : + */ +module hpdcache_fxarb + // Parameters + // {{{ +#( + // Number of requesters + parameter int unsigned N = 0 +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic [N-1:0] req_i, + output logic [N-1:0] gnt_o, + input logic ready_i +); + // }}} + + // Declaration of internal wires and registers + // {{{ + logic [N-1:0] gnt_q, gnt; + logic wait_q; + // }}} + + // Compute the grant vector + // {{{ + hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i), .val_o(gnt)); + // }}} + + // Compute the output grant vector + // {{{ + assign gnt_o = wait_q ? gnt_q : gnt; + // }}} + + // Setting of internal state + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + wait_q <= 1'b0; + gnt_q <= '0; + end else begin + wait_q <= ~ready_i & (wait_q | (|req_i)); + if (!ready_i && !wait_q && (|req_i)) begin + gnt_q <= gnt; + end + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + gnt_at_most_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot0(gnt_o)) else $error("arbiter: granting more than one requester"); + // pragma translate_on + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv new file mode 100644 index 0000000..d78e1eb --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv @@ -0,0 +1,79 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Author(s) : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Simple multiplexor + * History : + */ +module hpdcache_mux + // Parameters + // {{{ +#( + // Number of inputs + parameter int unsigned NINPUT = 0, + + // Width in bits of each input + parameter int unsigned DATA_WIDTH = 0, + + // Selector signal is one-hot encoded + parameter bit ONE_HOT_SEL = 0, + + // Compute the width of the selection signal + localparam int unsigned NINPUT_LOG2 = $clog2(NINPUT), + localparam int unsigned SEL_WIDTH = ONE_HOT_SEL ? NINPUT : NINPUT_LOG2, + + localparam type data_t = logic [DATA_WIDTH-1:0], + localparam type sel_t = logic [SEL_WIDTH-1:0] +) + // }}} + + // Ports + // {{{ +( + input data_t [NINPUT-1:0] data_i, + input sel_t sel_i, + output data_t data_o +); + // }}} + + generate + // Selector is one-hot encoded + if (ONE_HOT_SEL == 1) begin + always_comb + begin : data_out_mux_comb + data_o = '0; + for (int unsigned i = 0; i < NINPUT; i++) begin + data_o |= sel_i[i] ? data_i[i] : '0; + end + end + + // Selector is binary encoded + end else begin + always_comb + begin : data_out_mux_comb + data_o = '0; + for (int unsigned i = 0; i < NINPUT; i++) begin + data_o |= (i == int'(sel_i)) ? data_i[i] : '0; + end + end + end + endgenerate +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv new file mode 100644 index 0000000..36fe5bc --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv @@ -0,0 +1,43 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Priority One-hot Encoder + * History : + */ +module hpdcache_prio_1hot_encoder + // Parameters +#( + parameter int unsigned N = 0 +) + // Ports +( + input logic [N-1:0] val_i, + output logic [N-1:0] val_o +); + + generate + assign val_o[0] = val_i[0]; + for (genvar i = 1; i < int'(N); i++) begin : prio_gen + assign val_o[i] = val_i[i] & ~(|val_i[i-1:0]); + end + endgenerate +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv new file mode 100644 index 0000000..184e6fb --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv @@ -0,0 +1,63 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : 1RW register bank with write byte enable + * History : + */ +module hpdcache_regbank_wbyteenable_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE/8-1:0] wbyteenable, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + for (int i = 0; i < DATA_SIZE/8; i++) begin + if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8]; + end + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_regbank_wbyteenable_1rw diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv new file mode 100644 index 0000000..e185bc4 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv @@ -0,0 +1,61 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : 1RW register bank with write bit mask + * History : + */ +module hpdcache_regbank_wmask_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE-1:0] wmask, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask); + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_regbank_wmask_1rw diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv new file mode 100644 index 0000000..9595725 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv @@ -0,0 +1,121 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/** + * Author(s) : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Round-Robin Arbiter + * Based on design from + * http://www.rtlery.com/articles/how-design-round-robin-arbiter + * History : + */ +module hpdcache_rrarb + // Parameters + // {{{ +#( + // Number of requesters + parameter int unsigned N = 0 +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic [N-1:0] req_i, + output logic [N-1:0] gnt_o, + input logic ready_i +); + // }}} + + // Declaration of internal wires and registers + // {{{ + logic [N-1:0] gnt_q, gnt; + logic [N-1:0] nxt; + logic wait_q; + logic [N-1:0] mask, gnt_msk, gnt_nomsk; + logic pending; + genvar gen_i; + // }}} + + // Elaboration-time assertions + // {{{ + // pragma translate_off + generate + if (N == 0) $error("N must be greater than 0"); + endgenerate + // pragma translate_on + // }}} + + // Compute the thermometer mask vector + // {{{ + generate + if (N > 1) begin : gen_nxt_gt_1 + assign nxt = {gnt_q[N-2:0], gnt_q[N-1]}; + end else begin : gen_nxt_1 + assign nxt = gnt_q[0]; + end + + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : gen_mask + assign mask[gen_i] = |nxt[gen_i:0]; + end + endgenerate + // }}} + + // Compute the grant vector + // {{{ + hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i & mask), .val_o(gnt_msk)); + hpdcache_prio_1hot_encoder #(.N(N)) prio_nomsk_i (.val_i(req_i) , .val_o(gnt_nomsk)); + assign gnt = |gnt_msk ? gnt_msk : gnt_nomsk; + // }}} + + // Compute the output grant vector + // {{{ + assign gnt_o = wait_q ? gnt_q : gnt; + // }}} + + // Setting of internal state + // {{{ + assign pending = |req_i; + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + wait_q <= 1'b0; + gnt_q <= {1'b1, {N-1{1'b0}}}; + end else begin + wait_q <= ~ready_i & (wait_q | pending); + if (!wait_q && pending) begin + gnt_q <= gnt; + end + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + gnt_at_most_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot0(gnt)) else $error("arbiter: granting more than one requester"); + gnt_q_exactly_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot(gnt_q)) else $error("arbiter: grant state is not one-hot"); + // pragma translate_on + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv new file mode 100644 index 0000000..d4cab7d --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv @@ -0,0 +1,56 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Wrapper for Behavioral SRAM macros + * History : + */ +module hpdcache_sram +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + output logic [DATA_SIZE-1:0] rdata +); + + hpdcache_sram_1rw #( + .ADDR_SIZE(ADDR_SIZE), + .DATA_SIZE(DATA_SIZE), + .DEPTH(DEPTH) + ) ram_i ( + .clk, + .rst_n, + .cs, + .we, + .addr, + .wdata, + .rdata + ); + +endmodule : hpdcache_sram diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv new file mode 100644 index 0000000..43bdb45 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv @@ -0,0 +1,58 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Wrapper for 1RW SRAM macros implementing a write byte enable + * History : + */ +module hpdcache_sram_wbyteenable +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE/8-1:0] wbyteenable, + output logic [DATA_SIZE-1:0] rdata +); + + hpdcache_sram_wbyteenable_1rw #( + .ADDR_SIZE(ADDR_SIZE), + .DATA_SIZE(DATA_SIZE), + .DEPTH(DEPTH) + ) ram_i ( + .clk, + .rst_n, + .cs, + .we, + .addr, + .wdata, + .wbyteenable, + .rdata + ); + +endmodule : hpdcache_sram_wbyteenable diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv new file mode 100644 index 0000000..a4771e3 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv @@ -0,0 +1,58 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Wrapper for 1RW SRAM macros implementing write bit mask + * History : + */ +module hpdcache_sram_wmask +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE-1:0] wmask, + output logic [DATA_SIZE-1:0] rdata +); + + hpdcache_sram_wmask_1rw #( + .ADDR_SIZE(ADDR_SIZE), + .DATA_SIZE(DATA_SIZE), + .DEPTH(DEPTH) + ) ram_i ( + .clk, + .rst_n, + .cs, + .we, + .addr, + .wdata, + .wmask, + .rdata + ); + +endmodule : hpdcache_sram_wmask diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv new file mode 100644 index 0000000..863c588 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv @@ -0,0 +1,89 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : October, 2023 + * Description : Synchronization buffer + * History : + */ +module hpdcache_sync_buffer + // Parameters + // {{{ +#( + parameter bit FEEDTHROUGH = 1'b0, + parameter type data_t = logic +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic w_i, + output logic wok_o, + input data_t wdata_i, + input logic r_i, + output logic rok_o, + output data_t rdata_o +); + // }}} + + // Declaration of internal wires and registers + // {{{ + data_t buf_q; + logic buf_we; + logic valid_q, valid_d; + // }}} + + // Global control signals + // {{{ + assign rok_o = valid_q | (FEEDTHROUGH & w_i), + wok_o = ~valid_q | (FEEDTHROUGH & r_i); + + assign buf_we = w_i & ((FEEDTHROUGH & ~(valid_q ^ r_i)) | (~FEEDTHROUGH & ~valid_q)); + // }}} + + // Control of buffer + // {{{ + assign valid_d = buf_we | (valid_q & ~r_i); + // }}} + + // FIFO buffer memory management + // {{{ + always_ff @(posedge clk_i) + begin + if (buf_we) buf_q <= wdata_i; + end + + assign rdata_o = FEEDTHROUGH && !valid_q ? wdata_i : buf_q; + // }}} + + // Setting of internal state + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + valid_q <= 1'b0; + end else begin + valid_q <= valid_d; + end + end + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv new file mode 100644 index 0000000..7288c73 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv @@ -0,0 +1,60 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : SRAM behavioral model + * History : + */ +module hpdcache_sram_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + mem[addr] <= wdata; + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_sram_1rw diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv new file mode 100644 index 0000000..0e5e225 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv @@ -0,0 +1,63 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Behavioral model of a 1RW SRAM with write byte enable + * History : + */ +module hpdcache_sram_wbyteenable_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE/8-1:0] wbyteenable, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + for (int i = 0; i < DATA_SIZE/8; i++) begin + if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8]; + end + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_sram_wbyteenable_1rw diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv new file mode 100644 index 0000000..5058ba2 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv @@ -0,0 +1,61 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Behavioral model of a 1RW SRAM with write bit mask + * History : + */ +module hpdcache_sram_wmask_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE-1:0] wmask, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask); + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_sram_wmask_1rw diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv new file mode 100644 index 0000000..861e60c --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv @@ -0,0 +1,658 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache top + * History : + */ +module hpdcache +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int NREQUESTERS = 1, + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type hpdcache_mem_resp_w_t = logic +) + // }}} + + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Force the write buffer to send all pending writes + input logic wbuf_flush_i, + + // Core request interface + // 1st cycle + input logic core_req_valid_i [NREQUESTERS-1:0], + output logic core_req_ready_o [NREQUESTERS-1:0], + input hpdcache_req_t core_req_i [NREQUESTERS-1:0], + // 2nd cycle + input logic core_req_abort_i [NREQUESTERS-1:0], + input hpdcache_tag_t core_req_tag_i [NREQUESTERS-1:0], + input hpdcache_pma_t core_req_pma_i [NREQUESTERS-1:0], + + // Core response interface + output logic core_rsp_valid_o [NREQUESTERS-1:0], + output hpdcache_rsp_t core_rsp_o [NREQUESTERS-1:0], + + // Miss read interface + input logic mem_req_miss_read_ready_i, + output logic mem_req_miss_read_valid_o, + output hpdcache_mem_req_t mem_req_miss_read_o, + + output logic mem_resp_miss_read_ready_o, + input logic mem_resp_miss_read_valid_i, + input hpdcache_mem_resp_r_t mem_resp_miss_read_i, + + // Write-buffer write interface + input logic mem_req_wbuf_write_ready_i, + output logic mem_req_wbuf_write_valid_o, + output hpdcache_mem_req_t mem_req_wbuf_write_o, + + input logic mem_req_wbuf_write_data_ready_i, + output logic mem_req_wbuf_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_wbuf_write_data_o, + + output logic mem_resp_wbuf_write_ready_o, + input logic mem_resp_wbuf_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_wbuf_write_i, + + // Uncached read interface + input logic mem_req_uc_read_ready_i, + output logic mem_req_uc_read_valid_o, + output hpdcache_mem_req_t mem_req_uc_read_o, + + output logic mem_resp_uc_read_ready_o, + input logic mem_resp_uc_read_valid_i, + input hpdcache_mem_resp_r_t mem_resp_uc_read_i, + + // Uncached write interface + input logic mem_req_uc_write_ready_i, + output logic mem_req_uc_write_valid_o, + output hpdcache_mem_req_t mem_req_uc_write_o, + + input logic mem_req_uc_write_data_ready_i, + output logic mem_req_uc_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_uc_write_data_o, + + output logic mem_resp_uc_write_ready_o, + input logic mem_resp_uc_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_uc_write_i, + + // Performance events + output logic evt_cache_write_miss_o, + output logic evt_cache_read_miss_o, + output logic evt_uncached_req_o, + output logic evt_cmo_req_o, + output logic evt_write_req_o, + output logic evt_read_req_o, + output logic evt_prefetch_req_o, + output logic evt_req_on_hold_o, + output logic evt_rtab_rollback_o, + output logic evt_stall_refill_o, + output logic evt_stall_o, + + // Status interface + output logic wbuf_empty_o, + + // Configuration interface + input logic cfg_enable_i, + input wbuf_timecnt_t cfg_wbuf_threshold_i, + input logic cfg_wbuf_reset_timecnt_on_write_i, + input logic cfg_wbuf_sequential_waw_i, + input logic cfg_wbuf_inhibit_write_coalescing_i, + input logic cfg_prefetch_updt_plru_i, + input logic cfg_error_on_cacheable_amo_i, + input logic cfg_rtab_single_entry_i +); + + // }}} + + // Declaration of internal signals + // {{{ + logic refill_req_valid; + logic refill_req_ready; + logic refill_busy; + logic refill_updt_plru; + hpdcache_set_t refill_set; + hpdcache_dir_entry_t refill_dir_entry; + hpdcache_way_vector_t refill_read_victim_way; + hpdcache_way_vector_t refill_write_victim_way; + logic refill_write_dir; + logic refill_write_data; + hpdcache_word_t refill_word; + hpdcache_refill_data_t refill_data; + logic refill_core_rsp_valid; + hpdcache_rsp_t refill_core_rsp; + hpdcache_nline_t refill_nline; + logic refill_updt_rtab; + + logic miss_mshr_empty; + logic miss_mshr_check; + mshr_set_t miss_mshr_check_set; + mshr_tag_t miss_mshr_check_tag; + logic miss_mshr_hit; + logic miss_mshr_alloc_cs; + logic miss_mshr_alloc; + logic miss_mshr_alloc_ready; + logic miss_mshr_alloc_full; + hpdcache_nline_t miss_mshr_alloc_nline; + hpdcache_req_tid_t miss_mshr_alloc_tid; + hpdcache_req_sid_t miss_mshr_alloc_sid; + hpdcache_word_t miss_mshr_alloc_word; + logic miss_mshr_alloc_need_rsp; + logic miss_mshr_alloc_is_prefetch; + + logic wbuf_flush_all; + logic wbuf_write; + logic wbuf_write_ready; + wbuf_addr_t wbuf_write_addr; + wbuf_data_t wbuf_write_data; + wbuf_be_t wbuf_write_be; + logic wbuf_write_uncacheable; + logic wbuf_read_hit; + logic wbuf_read_flush_hit; + hpdcache_req_addr_t wbuf_rtab_addr; + logic wbuf_rtab_is_read; + logic wbuf_rtab_hit_open; + logic wbuf_rtab_hit_pend; + logic wbuf_rtab_hit_sent; + logic wbuf_rtab_not_ready; + + logic uc_ready; + logic uc_req_valid; + hpdcache_uc_op_t uc_req_op; + hpdcache_req_addr_t uc_req_addr; + hpdcache_req_size_t uc_req_size; + hpdcache_req_data_t uc_req_data; + hpdcache_req_be_t uc_req_be; + logic uc_req_uncacheable; + hpdcache_req_sid_t uc_req_sid; + hpdcache_req_tid_t uc_req_tid; + logic uc_req_need_rsp; + logic uc_wbuf_flush_all; + logic uc_dir_amo_match; + hpdcache_set_t uc_dir_amo_match_set; + hpdcache_tag_t uc_dir_amo_match_tag; + logic uc_dir_amo_update_plru; + hpdcache_way_vector_t uc_dir_amo_hit_way; + logic uc_data_amo_write; + logic uc_data_amo_write_enable; + hpdcache_set_t uc_data_amo_write_set; + hpdcache_req_size_t uc_data_amo_write_size; + hpdcache_word_t uc_data_amo_write_word; + logic [63:0] uc_data_amo_write_data; + logic [7:0] uc_data_amo_write_be; + logic uc_lrsc_snoop; + hpdcache_req_addr_t uc_lrsc_snoop_addr; + hpdcache_req_size_t uc_lrsc_snoop_size; + logic uc_core_rsp_ready; + logic uc_core_rsp_valid; + hpdcache_rsp_t uc_core_rsp; + + logic cmo_req_valid; + logic cmo_ready; + hpdcache_cmoh_op_t cmo_req_op; + hpdcache_req_addr_t cmo_req_addr; + hpdcache_req_data_t cmo_req_wdata; + logic cmo_wbuf_flush_all; + logic cmo_dir_check; + hpdcache_set_t cmo_dir_check_set; + hpdcache_tag_t cmo_dir_check_tag; + hpdcache_way_vector_t cmo_dir_check_hit_way; + logic cmo_dir_inval; + hpdcache_set_t cmo_dir_inval_set; + hpdcache_way_vector_t cmo_dir_inval_way; + + logic rtab_empty; + logic ctrl_empty; + + logic core_rsp_valid; + hpdcache_rsp_t core_rsp; + + logic arb_req_valid; + logic arb_req_ready; + hpdcache_req_t arb_req; + logic arb_abort; + hpdcache_tag_t arb_tag; + hpdcache_pma_t arb_pma; + + localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_READ_ID = {HPDcacheMemIdWidth{1'b1}}; + localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_WRITE_ID = {HPDcacheMemIdWidth{1'b1}}; + // }}} + + // Requesters arbiter + // {{{ + hpdcache_core_arbiter #( + .NREQUESTERS (NREQUESTERS) + ) core_req_arbiter_i ( + .clk_i, + .rst_ni, + + .core_req_valid_i, + .core_req_ready_o, + .core_req_i, + .core_req_abort_i, + .core_req_tag_i, + .core_req_pma_i, + + .core_rsp_valid_i (core_rsp_valid), + .core_rsp_i (core_rsp), + .core_rsp_valid_o, + .core_rsp_o, + + .arb_req_valid_o (arb_req_valid), + .arb_req_ready_i (arb_req_ready), + .arb_req_o (arb_req), + .arb_abort_o (arb_abort), + .arb_tag_o (arb_tag), + .arb_pma_o (arb_pma) + ); + // }}} + + // HPDcache controller + // {{{ + hpdcache_ctrl hpdcache_ctrl_i( + .clk_i, + .rst_ni, + + .core_req_valid_i (arb_req_valid), + .core_req_ready_o (arb_req_ready), + .core_req_i (arb_req), + .core_req_abort_i (arb_abort), + .core_req_tag_i (arb_tag), + .core_req_pma_i (arb_pma), + + .core_rsp_valid_o (core_rsp_valid), + .core_rsp_o (core_rsp), + + .wbuf_flush_i, + + .cachedir_hit_o (/* unused */), + + .miss_mshr_check_o (miss_mshr_check), + .miss_mshr_check_set_o (miss_mshr_check_set), + .miss_mshr_check_tag_o (miss_mshr_check_tag), + .miss_mshr_alloc_o (miss_mshr_alloc), + .miss_mshr_alloc_cs_o (miss_mshr_alloc_cs), + .miss_mshr_alloc_ready_i (miss_mshr_alloc_ready), + .miss_mshr_alloc_full_i (miss_mshr_alloc_full), + .miss_mshr_alloc_nline_o (miss_mshr_alloc_nline), + .miss_mshr_alloc_tid_o (miss_mshr_alloc_tid), + .miss_mshr_alloc_sid_o (miss_mshr_alloc_sid), + .miss_mshr_alloc_word_o (miss_mshr_alloc_word), + .miss_mshr_alloc_need_rsp_o (miss_mshr_alloc_need_rsp), + .miss_mshr_alloc_is_prefetch_o (miss_mshr_alloc_is_prefetch), + .miss_mshr_hit_i (miss_mshr_hit), + + .refill_req_valid_i (refill_req_valid), + .refill_req_ready_o (refill_req_ready), + .refill_busy_i (refill_busy), + .refill_updt_plru_i (refill_updt_plru), + .refill_set_i (refill_set), + .refill_dir_entry_i (refill_dir_entry), + .refill_victim_way_o (refill_read_victim_way), + .refill_victim_way_i (refill_write_victim_way), + .refill_write_dir_i (refill_write_dir), + .refill_write_data_i (refill_write_data), + .refill_word_i (refill_word), + .refill_data_i (refill_data), + .refill_core_rsp_valid_i (refill_core_rsp_valid), + .refill_core_rsp_i (refill_core_rsp), + .refill_nline_i (refill_nline), + .refill_updt_rtab_i (refill_updt_rtab), + + .wbuf_empty_i (wbuf_empty_o), + .wbuf_flush_all_o (wbuf_flush_all), + .wbuf_write_o (wbuf_write), + .wbuf_write_ready_i (wbuf_write_ready), + .wbuf_write_addr_o (wbuf_write_addr), + .wbuf_write_data_o (wbuf_write_data), + .wbuf_write_be_o (wbuf_write_be), + .wbuf_write_uncacheable_o (wbuf_write_uncacheable), + .wbuf_read_hit_i (wbuf_read_hit), + .wbuf_read_flush_hit_o (wbuf_read_flush_hit), + .wbuf_rtab_addr_o (wbuf_rtab_addr), + .wbuf_rtab_is_read_o (wbuf_rtab_is_read), + .wbuf_rtab_hit_open_i (wbuf_rtab_hit_open), + .wbuf_rtab_hit_pend_i (wbuf_rtab_hit_pend), + .wbuf_rtab_hit_sent_i (wbuf_rtab_hit_sent), + .wbuf_rtab_not_ready_i (wbuf_rtab_not_ready), + + .uc_busy_i (~uc_ready), + .uc_lrsc_snoop_o (uc_lrsc_snoop), + .uc_lrsc_snoop_addr_o (uc_lrsc_snoop_addr), + .uc_lrsc_snoop_size_o (uc_lrsc_snoop_size), + .uc_req_valid_o (uc_req_valid), + .uc_req_op_o (uc_req_op), + .uc_req_addr_o (uc_req_addr), + .uc_req_size_o (uc_req_size), + .uc_req_data_o (uc_req_data), + .uc_req_be_o (uc_req_be), + .uc_req_uc_o (uc_req_uncacheable), + .uc_req_sid_o (uc_req_sid), + .uc_req_tid_o (uc_req_tid), + .uc_req_need_rsp_o (uc_req_need_rsp), + .uc_wbuf_flush_all_i (uc_wbuf_flush_all), + .uc_dir_amo_match_i (uc_dir_amo_match), + .uc_dir_amo_match_set_i (uc_dir_amo_match_set), + .uc_dir_amo_match_tag_i (uc_dir_amo_match_tag), + .uc_dir_amo_update_plru_i (uc_dir_amo_update_plru), + .uc_dir_amo_hit_way_o (uc_dir_amo_hit_way), + .uc_data_amo_write_i (uc_data_amo_write), + .uc_data_amo_write_enable_i (uc_data_amo_write_enable), + .uc_data_amo_write_set_i (uc_data_amo_write_set), + .uc_data_amo_write_size_i (uc_data_amo_write_size), + .uc_data_amo_write_word_i (uc_data_amo_write_word), + .uc_data_amo_write_data_i (uc_data_amo_write_data), + .uc_data_amo_write_be_i (uc_data_amo_write_be), + .uc_core_rsp_ready_o (uc_core_rsp_ready), + .uc_core_rsp_valid_i (uc_core_rsp_valid), + .uc_core_rsp_i (uc_core_rsp), + + .cmo_busy_i (~cmo_ready), + .cmo_req_valid_o (cmo_req_valid), + .cmo_req_op_o (cmo_req_op), + .cmo_req_addr_o (cmo_req_addr), + .cmo_req_wdata_o (cmo_req_wdata), + .cmo_wbuf_flush_all_i (cmo_wbuf_flush_all), + .cmo_dir_check_i (cmo_dir_check), + .cmo_dir_check_set_i (cmo_dir_check_set), + .cmo_dir_check_tag_i (cmo_dir_check_tag), + .cmo_dir_check_hit_way_o (cmo_dir_check_hit_way), + .cmo_dir_inval_i (cmo_dir_inval), + .cmo_dir_inval_set_i (cmo_dir_inval_set), + .cmo_dir_inval_way_i (cmo_dir_inval_way), + + .rtab_empty_o (rtab_empty), + .ctrl_empty_o (ctrl_empty), + + .cfg_enable_i, + .cfg_rtab_single_entry_i, + + .evt_cache_write_miss_o, + .evt_cache_read_miss_o, + .evt_uncached_req_o, + .evt_cmo_req_o, + .evt_write_req_o, + .evt_read_req_o, + .evt_prefetch_req_o, + .evt_req_on_hold_o, + .evt_rtab_rollback_o, + .evt_stall_refill_o, + .evt_stall_o + ); + // }}} + + // HPDcache write-buffer + // {{{ + hpdcache_wbuf_wrapper #( + .HPDcacheMemIdWidth (HPDcacheMemIdWidth), + .HPDcacheMemDataWidth (HPDcacheMemDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t) + ) hpdcache_wbuf_i( + .clk_i, + .rst_ni, + + .empty_o (wbuf_empty_o), + .full_o (/* unused */), + .flush_all_i (wbuf_flush_all), + + .cfg_threshold_i (cfg_wbuf_threshold_i), + .cfg_reset_timecnt_on_write_i (cfg_wbuf_reset_timecnt_on_write_i), + .cfg_sequential_waw_i (cfg_wbuf_sequential_waw_i), + .cfg_inhibit_write_coalescing_i (cfg_wbuf_inhibit_write_coalescing_i), + + .write_i (wbuf_write), + .write_ready_o (wbuf_write_ready), + .write_addr_i (wbuf_write_addr), + .write_data_i (wbuf_write_data), + .write_be_i (wbuf_write_be), + .write_uc_i (wbuf_write_uncacheable), + + .read_addr_i (wbuf_write_addr), + .read_hit_o (wbuf_read_hit), + .read_flush_hit_i (wbuf_read_flush_hit), + + .replay_addr_i (wbuf_rtab_addr), + .replay_is_read_i (wbuf_rtab_is_read), + .replay_open_hit_o (wbuf_rtab_hit_open), + .replay_pend_hit_o (wbuf_rtab_hit_pend), + .replay_sent_hit_o (wbuf_rtab_hit_sent), + .replay_not_ready_o (wbuf_rtab_not_ready), + + .mem_req_write_ready_i (mem_req_wbuf_write_ready_i), + .mem_req_write_valid_o (mem_req_wbuf_write_valid_o), + .mem_req_write_o (mem_req_wbuf_write_o), + + .mem_req_write_data_ready_i (mem_req_wbuf_write_data_ready_i), + .mem_req_write_data_valid_o (mem_req_wbuf_write_data_valid_o), + .mem_req_write_data_o (mem_req_wbuf_write_data_o), + + .mem_resp_write_ready_o (mem_resp_wbuf_write_ready_o), + .mem_resp_write_valid_i (mem_resp_wbuf_write_valid_i), + .mem_resp_write_i (mem_resp_wbuf_write_i) + ); + // }}} + + // Miss handler + // {{{ + hpdcache_miss_handler #( + .HPDcacheMemIdWidth (HPDcacheMemIdWidth), + .HPDcacheMemDataWidth (HPDcacheMemDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t) + ) hpdcache_miss_handler_i( + .clk_i, + .rst_ni, + + .mshr_empty_o (miss_mshr_empty), + .mshr_full_o (/* unused */), + + .cfg_prefetch_updt_plru_i, + + .mshr_check_i (miss_mshr_check), + .mshr_check_set_i (miss_mshr_check_set), + .mshr_check_tag_i (miss_mshr_check_tag), + .mshr_check_hit_o (miss_mshr_hit), + + .mshr_alloc_ready_o (miss_mshr_alloc_ready), + .mshr_alloc_i (miss_mshr_alloc), + .mshr_alloc_cs_i (miss_mshr_alloc_cs), + .mshr_alloc_full_o (miss_mshr_alloc_full), + .mshr_alloc_nline_i (miss_mshr_alloc_nline), + .mshr_alloc_tid_i (miss_mshr_alloc_tid), + .mshr_alloc_sid_i (miss_mshr_alloc_sid), + .mshr_alloc_word_i (miss_mshr_alloc_word), + .mshr_alloc_need_rsp_i (miss_mshr_alloc_need_rsp), + .mshr_alloc_is_prefetch_i (miss_mshr_alloc_is_prefetch), + + .refill_req_ready_i (refill_req_ready), + .refill_req_valid_o (refill_req_valid), + .refill_busy_o (refill_busy), + .refill_updt_plru_o (refill_updt_plru), + .refill_set_o (refill_set), + .refill_dir_entry_o (refill_dir_entry), + .refill_victim_way_i (refill_read_victim_way), + .refill_write_dir_o (refill_write_dir), + .refill_write_data_o (refill_write_data), + .refill_victim_way_o (refill_write_victim_way), + .refill_data_o (refill_data), + .refill_word_o (refill_word), + .refill_nline_o (refill_nline), + .refill_updt_rtab_o (refill_updt_rtab), + + .refill_core_rsp_valid_o (refill_core_rsp_valid), + .refill_core_rsp_o (refill_core_rsp), + + .mem_req_ready_i (mem_req_miss_read_ready_i), + .mem_req_valid_o (mem_req_miss_read_valid_o), + .mem_req_o (mem_req_miss_read_o), + + .mem_resp_ready_o (mem_resp_miss_read_ready_o), + .mem_resp_valid_i (mem_resp_miss_read_valid_i), + .mem_resp_i (mem_resp_miss_read_i) + ); + // }}} + + // Uncacheable request handler + // {{{ + hpdcache_uncached #( + .HPDcacheMemIdWidth (HPDcacheMemIdWidth), + .HPDcacheMemDataWidth (HPDcacheMemDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t), + .hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t) + ) hpdcache_uc_i( + .clk_i, + .rst_ni, + + .wbuf_empty_i (wbuf_empty_o), + .mshr_empty_i (miss_mshr_empty), + .rtab_empty_i (rtab_empty), + .ctrl_empty_i (ctrl_empty), + + .req_valid_i (uc_req_valid), + .req_ready_o (uc_ready), + .req_op_i (uc_req_op), + .req_addr_i (uc_req_addr), + .req_size_i (uc_req_size), + .req_data_i (uc_req_data), + .req_be_i (uc_req_be), + .req_uc_i (uc_req_uncacheable), + .req_sid_i (uc_req_sid), + .req_tid_i (uc_req_tid), + .req_need_rsp_i (uc_req_need_rsp), + + .wbuf_flush_all_o (uc_wbuf_flush_all), + + .dir_amo_match_o (uc_dir_amo_match), + .dir_amo_match_set_o (uc_dir_amo_match_set), + .dir_amo_match_tag_o (uc_dir_amo_match_tag), + .dir_amo_update_plru_o (uc_dir_amo_update_plru), + .dir_amo_hit_way_i (uc_dir_amo_hit_way), + + .data_amo_write_o (uc_data_amo_write), + .data_amo_write_enable_o (uc_data_amo_write_enable), + .data_amo_write_set_o (uc_data_amo_write_set), + .data_amo_write_size_o (uc_data_amo_write_size), + .data_amo_write_word_o (uc_data_amo_write_word), + .data_amo_write_data_o (uc_data_amo_write_data), + .data_amo_write_be_o (uc_data_amo_write_be), + + .lrsc_snoop_i (uc_lrsc_snoop), + .lrsc_snoop_addr_i (uc_lrsc_snoop_addr), + .lrsc_snoop_size_i (uc_lrsc_snoop_size), + + .core_rsp_ready_i (uc_core_rsp_ready), + .core_rsp_valid_o (uc_core_rsp_valid), + .core_rsp_o (uc_core_rsp), + + .mem_read_id_i (HPDCACHE_UC_READ_ID), + .mem_write_id_i (HPDCACHE_UC_WRITE_ID), + + .mem_req_read_ready_i (mem_req_uc_read_ready_i), + .mem_req_read_valid_o (mem_req_uc_read_valid_o), + .mem_req_read_o (mem_req_uc_read_o), + + .mem_resp_read_ready_o (mem_resp_uc_read_ready_o), + .mem_resp_read_valid_i (mem_resp_uc_read_valid_i), + .mem_resp_read_i (mem_resp_uc_read_i), + + .mem_req_write_ready_i (mem_req_uc_write_ready_i), + .mem_req_write_valid_o (mem_req_uc_write_valid_o), + .mem_req_write_o (mem_req_uc_write_o), + + .mem_req_write_data_ready_i (mem_req_uc_write_data_ready_i), + .mem_req_write_data_valid_o (mem_req_uc_write_data_valid_o), + .mem_req_write_data_o (mem_req_uc_write_data_o), + + .mem_resp_write_ready_o (mem_resp_uc_write_ready_o), + .mem_resp_write_valid_i (mem_resp_uc_write_valid_i), + .mem_resp_write_i (mem_resp_uc_write_i), + + .cfg_error_on_cacheable_amo_i + ); + + // CMO Request Handler + // {{{ + hpdcache_cmo hpdcache_cmo_i( + .clk_i, + .rst_ni, + + .wbuf_empty_i (wbuf_empty_o), + .mshr_empty_i (miss_mshr_empty), + .rtab_empty_i (rtab_empty), + .ctrl_empty_i (ctrl_empty), + + .req_valid_i (cmo_req_valid), + .req_ready_o (cmo_ready), + .req_op_i (cmo_req_op), + .req_addr_i (cmo_req_addr), + .req_wdata_i (cmo_req_wdata), + + .wbuf_flush_all_o (cmo_wbuf_flush_all), + + .dir_check_o (cmo_dir_check), + .dir_check_set_o (cmo_dir_check_set), + .dir_check_tag_o (cmo_dir_check_tag), + .dir_check_hit_way_i (cmo_dir_check_hit_way), + + .dir_inval_o (cmo_dir_inval), + .dir_inval_set_o (cmo_dir_inval_set), + .dir_inval_way_o (cmo_dir_inval_way) + ); + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial begin + req_access_width_assert: + assert (HPDCACHE_REQ_WORDS <= HPDCACHE_ACCESS_WORDS) else + $error("req data width shall be l.e. to cache access width"); + refill_access_width_assert: + assert (HPDCACHE_CL_WORDS >= HPDCACHE_ACCESS_WORDS) else + $error("cache access width shall be l.e. to cache-line width"); + miss_mem_id_width_assert: + assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_WAY_WIDTH + HPDCACHE_MSHR_SET_WIDTH)) else + $error("insufficient ID bits on the mem interface to transport misses"); + wbuf_mem_id_width_assert: + assert (HPDcacheMemIdWidth >= HPDCACHE_WBUF_DIR_PTR_WIDTH) else + $error("insufficient ID bits on the mem interface to transport writes"); + + end + // pragma translate_on + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv new file mode 100644 index 0000000..d233af1 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv @@ -0,0 +1,67 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : May, 2021 + * Description : HPDcache AMO computing unit + * History : + */ +module hpdcache_amo +import hpdcache_pkg::*; +// Ports +// {{{ +( + input logic [63:0] ld_data_i, + input logic [63:0] st_data_i, + input hpdcache_uc_op_t op_i, + output logic [63:0] result_o +); +// }}} + + logic signed [63:0] ld_data; + logic signed [63:0] st_data; + logic signed [63:0] sum; + logic ugt, sgt; + + assign ld_data = ld_data_i, + st_data = st_data_i; + + assign ugt = (ld_data_i > st_data_i), + sgt = (ld_data > st_data), + sum = ld_data + st_data; + + always_comb + begin : amo_compute_comb + unique case (1'b1) + op_i.is_amo_lr : result_o = ld_data_i; + op_i.is_amo_sc : result_o = st_data_i; + op_i.is_amo_swap : result_o = st_data_i; + op_i.is_amo_add : result_o = sum; + op_i.is_amo_and : result_o = ld_data_i & st_data_i; + op_i.is_amo_or : result_o = ld_data_i | st_data_i; + op_i.is_amo_xor : result_o = ld_data_i ^ st_data_i; + op_i.is_amo_max : result_o = sgt ? ld_data_i : st_data_i; + op_i.is_amo_maxu : result_o = ugt ? ld_data_i : st_data_i; + op_i.is_amo_min : result_o = sgt ? st_data_i : ld_data_i; + op_i.is_amo_minu : result_o = ugt ? st_data_i : ld_data_i; + default : result_o = '0; + endcase + end +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv new file mode 100644 index 0000000..de09cd7 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv @@ -0,0 +1,250 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : July, 2021 + * Description : HPDcache Cache-Management-Operation Handler + * History : + */ +module hpdcache_cmo +import hpdcache_pkg::*; +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Global control signals + // {{{ + input logic wbuf_empty_i, + input logic mshr_empty_i, + input logic rtab_empty_i, + input logic ctrl_empty_i, + // }}} + + // Request interface + // {{{ + input logic req_valid_i, + output logic req_ready_o, + input hpdcache_cmoh_op_t req_op_i, + input hpdcache_req_addr_t req_addr_i, + input hpdcache_req_data_t req_wdata_i, + // }}} + + // Write Buffer Interface + // {{{ + output logic wbuf_flush_all_o, + // }}} + + // Cache Directory Interface + // {{{ + output logic dir_check_o, + output hpdcache_set_t dir_check_set_o, + output hpdcache_tag_t dir_check_tag_o, + input hpdcache_way_vector_t dir_check_hit_way_i, + + output logic dir_inval_o, + output hpdcache_set_t dir_inval_set_o, + output hpdcache_way_vector_t dir_inval_way_o + // }}} +); +// }}} + +// Definition of constants and types +// {{{ + typedef enum { + CMOH_IDLE, + CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY, + CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY, + CMOH_INVAL_CHECK_NLINE, + CMOH_INVAL_SET + } hpdcache_cmoh_fsm_t; +// }}} + +// Internal signals and registers +// {{{ + hpdcache_cmoh_fsm_t cmoh_fsm_q, cmoh_fsm_d; + hpdcache_cmoh_op_t cmoh_op_q, cmoh_op_d; + hpdcache_req_addr_t cmoh_addr_q, cmoh_addr_d; + hpdcache_way_vector_t cmoh_way_q, cmoh_way_d; + hpdcache_set_t cmoh_set_cnt_q, cmoh_set_cnt_d; + hpdcache_nline_t cmoh_nline_q; + hpdcache_tag_t cmoh_tag_q; + hpdcache_set_t cmoh_set_q; + hpdcache_data_word_t cmoh_wdata; +// }}} + +// CMO request handler FSM +// {{{ + assign cmoh_nline_q = cmoh_addr_q[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH], + cmoh_set_q = cmoh_nline_q[0 +: HPDCACHE_SET_WIDTH], + cmoh_tag_q = cmoh_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH]; + + assign dir_check_set_o = cmoh_set_q, + dir_check_tag_o = cmoh_tag_q; + + assign req_ready_o = (cmoh_fsm_q == CMOH_IDLE); + + // Only the least significant word of the write data contains parameters + // for the CMO handler + assign cmoh_wdata = req_wdata_i[0]; + + always_comb + begin : cmoh_fsm_comb + cmoh_op_d = cmoh_op_q; + cmoh_addr_d = cmoh_addr_q; + cmoh_way_d = cmoh_way_q; + cmoh_set_cnt_d = cmoh_set_cnt_q; + + dir_check_o = 1'b0; + + dir_inval_o = 1'b0; + dir_inval_set_o = cmoh_set_q; + dir_inval_way_o = '0; + + wbuf_flush_all_o = 1'b0; + + cmoh_fsm_d = cmoh_fsm_q; + + case (cmoh_fsm_q) + CMOH_IDLE: begin + cmoh_fsm_d = CMOH_IDLE; + + if (req_valid_i) begin + unique case (1'b1) + req_op_i.is_fence: begin + // request to the write buffer to send all open entries + wbuf_flush_all_o = rtab_empty_i; + + // then wait for the write buffer to be empty + if (!rtab_empty_i || !wbuf_empty_i) begin + cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY; + end + end + req_op_i.is_inval_by_nline, + req_op_i.is_inval_by_set, + req_op_i.is_inval_all: begin + cmoh_op_d = req_op_i; + cmoh_addr_d = req_addr_i; + cmoh_way_d = cmoh_wdata[0 +: HPDCACHE_WAYS]; + cmoh_set_cnt_d = 0; + if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + if (req_op_i.is_inval_by_nline) begin + cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE; + end else begin + cmoh_fsm_d = CMOH_INVAL_SET; + end + end else begin + cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY; + end + end + default: begin + // pragma translate_off + $error("cmo handler: unexpected operation"); + // pragma translate_on + end + endcase + end + end + CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY: begin + wbuf_flush_all_o = rtab_empty_i; + + if (wbuf_empty_i && rtab_empty_i) begin + cmoh_fsm_d = CMOH_IDLE; + end else begin + cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY; + end + end + CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY: begin + cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY; + if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + if (cmoh_op_q.is_inval_by_nline) begin + cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE; + end else begin + cmoh_fsm_d = CMOH_INVAL_SET; + end + end + end + CMOH_INVAL_CHECK_NLINE: begin + dir_check_o = 1'b1; + cmoh_fsm_d = CMOH_INVAL_SET; + end + CMOH_INVAL_SET: begin + cmoh_fsm_d = CMOH_INVAL_SET; + case (1'b1) + cmoh_op_q.is_inval_by_nline: begin + dir_inval_o = |dir_check_hit_way_i; + dir_inval_way_o = dir_check_hit_way_i; + cmoh_fsm_d = CMOH_IDLE; + end + cmoh_op_q.is_inval_all: begin + dir_inval_o = 1'b1; + dir_inval_way_o = {HPDCACHE_WAYS{1'b1}}; + dir_inval_set_o = cmoh_set_cnt_q; + cmoh_set_cnt_d = cmoh_set_cnt_q + 1; + if (cmoh_set_cnt_q == hpdcache_set_t'(HPDCACHE_SETS - 1)) begin + cmoh_fsm_d = CMOH_IDLE; + end + end + cmoh_op_q.is_inval_by_set: begin + dir_inval_o = 1'b1; + dir_inval_way_o = cmoh_way_q; + cmoh_fsm_d = CMOH_IDLE; + end + endcase + end + endcase + end +// }}} + +// CMO request handler set state +// {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + cmoh_fsm_q <= CMOH_IDLE; + end else begin + cmoh_fsm_q <= cmoh_fsm_d; + end + end + + always_ff @(posedge clk_i) + begin + cmoh_op_q <= cmoh_op_d; + cmoh_addr_q <= cmoh_addr_d; + cmoh_way_q <= cmoh_way_d; + cmoh_set_cnt_q <= cmoh_set_cnt_d; + end +// }}} + +// Assertions +// {{{ +// pragma translate_off + assert property (@(posedge clk_i) disable iff (!rst_ni) + req_valid_i -> $onehot(req_op_i)) else + $error("cmo_handler: more than one operation type requested"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + req_valid_i -> (cmoh_fsm_q == CMOH_IDLE)) else + $error("cmo_handler: new request received while busy"); +// pragma translate_on +// }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv new file mode 100644 index 0000000..1f8f5a4 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv @@ -0,0 +1,171 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : September, 2023 + * Description : HPDcache request arbiter + * History : + */ +module hpdcache_core_arbiter +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int NREQUESTERS = 1 +) + // }}} + + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Core request interface + // 1st cycle + input logic core_req_valid_i [NREQUESTERS-1:0], + output logic core_req_ready_o [NREQUESTERS-1:0], + input hpdcache_req_t core_req_i [NREQUESTERS-1:0], + // 2nd cycle + input logic core_req_abort_i [NREQUESTERS-1:0], + input hpdcache_tag_t core_req_tag_i [NREQUESTERS-1:0], + input hpdcache_pma_t core_req_pma_i [NREQUESTERS-1:0], + + // Core response interface + input logic core_rsp_valid_i, + input hpdcache_rsp_t core_rsp_i, + output logic core_rsp_valid_o [NREQUESTERS-1:0], + output hpdcache_rsp_t core_rsp_o [NREQUESTERS-1:0], + + // Granted request + output logic arb_req_valid_o, + input logic arb_req_ready_i, + output hpdcache_req_t arb_req_o, + output logic arb_abort_o, + output hpdcache_tag_t arb_tag_o, + output hpdcache_pma_t arb_pma_o +); + + // }}} + + // Declaration of internal signals + // {{{ + logic [NREQUESTERS-1:0] core_req_valid; + hpdcache_req_t [NREQUESTERS-1:0] core_req; + logic [NREQUESTERS-1:0] core_req_abort; + hpdcache_tag_t [NREQUESTERS-1:0] core_req_tag; + hpdcache_pma_t [NREQUESTERS-1:0] core_req_pma; + + logic [NREQUESTERS-1:0] arb_req_gnt_q, arb_req_gnt_d; + // }}} + + // Requesters arbiter + // {{{ + // Pack request ports + genvar gen_i; + + generate + for (gen_i = 0; gen_i < int'(NREQUESTERS); gen_i++) begin : gen_core_req + assign core_req_ready_o[gen_i] = arb_req_gnt_d[gen_i] & arb_req_ready_i, + core_req_valid[gen_i] = core_req_valid_i[gen_i], + core_req[gen_i] = core_req_i[gen_i]; + + assign core_req_abort[gen_i] = core_req_abort_i[gen_i], + core_req_tag[gen_i] = core_req_tag_i[gen_i], + core_req_pma[gen_i] = core_req_pma_i[gen_i]; + end + endgenerate + + // Arbiter + hpdcache_fxarb #(.N(NREQUESTERS)) req_arbiter_i + ( + .clk_i, + .rst_ni, + .req_i (core_req_valid), + .gnt_o (arb_req_gnt_d), + .ready_i (arb_req_ready_i) + ); + + // Request multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH ($bits(hpdcache_req_t)), + .ONE_HOT_SEL (1'b1) + ) core_req_mux_i ( + .data_i (core_req), + .sel_i (arb_req_gnt_d), + .data_o (arb_req_o) + ); + + // Request abort multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH (1), + .ONE_HOT_SEL (1'b1) + ) core_req_abort_mux_i ( + .data_i (core_req_abort), + .sel_i (arb_req_gnt_q), + .data_o (arb_abort_o) + ); + + // Tag Multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH ($bits(hpdcache_tag_t)), + .ONE_HOT_SEL (1'b1) + ) core_req_tag_mux_i ( + .data_i (core_req_tag), + .sel_i (arb_req_gnt_q), + .data_o (arb_tag_o) + ); + + // PMA Multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH ($bits(hpdcache_pma_t)), + .ONE_HOT_SEL (1'b1) + ) core_req_pma_mux_i ( + .data_i (core_req_pma), + .sel_i (arb_req_gnt_q), + .data_o (arb_pma_o) + ); + + // Save the grant signal for the tag in the next cycle + always_ff @(posedge clk_i or negedge rst_ni) + begin : arb_req_gnt_ff + if (!rst_ni) arb_req_gnt_q <= '0; + else arb_req_gnt_q <= arb_req_gnt_d; + end + + assign arb_req_valid_o = |arb_req_gnt_d; + // }}} + + // Response demultiplexor + // {{{ + always_comb + begin : resp_demux + for (int unsigned i = 0; i < NREQUESTERS; i++) begin + core_rsp_valid_o[i] = core_rsp_valid_i && (i == int'(core_rsp_i.sid)); + core_rsp_o[i] = core_rsp_i; + end + end + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv new file mode 100755 index 0000000..19369c5 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv @@ -0,0 +1,760 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache controller + * History : + */ +module hpdcache_ctrl + // Package imports + // {{{ +import hpdcache_pkg::*; + // }}} + + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + + // Core request interface + input logic core_req_valid_i, + output logic core_req_ready_o, + input hpdcache_req_t core_req_i, + input logic core_req_abort_i, + input hpdcache_tag_t core_req_tag_i, + input hpdcache_pma_t core_req_pma_i, + + // Core response interface + output logic core_rsp_valid_o, + output hpdcache_rsp_t core_rsp_o, + + // Force the write buffer to send all pending writes + input logic wbuf_flush_i, + + // Global control signals + output logic cachedir_hit_o, + + // Miss handler interface + output logic miss_mshr_check_o, + output mshr_set_t miss_mshr_check_set_o, + output mshr_tag_t miss_mshr_check_tag_o, + output logic miss_mshr_alloc_o, + output logic miss_mshr_alloc_cs_o, + input logic miss_mshr_alloc_ready_i, + input logic miss_mshr_alloc_full_i, + output hpdcache_nline_t miss_mshr_alloc_nline_o, + output hpdcache_req_tid_t miss_mshr_alloc_tid_o, + output hpdcache_req_sid_t miss_mshr_alloc_sid_o, + output hpdcache_word_t miss_mshr_alloc_word_o, + output logic miss_mshr_alloc_need_rsp_o, + output logic miss_mshr_alloc_is_prefetch_o, + input logic miss_mshr_hit_i, + + // Refill interface + input logic refill_req_valid_i, + output logic refill_req_ready_o, + input logic refill_busy_i, + input logic refill_updt_plru_i, + input hpdcache_set_t refill_set_i, + input hpdcache_dir_entry_t refill_dir_entry_i, + output hpdcache_way_vector_t refill_victim_way_o, + input hpdcache_way_vector_t refill_victim_way_i, + input logic refill_write_dir_i, + input logic refill_write_data_i, + input hpdcache_word_t refill_word_i, + input hpdcache_refill_data_t refill_data_i, + input logic refill_core_rsp_valid_i, + input hpdcache_rsp_t refill_core_rsp_i, + input hpdcache_nline_t refill_nline_i, + input logic refill_updt_rtab_i, + + // Write buffer interface + input logic wbuf_empty_i, + output logic wbuf_flush_all_o, + output logic wbuf_write_o, + input logic wbuf_write_ready_i, + output wbuf_addr_t wbuf_write_addr_o, + output wbuf_data_t wbuf_write_data_o, + output wbuf_be_t wbuf_write_be_o, + output logic wbuf_write_uncacheable_o, + input logic wbuf_read_hit_i, + output logic wbuf_read_flush_hit_o, + output hpdcache_req_addr_t wbuf_rtab_addr_o, + output logic wbuf_rtab_is_read_o, + input logic wbuf_rtab_hit_open_i, + input logic wbuf_rtab_hit_pend_i, + input logic wbuf_rtab_hit_sent_i, + input logic wbuf_rtab_not_ready_i, + + // Uncacheable request handler + input logic uc_busy_i, + output logic uc_lrsc_snoop_o, + output hpdcache_req_addr_t uc_lrsc_snoop_addr_o, + output hpdcache_req_size_t uc_lrsc_snoop_size_o, + output logic uc_req_valid_o, + output hpdcache_uc_op_t uc_req_op_o, + output hpdcache_req_addr_t uc_req_addr_o, + output hpdcache_req_size_t uc_req_size_o, + output hpdcache_req_data_t uc_req_data_o, + output hpdcache_req_be_t uc_req_be_o, + output logic uc_req_uc_o, + output hpdcache_req_sid_t uc_req_sid_o, + output hpdcache_req_tid_t uc_req_tid_o, + output logic uc_req_need_rsp_o, + input logic uc_wbuf_flush_all_i, + input logic uc_dir_amo_match_i, + input hpdcache_set_t uc_dir_amo_match_set_i, + input hpdcache_tag_t uc_dir_amo_match_tag_i, + input logic uc_dir_amo_update_plru_i, + output hpdcache_way_vector_t uc_dir_amo_hit_way_o, + input logic uc_data_amo_write_i, + input logic uc_data_amo_write_enable_i, + input hpdcache_set_t uc_data_amo_write_set_i, + input hpdcache_req_size_t uc_data_amo_write_size_i, + input hpdcache_word_t uc_data_amo_write_word_i, + input logic [63:0] uc_data_amo_write_data_i, + input logic [7:0] uc_data_amo_write_be_i, + output logic uc_core_rsp_ready_o, + input logic uc_core_rsp_valid_i, + input hpdcache_rsp_t uc_core_rsp_i, + + // Cache Management Operation (CMO) + input logic cmo_busy_i, + output logic cmo_req_valid_o, + output hpdcache_cmoh_op_t cmo_req_op_o, + output hpdcache_req_addr_t cmo_req_addr_o, + output hpdcache_req_data_t cmo_req_wdata_o, + input logic cmo_wbuf_flush_all_i, + input logic cmo_dir_check_i, + input hpdcache_set_t cmo_dir_check_set_i, + input hpdcache_tag_t cmo_dir_check_tag_i, + output hpdcache_way_vector_t cmo_dir_check_hit_way_o, + input logic cmo_dir_inval_i, + input hpdcache_set_t cmo_dir_inval_set_i, + input hpdcache_way_vector_t cmo_dir_inval_way_i, + + output logic rtab_empty_o, + output logic ctrl_empty_o, + + // Configuration signals + input logic cfg_enable_i, + input logic cfg_rtab_single_entry_i, + + // Performance events + output logic evt_cache_write_miss_o, + output logic evt_cache_read_miss_o, + output logic evt_uncached_req_o, + output logic evt_cmo_req_o, + output logic evt_write_req_o, + output logic evt_read_req_o, + output logic evt_prefetch_req_o, + output logic evt_req_on_hold_o, + output logic evt_rtab_rollback_o, + output logic evt_stall_refill_o, + output logic evt_stall_o +); + // }}} + + // Definition of internal registers + // {{{ + logic st1_req_valid_q, st1_req_valid_d; + hpdcache_req_t st1_req_q; + logic st1_req_rtab_q; + rtab_ptr_t st1_rtab_pop_try_ptr_q; + + logic st2_req_valid_q, st2_req_valid_d; + logic st2_req_is_prefetch_q, st2_req_is_prefetch_d; + logic st2_req_need_rsp_q; + hpdcache_req_addr_t st2_req_addr_q; + hpdcache_req_sid_t st2_req_sid_q; + hpdcache_req_tid_t st2_req_tid_q; + // }}} + + // Definition of internal signals + // {{{ + logic [1:0] st0_arb_req; + logic [1:0] st0_arb_req_grant; + logic st0_arb_ready; + + logic st0_req_ready; + + logic st0_req_valid; + hpdcache_req_t st0_req; + logic st0_req_is_uncacheable; + logic st0_req_is_load; + logic st0_req_is_store; + logic st0_req_is_amo; + logic st0_req_is_cmo_fence; + logic st0_req_is_cmo_inval; + logic st0_req_is_cmo_prefetch; + logic st0_req_cachedir_read; + logic st0_req_cachedata_read; + hpdcache_set_t st0_req_set; + hpdcache_word_t st0_req_word; + logic st0_rtab_pop_try_valid; + logic st0_rtab_pop_try_ready; + hpdcache_req_t st0_rtab_pop_try_req; + logic st0_rtab_pop_try_sel; + rtab_ptr_t st0_rtab_pop_try_ptr; + + logic st1_rsp_valid; + logic st1_rsp_aborted; + hpdcache_req_t st1_req; + logic st1_req_abort; + logic st1_req_cachedata_write; + logic st1_req_cachedata_write_enable; + hpdcache_pma_t st1_req_pma; + hpdcache_tag_t st1_req_tag; + hpdcache_set_t st1_req_set; + hpdcache_word_t st1_req_word; + hpdcache_nline_t st1_req_nline; + hpdcache_req_addr_t st1_req_addr; + logic st1_req_updt_lru; + logic st1_req_is_uncacheable; + logic st1_req_is_load; + logic st1_req_is_store; + logic st1_req_is_amo; + logic st1_req_is_amo_lr; + logic st1_req_is_amo_sc; + logic st1_req_is_amo_swap; + logic st1_req_is_amo_add; + logic st1_req_is_amo_and; + logic st1_req_is_amo_or; + logic st1_req_is_amo_xor; + logic st1_req_is_amo_max; + logic st1_req_is_amo_maxu; + logic st1_req_is_amo_min; + logic st1_req_is_amo_minu; + logic st1_req_is_cmo_inval; + logic st1_req_is_cmo_fence; + logic st1_req_is_cmo_prefetch; + hpdcache_way_vector_t st1_dir_hit; + hpdcache_req_data_t st1_read_data; + logic st1_rtab_alloc; + logic st1_rtab_alloc_and_link; + logic st1_rtab_pop_try_commit; + logic st1_rtab_pop_try_rback; + logic st1_rtab_mshr_hit; + logic st1_rtab_mshr_full; + logic st1_rtab_mshr_ready; + logic st1_rtab_wbuf_hit; + logic st1_rtab_wbuf_not_ready; + logic st1_rtab_check; + logic st1_rtab_check_hit; + + logic st2_req_we; + hpdcache_word_t st2_req_word; + + logic rtab_full; + + logic hpdcache_init_ready; + // }}} + + // Decoding of the request + // {{{ + // Select between request in the replay table or a new core requests + assign st0_req_valid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_valid + : core_req_valid_i, + st0_req.addr_offset = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_offset + : core_req_i.addr_offset, + st0_req.addr_tag = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_tag + : core_req_i.addr_tag, + st0_req.wdata = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.wdata + : core_req_i.wdata, + st0_req.op = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.op + : core_req_i.op, + st0_req.be = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.be + : core_req_i.be, + st0_req.size = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.size + : core_req_i.size, + st0_req.sid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.sid + : core_req_i.sid, + st0_req.tid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.tid + : core_req_i.tid, + st0_req.need_rsp = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.need_rsp + : core_req_i.need_rsp, + st0_req.phys_indexed = st0_rtab_pop_try_sel ? 1'b1 + : core_req_i.phys_indexed, + st0_req.pma = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.pma + : core_req_i.pma; + + // Decode operation in stage 0 + assign st0_req_is_uncacheable = ~cfg_enable_i | ( st0_req.phys_indexed + & st0_req.pma.uncacheable), + st0_req_is_load = is_load(st0_req.op), + st0_req_is_store = is_store(st0_req.op), + st0_req_is_amo = is_amo(st0_req.op), + st0_req_is_cmo_fence = is_cmo_fence(st0_req.op, st0_req.size), + st0_req_is_cmo_inval = is_cmo_inval(st0_req.op, st0_req.size), + st0_req_is_cmo_prefetch = is_cmo_prefetch(st0_req.op, st0_req.size); + + // Decode operation in stage 1 + + // In case of replay or physically-indexed cache, the tag and PMA come + // from stage 0. Otherwise, this information come directly from the + // requester in stage 1 + assign st1_req_tag = st1_req_q.phys_indexed ? st1_req_q.addr_tag : core_req_tag_i, + st1_req_pma = st1_req_q.phys_indexed ? st1_req_q.pma : core_req_pma_i; + + assign st1_req.addr_offset = st1_req_q.addr_offset, + st1_req.addr_tag = st1_req_rtab_q ? st1_req_q.addr_tag : st1_req_tag, + st1_req.wdata = st1_req_q.wdata, + st1_req.op = st1_req_q.op, + st1_req.be = st1_req_q.be, + st1_req.size = st1_req_q.size, + st1_req.sid = st1_req_q.sid, + st1_req.tid = st1_req_q.tid, + st1_req.need_rsp = st1_req_q.need_rsp, + st1_req.phys_indexed = st1_req_q.phys_indexed, + st1_req.pma = st1_req_rtab_q ? st1_req_q.pma : st1_req_pma; + + // A requester can ask to abort a request it initiated on the + // previous cycle (stage 0). Useful in case of TLB miss for example + assign st1_req_abort = core_req_abort_i & ~st1_req.phys_indexed; + + assign st1_req_is_uncacheable = ~cfg_enable_i | st1_req.pma.uncacheable, + st1_req_is_load = is_load(st1_req.op), + st1_req_is_store = is_store(st1_req.op), + st1_req_is_amo = is_amo(st1_req.op), + st1_req_is_amo_lr = is_amo_lr(st1_req.op), + st1_req_is_amo_sc = is_amo_sc(st1_req.op), + st1_req_is_amo_swap = is_amo_swap(st1_req.op), + st1_req_is_amo_add = is_amo_add(st1_req.op), + st1_req_is_amo_and = is_amo_and(st1_req.op), + st1_req_is_amo_or = is_amo_or(st1_req.op), + st1_req_is_amo_xor = is_amo_xor(st1_req.op), + st1_req_is_amo_max = is_amo_max(st1_req.op), + st1_req_is_amo_maxu = is_amo_maxu(st1_req.op), + st1_req_is_amo_min = is_amo_min(st1_req.op), + st1_req_is_amo_minu = is_amo_minu(st1_req.op), + st1_req_is_cmo_inval = is_cmo_inval(st1_req.op, st1_req.size), + st1_req_is_cmo_fence = is_cmo_fence(st1_req.op, st1_req.size), + st1_req_is_cmo_prefetch = is_cmo_prefetch(st1_req.op, st1_req.size); + // }}} + + // Refill arbiter: it arbitrates between normal requests (from the core, + // coprocessor, prefetch) and refill requests (from the miss handler). + // + // TODO This arbiter could be replaced by a weighted-round-robin arbiter. + // This way we could distribute asymetrically the bandwidth to the core + // and the refill interfaces. + // {{{ + hpdcache_rrarb #(.N(2)) st0_arb_i + ( + .clk_i, + .rst_ni, + .req_i (st0_arb_req), + .gnt_o (st0_arb_req_grant), + .ready_i (st0_arb_ready) + ); + + // The arbiter can cycle the priority token when: + // - The granted request is consumed (req_grant & req_valid & req_ready) + // - The granted request is aborted (req_grant & ~req_valid) + assign st0_arb_ready = ((st0_arb_req_grant[0] & st0_req_valid & st0_req_ready ) | + (st0_arb_req_grant[1] & refill_req_valid_i & refill_req_ready_o) | + (st0_arb_req_grant[0] & ~st0_req_valid ) | + (st0_arb_req_grant[1] & ~refill_req_valid_i)); + + assign st0_arb_req[0] = st0_req_valid, + st0_arb_req[1] = refill_req_valid_i; + + assign core_req_ready_o = st0_req_ready & ~st0_rtab_pop_try_sel, + st0_rtab_pop_try_ready = st0_req_ready & st0_rtab_pop_try_sel; + + // Trigger an event signal when the pipeline is stalled (new request is not consumed) + assign evt_stall_o = core_req_valid_i & ~core_req_ready_o; + // }}} + + // Cache controller protocol engine + // {{{ + hpdcache_ctrl_pe hpdcache_ctrl_pe_i( + .arb_st0_req_valid_i (st0_req_valid & st0_arb_req_grant[0]), + .arb_st0_req_ready_o (st0_req_ready), + .arb_refill_valid_i (refill_req_valid_i & st0_arb_req_grant[1]), + .arb_refill_ready_o (refill_req_ready_o), + .st0_req_is_uncacheable_i (st0_req_is_uncacheable), + .st0_req_need_rsp_i (st0_req.need_rsp), + .st0_req_is_load_i (st0_req_is_load), + .st0_req_is_store_i (st0_req_is_store), + .st0_req_is_amo_i (st0_req_is_amo), + .st0_req_is_cmo_fence_i (st0_req_is_cmo_fence), + .st0_req_is_cmo_inval_i (st0_req_is_cmo_inval), + .st0_req_is_cmo_prefetch_i (st0_req_is_cmo_prefetch), + .st0_req_mshr_check_o (miss_mshr_check_o), + .st0_req_cachedir_read_o (st0_req_cachedir_read), + .st0_req_cachedata_read_o (st0_req_cachedata_read), + + .st1_req_valid_i (st1_req_valid_q), + .st1_req_abort_i (st1_req_abort), + .st1_req_rtab_i (st1_req_rtab_q), + .st1_req_is_uncacheable_i (st1_req_is_uncacheable), + .st1_req_need_rsp_i (st1_req.need_rsp), + .st1_req_is_load_i (st1_req_is_load), + .st1_req_is_store_i (st1_req_is_store), + .st1_req_is_amo_i (st1_req_is_amo), + .st1_req_is_cmo_inval_i (st1_req_is_cmo_inval), + .st1_req_is_cmo_fence_i (st1_req_is_cmo_fence), + .st1_req_is_cmo_prefetch_i (st1_req_is_cmo_prefetch), + .st1_req_valid_o (st1_req_valid_d), + .st1_rsp_valid_o (st1_rsp_valid), + .st1_rsp_aborted_o (st1_rsp_aborted), + .st1_req_cachedir_updt_lru_o (st1_req_updt_lru), + .st1_req_cachedata_write_o (st1_req_cachedata_write), + .st1_req_cachedata_write_enable_o (st1_req_cachedata_write_enable), + + .st2_req_valid_i (st2_req_valid_q), + .st2_req_is_prefetch_i (st2_req_is_prefetch_q), + .st2_req_valid_o (st2_req_valid_d), + .st2_req_we_o (st2_req_we), + .st2_req_is_prefetch_o (st2_req_is_prefetch_d), + .st2_req_mshr_alloc_o (miss_mshr_alloc_o), + .st2_req_mshr_alloc_cs_o (miss_mshr_alloc_cs_o), + + .rtab_full_i (rtab_full), + .rtab_req_valid_i (st0_rtab_pop_try_valid), + .rtab_sel_o (st0_rtab_pop_try_sel), + .rtab_check_o (st1_rtab_check), + .rtab_check_hit_i (st1_rtab_check_hit), + .st1_rtab_alloc_o (st1_rtab_alloc), + .st1_rtab_alloc_and_link_o (st1_rtab_alloc_and_link), + .st1_rtab_commit_o (st1_rtab_pop_try_commit), + .st1_rtab_rback_o (st1_rtab_pop_try_rback), + .st1_rtab_mshr_hit_o (st1_rtab_mshr_hit), + .st1_rtab_mshr_full_o (st1_rtab_mshr_full), + .st1_rtab_mshr_ready_o (st1_rtab_mshr_ready), + .st1_rtab_wbuf_hit_o (st1_rtab_wbuf_hit), + .st1_rtab_wbuf_not_ready_o (st1_rtab_wbuf_not_ready), + + .cachedir_hit_i (cachedir_hit_o), + .cachedir_init_ready_i (hpdcache_init_ready), + + .mshr_alloc_ready_i (miss_mshr_alloc_ready_i), + .mshr_hit_i (miss_mshr_hit_i), + .mshr_full_i (miss_mshr_alloc_full_i), + + .refill_busy_i, + .refill_core_rsp_valid_i, + + .wbuf_write_valid_o (wbuf_write_o), + .wbuf_write_ready_i, + .wbuf_read_hit_i, + .wbuf_write_uncacheable_o, + .wbuf_read_flush_hit_o, + + .uc_busy_i, + .uc_req_valid_o, + .uc_core_rsp_ready_o, + + .cmo_busy_i, + .cmo_req_valid_o, + + .evt_cache_write_miss_o, + .evt_cache_read_miss_o, + .evt_uncached_req_o, + .evt_cmo_req_o, + .evt_write_req_o, + .evt_read_req_o, + .evt_prefetch_req_o, + .evt_req_on_hold_o, + .evt_rtab_rollback_o, + .evt_stall_refill_o + ); + + assign ctrl_empty_o = ~(st1_req_valid_q | st2_req_valid_q); + // }}} + + // Replay table + // {{{ + hpdcache_rtab #( + .rtab_entry_t (hpdcache_req_t) + ) hpdcache_rtab_i( + .clk_i, + .rst_ni, + + .empty_o (rtab_empty_o), + .full_o (rtab_full), + + .check_i (st1_rtab_check), + .check_nline_i (st1_req_nline), + .check_hit_o (st1_rtab_check_hit), + + .alloc_i (st1_rtab_alloc), + .alloc_and_link_i (st1_rtab_alloc_and_link), + .alloc_req_i (st1_req), + .alloc_mshr_hit_i (st1_rtab_mshr_hit), + .alloc_mshr_full_i (st1_rtab_mshr_full), + .alloc_mshr_ready_i (st1_rtab_mshr_ready), + .alloc_wbuf_hit_i (st1_rtab_wbuf_hit), + .alloc_wbuf_not_ready_i (st1_rtab_wbuf_not_ready), + + .pop_try_valid_o (st0_rtab_pop_try_valid), + .pop_try_i (st0_rtab_pop_try_ready), + .pop_try_req_o (st0_rtab_pop_try_req), + .pop_try_ptr_o (st0_rtab_pop_try_ptr), + + .pop_commit_i (st1_rtab_pop_try_commit), + .pop_commit_ptr_i (st1_rtab_pop_try_ptr_q), + + .pop_rback_i (st1_rtab_pop_try_rback), + .pop_rback_ptr_i (st1_rtab_pop_try_ptr_q), + .pop_rback_mshr_hit_i (st1_rtab_mshr_hit), + .pop_rback_mshr_full_i (st1_rtab_mshr_full), + .pop_rback_mshr_ready_i (st1_rtab_mshr_ready), + .pop_rback_wbuf_hit_i (st1_rtab_wbuf_hit), + .pop_rback_wbuf_not_ready_i (st1_rtab_wbuf_not_ready), + + .wbuf_addr_o (wbuf_rtab_addr_o), + .wbuf_is_read_o (wbuf_rtab_is_read_o), + .wbuf_hit_open_i (wbuf_rtab_hit_open_i), + .wbuf_hit_pend_i (wbuf_rtab_hit_pend_i), + .wbuf_hit_sent_i (wbuf_rtab_hit_sent_i), + .wbuf_not_ready_i (wbuf_rtab_not_ready_i), + + .miss_ready_i (miss_mshr_alloc_ready_i), + + .refill_i (refill_updt_rtab_i), + .refill_nline_i, + + .cfg_single_entry_i (cfg_rtab_single_entry_i) + ); + // }}} + + // Pipeline stage 1 registers + // {{{ + always_ff @(posedge clk_i) + begin : st1_req_payload_ff + if (st0_req_ready) begin + st1_req_q <= st0_req; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : st1_req_valid_ff + if (!rst_ni) begin + st1_req_valid_q <= 1'b0; + st1_req_rtab_q <= 1'b0; + st1_rtab_pop_try_ptr_q <= '0; + end else begin + st1_req_valid_q <= st1_req_valid_d; + if (st0_req_ready) begin + st1_req_rtab_q <= st0_rtab_pop_try_sel; + if (st0_rtab_pop_try_sel) begin + st1_rtab_pop_try_ptr_q <= st0_rtab_pop_try_ptr; + end + end + end + end + // }}} + + // Pipeline stage 2 registers + // {{{ + always_ff @(posedge clk_i) + begin : st2_req_payload_ff + if (st2_req_we) begin + st2_req_need_rsp_q <= st1_req.need_rsp; + st2_req_addr_q <= st1_req_addr; + st2_req_sid_q <= st1_req.sid; + st2_req_tid_q <= st1_req.tid; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : st2_req_valid_ff + if (!rst_ni) begin + st2_req_valid_q <= 1'b0; + st2_req_is_prefetch_q <= 1'b0; + end else begin + st2_req_valid_q <= st2_req_valid_d; + st2_req_is_prefetch_q <= st2_req_is_prefetch_d; + end + end + // }}} + + // Controller for the HPDcache directory and data memory arrays + // {{{ + assign st0_req_set = hpdcache_get_req_offset_set(st0_req.addr_offset), + st0_req_word = hpdcache_get_req_offset_word(st0_req.addr_offset), + st1_req_set = hpdcache_get_req_offset_set(st1_req.addr_offset), + st1_req_word = hpdcache_get_req_offset_word(st1_req.addr_offset), + st1_req_addr = {st1_req.addr_tag, st1_req.addr_offset}, + st1_req_nline = hpdcache_get_req_addr_nline(st1_req_addr), + st2_req_word = hpdcache_get_req_addr_word(st2_req_addr_q); + + hpdcache_memctrl hpdcache_memctrl_i ( + .clk_i, + .rst_ni, + + .ready_o (hpdcache_init_ready), + + .dir_match_i (st0_req_cachedir_read), + .dir_match_set_i (st0_req_set), + .dir_match_tag_i (st1_req.addr_tag), + .dir_update_lru_i (st1_req_updt_lru), + .dir_hit_way_o (st1_dir_hit), + + .dir_amo_match_i (uc_dir_amo_match_i), + .dir_amo_match_set_i (uc_dir_amo_match_set_i), + .dir_amo_match_tag_i (uc_dir_amo_match_tag_i), + .dir_amo_update_plru_i (uc_dir_amo_update_plru_i), + .dir_amo_hit_way_o (uc_dir_amo_hit_way_o), + + .dir_refill_i (refill_write_dir_i), + .dir_refill_set_i (refill_set_i), + .dir_refill_entry_i (refill_dir_entry_i), + .dir_refill_updt_plru_i (refill_updt_plru_i), + .dir_victim_way_o (refill_victim_way_o), + + .dir_cmo_check_i (cmo_dir_check_i), + .dir_cmo_check_set_i (cmo_dir_check_set_i), + .dir_cmo_check_tag_i (cmo_dir_check_tag_i), + .dir_cmo_check_hit_way_o (cmo_dir_check_hit_way_o), + + .dir_cmo_inval_i (cmo_dir_inval_i), + .dir_cmo_inval_set_i (cmo_dir_inval_set_i), + .dir_cmo_inval_way_i (cmo_dir_inval_way_i), + + .data_req_read_i (st0_req_cachedata_read), + .data_req_read_set_i (st0_req_set), + .data_req_read_size_i (st0_req.size), + .data_req_read_word_i (st0_req_word), + .data_req_read_data_o (st1_read_data), + + .data_req_write_i (st1_req_cachedata_write), + .data_req_write_enable_i (st1_req_cachedata_write_enable), + .data_req_write_set_i (st1_req_set), + .data_req_write_size_i (st1_req.size), + .data_req_write_word_i (st1_req_word), + .data_req_write_data_i (st1_req.wdata), + .data_req_write_be_i (st1_req.be), + + .data_amo_write_i (uc_data_amo_write_i), + .data_amo_write_enable_i (uc_data_amo_write_enable_i), + .data_amo_write_set_i (uc_data_amo_write_set_i), + .data_amo_write_size_i (uc_data_amo_write_size_i), + .data_amo_write_word_i (uc_data_amo_write_word_i), + .data_amo_write_data_i (uc_data_amo_write_data_i), + .data_amo_write_be_i (uc_data_amo_write_be_i), + + .data_refill_i (refill_write_data_i), + .data_refill_way_i (refill_victim_way_i), + .data_refill_set_i (refill_set_i), + .data_refill_word_i (refill_word_i), + .data_refill_data_i (refill_data_i) + ); + + assign cachedir_hit_o = |st1_dir_hit; + // }}} + + // Write buffer outputs + // {{{ + assign wbuf_write_addr_o = st1_req_addr, + wbuf_write_data_o = st1_req.wdata, + wbuf_write_be_o = st1_req.be, + wbuf_flush_all_o = cmo_wbuf_flush_all_i | uc_wbuf_flush_all_i | wbuf_flush_i; + // }}} + + // Miss handler outputs + // {{{ + assign miss_mshr_check_set_o = + st0_req.addr_offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_MSHR_SET_WIDTH]; + assign miss_mshr_check_tag_o = + st1_req_nline[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH]; + + assign miss_mshr_alloc_nline_o = hpdcache_get_req_addr_nline(st2_req_addr_q), + miss_mshr_alloc_tid_o = st2_req_tid_q, + miss_mshr_alloc_sid_o = st2_req_sid_q, + miss_mshr_alloc_word_o = st2_req_word, + miss_mshr_alloc_need_rsp_o = st2_req_need_rsp_q, + miss_mshr_alloc_is_prefetch_o = st2_req_is_prefetch_q; + // }}} + + // Uncacheable request handler outputs + // {{{ + assign uc_lrsc_snoop_o = st1_req_valid_q & st1_req_is_store, + uc_lrsc_snoop_addr_o = st1_req_addr, + uc_lrsc_snoop_size_o = st1_req.size, + uc_req_addr_o = st1_req_addr, + uc_req_size_o = st1_req.size, + uc_req_data_o = st1_req.wdata, + uc_req_be_o = st1_req.be, + uc_req_uc_o = st1_req_is_uncacheable, + uc_req_sid_o = st1_req.sid, + uc_req_tid_o = st1_req.tid, + uc_req_need_rsp_o = st1_req.need_rsp, + uc_req_op_o.is_ld = st1_req_is_load, + uc_req_op_o.is_st = st1_req_is_store, + uc_req_op_o.is_amo_lr = st1_req_is_amo_lr, + uc_req_op_o.is_amo_sc = st1_req_is_amo_sc, + uc_req_op_o.is_amo_swap = st1_req_is_amo_swap, + uc_req_op_o.is_amo_add = st1_req_is_amo_add, + uc_req_op_o.is_amo_and = st1_req_is_amo_and, + uc_req_op_o.is_amo_or = st1_req_is_amo_or, + uc_req_op_o.is_amo_xor = st1_req_is_amo_xor, + uc_req_op_o.is_amo_max = st1_req_is_amo_max, + uc_req_op_o.is_amo_maxu = st1_req_is_amo_maxu, + uc_req_op_o.is_amo_min = st1_req_is_amo_min, + uc_req_op_o.is_amo_minu = st1_req_is_amo_minu; + // }}} + + // CMO request handler outputs + // {{{ + assign cmo_req_addr_o = st1_req_addr, + cmo_req_wdata_o = st1_req.wdata, + cmo_req_op_o.is_fence = st1_req_is_cmo_fence, + cmo_req_op_o.is_inval_by_nline = st1_req_is_cmo_inval & + is_cmo_inval_by_nline(st1_req.size), + cmo_req_op_o.is_inval_by_set = st1_req_is_cmo_inval & + is_cmo_inval_by_set(st1_req.size), + cmo_req_op_o.is_inval_all = st1_req_is_cmo_inval & + is_cmo_inval_all(st1_req.size); + // }}} + + // Control of the response to the core + // {{{ + assign core_rsp_valid_o = refill_core_rsp_valid_i | + (uc_core_rsp_valid_i & uc_core_rsp_ready_o) | + st1_rsp_valid, + core_rsp_o.rdata = (refill_core_rsp_valid_i ? refill_core_rsp_i.rdata : + (uc_core_rsp_valid_i ? uc_core_rsp_i.rdata : + st1_read_data)), + core_rsp_o.sid = (refill_core_rsp_valid_i ? refill_core_rsp_i.sid : + (uc_core_rsp_valid_i ? uc_core_rsp_i.sid : + st1_req.sid)), + core_rsp_o.tid = (refill_core_rsp_valid_i ? refill_core_rsp_i.tid : + (uc_core_rsp_valid_i ? uc_core_rsp_i.tid : + st1_req.tid)), + core_rsp_o.error = (refill_core_rsp_valid_i ? refill_core_rsp_i.error : + (uc_core_rsp_valid_i ? uc_core_rsp_i.error : + /* FIXME */1'b0)), + core_rsp_o.aborted = st1_rsp_aborted; + // }}} + + // Assertions + // pragma translate_off + // {{{ + assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot0({core_req_ready_o, st0_rtab_pop_try_ready, refill_req_ready_o})) else + $error("ctrl: only one request can be served per cycle"); + // }}} + // pragma translate_on +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv new file mode 100755 index 0000000..13b4f58 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv @@ -0,0 +1,620 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Control Protocol Engine + * History : + */ +module hpdcache_ctrl_pe + // Ports + // {{{ +( + // Refill arbiter + // {{{ + input logic arb_st0_req_valid_i, + output logic arb_st0_req_ready_o, + input logic arb_refill_valid_i, + output logic arb_refill_ready_o, + // }}} + + // Pipeline stage 0 + // {{{ + input logic st0_req_is_uncacheable_i, + input logic st0_req_need_rsp_i, + input logic st0_req_is_load_i, + input logic st0_req_is_store_i, + input logic st0_req_is_amo_i, + input logic st0_req_is_cmo_fence_i, + input logic st0_req_is_cmo_inval_i, + input logic st0_req_is_cmo_prefetch_i, + output logic st0_req_mshr_check_o, + output logic st0_req_cachedir_read_o, + output logic st0_req_cachedata_read_o, + // }}} + + // Pipeline stage 1 + // {{{ + input logic st1_req_valid_i, + input logic st1_req_abort_i, + input logic st1_req_rtab_i, + input logic st1_req_is_uncacheable_i, + input logic st1_req_need_rsp_i, + input logic st1_req_is_load_i, + input logic st1_req_is_store_i, + input logic st1_req_is_amo_i, + input logic st1_req_is_cmo_inval_i, + input logic st1_req_is_cmo_fence_i, + input logic st1_req_is_cmo_prefetch_i, + output logic st1_req_valid_o, + output logic st1_rsp_valid_o, + output logic st1_rsp_aborted_o, + output logic st1_req_cachedir_updt_lru_o, + output logic st1_req_cachedata_write_o, + output logic st1_req_cachedata_write_enable_o, + // }}} + + // Pipeline stage 2 + // {{{ + input logic st2_req_valid_i, + input logic st2_req_is_prefetch_i, + output logic st2_req_valid_o, + output logic st2_req_we_o, + output logic st2_req_is_prefetch_o, + output logic st2_req_mshr_alloc_o, + output logic st2_req_mshr_alloc_cs_o, + // }}} + + // Replay + // {{{ + input logic rtab_full_i, + input logic rtab_req_valid_i, + output logic rtab_sel_o, + output logic rtab_check_o, + input logic rtab_check_hit_i, + output logic st1_rtab_alloc_o, + output logic st1_rtab_alloc_and_link_o, + output logic st1_rtab_commit_o, + output logic st1_rtab_rback_o, + output logic st1_rtab_mshr_hit_o, + output logic st1_rtab_mshr_full_o, + output logic st1_rtab_mshr_ready_o, + output logic st1_rtab_wbuf_hit_o, + output logic st1_rtab_wbuf_not_ready_o, + // }}} + + // Cache directory + // {{{ + input logic cachedir_hit_i, + input logic cachedir_init_ready_i, + // }}} + + // Miss Status Holding Register (MSHR) + // {{{ + input logic mshr_alloc_ready_i, + input logic mshr_hit_i, + input logic mshr_full_i, + // }}} + + // Refill interface + // {{{ + input logic refill_busy_i, + input logic refill_core_rsp_valid_i, + // }}} + + // Write buffer + // {{{ + input logic wbuf_write_ready_i, + input logic wbuf_read_hit_i, + output logic wbuf_write_valid_o, + output logic wbuf_write_uncacheable_o, + output logic wbuf_read_flush_hit_o, + // }}} + + // Uncacheable request handler + // {{{ + input logic uc_busy_i, + output logic uc_req_valid_o, + output logic uc_core_rsp_ready_o, + // }}} + + // Cache Management Operation (CMO) + // {{{ + input logic cmo_busy_i, + output logic cmo_req_valid_o, + // }}} + + // Performance events + // {{{ + output logic evt_cache_write_miss_o, + output logic evt_cache_read_miss_o, + output logic evt_uncached_req_o, + output logic evt_cmo_req_o, + output logic evt_write_req_o, + output logic evt_read_req_o, + output logic evt_prefetch_req_o, + output logic evt_req_on_hold_o, + output logic evt_rtab_rollback_o, + output logic evt_stall_refill_o + // }}} +); + // }}} + + // Definition of internal signals + // {{{ + logic st1_fence; + logic st1_rtab_alloc, st1_rtab_alloc_and_link; + // }}} + + // Global control signals + // {{{ + + // Determine if the new request is a "fence". Here, fence instructions are + // considered those that need to be executed in program order + // (irrespectively of addresses). This means that all memory operations + // arrived before the "fence" instruction need to be finished, and only + // then the "fence" instruction is executed. In the same manner, all + // instructions following the "fence" need to wait the completion of this + // last before being executed. + assign st1_fence = st1_req_is_uncacheable_i | + st1_req_is_cmo_fence_i | + st1_req_is_cmo_inval_i | + st1_req_is_amo_i; + // }}} + + // Arbitration of responses to the core + // {{{ + assign uc_core_rsp_ready_o = ~refill_core_rsp_valid_i; + // }}} + + // Arbiter between core or replay request. + // {{{ + // Take the replay request when: + // - The replay table is full. + // - The replay table has a ready request (request with all dependencies solved) + // - There is an outstanding CMO or uncached/AMO request + // + // IMPORTANT: When the replay table is full, the cache cannot accept new core + // requests because this can introduce a dead-lock : If the core request needs to + // be put on hold, as there is no place the replay table, the pipeline needs to + // stall. If the pipeline is stalled, dependencies of on-hold requests cannot be + // solved, and the system is locked. + assign rtab_sel_o = rtab_full_i | + rtab_req_valid_i | + (st1_req_valid_i & st1_fence) | + cmo_busy_i | + uc_busy_i; + // }}} + + // Replay logic + // {{{ + // Replay table allocation + assign st1_rtab_alloc_o = st1_rtab_alloc & ~st1_req_rtab_i, + st1_rtab_alloc_and_link_o = st1_rtab_alloc_and_link, + st1_rtab_rback_o = st1_rtab_alloc & st1_req_rtab_i; + + // Performance event + assign evt_req_on_hold_o = st1_rtab_alloc | st1_rtab_alloc_and_link, + evt_rtab_rollback_o = st1_rtab_rback_o; + // }}} + + // Data-cache control lines + // {{{ + always_comb + begin : hpdcache_ctrl_comb + automatic logic nop, st1_nop, st2_nop; + + uc_req_valid_o = 1'b0; + + cmo_req_valid_o = 1'b0; + + wbuf_write_valid_o = 1'b0; + wbuf_read_flush_hit_o = 1'b0; + wbuf_write_uncacheable_o = 1'b0; // unused + + arb_st0_req_ready_o = 1'b0; + arb_refill_ready_o = 1'b0; + + st0_req_mshr_check_o = 1'b0; + st0_req_cachedir_read_o = 1'b0; + st0_req_cachedata_read_o = 1'b0; + + st1_req_valid_o = st1_req_valid_i; + st1_nop = 1'b0; + st1_req_cachedata_write_o = 1'b0; + st1_req_cachedata_write_enable_o = 1'b0; + st1_req_cachedir_updt_lru_o = 1'b0; + st1_rsp_valid_o = 1'b0; + st1_rsp_aborted_o = 1'b0; + + st2_req_valid_o = st2_req_valid_i; + st2_req_we_o = 1'b0; + st2_req_is_prefetch_o = 1'b0; + st2_req_mshr_alloc_cs_o = 1'b0; + st2_req_mshr_alloc_o = 1'b0; + st2_nop = 1'b0; + + nop = 1'b0; + + rtab_check_o = 1'b0; + st1_rtab_alloc = 1'b0; + st1_rtab_alloc_and_link = 1'b0; + st1_rtab_commit_o = 1'b0; + st1_rtab_mshr_hit_o = 1'b0; + st1_rtab_mshr_full_o = 1'b0; + st1_rtab_mshr_ready_o = 1'b0; + st1_rtab_wbuf_hit_o = 1'b0; + st1_rtab_wbuf_not_ready_o = 1'b0; + + evt_cache_write_miss_o = 1'b0; + evt_cache_read_miss_o = 1'b0; + evt_uncached_req_o = 1'b0; + evt_cmo_req_o = 1'b0; + evt_write_req_o = 1'b0; + evt_read_req_o = 1'b0; + evt_prefetch_req_o = 1'b0; + evt_stall_refill_o = 1'b0; + + // Wait for the cache to be initialized + // {{{ + if (!cachedir_init_ready_i) begin + // initialization of the cache RAMs + end + // }}} + + // Refilling the cache + // {{{ + else if (refill_busy_i) begin + // miss handler has the control of the cache + evt_stall_refill_o = arb_st0_req_valid_i; + end + // }}} + + // Normal pipeline operation + // {{{ + else begin + // Stage 2 request pending + // {{{ + if (st2_req_valid_i) begin + st2_req_valid_o = 1'b0; + + // Allocate an entry in the MSHR + st2_req_mshr_alloc_cs_o = 1'b1; + st2_req_mshr_alloc_o = 1'b1; + + // Introduce a NOP in the next cycle to prevent a hazard on the MSHR + st2_nop = 1'b1; + + // Performance event + evt_cache_read_miss_o = ~st2_req_is_prefetch_i; + evt_read_req_o = ~st2_req_is_prefetch_i; + evt_prefetch_req_o = st2_req_is_prefetch_i; + end + // }}} + + // Stage 1 request pending + // {{{ + if (st1_req_valid_i) begin + // Check if the request in stage 1 has a conflict with one of the + // request in the replay table. + rtab_check_o = ~st1_req_rtab_i & ~st1_fence; + + // Check if the current request is aborted. If so, respond to the + // core (when need_rsp is set) and set the aborted flag + if (st1_req_abort_i && !st1_req_rtab_i) begin + st1_rsp_valid_o = st1_req_need_rsp_i; + st1_rsp_aborted_o = 1'b1; + end + + // Allocate a new entry in the replay table in case of conflict with + // an on-hold request + else if (rtab_check_o && rtab_check_hit_i) begin + st1_rtab_alloc_and_link = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + // CMO fence or invalidate + // {{{ + else if (st1_req_is_cmo_fence_i || st1_req_is_cmo_inval_i) begin + cmo_req_valid_o = 1'b1; + st1_nop = 1'b1; + + // Performance event + evt_cmo_req_o = 1'b1; + end + // }}} + + // Uncacheable load, store or AMO request + // {{{ + else if (st1_req_is_uncacheable_i) begin + uc_req_valid_o = 1'b1; + st1_nop = 1'b1; + + // Performance event + evt_uncached_req_o = 1'b1; + end + // }}} + + // Cacheable request + // {{{ + else begin + // AMO cacheable request + // {{{ + if (st1_req_is_amo_i) begin + uc_req_valid_o = 1'b1; + st1_nop = 1'b1; + + // Performance event + evt_uncached_req_o = 1'b1; + end + // }}} + + // Load cacheable request + // {{{ + if (|{st1_req_is_load_i, + st1_req_is_cmo_prefetch_i}) + begin + // Cache miss + // {{{ + if (!cachedir_hit_i) begin + // If there is a match in the write buffer, lets send the + // entry right away + wbuf_read_flush_hit_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + + // Pending miss on the same line + if (mshr_hit_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_hit_o = 1'b1; + end + + // No available slot in the MSHR + else if (mshr_full_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_full_o = 1'b1; + end + + // Hit on an open entry of the write buffer: + // wait for the entry to be acknowledged + else if (wbuf_read_hit_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_wbuf_hit_o = 1'b1; + end + + // Miss Handler is not ready to send + else if (!mshr_alloc_ready_i) begin + // Put the request on hold if the MISS HANDLER is not + // ready to send a new miss request. This is to prevent + // a deadlock between the read request channel and the + // read response channel. + // + // The request channel may be stalled by targets if they + // are not able to send a response (response is + // prioritary). Therefore, we need to put the request on + // hold to allow a possible refill read response to be + // accomplished. + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_ready_o = 1'b1; + end + + // Forward the request to the next stage to allocate the + // entry in the MSHR and send the refill request + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + st2_req_valid_o = 1'b1; + st2_req_we_o = 1'b1; + st2_req_is_prefetch_o = st1_req_is_cmo_prefetch_i; + end + end + // }}} + + // Cache hit + // {{{ + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + // Add a NOP when replaying a request, and there is no available + // request from the replay table. + st1_nop = st1_req_rtab_i & ~rtab_sel_o; + + // Update the PLRU bit for the accessed set + st1_req_cachedir_updt_lru_o = st1_req_is_load_i; + + // Respond to the core (if needed) + st1_rsp_valid_o = st1_req_need_rsp_i; + + // Performance event + evt_read_req_o = ~st1_req_is_cmo_prefetch_i; + evt_prefetch_req_o = st1_req_is_cmo_prefetch_i; + end + // }}} + end + // }}} + + // Store cacheable request + // {{{ + if (st1_req_is_store_i) begin + // Write in the write buffer if there is no pending miss in the same line. + // + // We assume here that the NoC that transports read and write transactions does + // not guaranty the order between transactions on those channels. + // Therefore, the cache must hold a write if there is a pending read on the + // same address. + wbuf_write_valid_o = ~mshr_hit_i; + + // Add a NOP in the pipeline when: + // - Structural hazard on the cache data if the st0 request is a load + // operation. + // - Replaying a request, the cache cannot accept a request from the + // core the next cycle. It can however accept a new request from the + // replay table + // + // IMPORTANT: we could remove the NOP in the first scenario if the + // controller checks for the hit of this write. However, this adds + // a DIR_RAM -> DATA_RAM timing path. + st1_nop = (arb_st0_req_valid_i & st0_req_is_load_i) | + (st1_req_rtab_i & ~rtab_sel_o); + + // Enable the data RAM in case of write. However, the actual write + // depends on the hit signal from the cache directory. + // + // IMPORTANT: this produces unnecessary power consumption in case of + // write misses, but removes timing paths between the cache directory + // RAM and the data RAM chip-select. + st1_req_cachedata_write_o = 1'b1; + + // Cache miss + if (!cachedir_hit_i) begin + // Pending miss on the same line + if (mshr_hit_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_hit_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + // No available entry in the write buffer (or conflict on pending entry) + else if (!wbuf_write_ready_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_wbuf_not_ready_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + // Respond to the core (if needed) + st1_rsp_valid_o = st1_req_need_rsp_i; + + // Performance event + evt_cache_write_miss_o = 1'b1; + evt_write_req_o = 1'b1; + end + end + + // Cache hit + else begin + // No available entry in the write buffer (or conflict on pending entry) + if (!wbuf_write_ready_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_wbuf_not_ready_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + // The store can be performed in the write buffer and in the cache + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + // Respond to the core + st1_rsp_valid_o = st1_req_need_rsp_i; + + // Update the PLRU bit for the accessed set + st1_req_cachedir_updt_lru_o = 1'b1; + + // Write in the data RAM + st1_req_cachedata_write_enable_o = 1'b1; + + // Performance event + evt_write_req_o = 1'b1; + end + end + end + // }}} + end + // }}} + end + // }}} + + // New request + // {{{ + nop = st1_nop | st2_nop; + + // The cache controller accepts a core request when: + // - The req-refill arbiter grants the request + // - The pipeline is not being flushed + arb_st0_req_ready_o = arb_st0_req_valid_i & ~nop; + + // The cache controller accepts a refill when: + // - The req-refill arbiter grants the refill + // - The pipeline is empty + arb_refill_ready_o = arb_refill_valid_i & ~(st1_req_valid_i | st2_req_valid_i); + + // Forward the request to stage 1 + // - There is a valid request in stage 0 + st1_req_valid_o = arb_st0_req_ready_o; + + // New cacheable stage 0 request granted + // {{{ + // IMPORTANT: here the RAM is enabled independently if the + // request needs to be put on-hold. + // This increases the power consumption in that cases, but + // removes the timing paths RAM-to-RAM between the cache + // directory and the data array. + if (arb_st0_req_valid_i && !st0_req_is_uncacheable_i) begin + st0_req_cachedata_read_o = + st0_req_is_load_i & + ~(st1_req_valid_i & st1_req_is_store_i & ~st1_req_is_uncacheable_i); + if (st0_req_is_load_i | + st0_req_is_cmo_prefetch_i | + st0_req_is_store_i | + st0_req_is_amo_i ) + begin + st0_req_mshr_check_o = 1'b1; + st0_req_cachedir_read_o = ~st0_req_is_amo_i; + end + end + // }}} + // }}} + end + // }}} end of normal pipeline operation + end + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv new file mode 100644 index 0000000..7c7ee65 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv @@ -0,0 +1,120 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Directory and Data Memory Arrays + * History : + */ +module hpdcache_memarray +import hpdcache_pkg::*; + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + + input hpdcache_dir_addr_t dir_addr_i, + input hpdcache_way_vector_t dir_cs_i, + input hpdcache_way_vector_t dir_we_i, + input hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry_i, + output hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry_o, + + input hpdcache_data_addr_t data_addr_i, + input hpdcache_data_enable_t data_cs_i, + input hpdcache_data_enable_t data_we_i, + input hpdcache_data_be_entry_t data_wbyteenable_i, + input hpdcache_data_entry_t data_wentry_i, + output hpdcache_data_entry_t data_rentry_o +); + // }}} + + // Memory arrays + // {{{ + generate + genvar x, y, dir_w; + + // Directory + // + for (dir_w = 0; dir_w < int'(HPDCACHE_WAYS); dir_w++) begin : dir_sram_gen + hpdcache_sram #( + .DATA_SIZE (HPDCACHE_DIR_RAM_WIDTH), + .ADDR_SIZE (HPDCACHE_DIR_RAM_ADDR_WIDTH) + ) dir_sram ( + .clk (clk_i), + .rst_n (rst_ni), + .cs (dir_cs_i[dir_w]), + .we (dir_we_i[dir_w]), + .addr (dir_addr_i), + .wdata (dir_wentry_i[dir_w]), + .rdata (dir_rentry_o[dir_w]) + ); + end + + // Data + // + for (y = 0; y < int'(HPDCACHE_DATA_RAM_Y_CUTS); y++) begin : data_sram_row_gen + for (x = 0; x < int'(HPDCACHE_DATA_RAM_X_CUTS); x++) begin : data_sram_col_gen + if (HPDCACHE_DATA_RAM_WBYTEENABLE) begin : data_sram_wbyteenable_gen + hpdcache_sram_wbyteenable #( + .DATA_SIZE (HPDCACHE_DATA_RAM_WIDTH), + .ADDR_SIZE (HPDCACHE_DATA_RAM_ADDR_WIDTH) + ) data_sram ( + .clk (clk_i), + .rst_n (rst_ni), + .cs (data_cs_i[y][x]), + .we (data_we_i[y][x]), + .addr (data_addr_i[y][x]), + .wdata (data_wentry_i[y][x]), + .wbyteenable (data_wbyteenable_i[y][x]), + .rdata (data_rentry_o[y][x]) + ); + end else begin : data_sram_wmask_gen + hpdcache_data_ram_data_t data_wmask; + + // build the bitmask from the write byte enable signal + always_comb + begin : data_wmask_comb + for (int w = 0; w < HPDCACHE_DATA_WAYS_PER_RAM_WORD; w++) begin + for (int b = 0; b < HPDCACHE_WORD_WIDTH/8; b++) begin + data_wmask[w][8*b +: 8] = {8{data_wbyteenable_i[y][x][w][b]}}; + end + end + end + + hpdcache_sram_wmask #( + .DATA_SIZE (HPDCACHE_DATA_RAM_WIDTH), + .ADDR_SIZE (HPDCACHE_DATA_RAM_ADDR_WIDTH) + ) data_sram ( + .clk (clk_i), + .rst_n (rst_ni), + .cs (data_cs_i[y][x]), + .we (data_we_i[y][x]), + .addr (data_addr_i[y][x]), + .wdata (data_wentry_i[y][x]), + .wmask (data_wmask), + .rdata (data_rentry_o[y][x]) + ); + end + end + end + endgenerate + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv new file mode 100644 index 0000000..c87cc3f --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv @@ -0,0 +1,656 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Directory and Data Memory RAMs Controller + * History : + */ +module hpdcache_memctrl +import hpdcache_pkg::*; + // Ports + // {{{ +( + // Global clock and reset signals + // {{{ + input logic clk_i, + input logic rst_ni, + // }}} + + // Global control signals + // {{{ + output logic ready_o, + // }}} + + // DIR array access interface + // {{{ + input logic dir_match_i, + input hpdcache_set_t dir_match_set_i, + input hpdcache_tag_t dir_match_tag_i, + input logic dir_update_lru_i, + output hpdcache_way_vector_t dir_hit_way_o, + + input logic dir_amo_match_i, + input hpdcache_set_t dir_amo_match_set_i, + input hpdcache_tag_t dir_amo_match_tag_i, + input logic dir_amo_update_plru_i, + output hpdcache_way_vector_t dir_amo_hit_way_o, + + input logic dir_refill_i, + input hpdcache_set_t dir_refill_set_i, + input hpdcache_dir_entry_t dir_refill_entry_i, + input logic dir_refill_updt_plru_i, + output hpdcache_way_vector_t dir_victim_way_o, + + input logic dir_cmo_check_i, + input hpdcache_set_t dir_cmo_check_set_i, + input hpdcache_tag_t dir_cmo_check_tag_i, + output hpdcache_way_vector_t dir_cmo_check_hit_way_o, + + input logic dir_cmo_inval_i, + input hpdcache_set_t dir_cmo_inval_set_i, + input hpdcache_way_vector_t dir_cmo_inval_way_i, + + // }}} + + // DATA array access interface + // {{{ + input logic data_req_read_i, + input hpdcache_set_t data_req_read_set_i, + input hpdcache_req_size_t data_req_read_size_i, + input hpdcache_word_t data_req_read_word_i, + output hpdcache_req_data_t data_req_read_data_o, + + input logic data_req_write_i, + input logic data_req_write_enable_i, + input hpdcache_set_t data_req_write_set_i, + input hpdcache_req_size_t data_req_write_size_i, + input hpdcache_word_t data_req_write_word_i, + input hpdcache_req_data_t data_req_write_data_i, + input hpdcache_req_be_t data_req_write_be_i, + + input logic data_amo_write_i, + input logic data_amo_write_enable_i, + input hpdcache_set_t data_amo_write_set_i, + input hpdcache_req_size_t data_amo_write_size_i, + input hpdcache_word_t data_amo_write_word_i, + input logic [63:0] data_amo_write_data_i, + input logic [7:0] data_amo_write_be_i, + + input logic data_refill_i, + input hpdcache_way_vector_t data_refill_way_i, + input hpdcache_set_t data_refill_set_i, + input hpdcache_word_t data_refill_word_i, + input hpdcache_refill_data_t data_refill_data_i + // }}} +); + // }}} + + // Definition of constants + // {{{ + localparam int unsigned HPDCACHE_ALL_CUTS = HPDCACHE_DATA_RAM_X_CUTS*HPDCACHE_DATA_RAM_Y_CUTS; + localparam int unsigned HPDCACHE_DATA_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS; + // }}} + + // Definition of functions + // {{{ + + // hpdcache_compute_data_ram_cs + // + // description: This function computes the chip-select signal for data + // RAMs depending on the request size and the word offset + function automatic hpdcache_data_row_enable_t hpdcache_compute_data_ram_cs( + input hpdcache_req_size_t size_i, + input hpdcache_word_t word_i); + + localparam hpdcache_uint32 off_width = + HPDCACHE_ACCESS_WORDS > 1 ? $clog2(HPDCACHE_ACCESS_WORDS) : 1; + + hpdcache_data_row_enable_t ret; + hpdcache_uint32 off; + + case (size_i) + 3'h0, + 3'h1, + 3'h2, + 3'h3: ret = hpdcache_data_row_enable_t'({ 64/HPDCACHE_WORD_WIDTH{1'b1}}); + 3'h4: ret = hpdcache_data_row_enable_t'({128/HPDCACHE_WORD_WIDTH{1'b1}}); + 3'h5: ret = hpdcache_data_row_enable_t'({256/HPDCACHE_WORD_WIDTH{1'b1}}); + default: ret = hpdcache_data_row_enable_t'({512/HPDCACHE_WORD_WIDTH{1'b1}}); + endcase + + off = HPDCACHE_ACCESS_WORDS > 1 ? hpdcache_uint'(word_i[0 +: off_width]) : 0; + return hpdcache_data_row_enable_t'(ret << off); + endfunction + + function automatic hpdcache_data_ram_row_idx_t hpdcache_way_to_data_ram_row( + input hpdcache_way_vector_t way); + for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin + if (way[i]) return hpdcache_data_ram_row_idx_t'(i / HPDCACHE_DATA_WAYS_PER_RAM_WORD); + end + return 0; + endfunction + + function automatic hpdcache_data_ram_way_idx_t hpdcache_way_to_data_ram_word( + input hpdcache_way_vector_t way); + for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin + if (way[i]) return hpdcache_data_ram_way_idx_t'(i % HPDCACHE_DATA_WAYS_PER_RAM_WORD); + end + return 0; + endfunction + + function automatic hpdcache_data_ram_addr_t hpdcache_set_to_data_ram_addr( + input hpdcache_set_t set, + input hpdcache_word_t word); + hpdcache_uint ret; + + ret = (hpdcache_uint'(set)*(HPDCACHE_CL_WORDS / HPDCACHE_ACCESS_WORDS)) + + (hpdcache_uint'(word) / HPDCACHE_ACCESS_WORDS); + + return hpdcache_data_ram_addr_t'(ret); + endfunction + // }}} + + // Definition of internal signals and registers + // {{{ + genvar gen_i, gen_j, gen_k; + + // Directory initialization signals and registers + logic init_q, init_d; + hpdcache_dir_addr_t init_set_q, init_set_d; + hpdcache_way_vector_t init_dir_cs; + hpdcache_way_vector_t init_dir_we; + hpdcache_dir_entry_t init_dir_wentry; + + // Directory valid bit vector (one bit per set and way) + hpdcache_way_vector_t [HPDCACHE_SETS-1:0] dir_valid_q, dir_valid_d; + hpdcache_set_t dir_req_set_q, dir_req_set_d; + hpdcache_dir_addr_t dir_addr; + hpdcache_way_vector_t dir_cs; + hpdcache_way_vector_t dir_we; + hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry; + hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry; + + hpdcache_data_addr_t data_addr; + hpdcache_data_enable_t data_cs; + hpdcache_data_enable_t data_we; + hpdcache_data_be_entry_t data_wbyteenable; + hpdcache_data_entry_t data_wentry; + hpdcache_data_entry_t data_rentry; + + logic data_write; + logic data_write_enable; + hpdcache_set_t data_write_set; + hpdcache_req_size_t data_write_size; + hpdcache_word_t data_write_word; + hpdcache_refill_data_t data_write_data; + hpdcache_refill_be_t data_write_be; + + hpdcache_refill_data_t data_req_write_data; + hpdcache_refill_be_t data_req_write_be; + + hpdcache_refill_data_t data_amo_write_data; + hpdcache_refill_be_t data_amo_write_be; + + hpdcache_way_vector_t data_way; + + hpdcache_data_ram_row_idx_t data_ram_row; + hpdcache_data_ram_way_idx_t data_ram_word; + + // }}} + + // Init FSM + // {{{ + always_comb + begin : init_comb + init_dir_wentry.tag = '0; + init_dir_wentry.reserved = '0; + init_dir_cs = '0; + init_dir_we = '0; + init_d = init_q; + init_set_d = init_set_q; + + case (init_q) + 1'b0: begin + init_d = (hpdcache_uint'(init_set_q) == (HPDCACHE_SETS - 1)); + init_set_d = init_set_q + 1; + init_dir_cs = '1; + init_dir_we = '1; + end + + 1'b1: begin + init_d = 1'b1; + init_set_d = init_set_q; + end + endcase + end + + assign ready_o = init_q; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : init_ff + if (!rst_ni) begin + init_q <= 1'b0; + init_set_q <= 0; + dir_valid_q <= '0; + end else begin + init_q <= init_d; + init_set_q <= init_set_d; + dir_valid_q <= dir_valid_d; + end + end + // }}} + + // Memory arrays + // {{{ + hpdcache_memarray hpdcache_memarray_i( + .clk_i, + .rst_ni, + + .dir_addr_i (dir_addr), + .dir_cs_i (dir_cs), + .dir_we_i (dir_we), + .dir_wentry_i (dir_wentry), + .dir_rentry_o (dir_rentry), + + .data_addr_i (data_addr), + .data_cs_i (data_cs), + .data_we_i (data_we), + .data_wbyteenable_i (data_wbyteenable), + .data_wentry_i (data_wentry), + .data_rentry_o (data_rentry) + ); + // }}} + + // Directory RAM request mux + // {{{ + always_comb + begin : dir_ctrl_comb + case (1'b1) + // Cache directory initialization + ~init_q: begin + dir_addr = init_set_q; + dir_cs = init_dir_cs; + dir_we = init_dir_we; + dir_wentry = {HPDCACHE_WAYS{init_dir_wentry}}; + end + + // Cache directory match tag -> hit + dir_match_i: begin + dir_addr = dir_match_set_i; + dir_cs = '1; + dir_we = '0; + dir_wentry = '0; + end + + // Cache directory AMO match tag -> hit + dir_amo_match_i: begin + dir_addr = dir_amo_match_set_i; + dir_cs = '1; + dir_we = '0; + dir_wentry = '0; + end + + // Cache directory update + dir_refill_i: begin + dir_addr = dir_refill_set_i; + dir_cs = dir_victim_way_o; + dir_we = dir_victim_way_o; + dir_wentry = {HPDCACHE_WAYS{dir_refill_entry_i}}; + end + + // Cache directory CMO match tag + dir_cmo_check_i: begin + dir_addr = dir_cmo_check_set_i; + dir_cs = '1; + dir_we = '0; + dir_wentry = '0; + end + + // Do nothing + default: begin + dir_addr = '0; + dir_cs = '0; + dir_we = '0; + dir_wentry = '0; + end + endcase + end + // }}} + + // Directory valid logic + // {{{ + always_comb + begin : dir_valid_comb + dir_valid_d = dir_valid_q; + + unique case (1'b1) + // Refill the cache after a miss + dir_refill_i: begin + dir_valid_d[dir_refill_set_i] = dir_valid_q[dir_refill_set_i] | dir_victim_way_o; + end + // CMO invalidate a set + dir_cmo_inval_i: begin + dir_valid_d[dir_cmo_inval_set_i] = dir_valid_q[dir_cmo_inval_set_i] & ~dir_cmo_inval_way_i; + end + default: begin + // do nothing + end + endcase + end + // }}} + + // Directory hit logic + // {{{ + assign dir_req_set_d = dir_match_i ? dir_match_set_i : + dir_amo_match_i ? dir_amo_match_set_i : + dir_cmo_check_i ? dir_cmo_check_set_i : + dir_req_set_q ; + + generate + hpdcache_way_vector_t req_hit; + hpdcache_way_vector_t amo_hit; + hpdcache_way_vector_t cmo_hit; + + for (gen_i = 0; gen_i < int'(HPDCACHE_WAYS); gen_i++) + begin : dir_match_tag_gen + assign req_hit[gen_i] = (dir_rentry[gen_i].tag == dir_match_tag_i), + amo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_amo_match_tag_i), + cmo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_cmo_check_tag_i); + + assign dir_hit_way_o [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & req_hit[gen_i], + dir_amo_hit_way_o [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & amo_hit[gen_i], + dir_cmo_check_hit_way_o[gen_i] = dir_valid_q[dir_req_set_q][gen_i] & cmo_hit[gen_i]; + end + endgenerate + // }}} + + // Directory victim select logic + // {{{ + logic plru_updt; + hpdcache_way_vector_t plru_updt_way; + + assign plru_updt = dir_update_lru_i | dir_amo_update_plru_i, + plru_updt_way = dir_update_lru_i ? dir_hit_way_o : dir_amo_hit_way_o; + + hpdcache_plru #( + .SETS (HPDCACHE_SETS), + .WAYS (HPDCACHE_WAYS) + ) plru_i ( + .clk_i, + .rst_ni, + + .updt_i (plru_updt), + .updt_set_i (dir_req_set_q), + .updt_way_i (plru_updt_way), + + .repl_i (dir_refill_i), + .repl_set_i (dir_refill_set_i), + .repl_dir_valid_i (dir_valid_q[dir_refill_set_i]), + .repl_updt_plru_i (dir_refill_updt_plru_i), + + .victim_way_o (dir_victim_way_o) + ); + // }}} + + // Data RAM request multiplexor + // {{{ + + // Upsize the request interface to match the maximum access width of the data RAM + generate + if (HPDCACHE_DATA_REQ_RATIO > 1) begin : upsize_data_req_write_gen + // demux request DATA + assign data_req_write_data = {HPDCACHE_DATA_REQ_RATIO{data_req_write_data_i}}; + + // demux request BE + hpdcache_demux #( + .NOUTPUT (HPDCACHE_DATA_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH/8), + .ONE_HOT_SEL (1'b0) + ) data_req_write_be_demux_i ( + .data_i (data_req_write_be_i), + .sel_i (data_req_write_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +: + $clog2(HPDCACHE_DATA_REQ_RATIO)]), + .data_o (data_req_write_be) + ); + end else begin + assign data_req_write_data = data_req_write_data_i, + data_req_write_be = data_req_write_be_i; + end + endgenerate + + // Upsize the AMO data interface to match the maximum access width of the data RAM + generate + localparam hpdcache_uint AMO_DATA_RATIO = HPDCACHE_DATA_RAM_ACCESS_WIDTH/64; + localparam hpdcache_uint AMO_DATA_INDEX_WIDTH = $clog2(AMO_DATA_RATIO); + + if (AMO_DATA_RATIO > 1) begin + assign data_amo_write_data = {AMO_DATA_RATIO{data_amo_write_data_i}}; + + hpdcache_demux #( + .NOUTPUT (AMO_DATA_RATIO), + .DATA_WIDTH (8), + .ONE_HOT_SEL (1'b0) + ) amo_be_demux_i ( + .data_i (data_amo_write_be_i), + .sel_i (data_amo_write_word_i[0 +: AMO_DATA_INDEX_WIDTH]), + .data_o (data_amo_write_be) + ); + end else begin + assign data_amo_write_data = data_amo_write_data_i, + data_amo_write_be = data_amo_write_be_i; + end + endgenerate + + // Multiplex between data write requests + always_comb + begin : data_write_comb + case (1'b1) + data_refill_i: begin + data_write = 1'b1; + data_write_enable = 1'b1; + data_write_set = data_refill_set_i; + data_write_size = hpdcache_req_size_t'($clog2(HPDCACHE_DATA_RAM_ACCESS_WIDTH/8)); + data_write_word = data_refill_word_i; + data_write_data = data_refill_data_i; + data_write_be = '1; + end + + data_req_write_i: begin + data_write = 1'b1; + data_write_enable = data_req_write_enable_i; + data_write_set = data_req_write_set_i; + data_write_size = data_req_write_size_i; + data_write_word = data_req_write_word_i; + data_write_data = data_req_write_data; + data_write_be = data_req_write_be; + end + + data_amo_write_i: begin + data_write = 1'b1; + data_write_enable = data_amo_write_enable_i; + data_write_set = data_amo_write_set_i; + data_write_size = data_amo_write_size_i; + data_write_word = data_amo_write_word_i; + data_write_data = data_amo_write_data; + data_write_be = data_amo_write_be; + end + + default: begin + data_write = 1'b0; + data_write_enable = 1'b0; + data_write_set = '0; + data_write_size = '0; + data_write_word = '0; + data_write_data = '0; + data_write_be = '0; + end + endcase + end + + // Multiplex between read and write access on the data RAM + assign data_way = data_refill_i ? data_refill_way_i : + data_amo_write_i ? dir_amo_hit_way_o : + dir_hit_way_o; + + // Decode way index + assign data_ram_word = hpdcache_way_to_data_ram_word(data_way), + data_ram_row = hpdcache_way_to_data_ram_row(data_way); + + always_comb + begin : data_ctrl_comb + case (1'b1) + // Select data read inputs + data_req_read_i: begin + data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_req_read_set_i, + data_req_read_word_i)}}; + + data_we = '0; + data_wbyteenable = '0; + data_wentry = '0; + for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin + data_cs[i] = hpdcache_compute_data_ram_cs(data_req_read_size_i, + data_req_read_word_i); + end + end + + // Select data write inputs + data_write: begin + data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_write_set, + data_write_word)}}; + + for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin + for (int unsigned j = 0; j < HPDCACHE_DATA_RAM_X_CUTS; j++) begin + data_wentry[i][j] = {HPDCACHE_DATA_WAYS_PER_RAM_WORD{data_write_data[j]}}; + end + end + + for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin + data_cs[i] = hpdcache_compute_data_ram_cs(data_write_size, data_write_word); + + if (i == hpdcache_uint'(data_ram_row)) begin + data_we[i] = data_write_enable ? data_cs[i] : '0; + end else begin + data_we[i] = '0; + end + + // Build the write mask + for (int unsigned j = 0; j < HPDCACHE_ACCESS_WORDS; j++) begin + for (int unsigned k = 0; k < HPDCACHE_DATA_WAYS_PER_RAM_WORD; k++) begin + data_wbyteenable[i][j][k] = (k == hpdcache_uint'(data_ram_word)) ? + data_write_be[j] : '0; + end + end + end + end + + // Do nothing + default: begin + data_addr = '0; + data_cs = '0; + data_we = '0; + data_wbyteenable = '0; + data_wentry = '0; + end + endcase + end + // }}} + + // Data RAM read data multiplexor + // {{{ + generate + hpdcache_req_data_t [HPDCACHE_DATA_REQ_RATIO-1:0][HPDCACHE_WAYS-1:0] data_read_words; + hpdcache_req_data_t [HPDCACHE_WAYS-1:0] data_read_req_word; + + // Organize the read data by words (all ways for the same word are contiguous) + for (gen_i = 0; gen_i < int'(HPDCACHE_DATA_REQ_RATIO); gen_i++) begin + for (gen_j = 0; gen_j < int'(HPDCACHE_WAYS); gen_j++) begin + for (gen_k = 0; gen_k < int'(HPDCACHE_REQ_WORDS); gen_k++) begin + assign data_read_words[gen_i][gen_j][gen_k] = + data_rentry[(gen_j / HPDCACHE_DATA_WAYS_PER_RAM_WORD)] + [(gen_i * HPDCACHE_REQ_WORDS ) + gen_k] + [(gen_j % HPDCACHE_DATA_WAYS_PER_RAM_WORD)]; + end + end + end + + // Mux the data according to the access word + if (HPDCACHE_DATA_REQ_RATIO > 1) begin : req_width_lt_ram_width + typedef logic [$clog2(HPDCACHE_DATA_REQ_RATIO)-1:0] data_req_word_t; + data_req_word_t data_read_req_word_index_q; + + hpdcache_mux #( + .NINPUT (HPDCACHE_DATA_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH*HPDCACHE_WAYS) + ) data_read_req_word_mux_i( + .data_i (data_read_words), + .sel_i (data_read_req_word_index_q), + .data_o (data_read_req_word) + ); + + always_ff @(posedge clk_i) + begin : data_req_read_word_ff + data_read_req_word_index_q <= + data_req_read_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +: + $clog2(HPDCACHE_DATA_REQ_RATIO)]; + end + end + + // Request data interface width is equal to the data RAM width + else begin : req_width_eq_ram_width + assign data_read_req_word = data_read_words; + end + + // Mux the data according to the hit way + hpdcache_mux #( + .NINPUT (HPDCACHE_WAYS), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH), + .ONE_HOT_SEL (1'b1) + ) data_read_req_word_way_mux_i( + .data_i (data_read_req_word), + .sel_i (dir_hit_way_o), + .data_o (data_req_read_data_o) + ); + endgenerate + + + // Delay the accessed set for checking the tag from the directory in the + // next cycle (hit logic) + always_ff @(posedge clk_i) + begin : req_read_ff + if (dir_match_i || dir_amo_match_i || dir_cmo_check_i) begin + dir_req_set_q <= dir_req_set_d; + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + concurrent_dir_access_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot0({dir_match_i, dir_amo_match_i, dir_cmo_check_i, dir_refill_i})) else + $error("hpdcache_memctrl: more than one process is accessing the cache directory"); + + concurrent_data_access_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot0({data_req_read_i, data_req_write_i, data_amo_write_i, data_refill_i})) else + $error("hpdcache_memctrl: more than one process is accessing the cache data"); + // pragma translate_on + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv new file mode 100644 index 0000000..97ecf46 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv @@ -0,0 +1,659 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Miss Handler + * History : + */ +module hpdcache_miss_handler +// {{{ +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) +// }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Global control signals + // {{{ + output logic mshr_empty_o, + output logic mshr_full_o, + // }}} + + // Configuration signals + // {{{ + input logic cfg_prefetch_updt_plru_i, + // }}} + + // CHECK interface + // {{{ + input logic mshr_check_i, + input mshr_set_t mshr_check_set_i, + input mshr_tag_t mshr_check_tag_i, + output logic mshr_check_hit_o, + // }}} + + // MISS interface + // {{{ + // MISS request interface + output logic mshr_alloc_ready_o, + input logic mshr_alloc_i, + input logic mshr_alloc_cs_i, + input hpdcache_nline_t mshr_alloc_nline_i, + output logic mshr_alloc_full_o, + input hpdcache_req_tid_t mshr_alloc_tid_i, + input hpdcache_req_sid_t mshr_alloc_sid_i, + input hpdcache_word_t mshr_alloc_word_i, + input logic mshr_alloc_need_rsp_i, + input logic mshr_alloc_is_prefetch_i, + + // REFILL MISS interface + input logic refill_req_ready_i, + output logic refill_req_valid_o, + output logic refill_busy_o, + output logic refill_updt_plru_o, + output hpdcache_set_t refill_set_o, + output hpdcache_dir_entry_t refill_dir_entry_o, + input hpdcache_way_vector_t refill_victim_way_i, + output logic refill_write_dir_o, + output logic refill_write_data_o, + output hpdcache_way_vector_t refill_victim_way_o, + output hpdcache_refill_data_t refill_data_o, + output hpdcache_word_t refill_word_o, + output hpdcache_nline_t refill_nline_o, + output logic refill_updt_rtab_o, + + // REFILL core response interface + output logic refill_core_rsp_valid_o, + output hpdcache_rsp_t refill_core_rsp_o, + // }}} + + // MEMORY interface + // {{{ + input logic mem_req_ready_i, + output logic mem_req_valid_o, + output hpdcache_mem_req_t mem_req_o, + + output logic mem_resp_ready_o, + input logic mem_resp_valid_i, + input hpdcache_mem_resp_r_t mem_resp_i + // }}} +); +// }}} + + // Declaration of constants and types + // {{{ + localparam int unsigned REFILL_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS; + + typedef enum logic { + MISS_REQ_IDLE = 1'b0, + MISS_REQ_SEND = 1'b1 + } miss_req_fsm_e; + + typedef enum { + REFILL_IDLE, + REFILL_WRITE, + REFILL_WRITE_DIR + } refill_fsm_e; + + typedef struct packed { + hpdcache_mem_error_e r_error; + hpdcache_mem_id_t r_id; + } mem_resp_metadata_t; + + function automatic mshr_set_t get_ack_mshr_set(hpdcache_mem_id_t id); + return id[0 +: HPDCACHE_MSHR_SET_WIDTH]; + endfunction + + function automatic mshr_way_t get_ack_mshr_way(hpdcache_mem_id_t id); + return id[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_WAY_WIDTH]; + endfunction + // }}} + + // Declaration of internal signals and registers + // {{{ + miss_req_fsm_e miss_req_fsm_q, miss_req_fsm_d; + mshr_way_t mshr_alloc_way_q, mshr_alloc_way_d; + mshr_set_t mshr_alloc_set_q, mshr_alloc_set_d; + mshr_tag_t mshr_alloc_tag_q, mshr_alloc_tag_d; + + refill_fsm_e refill_fsm_q, refill_fsm_d; + hpdcache_set_t refill_set_q; + hpdcache_tag_t refill_tag_q; + hpdcache_way_vector_t refill_way_q; + hpdcache_req_sid_t refill_sid_q; + hpdcache_req_tid_t refill_tid_q; + hpdcache_word_t refill_cnt_q, refill_cnt_d; + logic refill_need_rsp_q; + logic refill_is_prefetch_q; + hpdcache_word_t refill_core_rsp_word_q; + logic refill_way_bypass; + + mem_resp_metadata_t refill_fifo_resp_meta_wdata, refill_fifo_resp_meta_rdata; + logic refill_fifo_resp_meta_w, refill_fifo_resp_meta_wok; + logic refill_fifo_resp_meta_r, refill_fifo_resp_meta_rok; + + logic refill_fifo_resp_data_w, refill_fifo_resp_data_wok; + hpdcache_refill_data_t refill_fifo_resp_data_rdata; + logic refill_fifo_resp_data_r; + + logic refill_core_rsp_valid; + hpdcache_req_data_t refill_core_rsp_rdata; + hpdcache_req_sid_t refill_core_rsp_sid; + hpdcache_req_tid_t refill_core_rsp_tid; + logic refill_core_rsp_error; + hpdcache_word_t refill_core_rsp_word; + hpdcache_rsp_t refill_core_rsp; + + logic refill_is_error; + + logic mshr_alloc; + logic mshr_alloc_cs; + logic mshr_ack; + logic mshr_ack_cs; + mshr_set_t mshr_ack_set; + mshr_way_t mshr_ack_way; + hpdcache_nline_t mshr_ack_nline; + hpdcache_req_sid_t mshr_ack_src_id; + hpdcache_req_tid_t mshr_ack_req_id; + hpdcache_word_t mshr_ack_word; + logic mshr_ack_need_rsp; + logic mshr_ack_is_prefetch; + logic mshr_empty; + // }}} + + // Miss Request FSM + // {{{ + always_comb + begin : miss_req_fsm_comb + mshr_alloc_ready_o = 1'b0; + mshr_alloc = 1'b0; + mshr_alloc_cs = 1'b0; + mem_req_valid_o = 1'b0; + + miss_req_fsm_d = miss_req_fsm_q; + + case (miss_req_fsm_q) + MISS_REQ_IDLE: begin + mshr_alloc_ready_o = 1'b1; + mshr_alloc = mshr_alloc_i; + mshr_alloc_cs = mshr_alloc_cs_i; + if (mshr_alloc_i) begin + miss_req_fsm_d = MISS_REQ_SEND; + end else begin + miss_req_fsm_d = MISS_REQ_IDLE; + end + end + MISS_REQ_SEND: begin + mem_req_valid_o = 1'b1; + if (mem_req_ready_i) begin + miss_req_fsm_d = MISS_REQ_IDLE; + end else begin + miss_req_fsm_d = MISS_REQ_SEND; + end + end + endcase + end + + localparam hpdcache_uint REFILL_REQ_SIZE = $clog2(HPDcacheMemDataWidth/8); + localparam hpdcache_uint REFILL_REQ_LEN = HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth; + + assign mem_req_o.mem_req_addr = {mshr_alloc_tag_q, mshr_alloc_set_q, {HPDCACHE_OFFSET_WIDTH{1'b0}} }, + mem_req_o.mem_req_len = hpdcache_mem_len_t'(REFILL_REQ_LEN-1), + mem_req_o.mem_req_size = hpdcache_mem_size_t'(REFILL_REQ_SIZE), + mem_req_o.mem_req_id = hpdcache_mem_id_t'({mshr_alloc_way_q, mshr_alloc_set_q}), + mem_req_o.mem_req_command = HPDCACHE_MEM_READ, + mem_req_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD, + mem_req_o.mem_req_cacheable = 1'b1; + + always_ff @(posedge clk_i) + begin : miss_req_fsm_internal_ff + if (mshr_alloc) begin + mshr_alloc_way_q <= mshr_alloc_way_d; + mshr_alloc_set_q <= mshr_alloc_set_d; + mshr_alloc_tag_q <= mshr_alloc_tag_d; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : miss_req_fsm_ff + if (!rst_ni) begin + miss_req_fsm_q <= MISS_REQ_IDLE; + end else begin + miss_req_fsm_q <= miss_req_fsm_d; + end + end + // }}} + + // Refill FSM + // {{{ + + // ask permission to the refill arbiter if there is a pending refill + assign refill_req_valid_o = refill_fsm_q == REFILL_IDLE ? refill_fifo_resp_meta_rok : 1'b0; + + // forward the victim way directly from the victim selection logic or + // from the internal register + assign refill_victim_way_o = refill_way_bypass ? refill_victim_way_i : refill_way_q; + + always_comb + begin : miss_resp_fsm_comb + automatic hpdcache_uint REFILL_LAST_CHUNK_WORD; + REFILL_LAST_CHUNK_WORD = HPDCACHE_CL_WORDS - HPDCACHE_ACCESS_WORDS; + + refill_updt_plru_o = 1'b0; + refill_set_o = '0; + refill_write_dir_o = 1'b0; + refill_write_data_o = 1'b0; + refill_updt_rtab_o = 1'b0; + refill_cnt_d = refill_cnt_q; + refill_way_bypass = 1'b0; + + refill_core_rsp_valid = 1'b0; + refill_core_rsp_sid = '0; + refill_core_rsp_tid = '0; + refill_core_rsp_error = 1'b0; + refill_core_rsp_word = 0; + + refill_fifo_resp_meta_r = 1'b0; + refill_fifo_resp_data_r = 1'b0; + + mshr_ack_cs = 1'b0; + mshr_ack = 1'b0; + + refill_fsm_d = refill_fsm_q; + + case (refill_fsm_q) + // Wait for refill responses + // {{{ + REFILL_IDLE: begin + if (refill_fifo_resp_meta_rok) begin + // anticipate the activation of the MSHR independently of the grant signal from + // the refill arbiter. This is to avoid the introduction of unnecessary timing + // paths (however there could be a minor augmentation of the power + // consumption). + mshr_ack_cs = 1'b1; + + // if the permission is granted, start refilling + if (refill_req_ready_i) begin + refill_fsm_d = REFILL_WRITE; + + // read the MSHR and reset the valid bit for the + // corresponding entry + mshr_ack = 1'b1; + + // initialize the counter for refill words + refill_cnt_d = 0; + end + end + end + // }}} + + // Write refill data into the cache + // {{{ + REFILL_WRITE: begin + automatic logic is_prefetch; + + // Respond to the core (when needed) + if (refill_cnt_q == 0) begin + automatic hpdcache_uint _core_rsp_word; + _core_rsp_word = hpdcache_uint'(mshr_ack_word)/HPDCACHE_ACCESS_WORDS; + + if (mshr_ack_need_rsp) begin + refill_core_rsp_valid = (hpdcache_uint'(_core_rsp_word) == 0); + end + + refill_core_rsp_sid = mshr_ack_src_id; + refill_core_rsp_tid = mshr_ack_req_id; + refill_core_rsp_error = refill_is_error; + refill_core_rsp_word = hpdcache_word_t'( + hpdcache_uint'(mshr_ack_word)/HPDCACHE_REQ_WORDS); + end else begin + automatic hpdcache_uint _core_rsp_word; + _core_rsp_word = hpdcache_uint'(refill_core_rsp_word_q)/ + HPDCACHE_ACCESS_WORDS; + + if (refill_need_rsp_q) begin + automatic hpdcache_uint _refill_cnt; + _refill_cnt = hpdcache_uint'(refill_cnt_q)/HPDCACHE_ACCESS_WORDS; + refill_core_rsp_valid = (_core_rsp_word == _refill_cnt); + end + + refill_core_rsp_sid = refill_sid_q; + refill_core_rsp_tid = refill_tid_q; + refill_core_rsp_error = refill_is_error; + refill_core_rsp_word = hpdcache_word_t'( + hpdcache_uint'(refill_core_rsp_word_q)/HPDCACHE_REQ_WORDS); + end + + // Write the the data in the cache data array + if (refill_cnt_q == 0) begin + refill_set_o = mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH]; + refill_way_bypass = 1'b1; + is_prefetch = mshr_ack_is_prefetch; + end else begin + refill_set_o = refill_set_q; + refill_way_bypass = 1'b0; + is_prefetch = refill_is_prefetch_q; + end + refill_write_data_o = ~refill_is_error; + + // Consume chunk of data from the FIFO buffer in the memory interface + refill_fifo_resp_data_r = 1'b1; + + // Update directory on the last chunk of data + refill_cnt_d = refill_cnt_q + hpdcache_word_t'(HPDCACHE_ACCESS_WORDS); + + if (hpdcache_uint'(refill_cnt_q) == REFILL_LAST_CHUNK_WORD) begin + if (REFILL_LAST_CHUNK_WORD == 0) begin + // Special case: if the cache-line data can be written in a single cycle, + // wait an additional cycle to write the directory. This allows to prevent + // a RAM-to-RAM timing path between the MSHR and the DIR. + refill_fsm_d = REFILL_WRITE_DIR; + end else begin + // Write the new entry in the cache directory + refill_write_dir_o = ~refill_is_error; + + // Update the PLRU bits. Only in the following cases: + // - There is no error in response AND + // - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR + // - It is a read miss. + refill_updt_plru_o = ~refill_is_error & + (~is_prefetch | cfg_prefetch_updt_plru_i); + + // Update dependency flags in the retry table + refill_updt_rtab_o = 1'b1; + + // consume the response from the network + refill_fifo_resp_meta_r = 1'b1; + + refill_fsm_d = REFILL_IDLE; + end + end + end + // }}} + + // Write cache directory (this state is only visited when ACCESS_WORDS == CL_WORDS, + // this is when the entire cache-line can be written in a single cycle) + // {{{ + REFILL_WRITE_DIR: begin + automatic logic is_prefetch; + is_prefetch = refill_is_prefetch_q; + + // Select the target set and way + refill_set_o = refill_set_q; + refill_way_bypass = 1'b0; + + // Write the new entry in the cache directory + refill_write_dir_o = ~refill_is_error; + + // Update the PLRU bits. Only in the following cases: + // - There is no error in response AND + // - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR + // - It is a read miss. + refill_updt_plru_o = ~refill_is_error & + (~is_prefetch | cfg_prefetch_updt_plru_i); + + // Update dependency flags in the retry table + refill_updt_rtab_o = 1'b1; + + // consume the response from the network + refill_fifo_resp_meta_r = 1'b1; + + refill_fsm_d = REFILL_IDLE; + end + // }}} + + default: begin + // pragma translate_off + $error("Illegal state"); + // pragma translate_on + end + endcase + end + + assign refill_is_error = (refill_fifo_resp_meta_rdata.r_error == HPDCACHE_MEM_RESP_NOK); + + assign refill_busy_o = (refill_fsm_q != REFILL_IDLE), + refill_nline_o = {refill_tag_q, refill_set_q}, + refill_word_o = refill_cnt_q; + + assign mshr_ack_set = get_ack_mshr_set(refill_fifo_resp_meta_rdata.r_id), + mshr_ack_way = get_ack_mshr_way(refill_fifo_resp_meta_rdata.r_id); + + assign refill_dir_entry_o.tag = refill_tag_q, + refill_dir_entry_o.reserved = '0; + + assign refill_core_rsp.rdata = refill_core_rsp_rdata, + refill_core_rsp.sid = refill_core_rsp_sid, + refill_core_rsp.tid = refill_core_rsp_tid, + refill_core_rsp.error = refill_core_rsp_error, + refill_core_rsp.aborted = 1'b0; + + hpdcache_fifo_reg #( + .FIFO_DEPTH (1), + .FEEDTHROUGH (HPDCACHE_REFILL_CORE_RSP_FEEDTHROUGH), + .fifo_data_t (hpdcache_rsp_t) + ) i_refill_core_rsp_buf( + .clk_i, + .rst_ni, + .w_i (refill_core_rsp_valid), + .wok_o (/*unused*/), + .wdata_i (refill_core_rsp), + .r_i (1'b1), // core shall always be ready to consume a response + .rok_o (refill_core_rsp_valid_o), + .rdata_o (refill_core_rsp_o) + ); + + generate + // refill's width is bigger than the width of the core's interface + if (REFILL_REQ_RATIO > 1) begin : core_rsp_data_mux_gen + hpdcache_mux #( + .NINPUT (REFILL_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH) + ) data_read_rsp_mux_i( + .data_i (refill_data_o), + .sel_i (refill_core_rsp_word[0 +: $clog2(REFILL_REQ_RATIO)]), + .data_o (refill_core_rsp_rdata) + ); + end + + // refill's width is equal to the width of the core's interface + else begin + assign refill_core_rsp_rdata = refill_data_o; + end + endgenerate + + /* FIXME: when multiple chunks, in case of error, the error bit is not + * necessarily set on all chunks */ + assign refill_fifo_resp_meta_wdata = '{ + r_error: mem_resp_i.mem_resp_r_error, + r_id : mem_resp_i.mem_resp_r_id + }; + + hpdcache_fifo_reg #( + .FIFO_DEPTH (2), + .fifo_data_t (mem_resp_metadata_t) + ) i_r_metadata_fifo ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_meta_w), + .wok_o (refill_fifo_resp_meta_wok), + .wdata_i(refill_fifo_resp_meta_wdata), + + .r_i (refill_fifo_resp_meta_r), + .rok_o (refill_fifo_resp_meta_rok), + .rdata_o(refill_fifo_resp_meta_rdata) + ); + + generate + if (HPDcacheMemDataWidth < HPDCACHE_REFILL_DATA_WIDTH) begin + hpdcache_data_upsize #( + .WR_WIDTH(HPDcacheMemDataWidth), + .RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH), + .DEPTH(2*(HPDCACHE_CL_WIDTH/HPDCACHE_REFILL_DATA_WIDTH)) + ) i_rdata_upsize ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_data_w), + .wlast_i (mem_resp_i.mem_resp_r_last), + .wok_o (refill_fifo_resp_data_wok), + .wdata_i (mem_resp_i.mem_resp_r_data), + + .r_i (refill_fifo_resp_data_r), + .rok_o (/* unused */), + .rdata_o (refill_fifo_resp_data_rdata) + ); + end else if (HPDcacheMemDataWidth > HPDCACHE_REFILL_DATA_WIDTH) begin + hpdcache_data_downsize #( + .WR_WIDTH(HPDcacheMemDataWidth), + .RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH), + .DEPTH(2*(HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth)) + ) i_rdata_downsize ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_data_w), + .wok_o (refill_fifo_resp_data_wok), + .wdata_i (mem_resp_i.mem_resp_r_data), + + .r_i (refill_fifo_resp_data_r), + .rok_o (/* unused */), + .rdata_o (refill_fifo_resp_data_rdata) + ); + end else begin + hpdcache_fifo_reg #( + .FIFO_DEPTH (2), + .fifo_data_t (hpdcache_refill_data_t) + ) i_rdata_fifo ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_data_w), + .wok_o (refill_fifo_resp_data_wok), + .wdata_i (mem_resp_i.mem_resp_r_data), + + .r_i (refill_fifo_resp_data_r), + .rok_o (/* unused */), + .rdata_o (refill_fifo_resp_data_rdata) + ); + end + endgenerate + + assign refill_data_o = refill_fifo_resp_data_rdata; + + assign refill_fifo_resp_data_w = mem_resp_valid_i & + (refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last), + refill_fifo_resp_meta_w = mem_resp_valid_i & + (refill_fifo_resp_data_wok & mem_resp_i.mem_resp_r_last), + mem_resp_ready_o = refill_fifo_resp_data_wok & + (refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last); + + always_ff @(posedge clk_i or negedge rst_ni) + begin : miss_resp_fsm_ff + if (!rst_ni) begin + refill_fsm_q <= REFILL_IDLE; + end else begin + refill_fsm_q <= refill_fsm_d; + end + end + + always_ff @(posedge clk_i) + begin : miss_resp_fsm_internal_ff + if ((refill_fsm_q == REFILL_WRITE) && (refill_cnt_q == 0)) begin + refill_set_q <= mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH]; + refill_tag_q <= mshr_ack_nline[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];; + refill_way_q <= refill_victim_way_i; + refill_sid_q <= mshr_ack_src_id; + refill_tid_q <= mshr_ack_req_id; + refill_need_rsp_q <= mshr_ack_need_rsp; + refill_is_prefetch_q <= mshr_ack_is_prefetch; + refill_core_rsp_word_q <= mshr_ack_word; + end + refill_cnt_q <= refill_cnt_d; + end + // }}} + + // Miss Status Holding Register component + // {{{ + hpdcache_mshr hpdcache_mshr_i ( + .clk_i, + .rst_ni, + + .empty_o (mshr_empty), + .full_o (mshr_full_o), + + .check_i (mshr_check_i), + .check_set_i (mshr_check_set_i), + .check_tag_i (mshr_check_tag_i), + .hit_o (mshr_check_hit_o), + .alloc_i (mshr_alloc), + .alloc_cs_i (mshr_alloc_cs), + .alloc_nline_i (mshr_alloc_nline_i), + .alloc_req_id_i (mshr_alloc_tid_i), + .alloc_src_id_i (mshr_alloc_sid_i), + .alloc_word_i (mshr_alloc_word_i), + .alloc_need_rsp_i (mshr_alloc_need_rsp_i), + .alloc_is_prefetch_i (mshr_alloc_is_prefetch_i), + .alloc_full_o (mshr_alloc_full_o), + .alloc_set_o (mshr_alloc_set_d), + .alloc_tag_o (mshr_alloc_tag_d), + .alloc_way_o (mshr_alloc_way_d), + + .ack_i (mshr_ack), + .ack_cs_i (mshr_ack_cs), + .ack_set_i (mshr_ack_set), + .ack_way_i (mshr_ack_way), + .ack_req_id_o (mshr_ack_req_id), + .ack_src_id_o (mshr_ack_src_id), + .ack_nline_o (mshr_ack_nline), + .ack_word_o (mshr_ack_word), + .ack_need_rsp_o (mshr_ack_need_rsp), + .ack_is_prefetch_o (mshr_ack_is_prefetch) + ); + + // Indicate to the cache controller that there is no pending miss. This + // is, when the MSHR is empty, and the MISS handler has finished of + // processing the last miss response. + assign mshr_empty_o = mshr_empty & ~refill_busy_o; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_SET_WIDTH + HPDCACHE_MSHR_WAY_WIDTH)) else + $error("miss_handler: not enough ID bits in the memory interface"); + // pragma translate_on + // }}} + +endmodule +// }}} diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv new file mode 100644 index 0000000..f63e408 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv @@ -0,0 +1,385 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Miss Status Holding Register (MSHR) + * History : + */ +module hpdcache_mshr +import hpdcache_pkg::*; + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, + output logic full_o, + + // Check and allocation interface + input logic check_i, + input mshr_set_t check_set_i, + input mshr_tag_t check_tag_i, + output logic hit_o, + input logic alloc_i, + input logic alloc_cs_i, + input hpdcache_nline_t alloc_nline_i, + input hpdcache_req_tid_t alloc_req_id_i, + input hpdcache_req_sid_t alloc_src_id_i, + input hpdcache_word_t alloc_word_i, + input logic alloc_need_rsp_i, + input logic alloc_is_prefetch_i, + output logic alloc_full_o, + output mshr_set_t alloc_set_o, + output mshr_tag_t alloc_tag_o, + output mshr_way_t alloc_way_o, + + // Acknowledge interface + input logic ack_i, + input logic ack_cs_i, + input mshr_set_t ack_set_i, + input mshr_way_t ack_way_i, + output hpdcache_req_tid_t ack_req_id_o, + output hpdcache_req_sid_t ack_src_id_o, + output hpdcache_nline_t ack_nline_o, + output hpdcache_word_t ack_word_o, + output logic ack_need_rsp_o, + output logic ack_is_prefetch_o +); + // }}} + + // Definition of constants and types + // {{{ + typedef struct packed { + mshr_tag_t tag; + hpdcache_req_tid_t req_id; + hpdcache_req_sid_t src_id; + hpdcache_word_t word_idx; + logic need_rsp; + logic is_prefetch; + } mshr_entry_t; + + + // Compute the width of MSHR entries depending on the support of write + // bitmask or not (write byte enable) + localparam int unsigned HPDCACHE_MSHR_ENTRY_BITS = $bits(mshr_entry_t); + + localparam int unsigned HPDCACHE_MSHR_RAM_ENTRY_BITS = + HPDCACHE_MSHR_RAM_WBYTEENABLE ? + ((HPDCACHE_MSHR_ENTRY_BITS + 7)/8) * 8 : // align to 8 bits + HPDCACHE_MSHR_ENTRY_BITS; // or use the exact number of bits + + typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_data_t; + // }}} + + // Definition of internal wires and registers + // {{{ + logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_q, mshr_valid_d; + mshr_set_t check_set_q; + mshr_set_t alloc_set; + mshr_tag_t alloc_tag; + hpdcache_set_t alloc_dcache_set; + mshr_way_t ack_way_q; + mshr_set_t ack_set_q; + hpdcache_set_t ack_dcache_set; + hpdcache_tag_t ack_dcache_tag; + + logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_set, mshr_valid_rst; + mshr_entry_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wentry; + mshr_sram_data_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wdata; + mshr_entry_t [HPDCACHE_MSHR_WAYS-1:0] mshr_rentry; + mshr_sram_data_t [HPDCACHE_MSHR_WAYS-1:0] mshr_rdata; + + logic mshr_we; + logic mshr_cs; + mshr_set_t mshr_addr; + logic check; + // }}} + + // Control part for the allocation and check operations + // {{{ + + // The allocation operation is prioritary with respect to the check operation + assign check = check_i & ~alloc_i; + + assign alloc_set = alloc_nline_i[0 +: HPDCACHE_MSHR_SET_WIDTH], + alloc_tag = alloc_nline_i[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH], + alloc_dcache_set = alloc_nline_i[0 +: HPDCACHE_SET_WIDTH]; + + // Look for an available way in case of allocation + always_comb + begin + automatic mshr_way_t found_available_way; + + found_available_way = 0; + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(alloc_set)]) begin + found_available_way = mshr_way_t'(i); + break; + end + end + alloc_way_o = found_available_way; + end + + // Look if the mshr can accept the checked nline (in case of allocation) + always_comb + begin + automatic bit found_available; + + found_available = 1'b0; + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(check_set_q)]) begin + found_available = 1'b1; + break; + end + end + alloc_full_o = ~found_available; + end + + assign alloc_set_o = alloc_set, + alloc_tag_o = alloc_tag; + + // Write when there is an allocation operation + assign mshr_we = alloc_i; + + // HPDcache SET to MSHR SET translation table + hpdcache_mshr_to_cache_set trlt_i ( + .clk_i, + .write_i (mshr_we), + .write_dcache_set_i (alloc_dcache_set), + .write_mshr_way_i (alloc_way_o), + .read_mshr_set_i (ack_set_q), + .read_mshr_way_i (ack_way_q), + .read_dcache_set_o (ack_dcache_set) + ); + + + // Generate write data and mask depending on the available way + always_comb + begin + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wentry[i].tag = alloc_tag; + mshr_wentry[i].req_id = alloc_req_id_i; + mshr_wentry[i].src_id = alloc_src_id_i; + mshr_wentry[i].word_idx = alloc_word_i; + mshr_wentry[i].need_rsp = alloc_need_rsp_i; + mshr_wentry[i].is_prefetch = alloc_is_prefetch_i; + end + end + // }}} + + // Shared control signals + // {{{ + assign mshr_cs = check_i | alloc_cs_i | ack_cs_i; + assign mshr_addr = ack_i ? ack_set_i : + (alloc_i ? alloc_set : check_set_i); + + always_comb + begin : mshr_valid_comb + automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_alloc_slot; + automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_ack_slot; + + mshr_alloc_slot = {alloc_way_o, alloc_set}; + mshr_ack_slot = { ack_way_i, ack_set_i}; + + for (int unsigned i = 0; i < HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS; i++) begin + mshr_valid_rst[i] = (i == hpdcache_uint'(mshr_ack_slot)) ? ack_i : 1'b0; + mshr_valid_set[i] = (i == hpdcache_uint'(mshr_alloc_slot)) ? alloc_i : 1'b0; + end + end + assign mshr_valid_d = (~mshr_valid_q & mshr_valid_set) | (mshr_valid_q & ~mshr_valid_rst); + // }}} + + // Read interface (ack) + // {{{ + generate + // extract HPDcache tag from the MSb of the MSHT TAG + if (HPDCACHE_SETS >= HPDCACHE_MSHR_SETS) begin : ack_dcache_set_ge_mshr_set_gen + assign ack_dcache_tag = mshr_rentry[ack_way_q].tag[ + HPDCACHE_MSHR_TAG_WIDTH - 1 : + HPDCACHE_MSHR_TAG_WIDTH - HPDCACHE_TAG_WIDTH]; + end + + // extract HPDcache tag from MSb of the MSHR set concatenated with the MSHR tag + else begin : ack_dcache_set_lt_mshr_set_gen + assign ack_dcache_tag = { + mshr_rentry[ack_way_q].tag , + ack_set_q[HPDCACHE_MSHR_SET_WIDTH - 1:HPDCACHE_SET_WIDTH]}; + end + endgenerate + + assign ack_req_id_o = mshr_rentry[ack_way_q].req_id, + ack_src_id_o = mshr_rentry[ack_way_q].src_id, + ack_nline_o = {ack_dcache_tag, ack_dcache_set}, + ack_word_o = mshr_rentry[ack_way_q].word_idx, + ack_need_rsp_o = mshr_rentry[ack_way_q].need_rsp, + ack_is_prefetch_o = mshr_rentry[ack_way_q].is_prefetch; + // }}} + + // Global control signals + // {{{ + assign empty_o = ~|mshr_valid_q; + assign full_o = &mshr_valid_q; + + always_comb + begin : hit_comb + automatic bit [HPDCACHE_MSHR_WAYS-1:0] __hit_way; + + for (int unsigned w = 0; w < HPDCACHE_MSHR_WAYS; w++) begin + automatic bit __valid; + automatic bit __match; + __valid = mshr_valid_q[w*HPDCACHE_MSHR_SETS + int'(check_set_q)]; + __match = (mshr_rentry[w].tag == check_tag_i); + __hit_way[w] = (__valid && __match); + end + + hit_o = |__hit_way; + end + // }}} + + // Internal state assignment + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : mshr_ff_set + if (!rst_ni) begin + mshr_valid_q <= '0; + ack_way_q <= '0; + ack_set_q <= '0; + check_set_q <= '0; + end else begin + mshr_valid_q <= mshr_valid_d; + if (ack_i) begin + ack_way_q <= ack_way_i; + ack_set_q <= ack_set_i; + end + if (check) begin + check_set_q <= check_set_i; + end + end + end + // }}} + + // Internal components + // {{{ + generate + if (HPDCACHE_MSHR_RAM_WBYTEENABLE) begin : mshr_wbyteenable_gen + typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS/8-1:0] mshr_sram_wbyteenable_t; + mshr_sram_wbyteenable_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wbyteenable; + + always_comb + begin : mshr_wbyteenable_comb + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wbyteenable[i] = (int'(alloc_way_o) == i) ? '1 : '0; + end + end + + if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen + hpdcache_regbank_wbyteenable_1rw #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wbyteenable (mshr_wbyteenable), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end else begin : mshr_sram_gen + hpdcache_sram_wbyteenable #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wbyteenable (mshr_wbyteenable), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end + end else begin : mshr_wmask_gen + typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_wmask_t; + mshr_sram_wmask_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wmask; + + always_comb + begin : mshr_wmask_comb + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wmask[i] = (int'(alloc_way_o) == i) ? '1 : '0; + end + end + + if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen + hpdcache_regbank_wmask_1rw #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wmask (mshr_wmask), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end else begin : mshr_sram_gen + hpdcache_sram_wmask #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wmask (mshr_wmask), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end + end + endgenerate + + always_comb + begin : ram_word_fitting_comb + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wdata[i] = mshr_sram_data_t'(mshr_wentry[i]); + mshr_rentry[i] = mshr_entry_t'(mshr_rdata[i][0 +: HPDCACHE_MSHR_ENTRY_BITS]); + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + one_command_assert: assert property (@(posedge clk_i) + (ack_i -> !(alloc_i || check_i))) else + $error("MSHR: ack with concurrent alloc or check"); + // pragma translate_on + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv new file mode 100644 index 0000000..3dc8b73 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv @@ -0,0 +1,105 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache MSHR set translation table + * History : + */ +module hpdcache_mshr_to_cache_set +import hpdcache_pkg::*; +// Ports +// {{{ +( + // Clock signals + input logic clk_i, + + // Write interface + input logic write_i, + input hpdcache_set_t write_dcache_set_i, + input mshr_way_t write_mshr_way_i, + + // Read interface + input mshr_way_t read_mshr_way_i, + input mshr_set_t read_mshr_set_i, + output hpdcache_set_t read_dcache_set_o +); +// }}} + // + + generate + // Number of HPDcache sets is bigger than the MSHR sets + // In this case, a translation table (in flip-flops) is needed + // {{{ + // Write most significant bits of the HPDcache set into the + // translation table + if (HPDCACHE_SETS > HPDCACHE_MSHR_SETS) begin : hpdcache_sets_gt_mshr_sets_gen + localparam hpdcache_uint TRLT_TAB_ENTRY_WIDTH = + HPDCACHE_SET_WIDTH - HPDCACHE_MSHR_SET_WIDTH; + typedef logic [TRLT_TAB_ENTRY_WIDTH-1:0] trlt_entry_t; + + + // Translation table + // + // This table is used to store the most significant bits of the HPDcache set + trlt_entry_t [HPDCACHE_MSHR_SETS-1:0][HPDCACHE_MSHR_WAYS-1:0] tab; + trlt_entry_t tab_wdata; + mshr_set_t write_mshr_set; + + // Write operation + // {{{ + // Write most significant bits of the HPDcache set into the + // translation table + always_ff @(posedge clk_i) + begin + if (write_i) begin + tab[write_mshr_set][write_mshr_way_i] <= tab_wdata; + end + end + + assign tab_wdata = write_dcache_set_i[HPDCACHE_MSHR_SET_WIDTH +: + TRLT_TAB_ENTRY_WIDTH], + write_mshr_set = write_dcache_set_i[0 +: HPDCACHE_MSHR_SET_WIDTH]; + // }}} + + // Read operation + // {{{ + // Concatenate the mshr set with the most significant bits of the + // dcache set stored in the translation table + assign read_dcache_set_o = {tab[read_mshr_set_i][read_mshr_way_i], read_mshr_set_i}; + // }}} + end + // }}} + + // Number of HPDcache sets is smaller or equal than the MSHR sets + // In this case, no translation table is needed + // {{{ + else begin : hpdcache_sets_le_mshr_sets_gen + assign read_dcache_set_o = hpdcache_set_t'(read_mshr_set_i); + end + // }}} + endgenerate + +// Assertions +// {{{ +// pragma translate_off +// pragma translate_on +// }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv new file mode 100755 index 0000000..8a96a16 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv @@ -0,0 +1,623 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Write-Through (WT), High-Throughput (HTPUT) HPDcache Package + * History : + */ +package hpdcache_pkg; + // Definition of global constants for the HPDcache data and directory + // {{{ + + // HPDcache physical address width (bits) + localparam int unsigned HPDCACHE_PA_WIDTH = hpdcache_params_pkg::PARAM_PA_WIDTH; + + // HPDcache number of sets + localparam int unsigned HPDCACHE_SETS = hpdcache_params_pkg::PARAM_SETS; + + // HPDcache number of ways + localparam int unsigned HPDCACHE_WAYS = hpdcache_params_pkg::PARAM_WAYS; + + // HPDcache word width (bits) + localparam int unsigned HPDCACHE_WORD_WIDTH = hpdcache_params_pkg::PARAM_WORD_WIDTH; + + // HPDcache cache-line width (bits) + localparam int unsigned HPDCACHE_CL_WORDS = hpdcache_params_pkg::PARAM_CL_WORDS; + + // HPDcache number of words in the request data channels (request and response) + localparam int unsigned HPDCACHE_REQ_WORDS = hpdcache_params_pkg::PARAM_REQ_WORDS; + + // HPDcache request transaction ID width (bits) + localparam int unsigned HPDCACHE_REQ_TRANS_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_TRANS_ID_WIDTH; + + // HPDcache request source ID width (bits) + localparam int unsigned HPDCACHE_REQ_SRC_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_SRC_ID_WIDTH; + // }}} + + // Utility definitions + // {{{ + typedef logic unsigned [31:0] hpdcache_uint; + typedef logic signed [31:0] hpdcache_int; + typedef logic unsigned [31:0] hpdcache_uint32; + typedef logic signed [31:0] hpdcache_int32; + typedef logic unsigned [63:0] hpdcache_uint64; + typedef logic signed [63:0] hpdcache_int64; + // }}} + + // Definition of constants and types for HPDcache directory memory + // {{{ + localparam int unsigned HPDCACHE_CL_WIDTH = HPDCACHE_CL_WORDS*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_OFFSET_WIDTH = $clog2(HPDCACHE_CL_WIDTH/8); + localparam int unsigned HPDCACHE_NLINE_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_OFFSET_WIDTH; + localparam int unsigned HPDCACHE_SET_WIDTH = $clog2(HPDCACHE_SETS); + localparam int unsigned HPDCACHE_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_SET_WIDTH; + localparam int unsigned HPDCACHE_WORD_IDX_WIDTH = $clog2(HPDCACHE_CL_WORDS); + + typedef logic unsigned [ HPDCACHE_OFFSET_WIDTH-1:0] hpdcache_offset_t; + typedef logic unsigned [ HPDCACHE_NLINE_WIDTH-1:0] hpdcache_nline_t; + typedef logic unsigned [ HPDCACHE_SET_WIDTH-1:0] hpdcache_set_t; + typedef logic unsigned [ HPDCACHE_TAG_WIDTH-1:0] hpdcache_tag_t; + typedef logic unsigned [ $clog2(HPDCACHE_WAYS)-1:0] hpdcache_way_t; + typedef logic unsigned [ HPDCACHE_WAYS-1:0] hpdcache_way_vector_t; + typedef logic unsigned [HPDCACHE_WORD_IDX_WIDTH-1:0] hpdcache_word_t; + + typedef struct packed { + hpdcache_tag_t tag; + logic [1:0] reserved; + } hpdcache_dir_entry_t; + + localparam int unsigned HPDCACHE_DIR_RAM_WIDTH = $bits(hpdcache_dir_entry_t); + localparam int unsigned HPDCACHE_DIR_RAM_DEPTH = HPDCACHE_SETS; + localparam int unsigned HPDCACHE_DIR_RAM_ADDR_WIDTH = $clog2(HPDCACHE_DIR_RAM_DEPTH); + + typedef logic [HPDCACHE_DIR_RAM_ADDR_WIDTH-1:0] hpdcache_dir_addr_t; + + function automatic hpdcache_way_t hpdcache_way_vector_to_index(input hpdcache_way_vector_t way); + for (int unsigned i = 0; i < HPDCACHE_WAYS; i++) begin + if (way[i]) return hpdcache_way_t'(i); + end + return 0; + endfunction + + // }}} + + // Definition of constants and types for HPDcache data memory + // {{{ + localparam int unsigned HPDCACHE_DATA_WAYS_PER_RAM_WORD = + hpdcache_params_pkg::PARAM_DATA_WAYS_PER_RAM_WORD; + + localparam int unsigned HPDCACHE_DATA_SETS_PER_RAM = /* FIXME this parameter is currently ignored */ + hpdcache_params_pkg::PARAM_DATA_SETS_PER_RAM; + + // HPDcache DATA RAM implements write byte enable + localparam bit HPDCACHE_DATA_RAM_WBYTEENABLE = + hpdcache_params_pkg::PARAM_DATA_RAM_WBYTEENABLE; + + // Define the number of memory contiguous words that can be accessed + // simultaneously from the cache. + // - This limits the maximum width for the data channel from requesters + // - This impacts the refill latency + localparam int unsigned HPDCACHE_ACCESS_WORDS = hpdcache_params_pkg::PARAM_ACCESS_WORDS; + + + localparam int unsigned HPDCACHE_DATA_RAM_WIDTH = + HPDCACHE_DATA_WAYS_PER_RAM_WORD*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_DATA_RAM_Y_CUTS = HPDCACHE_WAYS/HPDCACHE_DATA_WAYS_PER_RAM_WORD; + localparam int unsigned HPDCACHE_DATA_RAM_X_CUTS = HPDCACHE_ACCESS_WORDS; + localparam int unsigned HPDCACHE_DATA_RAM_ACCESS_WIDTH = HPDCACHE_ACCESS_WORDS*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_DATA_RAM_ENTR_PER_SET = HPDCACHE_CL_WORDS/HPDCACHE_ACCESS_WORDS; + localparam int unsigned HPDCACHE_DATA_RAM_DEPTH = HPDCACHE_SETS*HPDCACHE_DATA_RAM_ENTR_PER_SET; + localparam int unsigned HPDCACHE_DATA_RAM_ADDR_WIDTH = $clog2(HPDCACHE_DATA_RAM_DEPTH); + + typedef logic [ HPDCACHE_WORD_WIDTH-1:0] hpdcache_data_word_t; + typedef logic [ HPDCACHE_WORD_WIDTH/8-1:0] hpdcache_data_be_t; + typedef logic [ $clog2(HPDCACHE_DATA_RAM_Y_CUTS)-1:0] hpdcache_data_ram_row_idx_t; + typedef logic [ $clog2(HPDCACHE_DATA_WAYS_PER_RAM_WORD)-1:0] hpdcache_data_ram_way_idx_t; + + typedef logic [HPDCACHE_DATA_RAM_ADDR_WIDTH-1:0] hpdcache_data_ram_addr_t; + typedef hpdcache_data_word_t[HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_data_t; + typedef hpdcache_data_be_t [HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_be_t; + + typedef hpdcache_data_ram_data_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_entry_t; + + typedef hpdcache_data_ram_be_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_be_entry_t; + + typedef logic + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_row_enable_t; + + typedef hpdcache_data_row_enable_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + hpdcache_data_enable_t; + + typedef hpdcache_data_ram_addr_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_addr_t; + // }}} + + // Definition of interface with miss handler + // {{{ + localparam int unsigned HPDCACHE_REFILL_DATA_WIDTH = HPDCACHE_DATA_RAM_ACCESS_WIDTH; + + // Use feedthrough FIFOs from the refill handler to the core. This + // reduces the latency (by one cycle) but adds an additional timing path + localparam bit HPDCACHE_REFILL_CORE_RSP_FEEDTHROUGH = + hpdcache_params_pkg::PARAM_REFILL_CORE_RSP_FEEDTHROUGH; + + typedef hpdcache_data_word_t[HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_data_t; + typedef hpdcache_data_be_t [HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_be_t; + // }}} + + // Definition of interface with requesters + // {{{ + localparam int unsigned HPDCACHE_REQ_DATA_WIDTH = HPDCACHE_REQ_WORDS*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_REQ_DATA_BYTES = HPDCACHE_REQ_DATA_WIDTH/8; + localparam int unsigned HPDCACHE_REQ_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_WORDS); + localparam int unsigned HPDCACHE_REQ_BYTE_OFFSET_WIDTH = $clog2(HPDCACHE_REQ_DATA_BYTES); + localparam int unsigned HPDCACHE_REQ_OFFSET_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_TAG_WIDTH; + + typedef logic [HPDCACHE_PA_WIDTH-1:0] hpdcache_req_addr_t; + typedef logic [HPDCACHE_REQ_OFFSET_WIDTH-1:0] hpdcache_req_offset_t; + typedef hpdcache_data_word_t [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_data_t; + typedef hpdcache_data_be_t [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_be_t; + typedef logic [2:0] hpdcache_req_size_t; + typedef logic [HPDCACHE_REQ_SRC_ID_WIDTH-1:0] hpdcache_req_sid_t; + typedef logic [HPDCACHE_REQ_TRANS_ID_WIDTH-1:0] hpdcache_req_tid_t; + + // Definition of operation codes + // {{{ + typedef enum logic [3:0] { + HPDCACHE_REQ_LOAD = 4'h0, + HPDCACHE_REQ_STORE = 4'h1, + // RESERVED = 4'h2, + // RESERVED = 4'h3, + HPDCACHE_REQ_AMO_LR = 4'h4, + HPDCACHE_REQ_AMO_SC = 4'h5, + HPDCACHE_REQ_AMO_SWAP = 4'h6, + HPDCACHE_REQ_AMO_ADD = 4'h7, + HPDCACHE_REQ_AMO_AND = 4'h8, + HPDCACHE_REQ_AMO_OR = 4'h9, + HPDCACHE_REQ_AMO_XOR = 4'ha, + HPDCACHE_REQ_AMO_MAX = 4'hb, + HPDCACHE_REQ_AMO_MAXU = 4'hc, + HPDCACHE_REQ_AMO_MIN = 4'hd, + HPDCACHE_REQ_AMO_MINU = 4'he, + HPDCACHE_REQ_CMO = 4'hf + } hpdcache_req_op_t; + // }}} + + // Definition of CMO codes + // {{{ + typedef enum hpdcache_req_size_t { + HPDCACHE_REQ_CMO_FENCE = 3'h0, + // RESERVED = 3'h1, + HPDCACHE_REQ_CMO_INVAL_NLINE = 3'h2, + HPDCACHE_REQ_CMO_INVAL_SET_WAY = 3'h3, + HPDCACHE_REQ_CMO_INVAL_ALL = 3'h4, + HPDCACHE_REQ_CMO_PREFETCH = 3'h5 + } hpdcache_req_cmo_t; + // }}} + + // Definition of PMA flags + // {{{ + typedef struct packed + { + logic uncacheable; + logic io; // FIXME: for future use + } hpdcache_pma_t; + // }}} + + // Definition of interfaces + // {{{ + // Request Interface + typedef struct packed + { + hpdcache_req_offset_t addr_offset; + hpdcache_req_data_t wdata; + hpdcache_req_op_t op; + hpdcache_req_be_t be; + hpdcache_req_size_t size; + hpdcache_req_sid_t sid; + hpdcache_req_tid_t tid; + logic need_rsp; + + // only valid in case of physically indexed requests + logic phys_indexed; + hpdcache_tag_t addr_tag; + hpdcache_pma_t pma; + } hpdcache_req_t; + + // Response Interface + typedef struct packed + { + hpdcache_req_data_t rdata; + hpdcache_req_sid_t sid; + hpdcache_req_tid_t tid; + logic error; + logic aborted; + } hpdcache_rsp_t; + // }}} + + // Definition of functions + // {{{ + function automatic logic is_load(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_LOAD: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_store(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_STORE: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_LR, + HPDCACHE_REQ_AMO_SC, + HPDCACHE_REQ_AMO_SWAP, + HPDCACHE_REQ_AMO_ADD, + HPDCACHE_REQ_AMO_AND, + HPDCACHE_REQ_AMO_OR, + HPDCACHE_REQ_AMO_XOR, + HPDCACHE_REQ_AMO_MAX, + HPDCACHE_REQ_AMO_MAXU, + HPDCACHE_REQ_AMO_MIN, + HPDCACHE_REQ_AMO_MINU: + return 1'b1; + default: + return 1'b0; + endcase + endfunction + + function automatic logic is_amo_lr(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_LR: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_sc(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_SC: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_swap(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_SWAP: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_add(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_ADD: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_and(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_AND: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_or(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_OR: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_xor(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_XOR: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_max(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MAX: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_maxu(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MAXU: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_min(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MIN: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_minu(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MINU: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_cmo_inval( + input hpdcache_req_op_t op, + input hpdcache_req_size_t sz); + case (op) + HPDCACHE_REQ_CMO: + case (sz) + HPDCACHE_REQ_CMO_INVAL_NLINE, + HPDCACHE_REQ_CMO_INVAL_SET_WAY, + HPDCACHE_REQ_CMO_INVAL_ALL: begin + return 1'b1; + end + default: begin + return 1'b0; + end + endcase + default: begin + return 1'b0; + end + endcase + endfunction + + function automatic logic is_cmo_inval_by_nline(input hpdcache_req_size_t sz); + return (sz == HPDCACHE_REQ_CMO_INVAL_NLINE); + endfunction + + function automatic logic is_cmo_inval_by_set(input hpdcache_req_size_t sz); + return (sz == HPDCACHE_REQ_CMO_INVAL_SET_WAY); + endfunction + + function automatic logic is_cmo_inval_all(input hpdcache_req_size_t sz); + return (sz == HPDCACHE_REQ_CMO_INVAL_ALL); + endfunction + + function automatic logic is_cmo_fence( + input hpdcache_req_op_t op, + input hpdcache_req_size_t sz); + case (op) + HPDCACHE_REQ_CMO: begin + return (sz == HPDCACHE_REQ_CMO_FENCE); + end + default: begin + return 1'b0; + end + endcase + endfunction + + function automatic logic is_cmo_prefetch( + input hpdcache_req_op_t op, + input hpdcache_req_size_t sz); + case (op) + HPDCACHE_REQ_CMO: begin + return (sz == HPDCACHE_REQ_CMO_PREFETCH); + end + default: begin + return 1'b0; + end + endcase + endfunction + + function automatic hpdcache_tag_t hpdcache_get_req_addr_tag(input hpdcache_req_addr_t addr); + return addr[(HPDCACHE_OFFSET_WIDTH + HPDCACHE_SET_WIDTH) +: HPDCACHE_TAG_WIDTH]; + endfunction + + function automatic hpdcache_set_t hpdcache_get_req_addr_set(input hpdcache_req_addr_t addr); + return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH]; + endfunction + + function automatic hpdcache_word_t hpdcache_get_req_addr_word(input hpdcache_req_addr_t addr); + return addr[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH]; + endfunction + + function automatic hpdcache_offset_t hpdcache_get_req_addr_offset(input hpdcache_req_addr_t addr); + return addr[0 +: HPDCACHE_OFFSET_WIDTH]; + endfunction + + function automatic hpdcache_nline_t hpdcache_get_req_addr_nline(input hpdcache_req_addr_t addr); + return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH]; + endfunction + + function automatic hpdcache_set_t hpdcache_get_req_offset_set(input hpdcache_req_offset_t offset); + return offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH]; + endfunction + + function automatic hpdcache_word_t hpdcache_get_req_offset_word(input hpdcache_req_offset_t offset); + return offset[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH]; + endfunction + + // }}} + // }}} + + // Definition of constants and types for the Miss Status Holding Register (MSHR) + // {{{ + + // HPDcache MSHR number of sets + localparam int unsigned HPDCACHE_MSHR_SETS = + hpdcache_params_pkg::PARAM_MSHR_SETS; + + // HPDcache MSHR number of ways + localparam int unsigned HPDCACHE_MSHR_WAYS = + hpdcache_params_pkg::PARAM_MSHR_WAYS; + + // HPDcache MSHR number of ways in the same SRAM word + localparam int unsigned HPDCACHE_MSHR_WAYS_PER_RAM_WORD = + hpdcache_params_pkg::PARAM_MSHR_WAYS_PER_RAM_WORD; /* FIXME this parameter is currently ignored */ + + // HPDcache MSHR number of sets in the same SRAM + localparam int unsigned HPDCACHE_MSHR_SETS_PER_RAM = + hpdcache_params_pkg::PARAM_MSHR_SETS_PER_RAM; /* FIXME this parameter is currently ignored */ + + // HPDcache MSHR implements write byte enable + localparam bit HPDCACHE_MSHR_RAM_WBYTEENABLE = + hpdcache_params_pkg::PARAM_MSHR_RAM_WBYTEENABLE; + localparam bit HPDCACHE_MSHR_USE_REGBANK = + hpdcache_params_pkg::PARAM_MSHR_USE_REGBANK; + + localparam int unsigned HPDCACHE_MSHR_SET_WIDTH = $clog2(HPDCACHE_MSHR_SETS); + localparam int unsigned HPDCACHE_MSHR_WAY_WIDTH = $clog2(HPDCACHE_MSHR_WAYS); + localparam int unsigned HPDCACHE_MSHR_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_MSHR_SET_WIDTH; + + typedef logic unsigned [HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_set_t; + typedef logic unsigned [HPDCACHE_MSHR_TAG_WIDTH-1:0] mshr_tag_t; + typedef logic unsigned [HPDCACHE_MSHR_WAY_WIDTH-1:0] mshr_way_t; + // }}} + + // Definition of interface with memory + // {{{ + typedef logic [7:0] hpdcache_mem_len_t; + typedef logic [2:0] hpdcache_mem_size_t; + + typedef enum logic [1:0] { + HPDCACHE_MEM_RESP_OK = 2'b00, + HPDCACHE_MEM_RESP_NOK = 2'b01 + } hpdcache_mem_error_e; + + typedef enum logic [1:0] { + HPDCACHE_MEM_READ = 2'b00, + HPDCACHE_MEM_WRITE = 2'b01, + HPDCACHE_MEM_ATOMIC = 2'b10 + // Reserved = 2'b11 - TODO: CMO ? + } hpdcache_mem_command_e; + + typedef enum logic [3:0] { + HPDCACHE_MEM_ATOMIC_ADD = 4'b0000, + HPDCACHE_MEM_ATOMIC_CLR = 4'b0001, + HPDCACHE_MEM_ATOMIC_SET = 4'b0010, + HPDCACHE_MEM_ATOMIC_EOR = 4'b0011, + HPDCACHE_MEM_ATOMIC_SMAX = 4'b0100, + HPDCACHE_MEM_ATOMIC_SMIN = 4'b0101, + HPDCACHE_MEM_ATOMIC_UMAX = 4'b0110, + HPDCACHE_MEM_ATOMIC_UMIN = 4'b0111, + HPDCACHE_MEM_ATOMIC_SWAP = 4'b1000, + // Reserved = 4'b1001, + // Reserved = 4'b1010, + // Reserved = 4'b1011, + HPDCACHE_MEM_ATOMIC_LDEX = 4'b1100, + HPDCACHE_MEM_ATOMIC_STEX = 4'b1101 + // Reserved = 4'b1110, + // Reserved = 4'b1111 + } hpdcache_mem_atomic_e; + + function automatic hpdcache_mem_size_t get_hpdcache_mem_size(int unsigned bytes); + if (bytes == 0) return 0; + else if (bytes <= 2) return 1; + else if (bytes <= 4) return 2; + else if (bytes <= 8) return 3; + else if (bytes <= 16) return 4; + else if (bytes <= 32) return 5; + else if (bytes <= 64) return 6; + else if (bytes <= 128) return 7; + // pragma translate_off + else $error("hpdcache: unsupported number of bytes"); + // pragma translate_on + endfunction + // }}} + + // Definition of constants and types for the Write Buffer (WBUF) + // {{{ + localparam int unsigned HPDCACHE_WBUF_DIR_ENTRIES = + hpdcache_params_pkg::PARAM_WBUF_DIR_ENTRIES; + + localparam int unsigned HPDCACHE_WBUF_DATA_ENTRIES = + hpdcache_params_pkg::PARAM_WBUF_DATA_ENTRIES; + + localparam int unsigned HPDCACHE_WBUF_WORDS = + hpdcache_params_pkg::PARAM_WBUF_WORDS; + + localparam int unsigned HPDCACHE_WBUF_TIMECNT_WIDTH = + hpdcache_params_pkg::PARAM_WBUF_TIMECNT_WIDTH; + + // Use feedthrough FIFOs from the write-buffer to the NoC. This reduces + // the latency (by one cycle) but adds an additional timing path + localparam bit HPDCACHE_WBUF_SEND_FEEDTHROUGH = + hpdcache_params_pkg::PARAM_WBUF_SEND_FEEDTHROUGH; + + localparam int unsigned HPDCACHE_WBUF_DATA_WIDTH = HPDCACHE_REQ_DATA_WIDTH* + HPDCACHE_WBUF_WORDS; + localparam int unsigned HPDCACHE_WBUF_DATA_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DATA_ENTRIES); + localparam int unsigned HPDCACHE_WBUF_DIR_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DIR_ENTRIES); + + typedef hpdcache_req_addr_t wbuf_addr_t; + typedef hpdcache_nline_t wbuf_match_t; + typedef hpdcache_req_data_t wbuf_data_t; + typedef hpdcache_req_be_t wbuf_be_t; + typedef wbuf_data_t[HPDCACHE_WBUF_WORDS-1:0] wbuf_data_buf_t; + typedef wbuf_be_t [HPDCACHE_WBUF_WORDS-1:0] wbuf_be_buf_t; + typedef logic unsigned [ HPDCACHE_WBUF_TIMECNT_WIDTH-1:0] wbuf_timecnt_t; + typedef logic unsigned [ HPDCACHE_WBUF_DIR_PTR_WIDTH-1:0] wbuf_dir_ptr_t; + typedef logic unsigned [HPDCACHE_WBUF_DATA_PTR_WIDTH-1:0] wbuf_data_ptr_t; + // }}} + + // Definition of constants and types for the Replay Table (RTAB) + // {{{ + localparam int HPDCACHE_RTAB_ENTRIES = hpdcache_params_pkg::PARAM_RTAB_ENTRIES; + + typedef logic [$clog2(HPDCACHE_RTAB_ENTRIES)-1:0] rtab_ptr_t; + // }}} + + // Definition of constants and types for the uncacheable request handler (UC) + // {{{ + typedef struct packed { + logic is_ld; + logic is_st; + logic is_amo_lr; + logic is_amo_sc; + logic is_amo_swap; + logic is_amo_add; + logic is_amo_and; + logic is_amo_or; + logic is_amo_xor; + logic is_amo_max; + logic is_amo_maxu; + logic is_amo_min; + logic is_amo_minu; + } hpdcache_uc_op_t; + // }}} + + // Definition of constants and types for the CMO request handler (CMOH) + // {{{ + typedef struct packed { + logic is_inval_by_nline; + logic is_inval_by_set; + logic is_inval_all; + logic is_fence; + } hpdcache_cmoh_op_t; + // }}} +endpackage diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv new file mode 100644 index 0000000..7697737 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv @@ -0,0 +1,138 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : May, 2021 + * Description : HPDcache Pseudo-LRU replacement policy + * History : + */ +module hpdcache_plru + // Parameters + // {{{ +#( + parameter int unsigned SETS = 0, + parameter int unsigned WAYS = 0, + + localparam type set_t = logic [$clog2(SETS)-1:0], + localparam type way_vector_t = logic [WAYS-1:0] +) + // }}} + + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + + // PLRU update interface + input logic updt_i, + input set_t updt_set_i, + input way_vector_t updt_way_i, + + // Victim replacement interface + input logic repl_i, + input set_t repl_set_i, + input way_vector_t repl_dir_valid_i, + input logic repl_updt_plru_i, + + output way_vector_t victim_way_o +); + // }}} + + // Internal signals and registers + // {{{ + way_vector_t [SETS-1:0] plru_q, plru_d; + way_vector_t updt_plru; + way_vector_t repl_plru; + way_vector_t used_victim_way, unused_victim_way; + // }}} + + // Victim way selection + // {{{ + hpdcache_prio_1hot_encoder #(.N(WAYS)) + used_victim_select_i ( + .val_i (~plru_q[repl_set_i]), + .val_o (used_victim_way) + ); + + hpdcache_prio_1hot_encoder #(.N(WAYS)) + unused_victim_select_i ( + .val_i (~repl_dir_valid_i), + .val_o (unused_victim_way) + ); + + // If there is a free entry in the directory (valid == 0), choose it as victim + assign victim_way_o = |unused_victim_way ? unused_victim_way : used_victim_way; + // }}} + + // Pseudo-LRU update process + // {{{ + assign updt_plru = plru_q[updt_set_i] | updt_way_i; + assign repl_plru = plru_q[repl_set_i] | victim_way_o; + + always_comb + begin : plru_update_comb + plru_d = plru_q; + + case (1'b1) + // When replacing a cache-line, set the PLRU bit of the new line + repl_i: + if (repl_updt_plru_i) begin + // If all PLRU bits of a given would be set, reset them all + // but the currently accessed way + if (&repl_plru) begin + plru_d[repl_set_i] = victim_way_o; + end else begin + plru_d[repl_set_i] = repl_plru; + end + end + + // When accessing a cache-line, set the corresponding PLRU bit + updt_i: + // If all PLRU bits of a given would be set, reset them all + // but the currently accessed way + if (&updt_plru) begin + plru_d[updt_set_i] = updt_way_i; + end else begin + plru_d[updt_set_i] = updt_plru; + end + + default: begin + // do nothing + end + endcase + end + // }}} + + // Set state process + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : lru_ff + if (!rst_ni) begin + plru_q <= '0; + end else begin + if (updt_i || repl_i) begin + plru_q <= plru_d; + end + end + end + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv new file mode 100755 index 0000000..d7d9d64 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv @@ -0,0 +1,666 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : September, 2021 + * Description : HPDcache Replay Table + * History : + */ +module hpdcache_rtab +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter type rtab_entry_t = logic +) +// }}} +// Ports +// {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, // RTAB is empty + output logic full_o, // RTAB is full + + // Check RTAB signals + // This interface allows to check if there is an address-overlapping + // request in the RTAB with respect to the given nline. + input logic check_i, // Check for hit (nline) in the RTAB + input hpdcache_nline_t check_nline_i, + output logic check_hit_o, + + // Allocate signals + // This interface allows to allocate a new request in a new linked list + input logic alloc_i, + input logic alloc_and_link_i, + input rtab_entry_t alloc_req_i, + input logic alloc_mshr_hit_i, + input logic alloc_mshr_full_i, + input logic alloc_mshr_ready_i, + input logic alloc_wbuf_hit_i, + input logic alloc_wbuf_not_ready_i, + + // Pop signals + // This interface allows to read (and remove) a request from the RTAB + output logic pop_try_valid_o, // Request ready to be replayed + input logic pop_try_i, + output rtab_entry_t pop_try_req_o, + output rtab_ptr_t pop_try_ptr_o, + + // Pop Commit signals + // This interface allows to actually remove a popped request + input logic pop_commit_i, + input rtab_ptr_t pop_commit_ptr_i, + + // Pop Rollback signals + // This interface allows to put back a popped request + input logic pop_rback_i, + input rtab_ptr_t pop_rback_ptr_i, + input logic pop_rback_mshr_hit_i, + input logic pop_rback_mshr_full_i, + input logic pop_rback_mshr_ready_i, + input logic pop_rback_wbuf_hit_i, + input logic pop_rback_wbuf_not_ready_i, + + + // Control signals from/to WBUF + output hpdcache_req_addr_t wbuf_addr_o, // address to check against ongoing writes + output logic wbuf_is_read_o, // monitored request is read + input logic wbuf_hit_open_i, // Hit on open entry in the write buf + input logic wbuf_hit_pend_i, // Hit on pend entry in the write buf + input logic wbuf_hit_sent_i, // Hit on sent entry in the write buf + input logic wbuf_not_ready_i, // Write buffer cannot accept the write + + // Control signals from the Miss Handler + input logic miss_ready_i, // Miss Handler is ready + + // Control signals from the Refill Handler + input logic refill_i, // Active refill + input hpdcache_nline_t refill_nline_i, // Cache-line index being refilled + + // Configuration parameters + input logic cfg_single_entry_i // Enable only one entry of the table +); +// }}} + +// Definition of constants, types and functions +// {{{ + localparam int N = HPDCACHE_RTAB_ENTRIES; + + function automatic rtab_ptr_t rtab_bv_to_index( + input logic [N-1:0] bv); + for (int i = 0; i < N; i++) begin + if (bv[i]) return rtab_ptr_t'(i); + end + return 0; + endfunction + + function automatic logic [N-1:0] rtab_index_to_bv( + input rtab_ptr_t index); + logic [N-1:0] bv; + + for (int i = 0; i < N; i++) begin + bv[i] = (rtab_ptr_t'(i) == index); + end + return bv; + endfunction + + function automatic bit rtab_mshr_set_equal( + input hpdcache_nline_t x, + input hpdcache_nline_t y); + return (x[0 +: HPDCACHE_MSHR_SET_WIDTH] == y[0 +: HPDCACHE_MSHR_SET_WIDTH]); + endfunction + + function automatic logic [N-1:0] rtab_next(rtab_ptr_t [N-1:0] next, rtab_ptr_t x); + return rtab_index_to_bv(next[x]); + endfunction + + typedef enum { + POP_TRY_HEAD, + POP_TRY_NEXT, + POP_TRY_NEXT_WAIT + } rtab_pop_try_state_e; +// }}} + +// Internal signals and registers +// {{{ + rtab_entry_t [N-1:0] req_q; + rtab_ptr_t [N-1:0] next_q; + + rtab_pop_try_state_e pop_try_state_q, pop_try_state_d; + logic [N-1:0] pop_try_next_q, pop_try_next_d; + + logic [N-1:0] valid_q; + logic [N-1:0] valid_set, valid_rst; + logic [N-1:0] alloc_valid_set; + logic [N-1:0] pop_commit_valid_rst; + + // Bits indicating if the corresponding entry is the head of a linked list + logic [N-1:0] head_q; + logic [N-1:0] head_set, head_rst; + logic [N-1:0] alloc_head_set, alloc_head_rst; + logic [N-1:0] pop_try_head_rst; + logic [N-1:0] pop_commit_head_set; + logic [N-1:0] pop_rback_head_set; + + // Bits indicating if the corresponding entry is the tail of a linked list + logic [N-1:0] tail_q; + logic [N-1:0] tail_set, tail_rst; + logic [N-1:0] alloc_tail_set, alloc_tail_rst; + + // There is a pend ing miss on the target nline + logic [N-1:0] deps_mshr_hit_q; + logic [N-1:0] deps_mshr_hit_set, deps_mshr_hit_rst; + logic [N-1:0] alloc_deps_mshr_hit_set; + logic [N-1:0] pop_rback_deps_mshr_hit_set; + + // The MSHR has no available slot for the new miss + logic [N-1:0] deps_mshr_full_q; + logic [N-1:0] deps_mshr_full_set, deps_mshr_full_rst; + logic [N-1:0] alloc_deps_mshr_full_set; + logic [N-1:0] pop_rback_deps_mshr_full_set; + + // The MSHR is not ready to send a new miss requests + logic [N-1:0] deps_mshr_ready_q; + logic [N-1:0] deps_mshr_ready_set, deps_mshr_ready_rst; + logic [N-1:0] alloc_deps_mshr_ready_set; + logic [N-1:0] pop_rback_deps_mshr_ready_set; + + // Hit on an non-e mpty entry of the write buffer + logic [N-1:0] deps_wbuf_hit_q; + logic [N-1:0] deps_wbuf_hit_set, deps_wbuf_hit_rst; + logic [N-1:0] alloc_deps_wbuf_hit_set; + logic [N-1:0] pop_rback_deps_wbuf_hit_set; + + // Hit on a pend entry of the write buffer + logic [N-1:0] deps_wbuf_not_ready_q; + logic [N-1:0] deps_wbuf_not_ready_set, deps_wbuf_not_ready_rst; + logic [N-1:0] alloc_deps_wbuf_not_ready_set; + logic [N-1:0] pop_rback_deps_wbuf_not_ready_set; + + logic [N-1:0] nodeps; + hpdcache_nline_t [N-1:0] nline; + hpdcache_req_addr_t [N-1:0] addr; + logic [N-1:0] is_read; + logic [N-1:0] check_hit; + logic [N-1:0] match_check_nline; + logic [N-1:0] match_check_tail; + logic [N-1:0] match_refill_nline; + logic [N-1:0] match_refill_mshr_set; + + logic [N-1:0] free; + logic [N-1:0] free_alloc; + logic alloc; + + logic [N-1:0] pop_match_next; + logic [N-1:0] pop_rback_ptr_bv; + logic [N-1:0] pop_try_bv; + logic [N-1:0] ready; + + genvar gen_i; +// }}} + +// Compute global control signals +// {{{ + // compute if entries are ready to be replayed + assign nodeps = ~(deps_mshr_hit_q | + deps_mshr_full_q | + deps_mshr_ready_q | + deps_wbuf_hit_q | + deps_wbuf_not_ready_q); + + assign ready = valid_q & head_q & nodeps; + + assign free = ~valid_q; + + // compute the free vector (one-hot signal) + hpdcache_prio_1hot_encoder #( + .N (N) + ) free_encoder_i ( + .val_i (free), + .val_o (free_alloc) + ); + + // full and empty signals + assign empty_o = &(~valid_q); + assign full_o = &( valid_q) | (|valid_q & cfg_single_entry_i); +// }}} + +// Check interface +// {{{ + generate + for (gen_i = 0; gen_i < N; gen_i++) begin : check_gen + assign addr[gen_i] = {req_q[gen_i].addr_tag, req_q[gen_i].addr_offset}, + nline[gen_i] = hpdcache_get_req_addr_nline(addr[gen_i]), + match_check_nline[gen_i] = (check_nline_i == nline[gen_i]); + + assign is_read[gen_i] = is_load(req_q[gen_i].op) | + is_cmo_prefetch(req_q[gen_i].op, req_q[gen_i].size); + end + endgenerate + + assign check_hit = valid_q & match_check_nline, + check_hit_o = |check_hit, + match_check_tail = check_hit & tail_q; +// }}} + +// Allocation process +// {{{ + assign alloc = alloc_i | alloc_and_link_i; + + // Set the valid bit-vector of the replay table + assign alloc_valid_set = free_alloc & {N{alloc}}; + + // Set of head and tail bit-vectors during an allocation + // - The head bit is only set when creating a new linked-list + // - The tail bit is always set because new requests are added on the tail. + assign alloc_head_set = free_alloc & {N{alloc_i}}, + alloc_tail_set = alloc_valid_set; + + // Reset of head and tail bit-vectors during an allocation + // - When doing an allocation and link, head bit shall be reset + // - when doing an allocation and link, the "prev" tail shall be reset + assign alloc_head_rst = free_alloc & {N{alloc_and_link_i}}, + alloc_tail_rst = match_check_tail & {N{alloc_and_link_i}}; + + // Set the dependency bits for the allocated entry + assign alloc_deps_mshr_hit_set = alloc_valid_set & {N{ alloc_mshr_hit_i}}, + alloc_deps_mshr_full_set = alloc_valid_set & {N{ alloc_mshr_full_i}}, + alloc_deps_mshr_ready_set = alloc_valid_set & {N{ alloc_mshr_ready_i}}, + alloc_deps_wbuf_hit_set = alloc_valid_set & {N{ alloc_wbuf_hit_i}}, + alloc_deps_wbuf_not_ready_set = alloc_valid_set & {N{alloc_wbuf_not_ready_i}}; +// }}} + +// Update replay table dependencies +// {{{ + // Update write buffer hit dependencies + // {{{ + // Build a bit-vector with HEAD requests waiting for a conflict in the wbuf + logic [N-1:0] wbuf_rd_pending, wbuf_wr_pending; + logic [N-1:0] wbuf_rd_gnt, wbuf_wr_gnt; + logic [ 1:0] wbuf_pending; + logic [ 1:0] wbuf_gnt; + logic wbuf_ready; + logic [N-1:0] wbuf_sel; + + assign wbuf_rd_pending = valid_q & head_q & deps_wbuf_hit_q, + wbuf_wr_pending = valid_q & head_q & deps_wbuf_not_ready_q; + + // Choose in a round-robin manner a ready transaction waiting for a conflict in the wbuf + hpdcache_rrarb #( + .N (N) + ) wbuf_rd_pending_arb_i ( + .clk_i, + .rst_ni, + .req_i (wbuf_rd_pending), + .gnt_o (wbuf_rd_gnt), + .ready_i (wbuf_gnt[0] & wbuf_ready) + ); + + hpdcache_rrarb #( + .N (N) + ) wbuf_wr_pending_arb_i ( + .clk_i, + .rst_ni, + .req_i (wbuf_wr_pending), + .gnt_o (wbuf_wr_gnt), + .ready_i (wbuf_gnt[1] & wbuf_ready) + ); + + assign wbuf_pending = {|wbuf_wr_gnt, |wbuf_rd_gnt}, + wbuf_ready = |(pop_try_bv & (wbuf_rd_gnt | wbuf_wr_gnt)); + + hpdcache_fxarb #( + .N (2) + ) wbuf_pending_arb_i ( + .clk_i, + .rst_ni, + .req_i (wbuf_pending), + .gnt_o (wbuf_gnt), + .ready_i (wbuf_ready) + ); + + assign wbuf_sel = wbuf_gnt[0] ? wbuf_rd_gnt : + wbuf_gnt[1] ? wbuf_wr_gnt : '0; + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_req_addr_t)), + .ONE_HOT_SEL (1'b1) + ) wbuf_pending_addr_mux_i ( + .data_i (addr), + .sel_i (wbuf_sel), + .data_o (wbuf_addr_o) + ); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH (1), + .ONE_HOT_SEL (1'b1) + ) wbuf_pending_is_read_mux_i ( + .data_i (is_read), + .sel_i (wbuf_sel), + .data_o (wbuf_is_read_o) + ); + + // reset write buffer dependency bits with the output from the write buffer + assign deps_wbuf_hit_rst = + wbuf_sel & ~{N{wbuf_hit_open_i | wbuf_hit_pend_i | wbuf_hit_sent_i}}; + assign deps_wbuf_not_ready_rst = + wbuf_sel & ~{N{wbuf_not_ready_i}}; + // }}} + + // Update miss handler dependency + // {{{ + assign deps_mshr_ready_rst = {N{miss_ready_i}}; + // }}} + + // Update refill dependencies + // {{{ + generate + for (gen_i = 0; gen_i < N; gen_i++) begin : match_refill_gen + assign match_refill_mshr_set[gen_i] = + rtab_mshr_set_equal(refill_nline_i, nline[gen_i]); + assign match_refill_nline[gen_i] = + (refill_nline_i == nline[gen_i]); + end + endgenerate + + assign deps_mshr_full_rst = {N{refill_i}} & match_refill_mshr_set; + assign deps_mshr_hit_rst = {N{refill_i}} & match_refill_nline; + // }}} +// }}} + +// Pop interface +// {{{ + logic [N-1:0] pop_sel; + logic [N-1:0] pop_commit_bv; + + assign pop_commit_bv = rtab_index_to_bv(pop_commit_ptr_i); + + // Pop try process + // {{{ + logic [N-1:0] pop_gnt; + logic pop_head; + + hpdcache_rrarb #( + .N (N) + ) pop_arb_i ( + .clk_i, + .rst_ni, + .req_i (ready), + .gnt_o (pop_gnt), + .ready_i (pop_head) + ); + + always_comb + begin : req_valid_comb + case(pop_try_state_q) + POP_TRY_HEAD : pop_try_valid_o = |ready; + POP_TRY_NEXT : pop_try_valid_o = 1'b1; + POP_TRY_NEXT_WAIT: pop_try_valid_o = 1'b1; + default : pop_try_valid_o = 1'b0; + endcase + end + + always_comb + begin : pop_entry_sel_comb + pop_try_state_d = pop_try_state_q; + pop_try_next_d = pop_try_next_q; + pop_head = 1'b0; + pop_sel = '0; + + case (pop_try_state_q) + POP_TRY_HEAD: begin + // This FSM may be in this state after forwarding the tail of + // a list. In that case, a rollback may arrive in this cycle. + pop_sel = pop_gnt; + if (!pop_rback_i && pop_try_valid_o) begin + if (pop_try_i) begin + // If the request interface accepts the request, go to the next request + // in the list (if the current request is not the tail). Otherwise, stay in + // the same state to to forward a request from a new list + pop_head = 1'b1; + if ((pop_gnt & ~tail_q) != 0) begin + pop_try_state_d = POP_TRY_NEXT; + pop_try_next_d = rtab_next(next_q, pop_try_ptr_o); + end + end + end + end + POP_TRY_NEXT: begin + pop_sel = pop_try_next_q; + if (pop_rback_i) begin + pop_try_state_d = POP_TRY_HEAD; + end else begin + if (pop_try_i) begin + // If the request interface accepts the new request, go to the next request + // in the list (if the current request is not the tail). Otherwise, return + // to the POP_TRY_HEAD state to forward a request from a new list + if ((pop_try_next_q & ~tail_q) != 0) begin + pop_try_state_d = POP_TRY_NEXT; + pop_try_next_d = rtab_next(next_q, pop_try_ptr_o); + end else begin + pop_try_state_d = POP_TRY_HEAD; + end + end else begin + // If the request interface is not ready to consume the new request, wait + // until it is + pop_try_state_d = POP_TRY_NEXT_WAIT; + end + end + end + POP_TRY_NEXT_WAIT: begin + // Wait for the current request to be accepted. Then go to the next request in the + // list or to a new list + pop_sel = pop_try_next_q; + if (pop_try_i) begin + if ((pop_try_next_q & ~tail_q) != 0) begin + pop_try_state_d = POP_TRY_NEXT; + pop_try_next_d = rtab_next(next_q, pop_try_ptr_o); + end else begin + pop_try_state_d = POP_TRY_HEAD; + end + end + end + default: begin + end + endcase + end + + assign pop_commit_head_set = '0; + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(rtab_entry_t)), + .ONE_HOT_SEL (1'b1) + ) pop_mux_i ( + .data_i (req_q), + .sel_i (pop_sel), + .data_o (pop_try_req_o) + ); + + // Temporarily unset the head bit of the popped request to prevent it to be rescheduled + assign pop_try_bv = pop_sel & {N{pop_try_i}}, + pop_try_head_rst = pop_try_bv; + + + // Forward the index of the entry being popped. This is used later by the + // commit or rollback operations + assign pop_try_ptr_o = rtab_bv_to_index(pop_sel); + + // }}} + + // Pop commit process + // {{{ + // Invalidate the entry being popped (head of the linked list) + assign pop_commit_valid_rst = {N{pop_commit_i}} & rtab_index_to_bv(pop_commit_ptr_i); + // }}} + + // Pop rollback process + // {{{ + // Set again the head bit of the rolled-back request + assign pop_rback_ptr_bv = rtab_index_to_bv(pop_rback_ptr_i); + + assign pop_rback_head_set = {N{pop_rback_i}} & pop_rback_ptr_bv; + + assign pop_rback_deps_mshr_hit_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_hit_i}}, + pop_rback_deps_mshr_full_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_full_i}}, + pop_rback_deps_mshr_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_ready_i}}, + pop_rback_deps_wbuf_hit_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_hit_i}}, + pop_rback_deps_wbuf_not_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_not_ready_i}}; + // }}} +// }}} + +// Internal state assignment +// {{{ + assign head_set = alloc_head_set | pop_commit_head_set | pop_rback_head_set, + head_rst = alloc_head_rst | pop_try_head_rst; + + assign tail_set = alloc_tail_set, + tail_rst = alloc_tail_rst; + + assign valid_set = alloc_valid_set, + valid_rst = pop_commit_valid_rst; + + assign deps_mshr_hit_set = alloc_deps_mshr_hit_set | pop_rback_deps_mshr_hit_set, + deps_mshr_full_set = alloc_deps_mshr_full_set | pop_rback_deps_mshr_full_set, + deps_mshr_ready_set = alloc_deps_mshr_ready_set | pop_rback_deps_mshr_ready_set, + deps_wbuf_hit_set = alloc_deps_wbuf_hit_set | pop_rback_deps_wbuf_hit_set, + deps_wbuf_not_ready_set = alloc_deps_wbuf_not_ready_set | pop_rback_deps_wbuf_not_ready_set; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : rtab_valid_ff + if (!rst_ni) begin + valid_q <= '0; + head_q <= '0; + tail_q <= '0; + deps_mshr_hit_q <= '0; + deps_mshr_full_q <= '0; + deps_mshr_ready_q <= '0; + deps_wbuf_hit_q <= '0; + deps_wbuf_not_ready_q <= '0; + next_q <= '0; + end else begin + valid_q <= (~valid_q & valid_set) | + ( valid_q & ~valid_rst); + + // update head and tail flags + head_q <= (~head_q & head_set) | + ( head_q & ~head_rst); + + tail_q <= (~tail_q & tail_set) | + ( tail_q & ~tail_rst); + + // update dependency flags + deps_mshr_hit_q <= (~deps_mshr_hit_q & deps_mshr_hit_set) | + ( deps_mshr_hit_q & ~deps_mshr_hit_rst); + deps_mshr_full_q <= (~deps_mshr_full_q & deps_mshr_full_set) | + ( deps_mshr_full_q & ~deps_mshr_full_rst); + deps_mshr_ready_q <= (~deps_mshr_ready_q & deps_mshr_ready_set) | + ( deps_mshr_ready_q & ~deps_mshr_ready_rst); + deps_wbuf_hit_q <= (~deps_wbuf_hit_q & deps_wbuf_hit_set) | + ( deps_wbuf_hit_q & ~deps_wbuf_hit_rst); + deps_wbuf_not_ready_q <= (~deps_wbuf_not_ready_q & deps_wbuf_not_ready_set) | + ( deps_wbuf_not_ready_q & ~deps_wbuf_not_ready_rst); + + // update the next pointers + for (int i = 0; i < N; i++) begin + if (alloc_and_link_i && match_check_tail[i]) begin + next_q[i] <= rtab_bv_to_index(free_alloc); + end + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : pop_try_ff + if (!rst_ni) begin + pop_try_state_q <= POP_TRY_HEAD; + pop_try_next_q <= '0; + end else begin + pop_try_state_q <= pop_try_state_d; + pop_try_next_q <= pop_try_next_d; + end + end + + always_ff @(posedge clk_i) + begin : rtab_ff + for (int i = 0; i < N; i++) begin + // update the request array + if (valid_set[i]) begin + req_q[i] <= alloc_req_i; + end + end + end +// }}} + +// Assertions +// {{{ +// pragma translate_off + assert property (@(posedge clk_i) disable iff (!rst_ni) + check_i |-> $onehot0(match_check_tail)) else + $error("rtab: more than one entry matching"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + alloc_and_link_i |-> (check_i & check_hit_o)) else + $error("rtab: alloc and link shall be performed in case of check hit"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + alloc_and_link_i |-> + ({alloc_req_i.addr_tag, hpdcache_get_req_offset_set(alloc_req_i.addr_offset)} == + check_nline_i)) else + $error("rtab: nline for alloc and link shall match the one being checked"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + alloc_i |-> !alloc_and_link_i) else + $error("rtab: only one allocation per cycle is allowed"); + +`ifndef VERILATOR + assert property (@(posedge clk_i) disable iff (!rst_ni) + pop_try_i |-> ##1 (pop_commit_i | pop_rback_i)) else + $error("rtab: a pop try shall be followed by a commit or rollback"); +`endif + + assert property (@(posedge clk_i) disable iff (!rst_ni) + pop_commit_i |-> valid_q[pop_commit_ptr_i]) else + $error("rtab: commiting an invalid entry"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + pop_rback_i |-> valid_q[pop_rback_ptr_i]) else + $error("rtab: rolling-back an invalid entry"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + pop_rback_i |-> !pop_try_i) else + $error("rtab: cache shall not accept a new request while rolling back"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + alloc |-> ~full_o) else + $error("rtab: trying to allocate while the table is full"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + alloc_and_link_i |-> ~cfg_single_entry_i) else + $error("rtab: trying to link a request in single entry mode"); +// pragma translate_on +// }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv new file mode 100644 index 0000000..17519e6 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv @@ -0,0 +1,965 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : May, 2021 + * Description : HPDcache uncached and AMO request handler + * History : + */ +module hpdcache_uncached +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) + // }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Global control signals + // {{{ + input logic wbuf_empty_i, + input logic mshr_empty_i, + input logic rtab_empty_i, + input logic ctrl_empty_i, + // }}} + + // Cache-side request interface + // {{{ + input logic req_valid_i, + output logic req_ready_o, + input hpdcache_uc_op_t req_op_i, + input hpdcache_req_addr_t req_addr_i, + input hpdcache_req_size_t req_size_i, + input hpdcache_req_data_t req_data_i, + input hpdcache_req_be_t req_be_i, + input logic req_uc_i, + input hpdcache_req_sid_t req_sid_i, + input hpdcache_req_tid_t req_tid_i, + input logic req_need_rsp_i, + // }}} + + // Write buffer interface + // {{{ + output logic wbuf_flush_all_o, + // }}} + + // AMO Cache Interface + // {{{ + output logic dir_amo_match_o, + output hpdcache_set_t dir_amo_match_set_o, + output hpdcache_tag_t dir_amo_match_tag_o, + output logic dir_amo_update_plru_o, + input hpdcache_way_vector_t dir_amo_hit_way_i, + + output logic data_amo_write_o, + output logic data_amo_write_enable_o, + output hpdcache_set_t data_amo_write_set_o, + output hpdcache_req_size_t data_amo_write_size_o, + output hpdcache_word_t data_amo_write_word_o, + output logic [63:0] data_amo_write_data_o, + output logic [7:0] data_amo_write_be_o, + // }}} + + // LR/SC reservation buffer + // {{{ + input logic lrsc_snoop_i, + input hpdcache_req_addr_t lrsc_snoop_addr_i, + input hpdcache_req_size_t lrsc_snoop_size_i, + // }}} + + // Core response interface + // {{{ + input logic core_rsp_ready_i, + output logic core_rsp_valid_o, + output hpdcache_rsp_t core_rsp_o, + // }}} + + // MEMORY interfaces + // {{{ + // Memory request unique identifier + input hpdcache_mem_id_t mem_read_id_i, + input hpdcache_mem_id_t mem_write_id_i, + + // Read interface + input logic mem_req_read_ready_i, + output logic mem_req_read_valid_o, + output hpdcache_mem_req_t mem_req_read_o, + + output logic mem_resp_read_ready_o, + input logic mem_resp_read_valid_i, + input hpdcache_mem_resp_r_t mem_resp_read_i, + + // Write interface + input logic mem_req_write_ready_i, + output logic mem_req_write_valid_o, + output hpdcache_mem_req_t mem_req_write_o, + + input logic mem_req_write_data_ready_i, + output logic mem_req_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_write_data_o, + + output logic mem_resp_write_ready_o, + input logic mem_resp_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_write_i, + // }}} + + // Configuration interface + // {{{ + input logic cfg_error_on_cacheable_amo_i + // }}} +); +// }}} + +// Definition of constants and types +// {{{ + localparam hpdcache_uint MEM_REQ_RATIO = HPDcacheMemDataWidth/HPDCACHE_REQ_DATA_WIDTH; + localparam hpdcache_uint MEM_REQ_WORD_INDEX_WIDTH = $clog2(MEM_REQ_RATIO); + + typedef enum { + UC_IDLE, + UC_WAIT_PENDING, + UC_MEM_REQ, + UC_MEM_W_REQ, + UC_MEM_WDATA_REQ, + UC_MEM_WAIT_RSP, + UC_CORE_RSP, + UC_AMO_READ_DIR, + UC_AMO_WRITE_DATA + } hpdcache_uc_fsm_t; + + localparam logic AMO_SC_SUCCESS = 1'b0; + localparam logic AMO_SC_FAILURE = 1'b1; + + function automatic logic [63:0] prepare_amo_data_operand( + input logic [63:0] data_i, + input hpdcache_req_size_t size_i, + input hpdcache_req_addr_t addr_i, + input logic sign_extend_i + ); + // 64-bits AMOs are already aligned, thus do nothing + if (size_i == hpdcache_req_size_t'(3)) begin + return data_i; + end + + // 32-bits AMOs + else begin + if (addr_i[2] == 1'b1) begin + if (sign_extend_i) begin + return {{32{data_i[63]}}, data_i[63:32]}; + end else begin + return {{32{ 1'b0}}, data_i[63:32]}; + end + end else begin + if (sign_extend_i) begin + return {{32{data_i[31]}}, data_i[31: 0]}; + end else begin + return {{32{ 1'b0}}, data_i[31: 0]}; + end + end + end + endfunction; + + function automatic logic [63:0] prepare_amo_data_result( + input logic [63:0] data_i, + input hpdcache_req_size_t size_i + ); + // 64-bits AMOs are already aligned, thus do nothing + if (size_i == hpdcache_req_size_t'(3)) begin + return data_i; + end + + // 32-bits AMOs + else begin + return {2{data_i[31:0]}}; + end + endfunction; + + function automatic logic amo_need_sign_extend(hpdcache_uc_op_t op); + unique case (1'b1) + op.is_amo_add, + op.is_amo_max, + op.is_amo_min: return 1'b1; + default : return 1'b0; + endcase; + endfunction +// }}} + +// Internal signals and registers +// {{{ + hpdcache_uc_fsm_t uc_fsm_q, uc_fsm_d; + hpdcache_uc_op_t req_op_q; + hpdcache_req_addr_t req_addr_q; + hpdcache_req_size_t req_size_q; + hpdcache_req_data_t req_data_q; + hpdcache_req_be_t req_be_q; + logic req_uc_q; + hpdcache_req_sid_t req_sid_q; + hpdcache_req_tid_t req_tid_q; + logic req_need_rsp_q; + + logic uc_sc_retcode_q, uc_sc_retcode_d; + + hpdcache_req_data_t rsp_rdata_q, rsp_rdata_d; + logic rsp_error_set, rsp_error_rst; + logic rsp_error_q; + logic mem_resp_write_valid_q, mem_resp_write_valid_d; + logic mem_resp_read_valid_q, mem_resp_read_valid_d; + + hpdcache_req_data_t mem_req_write_data; + logic [63:0] amo_req_ld_data; + logic [63:0] amo_ld_data; + logic [63:0] amo_req_st_data; + logic [63:0] amo_st_data; + logic [ 7:0] amo_st_be; + logic [63:0] amo_result; +// }}} + +// LR/SC reservation buffer logic +// {{{ + logic lrsc_rsrv_valid_q; + hpdcache_req_addr_t lrsc_rsrv_addr_q, lrsc_rsrv_addr_d; + hpdcache_nline_t lrsc_rsrv_nline; + hpdcache_offset_t lrsc_rsrv_word; + + hpdcache_offset_t lrsc_snoop_words; + hpdcache_nline_t lrsc_snoop_nline; + hpdcache_offset_t lrsc_snoop_base, lrsc_snoop_end; + logic lrsc_snoop_hit; + logic lrsc_snoop_reset; + + hpdcache_nline_t lrsc_uc_nline; + hpdcache_offset_t lrsc_uc_word; + logic lrsc_uc_hit; + logic lrsc_uc_set, lrsc_uc_reset; + + // NOTE: Reservation set for LR instruction is always 8-bytes in this + // implementation. + assign lrsc_rsrv_nline = hpdcache_get_req_addr_nline(lrsc_rsrv_addr_q), + lrsc_rsrv_word = hpdcache_get_req_addr_offset(lrsc_rsrv_addr_q) >> 3; + + // Check hit on LR/SC reservation for snoop port (normal write accesses) + assign lrsc_snoop_words = (lrsc_snoop_size_i < 3) ? 1 : hpdcache_offset_t'((8'h1 << lrsc_snoop_size_i) >> 3), + lrsc_snoop_nline = hpdcache_get_req_addr_nline(lrsc_snoop_addr_i), + lrsc_snoop_base = hpdcache_get_req_addr_offset(lrsc_snoop_addr_i) >> 3, + lrsc_snoop_end = lrsc_snoop_base + lrsc_snoop_words; + + assign lrsc_snoop_hit = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_snoop_nline) & + (lrsc_rsrv_word >= lrsc_snoop_base) & + (lrsc_rsrv_word < lrsc_snoop_end ); + + assign lrsc_snoop_reset = lrsc_snoop_i & lrsc_snoop_hit; + + // Check hit on LR/SC reservation for AMOs and SC + assign lrsc_uc_nline = hpdcache_get_req_addr_nline(req_addr_i), + lrsc_uc_word = hpdcache_get_req_addr_offset(req_addr_i) >> 3; + + assign lrsc_uc_hit = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_uc_nline) & + (lrsc_rsrv_word == lrsc_uc_word); +// }}} + +// Uncacheable request FSM +// {{{ + always_comb + begin : uc_fsm_comb + mem_resp_write_valid_d = mem_resp_write_valid_q; + mem_resp_read_valid_d = mem_resp_read_valid_q; + rsp_error_set = 1'b0; + rsp_error_rst = 1'b0; + lrsc_rsrv_addr_d = lrsc_rsrv_addr_q; + uc_sc_retcode_d = uc_sc_retcode_q; + wbuf_flush_all_o = 1'b0; + lrsc_uc_set = 1'b0; + lrsc_uc_reset = 1'b0; + + uc_fsm_d = uc_fsm_q; + + case (uc_fsm_q) + // Wait for a request + // {{{ + UC_IDLE: begin + + if (req_valid_i) begin + wbuf_flush_all_o = 1'b1; + + unique case (1'b1) + req_op_i.is_ld, + req_op_i.is_st: begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + + req_op_i.is_amo_swap, + req_op_i.is_amo_add, + req_op_i.is_amo_and, + req_op_i.is_amo_or, + req_op_i.is_amo_xor, + req_op_i.is_amo_max, + req_op_i.is_amo_maxu, + req_op_i.is_amo_min, + req_op_i.is_amo_minu, + req_op_i.is_amo_lr: begin + // Reset LR/SC reservation if AMO matches its address + lrsc_uc_reset = ~req_op_i.is_amo_lr & lrsc_uc_hit; + + if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin + rsp_error_set = 1'b1; + uc_fsm_d = UC_CORE_RSP; + end else begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + end + + req_op_i.is_amo_sc: begin + if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin + rsp_error_set = 1'b1; + uc_fsm_d = UC_CORE_RSP; + end else begin + // Reset previous reservation (if any) + lrsc_uc_reset = 1'b1; + + // SC with valid reservation + if (lrsc_uc_hit) begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + // SC with no valid reservation, thus respond with the failure code + else begin + uc_sc_retcode_d = AMO_SC_FAILURE; + uc_fsm_d = UC_CORE_RSP; + end + end + end + + default: begin + if (req_need_rsp_i) begin + rsp_error_set = 1'b1; + uc_fsm_d = UC_CORE_RSP; + end + end + endcase + end + end + // }}} + + // Wait for the write buffer to be empty + // {{{ + UC_WAIT_PENDING: begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + // }}} + + // Send request to memory + // {{{ + UC_MEM_REQ: begin + uc_fsm_d = UC_MEM_REQ; + + mem_resp_write_valid_d = 1'b0; + mem_resp_read_valid_d = 1'b0; + + case (1'b1) + req_op_q.is_ld, + req_op_q.is_amo_lr: begin + if (mem_req_read_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end + end + + req_op_q.is_st, + req_op_q.is_amo_sc, + req_op_q.is_amo_swap, + req_op_q.is_amo_add, + req_op_q.is_amo_and, + req_op_q.is_amo_or, + req_op_q.is_amo_xor, + req_op_q.is_amo_max, + req_op_q.is_amo_maxu, + req_op_q.is_amo_min, + req_op_q.is_amo_minu: begin + if (mem_req_write_ready_i && mem_req_write_data_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end else if (mem_req_write_ready_i) begin + uc_fsm_d = UC_MEM_WDATA_REQ; + end else if (mem_req_write_data_ready_i) begin + uc_fsm_d = UC_MEM_W_REQ; + end + end + endcase + end + // }}} + + // Send write address + // {{{ + UC_MEM_W_REQ: begin + mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i; + mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i; + + if (mem_req_write_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end else begin + uc_fsm_d = UC_MEM_W_REQ; + end + end + // }}} + + // Send write data + // {{{ + UC_MEM_WDATA_REQ: begin + mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i; + mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i; + + if (mem_req_write_data_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end else begin + uc_fsm_d = UC_MEM_WDATA_REQ; + end + end + // }}} + + // Wait for the response from the memory + // {{{ + UC_MEM_WAIT_RSP: begin + automatic bit rd_error; + automatic bit wr_error; + + uc_fsm_d = UC_MEM_WAIT_RSP; + mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i; + mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i; + + rd_error = mem_resp_read_valid_i && + ( mem_resp_read_i.mem_resp_r_error == HPDCACHE_MEM_RESP_NOK); + wr_error = mem_resp_write_valid_i && + (mem_resp_write_i.mem_resp_w_error == HPDCACHE_MEM_RESP_NOK); + rsp_error_set = req_need_rsp_q & (rd_error | wr_error); + + case (1'b1) + req_op_q.is_ld: begin + if (mem_resp_read_valid_i) begin + if (req_need_rsp_q) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_IDLE; + end + end + end + req_op_q.is_st: begin + if (mem_resp_write_valid_i) begin + if (req_need_rsp_q) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_IDLE; + end + end + end + req_op_q.is_amo_lr: begin + if (mem_resp_read_valid_i) begin + // set a new reservation + if (!rd_error) + begin + lrsc_uc_set = 1'b1; + lrsc_rsrv_addr_d = req_addr_q; + end + // in case of a memory error, do not make the reservation and + // invalidate an existing one (if valid) + else begin + lrsc_uc_reset = 1'b1; + end + + if (req_uc_q || rd_error) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_AMO_READ_DIR; + end + end + end + req_op_q.is_amo_sc: begin + if (mem_resp_write_valid_i) begin + automatic bit is_atomic; + + is_atomic = mem_resp_write_i.mem_resp_w_is_atomic && !wr_error; + uc_sc_retcode_d = is_atomic ? AMO_SC_SUCCESS : AMO_SC_FAILURE; + + if (req_uc_q || !is_atomic) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_AMO_READ_DIR; + end + end + end + req_op_q.is_amo_swap, + req_op_q.is_amo_add, + req_op_q.is_amo_and, + req_op_q.is_amo_or, + req_op_q.is_amo_xor, + req_op_q.is_amo_max, + req_op_q.is_amo_maxu, + req_op_q.is_amo_min, + req_op_q.is_amo_minu: begin + // wait for both old data and write acknowledged were received + if ((mem_resp_read_valid_i && mem_resp_write_valid_i) || + (mem_resp_read_valid_i && mem_resp_write_valid_q) || + (mem_resp_read_valid_q && mem_resp_write_valid_i)) + begin + if (req_uc_q || rsp_error_q || rd_error || wr_error) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_AMO_READ_DIR; + end + end + end + endcase + end + // }}} + + // Send the response to the requester + // {{{ + UC_CORE_RSP: begin + if (core_rsp_ready_i) begin + rsp_error_rst = 1'b1; + uc_fsm_d = UC_IDLE; + end else begin + uc_fsm_d = UC_CORE_RSP; + end + end + // }}} + + // Check for a cache hit on the AMO target address + // {{{ + UC_AMO_READ_DIR: begin + uc_fsm_d = UC_AMO_WRITE_DATA; + end + // }}} + + // Write the locally computed AMO result in the cache + // {{{ + UC_AMO_WRITE_DATA: begin + uc_fsm_d = UC_CORE_RSP; + end + // }}} + endcase + end +// }}} + +// AMO unit +// {{{ + localparam hpdcache_uint AMO_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_DATA_WIDTH/64); + + generate + if (AMO_WORD_INDEX_WIDTH > 0) begin : amo_operand_mux_gen + hpdcache_mux #( + .NINPUT (HPDCACHE_REQ_DATA_WIDTH/64), + .DATA_WIDTH (64), + .ONE_HOT_SEL (1'b0) + ) amo_ld_data_mux_i ( + .data_i (rsp_rdata_q), + .sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]), + .data_o (amo_req_ld_data) + ); + + hpdcache_mux #( + .NINPUT (HPDCACHE_REQ_DATA_WIDTH/64), + .DATA_WIDTH (64), + .ONE_HOT_SEL (1'b0) + ) amo_st_data_mux_i ( + .data_i (req_data_q), + .sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]), + .data_o (amo_req_st_data) + ); + + hpdcache_mux #( + .NINPUT (HPDCACHE_REQ_DATA_WIDTH/64), + .DATA_WIDTH (8), + .ONE_HOT_SEL (1'b0) + ) amo_st_be_mux_i ( + .data_i (req_be_q), + .sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]), + .data_o (amo_st_be) + ); + + end else begin + assign amo_req_ld_data = rsp_rdata_q; + assign amo_req_st_data = req_data_q; + assign amo_st_be = req_be_q; + end + endgenerate + + assign amo_ld_data = prepare_amo_data_operand(amo_req_ld_data, req_size_q, + req_addr_q, amo_need_sign_extend(req_op_q)); + assign amo_st_data = prepare_amo_data_operand(amo_req_st_data, req_size_q, + req_addr_q, amo_need_sign_extend(req_op_q)); + + hpdcache_amo amo_unit_i ( + .ld_data_i (amo_ld_data), + .st_data_i (amo_st_data), + .op_i (req_op_q), + .result_o (amo_result) + ); + + assign dir_amo_match_o = (uc_fsm_q == UC_AMO_READ_DIR), + dir_amo_match_set_o = hpdcache_get_req_addr_set(req_addr_q), + dir_amo_match_tag_o = hpdcache_get_req_addr_tag(req_addr_q), + dir_amo_update_plru_o = dir_amo_match_o; + + assign data_amo_write_o = (uc_fsm_q == UC_AMO_WRITE_DATA), + data_amo_write_enable_o = |dir_amo_hit_way_i, + data_amo_write_set_o = hpdcache_get_req_addr_set(req_addr_q), + data_amo_write_size_o = req_size_q, + data_amo_write_word_o = hpdcache_get_req_addr_word(req_addr_q), + data_amo_write_data_o = prepare_amo_data_result(amo_result, req_size_q), + data_amo_write_be_o = amo_st_be; +// }}} + +// Core response outputs +// {{{ + assign req_ready_o = (uc_fsm_q == UC_IDLE), + core_rsp_valid_o = (uc_fsm_q == UC_CORE_RSP); +// }}} + +// Memory read request outputs +// {{{ + always_comb + begin : mem_req_read_comb + mem_req_read_o.mem_req_addr = req_addr_q; + mem_req_read_o.mem_req_len = 0; + mem_req_read_o.mem_req_size = req_size_q; + mem_req_read_o.mem_req_id = mem_read_id_i; + mem_req_read_o.mem_req_cacheable = 1'b0; + mem_req_read_o.mem_req_command = HPDCACHE_MEM_READ; + mem_req_read_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD; + + unique case (1'b1) + req_op_q.is_ld: begin + mem_req_read_valid_o = (uc_fsm_q == UC_MEM_REQ); + end + req_op_q.is_amo_lr: begin + mem_req_read_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_read_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_LDEX; + mem_req_read_valid_o = (uc_fsm_q == UC_MEM_REQ); + end + default: begin + mem_req_read_valid_o = 1'b0; + end + endcase + end +// }}} + +// Memory write request outputs +// {{{ + always_comb + begin : mem_req_write_comb + mem_req_write_data = req_data_q; + mem_req_write_o.mem_req_addr = req_addr_q; + mem_req_write_o.mem_req_len = 0; + mem_req_write_o.mem_req_size = req_size_q; + mem_req_write_o.mem_req_id = mem_write_id_i; + mem_req_write_o.mem_req_cacheable = 1'b0; + unique case (1'b1) + req_op_q.is_amo_sc: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_STEX; + end + req_op_q.is_amo_swap: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SWAP; + end + req_op_q.is_amo_add: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD; + end + req_op_q.is_amo_and: begin + mem_req_write_data = ~req_data_q; + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_CLR; + end + req_op_q.is_amo_or: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SET; + end + req_op_q.is_amo_xor: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_EOR; + end + req_op_q.is_amo_max: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SMAX; + end + req_op_q.is_amo_maxu: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_UMAX; + end + req_op_q.is_amo_min: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SMIN; + end + req_op_q.is_amo_minu: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_UMIN; + end + default: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD; + end + endcase + + unique case (uc_fsm_q) + UC_MEM_REQ: begin + unique case (1'b1) + req_op_q.is_st, + req_op_q.is_amo_sc, + req_op_q.is_amo_swap, + req_op_q.is_amo_add, + req_op_q.is_amo_and, + req_op_q.is_amo_or, + req_op_q.is_amo_xor, + req_op_q.is_amo_max, + req_op_q.is_amo_maxu, + req_op_q.is_amo_min, + req_op_q.is_amo_minu: begin + mem_req_write_data_valid_o = 1'b1; + mem_req_write_valid_o = 1'b1; + end + + default: begin + mem_req_write_data_valid_o = 1'b0; + mem_req_write_valid_o = 1'b0; + end + endcase + end + + UC_MEM_W_REQ: begin + mem_req_write_valid_o = 1'b1; + mem_req_write_data_valid_o = 1'b0; + end + + UC_MEM_WDATA_REQ: begin + mem_req_write_valid_o = 1'b0; + mem_req_write_data_valid_o = 1'b1; + end + + default: begin + mem_req_write_valid_o = 1'b0; + mem_req_write_data_valid_o = 1'b0; + end + endcase + end + + generate + // memory data width is bigger than the width of the core's interface + if (MEM_REQ_RATIO > 1) begin : mem_req_data_gen + // replicate data + assign mem_req_write_data_o.mem_req_w_data = {MEM_REQ_RATIO{mem_req_write_data}}; + + // demultiplex the byte-enable + hpdcache_demux #( + .NOUTPUT (MEM_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH/8) + ) mem_write_be_demux_i ( + .data_i (req_be_q), + .sel_i (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]), + .data_o (mem_req_write_data_o.mem_req_w_be) + ); + end + + // memory data width is equal to the width of the core's interface + else begin + assign mem_req_write_data_o.mem_req_w_data = mem_req_write_data; + assign mem_req_write_data_o.mem_req_w_be = req_be_q; + end + + assign mem_req_write_data_o.mem_req_w_last = 1'b1; + endgenerate +// }}} + +// Response handling +// {{{ + logic [63:0] sc_retcode; + logic [63:0] sc_rdata; + + assign sc_retcode = {{63{1'b0}}, uc_sc_retcode_q}, + sc_rdata = prepare_amo_data_result(sc_retcode, req_size_q); + + assign core_rsp_o.rdata = req_op_q.is_amo_sc ? {HPDCACHE_REQ_WORDS{sc_rdata}} : rsp_rdata_q, + core_rsp_o.sid = req_sid_q, + core_rsp_o.tid = req_tid_q, + core_rsp_o.error = rsp_error_q, + core_rsp_o.aborted = 1'b0; + + // Resize the memory response data to the core response width + generate + // memory data width is bigger than the width of the core's interface + if (MEM_REQ_RATIO > 1) begin : core_rsp_data_gen + hpdcache_mux #( + .NINPUT (MEM_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH) + ) data_read_rsp_mux_i( + .data_i (mem_resp_read_i.mem_resp_r_data), + .sel_i (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]), + .data_o (rsp_rdata_d) + ); + end + + // memory data width is equal to the width of the core's interface + else begin + assign rsp_rdata_d = mem_resp_read_i.mem_resp_r_data; + end + endgenerate + + // This FSM is always ready to accept the response + assign mem_resp_read_ready_o = 1'b1, + mem_resp_write_ready_o = 1'b1; +// }}} + +// Set cache request registers +// {{{ + always_ff @(posedge clk_i) + begin : req_ff + if (req_valid_i && req_ready_o) begin + req_op_q <= req_op_i; + req_addr_q <= req_addr_i; + req_size_q <= req_size_i; + req_data_q <= req_data_i; + req_be_q <= req_be_i; + req_uc_q <= req_uc_i; + req_sid_q <= req_sid_i; + req_tid_q <= req_tid_i; + req_need_rsp_q <= req_need_rsp_i; + end + end +// }}} + +// Uncacheable request FSM set state +// {{{ + logic lrsc_rsrv_valid_set, lrsc_rsrv_valid_reset; + + assign lrsc_rsrv_valid_set = lrsc_uc_set, + lrsc_rsrv_valid_reset = lrsc_uc_reset | lrsc_snoop_reset; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : uc_fsm_ff + if (!rst_ni) begin + uc_fsm_q <= UC_IDLE; + lrsc_rsrv_valid_q <= 1'b0; + end else begin + uc_fsm_q <= uc_fsm_d; + lrsc_rsrv_valid_q <= (~lrsc_rsrv_valid_q & lrsc_rsrv_valid_set ) | + ( lrsc_rsrv_valid_q & ~lrsc_rsrv_valid_reset); + end + end + + always_ff @(posedge clk_i) + begin : uc_amo_ff + lrsc_rsrv_addr_q <= lrsc_rsrv_addr_d; + uc_sc_retcode_q <= uc_sc_retcode_d; + end +// }}} + +// Response registers +// {{{ + always_ff @(posedge clk_i) + begin + if (mem_resp_read_valid_i) begin + rsp_rdata_q <= rsp_rdata_d; + end + mem_resp_write_valid_q <= mem_resp_write_valid_d; + mem_resp_read_valid_q <= mem_resp_read_valid_d; + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + rsp_error_q <= 1'b0; + end else begin + rsp_error_q <= (~rsp_error_q & rsp_error_set) | + ( rsp_error_q & ~rsp_error_rst); + end + end +// }}} + +// Assertions +// {{{ +// pragma translate_off + assert property (@(posedge clk_i) disable iff (!rst_ni) + (req_valid_i && req_op_i.is_ld) -> req_uc_i) else + $error("uc_handler: unexpected load request on cacheable region"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + (req_valid_i && req_op_i.is_st) -> req_uc_i) else + $error("uc_handler: unexpected store request on cacheable region"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + (req_valid_i && (req_op_i.is_amo_lr || + req_op_i.is_amo_sc || + req_op_i.is_amo_swap || + req_op_i.is_amo_add || + req_op_i.is_amo_and || + req_op_i.is_amo_or || + req_op_i.is_amo_xor || + req_op_i.is_amo_max || + req_op_i.is_amo_maxu || + req_op_i.is_amo_min || + req_op_i.is_amo_minu )) -> req_need_rsp_i) else + $error("uc_handler: amo requests shall need a response"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + (req_valid_i && (req_op_i.is_amo_lr || + req_op_i.is_amo_sc || + req_op_i.is_amo_swap || + req_op_i.is_amo_add || + req_op_i.is_amo_and || + req_op_i.is_amo_or || + req_op_i.is_amo_xor || + req_op_i.is_amo_max || + req_op_i.is_amo_maxu || + req_op_i.is_amo_min || + req_op_i.is_amo_minu )) -> (req_size_i inside {2,3})) else + $error("uc_handler: amo requests shall be 4 or 8 bytes wide"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + (mem_resp_write_valid_i || mem_resp_read_valid_i) -> (uc_fsm_q == UC_MEM_WAIT_RSP)) else + $error("uc_handler: unexpected response from memory"); +// pragma translate_on +// }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv new file mode 100644 index 0000000..0607440 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv @@ -0,0 +1,678 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Write Buffer + * History : + */ +module hpdcache_wbuf + // Parameters + // {{{ +#( + // Number of entries in the directory part of the Write Buffer + parameter int unsigned WBUF_DIR_ENTRIES = 0, + // Number of entries in the data part of the Write Buffer + parameter int unsigned WBUF_DATA_ENTRIES = 0, + // Width in bits of the write words + parameter int unsigned WBUF_WORD_WIDTH = 0, + // Number of words per line in the write buffer + parameter int unsigned WBUF_WORDS = 0, + // Width in bits of the physical address + parameter int unsigned WBUF_PA_WIDTH = 0, + // Maximum value of the time counter + parameter int unsigned WBUF_TIMECNT_MAX = 8, + // Number of most significant bits to check for read conflicts + parameter int unsigned WBUF_READ_MATCH_WIDTH = 0, + // Use a feedthrough FIFO on the send interface + parameter bit WBUF_SEND_FEEDTHROUGH = 0, + + localparam int unsigned WBUF_OFFSET_WIDTH = $clog2((WBUF_WORD_WIDTH*WBUF_WORDS)/8), + localparam int unsigned WBUF_TAG_WIDTH = WBUF_PA_WIDTH - WBUF_OFFSET_WIDTH, + localparam int unsigned WBUF_WORD_OFFSET = $clog2(WBUF_WORD_WIDTH/8), + localparam int unsigned WBUF_DATA_PTR_WIDTH = $clog2(WBUF_DATA_ENTRIES), + localparam int unsigned WBUF_DIR_PTR_WIDTH = $clog2(WBUF_DIR_ENTRIES), + localparam int unsigned WBUF_TIMECNT_WIDTH = $clog2(WBUF_TIMECNT_MAX), + localparam type wbuf_addr_t = logic unsigned [ WBUF_PA_WIDTH-1:0], + localparam type wbuf_dir_ptr_t = logic unsigned [ WBUF_DIR_PTR_WIDTH-1:0], + localparam type wbuf_data_ptr_t = logic unsigned [ WBUF_DATA_PTR_WIDTH-1:0], + localparam type wbuf_data_t = logic [ WBUF_WORD_WIDTH-1:0], + localparam type wbuf_be_t = logic [ WBUF_WORD_WIDTH/8-1:0], + localparam type wbuf_data_buf_t = wbuf_data_t [ WBUF_WORDS-1:0], + localparam type wbuf_be_buf_t = wbuf_be_t [ WBUF_WORDS-1:0], + localparam type wbuf_tag_t = logic unsigned [ WBUF_TAG_WIDTH-1:0], + localparam type wbuf_match_t = logic unsigned [WBUF_READ_MATCH_WIDTH-1:0], + localparam type wbuf_timecnt_t = logic unsigned [ WBUF_TIMECNT_WIDTH-1:0] +) + // }}} + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, + output logic full_o, + input logic flush_all_i, + + // Configuration signals + // Timer threshold + input wbuf_timecnt_t cfg_threshold_i, + // Reset timer on write + input logic cfg_reset_timecnt_on_write_i, + // Sequentialize write-after-write hazards + input logic cfg_sequential_waw_i, + // Inhibit write coalescing + input logic cfg_inhibit_write_coalescing_i, + + // Write interface + input logic write_i, + output logic write_ready_o, + input wbuf_addr_t write_addr_i, + input wbuf_data_t write_data_i, + input wbuf_be_t write_be_i, // byte-enable + input logic write_uc_i, // uncacheable write + + // Read hit interface + input wbuf_addr_t read_addr_i, + output logic read_hit_o, + input logic read_flush_hit_i, + + // Replay hit interface + input wbuf_addr_t replay_addr_i, + input logic replay_is_read_i, + output logic replay_open_hit_o, + output logic replay_pend_hit_o, + output logic replay_sent_hit_o, + output logic replay_not_ready_o, + + // Send interface + input logic send_meta_ready_i, + output logic send_meta_valid_o, + output wbuf_addr_t send_addr_o, + output wbuf_dir_ptr_t send_id_o, + output logic send_uc_o, + + input logic send_data_ready_i, + output logic send_data_valid_o, + output wbuf_addr_t send_data_tag_o, + output wbuf_data_buf_t send_data_o, + output wbuf_be_buf_t send_be_o, + + // Acknowledge interface + input logic ack_i, + input wbuf_dir_ptr_t ack_id_i, + input logic ack_error_i +); + // }}} + + // Definition of constants, types and functions + // {{{ + localparam int WBUF_SEND_FIFO_DEPTH = WBUF_DATA_ENTRIES; + + typedef logic unsigned [31:0] wbuf_uint; + + typedef enum logic [1:0] { + WBUF_FREE = 2'b00, // unused/free slot + WBUF_OPEN = 2'b01, // there are pending writes in this slot + WBUF_PEND = 2'b10, // the slot is waiting to be sent + WBUF_SENT = 2'b11 // the slot is sent and waits for the memory acknowledge + } wbuf_state_e; + + typedef struct packed { + wbuf_data_ptr_t ptr; + wbuf_timecnt_t cnt; + wbuf_tag_t tag; + logic uc; + } wbuf_dir_entry_t; + + typedef struct packed { + wbuf_data_buf_t data; + wbuf_be_buf_t be; + } wbuf_data_entry_t; + + typedef struct packed { + wbuf_data_ptr_t send_data_ptr; + wbuf_tag_t send_data_tag; + } wbuf_send_data_t; + + typedef struct packed { + wbuf_tag_t send_meta_tag; + wbuf_dir_ptr_t send_meta_id; + logic send_meta_uc; + } wbuf_send_meta_t; + + function automatic wbuf_dir_ptr_t wbuf_dir_find_next( + input wbuf_dir_ptr_t curr_ptr, + input wbuf_state_e [WBUF_DIR_ENTRIES-1:0] dir_state, + input wbuf_state_e state); + automatic wbuf_dir_ptr_t next_ptr; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + next_ptr = wbuf_dir_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DIR_ENTRIES); + if (dir_state[next_ptr] == state) begin + return next_ptr; + end + end + return curr_ptr; + endfunction + + function automatic wbuf_data_ptr_t wbuf_data_find_next( + input wbuf_data_ptr_t curr_ptr, + input logic [WBUF_DATA_ENTRIES-1:0] data_valid, + input logic state); + automatic wbuf_data_ptr_t next_ptr; + for (int unsigned i = 0; i < WBUF_DATA_ENTRIES; i++) begin + next_ptr = wbuf_data_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DATA_ENTRIES); + if (data_valid[next_ptr] == state) begin + return next_ptr; + end + end + return curr_ptr; + endfunction + + function automatic void wbuf_data_write( + output wbuf_data_buf_t wbuf_ret_data, + output wbuf_be_buf_t wbuf_ret_be, + input wbuf_data_buf_t wbuf_old_data, + input wbuf_be_buf_t wbuf_old_be, + input wbuf_data_buf_t wbuf_new_data, + input wbuf_be_buf_t wbuf_new_be); + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + for (int unsigned b = 0; b < WBUF_WORD_WIDTH/8; b++) begin + wbuf_ret_data[w][b*8 +: 8] = wbuf_new_be[w][b] ? + wbuf_new_data[w][b*8 +: 8] : + wbuf_old_data[w][b*8 +: 8]; + end + wbuf_ret_be[w] = wbuf_old_be[w] | wbuf_new_be[w]; + end + endfunction + + function automatic wbuf_match_t wbuf_tag_to_match_addr(wbuf_tag_t tag); + return tag[WBUF_TAG_WIDTH - 1:WBUF_TAG_WIDTH - WBUF_READ_MATCH_WIDTH]; + endfunction + // }}} + + // Definition of internal wires and registers + // {{{ + wbuf_state_e [ WBUF_DIR_ENTRIES-1:0] wbuf_dir_state_q, wbuf_dir_state_d; + wbuf_dir_entry_t [ WBUF_DIR_ENTRIES-1:0] wbuf_dir_q, wbuf_dir_d; + logic [WBUF_DATA_ENTRIES-1:0] wbuf_data_valid_q, wbuf_data_valid_d; + wbuf_data_entry_t [WBUF_DATA_ENTRIES-1:0] wbuf_data_q, wbuf_data_d; + + wbuf_dir_ptr_t wbuf_dir_free_ptr_q, wbuf_dir_free_ptr_d; + logic wbuf_dir_free; + wbuf_dir_ptr_t wbuf_dir_send_ptr_q, wbuf_dir_send_ptr_d; + wbuf_data_ptr_t wbuf_data_free_ptr_q, wbuf_data_free_ptr_d; + logic wbuf_data_free; + + logic wbuf_write_free; + logic wbuf_write_hit_open; + logic wbuf_write_hit_pend; + logic wbuf_write_hit_sent; + wbuf_dir_ptr_t wbuf_write_hit_open_dir_ptr; + wbuf_dir_ptr_t wbuf_write_hit_pend_dir_ptr; + + logic send_meta_valid; + logic send_meta_ready; + wbuf_send_meta_t send_meta_wdata, send_meta_rdata; + + logic send_data_wok; + logic send_data_w; + wbuf_send_data_t send_data_d; + wbuf_send_data_t send_data_q; + + wbuf_tag_t write_tag; + wbuf_data_buf_t write_data; + wbuf_be_buf_t write_be; + + logic [WBUF_DIR_ENTRIES-1:0] replay_match; + logic [WBUF_DIR_ENTRIES-1:0] replay_open_hit; + logic [WBUF_DIR_ENTRIES-1:0] replay_pend_hit; + logic [WBUF_DIR_ENTRIES-1:0] replay_sent_hit; + + genvar gen_i; + // }}} + + // Global control signals + // {{{ + always_comb + begin : empty_comb + empty_o = 1'b1; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + empty_o &= (wbuf_dir_state_q[i] == WBUF_FREE); + end + end + + always_comb + begin : full_comb + full_o = 1'b1; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + full_o &= (wbuf_dir_state_q[i] != WBUF_FREE); + end + end + // }}} + + // Write control + // {{{ + assign write_tag = write_addr_i[WBUF_PA_WIDTH-1:WBUF_OFFSET_WIDTH]; + + always_comb + begin : wbuf_write_data_comb + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + write_data[w] = write_data_i; + end + end + + generate + if (WBUF_OFFSET_WIDTH > WBUF_WORD_OFFSET) begin : wbuf_write_be_gt_gen + always_comb + begin : wbuf_write_be_comb + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + if (w == int'(write_addr_i[WBUF_OFFSET_WIDTH-1:WBUF_WORD_OFFSET])) begin + write_be[w] = write_be_i; + end else begin + write_be[w] = '0; + end + end + end + end else begin : wbuf_write_be_le_gen + always_comb + begin : wbuf_write_be_comb + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + write_be[w] = write_be_i; + end + end + end + endgenerate + + always_comb + begin : wbuf_free_comb + wbuf_dir_free_ptr_d = wbuf_dir_free_ptr_q; + if (ack_i) begin + wbuf_dir_free_ptr_d = ack_id_i; + end else if (write_i && wbuf_write_free) begin + wbuf_dir_free_ptr_d = wbuf_dir_find_next(wbuf_dir_free_ptr_q, wbuf_dir_state_q, WBUF_FREE); + end + + wbuf_data_free_ptr_d = wbuf_data_free_ptr_q; + if (send_data_valid_o && send_data_ready_i) begin + wbuf_data_free_ptr_d = send_data_q.send_data_ptr; + end else if (write_i && wbuf_write_free) begin + wbuf_data_free_ptr_d = wbuf_data_find_next(wbuf_data_free_ptr_q, wbuf_data_valid_q, 1'b0); + end + end + + assign wbuf_dir_free = (wbuf_dir_state_q[wbuf_dir_free_ptr_q] == WBUF_FREE); + assign wbuf_data_free = ~wbuf_data_valid_q[wbuf_data_free_ptr_q]; + + always_comb + begin : wbuf_write_hit_comb + wbuf_write_hit_open = 1'b0; + wbuf_write_hit_pend = 1'b0; + wbuf_write_hit_sent = 1'b0; + + wbuf_write_hit_open_dir_ptr = 0; + wbuf_write_hit_pend_dir_ptr = 0; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + if (wbuf_dir_q[i].tag == write_tag) begin + unique case (wbuf_dir_state_q[i]) + WBUF_OPEN: begin + wbuf_write_hit_open = 1'b1; + wbuf_write_hit_open_dir_ptr = wbuf_dir_ptr_t'(i); + end + WBUF_PEND: begin + wbuf_write_hit_pend = 1'b1; + wbuf_write_hit_pend_dir_ptr = wbuf_dir_ptr_t'(i); + end + WBUF_SENT: begin + wbuf_write_hit_sent = 1'b1; + end + default: begin + /* do nothing */ + end + endcase + end + end + end + + // Check if there is a match between the read address and the tag of one + // of the used slots in the write buffer directory + always_comb + begin : read_hit_comb + automatic logic [WBUF_DIR_ENTRIES-1:0] read_hit; + + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + read_hit[i] = 1'b0; + unique case (wbuf_dir_state_q[i]) + WBUF_OPEN, WBUF_PEND, WBUF_SENT: begin + automatic wbuf_addr_t wbuf_addr; + automatic wbuf_match_t wbuf_tag; + automatic wbuf_match_t read_tag; + + wbuf_addr = wbuf_addr_t'(wbuf_dir_q[i].tag) << WBUF_OFFSET_WIDTH; + read_tag = read_addr_i[WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]; + wbuf_tag = wbuf_addr [WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]; + read_hit[i] = (read_tag == wbuf_tag) ? 1'b1 : 1'b0; + end + default: begin + /* do nothing */ + end + endcase + end + + read_hit_o = |read_hit; + end + + // Check if there is a match between the replay address and the tag of one + // of the used slots in the write buffer directory + generate + for (gen_i = 0; gen_i < WBUF_DIR_ENTRIES; gen_i++) begin : replay_match_gen + assign replay_match[gen_i] = replay_is_read_i ? + /* replay is read: compare address block tag (e.g. cache line) */ + (wbuf_tag_to_match_addr(wbuf_dir_q[gen_i].tag) == + replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]) : + /* replay is write: compare wbuf tag */ + (wbuf_dir_q[gen_i].tag == + replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_TAG_WIDTH]); + + assign replay_open_hit[gen_i] = + replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_OPEN); + assign replay_pend_hit[gen_i] = + replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_PEND); + assign replay_sent_hit[gen_i] = + replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_SENT); + end + endgenerate + + assign replay_open_hit_o = |replay_open_hit, + replay_pend_hit_o = |replay_pend_hit, + replay_sent_hit_o = |replay_sent_hit; + + always_comb + begin : replay_wbuf_not_ready_comb + replay_not_ready_o = 1'b0; + if (replay_pend_hit_o) begin + replay_not_ready_o = 1'b1; + end else if (replay_sent_hit_o && cfg_sequential_waw_i) begin + replay_not_ready_o = 1'b1; + end else if (!replay_open_hit_o && (!wbuf_dir_free || !wbuf_data_free)) begin + replay_not_ready_o = 1'b1; + end + end + + assign wbuf_write_free = + wbuf_dir_free + & wbuf_data_free + & ~wbuf_write_hit_open + & ~wbuf_write_hit_pend + & ~(wbuf_write_hit_sent & cfg_sequential_waw_i); + + assign write_ready_o = wbuf_write_free + | ((wbuf_write_hit_open | wbuf_write_hit_pend) + & ~cfg_inhibit_write_coalescing_i); + // }}} + + // Update control + // {{{ + always_comb + begin : wbuf_update_comb + automatic bit timeout; + automatic bit write_hit; + automatic bit read_hit; + automatic bit match_open_ptr; + automatic bit match_pend_ptr; + automatic bit match_free; + automatic bit send; + + timeout = 1'b0; + write_hit = 1'b0; + read_hit = 1'b0; + match_open_ptr = 1'b0; + match_pend_ptr = 1'b0; + match_free = 1'b0; + send = 1'b0; + + wbuf_dir_state_d = wbuf_dir_state_q; + wbuf_dir_d = wbuf_dir_q; + wbuf_data_d = wbuf_data_q; + + send_data_w = 1'b0; + send_meta_valid = 1'b0; + + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + case (wbuf_dir_state_q[i]) + WBUF_FREE: begin + match_free = wbuf_write_free && (i == int'(wbuf_dir_free_ptr_q)); + + if (write_i && match_free) begin + send = (cfg_threshold_i == 0) + | write_uc_i + | flush_all_i + | cfg_inhibit_write_coalescing_i; + + wbuf_dir_state_d[i] = send ? WBUF_PEND : WBUF_OPEN; + wbuf_dir_d[i].tag = write_tag; + wbuf_dir_d[i].cnt = 0; + wbuf_dir_d[i].ptr = wbuf_data_free_ptr_q; + wbuf_dir_d[i].uc = write_uc_i; + + wbuf_data_write( + wbuf_data_d[wbuf_data_free_ptr_q].data, + wbuf_data_d[wbuf_data_free_ptr_q].be, + '0, + '0, + write_data, + write_be + ); + end + end + + WBUF_OPEN: begin + match_open_ptr = (i == int'(wbuf_write_hit_open_dir_ptr)); + timeout = (wbuf_dir_q[i].cnt == (cfg_threshold_i - 1)); + read_hit = read_flush_hit_i & wbuf_write_hit_open & match_open_ptr; + write_hit = write_i + & wbuf_write_hit_open + & match_open_ptr + & ~cfg_inhibit_write_coalescing_i; + + if (!flush_all_i) begin + if (write_hit && cfg_reset_timecnt_on_write_i) begin + timeout = 1'b0; + wbuf_dir_d[i].cnt = 0; + end else if (!timeout) begin + wbuf_dir_d[i].cnt = wbuf_dir_q[i].cnt + 1; + end + + if (read_hit | timeout | cfg_inhibit_write_coalescing_i) begin + wbuf_dir_state_d[i] = WBUF_PEND; + end + end else begin + wbuf_dir_state_d[i] = WBUF_PEND; + end + + if (write_hit) begin + wbuf_data_write( + wbuf_data_d[wbuf_dir_q[i].ptr].data, + wbuf_data_d[wbuf_dir_q[i].ptr].be, + wbuf_data_q[wbuf_dir_q[i].ptr].data, + wbuf_data_q[wbuf_dir_q[i].ptr].be, + write_data, + write_be + ); + end + end + + WBUF_PEND: begin + match_pend_ptr = (i == int'(wbuf_write_hit_pend_dir_ptr)); + write_hit = write_i + & wbuf_write_hit_pend + & match_pend_ptr + & ~cfg_inhibit_write_coalescing_i; + + if (write_hit) begin + wbuf_data_write( + wbuf_data_d[wbuf_dir_q[i].ptr].data, + wbuf_data_d[wbuf_dir_q[i].ptr].be, + wbuf_data_q[wbuf_dir_q[i].ptr].data, + wbuf_data_q[wbuf_dir_q[i].ptr].be, + write_data, + write_be + ); + end + + if (i == int'(wbuf_dir_send_ptr_q)) begin + send_data_w = send_meta_ready; + send_meta_valid = send_data_wok; + if (send_meta_ready && send_data_wok) begin + wbuf_dir_state_d[i] = WBUF_SENT; + end + end + end + + WBUF_SENT: begin + if (ack_i && (i == int'(ack_id_i))) begin + wbuf_dir_state_d[i] = WBUF_FREE; + end + end + endcase + end + end + + always_comb + begin : wbuf_data_valid_comb + wbuf_data_valid_d = wbuf_data_valid_q; + + // allocate a free data buffer on new write + if (write_i && wbuf_write_free) begin + wbuf_data_valid_d[wbuf_data_free_ptr_q] = 1'b1; + end + + // de-allocate a data buffer as soon as it is send + if (send_data_valid_o && send_data_ready_i) begin + wbuf_data_valid_d[send_data_q.send_data_ptr] = 1'b0; + end + end + // }}} + + // Send control + // {{{ + // Data channel + hpdcache_fifo_reg #( + .FIFO_DEPTH (WBUF_SEND_FIFO_DEPTH), + .FEEDTHROUGH (WBUF_SEND_FEEDTHROUGH), + .fifo_data_t (wbuf_send_data_t) + ) send_data_ptr_fifo_i ( + .clk_i, + .rst_ni, + .w_i (send_data_w), + .wok_o (send_data_wok), + .wdata_i (send_data_d), + .r_i (send_data_ready_i), + .rok_o (send_data_valid_o), + .rdata_o (send_data_q) + ); + + assign send_data_d.send_data_ptr = wbuf_dir_q[wbuf_dir_send_ptr_q].ptr, + send_data_d.send_data_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag; + + assign send_data_tag_o = wbuf_addr_t'(send_data_q.send_data_tag), + send_data_o = wbuf_data_q[send_data_q.send_data_ptr].data, + send_be_o = wbuf_data_q[send_data_q.send_data_ptr].be; + + // Meta-data channel + hpdcache_fifo_reg #( + .FIFO_DEPTH (WBUF_SEND_FIFO_DEPTH), + .FEEDTHROUGH (WBUF_SEND_FEEDTHROUGH), + .fifo_data_t (wbuf_send_meta_t) + ) send_meta_fifo_i ( + .clk_i, + .rst_ni, + .w_i (send_meta_valid), + .wok_o (send_meta_ready), + .wdata_i (send_meta_wdata), + .r_i (send_meta_ready_i), + .rok_o (send_meta_valid_o), + .rdata_o (send_meta_rdata) + ); + + assign send_meta_wdata.send_meta_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag, + send_meta_wdata.send_meta_id = wbuf_dir_send_ptr_q, + send_meta_wdata.send_meta_uc = wbuf_dir_q[wbuf_dir_send_ptr_q].uc; + + assign send_addr_o = { send_meta_rdata.send_meta_tag, {WBUF_OFFSET_WIDTH{1'b0}} }, + send_id_o = send_meta_rdata.send_meta_id, + send_uc_o = send_meta_rdata.send_meta_uc; + + // Send pointer + always_comb + begin : wbuf_send_comb + wbuf_dir_send_ptr_d = wbuf_dir_find_next(wbuf_dir_send_ptr_q, wbuf_dir_state_q, WBUF_PEND); + if (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND) begin + if (!send_meta_valid || !send_meta_ready) begin + wbuf_dir_send_ptr_d = wbuf_dir_send_ptr_q; + end + end + end + // }}} + + // Internal state assignment + // {{{ + always_ff @(posedge clk_i) wbuf_data_q <= wbuf_data_d; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : wbuf_state_ff + if (!rst_ni) begin + wbuf_dir_q <= '0; + wbuf_dir_state_q <= {WBUF_DIR_ENTRIES{WBUF_FREE}}; + wbuf_data_valid_q <= '0; + wbuf_dir_free_ptr_q <= 0; + wbuf_dir_send_ptr_q <= 0; + wbuf_data_free_ptr_q <= 0; + end else begin + wbuf_dir_q <= wbuf_dir_d; + wbuf_dir_state_q <= wbuf_dir_state_d; + wbuf_data_valid_q <= wbuf_data_valid_d; + wbuf_dir_free_ptr_q <= wbuf_dir_free_ptr_d; + wbuf_dir_send_ptr_q <= wbuf_dir_send_ptr_d; + wbuf_data_free_ptr_q <= wbuf_data_free_ptr_d; + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert(WBUF_WORDS inside {1, 2, 4, 8, 16}) else + $error("WBUF: width of data buffers must be a power of 2"); + ack_sent_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + (ack_i -> (wbuf_dir_state_q[ack_id_i] == WBUF_SENT))) else + $error("WBUF: acknowledging a not SENT slot"); + send_pend_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + (send_meta_valid -> (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND))) else + $error("WBUF: sending a not PEND slot"); + send_valid_data_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + (send_data_valid_o -> (wbuf_data_valid_q[send_data_q.send_data_ptr] == 1'b1))) else + $error("WBUF: sending a not valid data"); + // pragma translate_on + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv new file mode 100644 index 0000000..1792ff4 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv @@ -0,0 +1,228 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Write Buffer Wrapper + * History : + */ +/* This wrapper adapts the send interface of the write buffer to the memory + * interface of the cache. + */ +module hpdcache_wbuf_wrapper +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) + // }}} + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, + output logic full_o, + input logic flush_all_i, + + // Configuration signals + // Timer threshold + input wbuf_timecnt_t cfg_threshold_i, + // Reset timer on write + input logic cfg_reset_timecnt_on_write_i, + // Sequentialize write-after-write hazards + input logic cfg_sequential_waw_i, + // Inhibit write coalescing + input logic cfg_inhibit_write_coalescing_i, + + // Write interface + input logic write_i, + output logic write_ready_o, + input wbuf_addr_t write_addr_i, + input wbuf_data_t write_data_i, + input wbuf_be_t write_be_i, // byte-enable + input logic write_uc_i, // uncacheable write + + // Read hit interface + input wbuf_addr_t read_addr_i, + output logic read_hit_o, + input logic read_flush_hit_i, + + // Replay hit interface + input wbuf_addr_t replay_addr_i, + input logic replay_is_read_i, + output logic replay_open_hit_o, + output logic replay_pend_hit_o, + output logic replay_sent_hit_o, + output logic replay_not_ready_o, + + // Memory interface + input logic mem_req_write_ready_i, + output logic mem_req_write_valid_o, + output hpdcache_mem_req_t mem_req_write_o, + + input logic mem_req_write_data_ready_i, + output logic mem_req_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_write_data_o, + + output logic mem_resp_write_ready_o, + input logic mem_resp_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_write_i +); + // }}} + + // Internal signals + // {{{ + wbuf_addr_t send_addr; + wbuf_dir_ptr_t send_id; + logic send_uc; + wbuf_addr_t send_data_tag; + wbuf_data_buf_t send_data; + wbuf_be_buf_t send_be; + wbuf_dir_ptr_t ack_id; + logic ack_error; + // }}} + + // Wrapped write buffer + // {{{ + hpdcache_wbuf #( + .WBUF_DIR_ENTRIES (HPDCACHE_WBUF_DIR_ENTRIES), + .WBUF_DATA_ENTRIES (HPDCACHE_WBUF_DATA_ENTRIES), + .WBUF_WORD_WIDTH (HPDCACHE_REQ_DATA_WIDTH), + .WBUF_WORDS (HPDCACHE_WBUF_WORDS), + .WBUF_PA_WIDTH (HPDCACHE_PA_WIDTH), + .WBUF_TIMECNT_MAX ((2**HPDCACHE_WBUF_TIMECNT_WIDTH) - 1), + .WBUF_READ_MATCH_WIDTH (HPDCACHE_NLINE_WIDTH), + .WBUF_SEND_FEEDTHROUGH (HPDCACHE_WBUF_SEND_FEEDTHROUGH) + ) hpdcache_wbuf_i ( + .clk_i, + .rst_ni, + .empty_o, + .full_o, + .flush_all_i, + .cfg_threshold_i, + .cfg_reset_timecnt_on_write_i, + .cfg_sequential_waw_i, + .cfg_inhibit_write_coalescing_i, + .write_i, + .write_ready_o, + .write_addr_i, + .write_data_i, + .write_be_i, + .write_uc_i, + .read_addr_i, + .read_hit_o, + .read_flush_hit_i, + .replay_addr_i, + .replay_is_read_i, + .replay_open_hit_o, + .replay_pend_hit_o, + .replay_sent_hit_o, + .replay_not_ready_o, + .send_meta_ready_i (mem_req_write_ready_i), + .send_meta_valid_o (mem_req_write_valid_o), + .send_addr_o (send_addr), + .send_id_o (send_id), + .send_uc_o (send_uc), + .send_data_ready_i (mem_req_write_data_ready_i), + .send_data_valid_o (mem_req_write_data_valid_o), + .send_data_tag_o (send_data_tag), + .send_data_o (send_data), + .send_be_o (send_be), + .ack_i (mem_resp_write_valid_i), + .ack_id_i (ack_id), + .ack_error_i (ack_error) + ); + // }}} + + // Memory interface + // {{{ + assign mem_req_write_o.mem_req_addr = send_addr, + mem_req_write_o.mem_req_len = 0, + mem_req_write_o.mem_req_size = get_hpdcache_mem_size(HPDCACHE_WBUF_DATA_WIDTH/8), + mem_req_write_o.mem_req_id = hpdcache_mem_id_t'(send_id), + mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE, + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD, + mem_req_write_o.mem_req_cacheable = ~send_uc; + + generate + localparam int unsigned WBUF_MEM_DATA_RATIO = HPDcacheMemDataWidth/HPDCACHE_WBUF_DATA_WIDTH; + localparam int unsigned WBUF_MEM_DATA_WORD_INDEX_WIDTH = $clog2(WBUF_MEM_DATA_RATIO); + + assign mem_req_write_data_o.mem_req_w_last = 1'b1; + + if (WBUF_MEM_DATA_RATIO > 1) + begin : wbuf_data_upsizing_gen + logic [HPDCACHE_WBUF_DATA_WIDTH/8-1:0][WBUF_MEM_DATA_RATIO-1:0] mem_req_be; + + // demux send BE + hpdcache_demux #( + .NOUTPUT (WBUF_MEM_DATA_RATIO), + .DATA_WIDTH (HPDCACHE_WBUF_DATA_WIDTH/8), + .ONE_HOT_SEL (1'b0) + ) mem_write_be_demux_i ( + .data_i (send_be), + .sel_i (send_data_tag[0 +: WBUF_MEM_DATA_WORD_INDEX_WIDTH]), + .data_o (mem_req_be) + ); + + assign mem_req_write_data_o.mem_req_w_data = {WBUF_MEM_DATA_RATIO{send_data}}, + mem_req_write_data_o.mem_req_w_be = mem_req_be; + + end else if (WBUF_MEM_DATA_RATIO == 1) + begin : wbuf_data_forwarding_gen + assign mem_req_write_data_o.mem_req_w_data = send_data, + mem_req_write_data_o.mem_req_w_be = send_be; + end + + // Assertions + // {{{ + // pragma translate_off + initial assert(WBUF_MEM_DATA_RATIO > 0) else + $error($sformatf("WBUF: data width of mem interface (%d) shall be g.e. to wbuf data width(%d)", + HPDcacheMemDataWidth, HPDCACHE_WBUF_DATA_WIDTH)); + // pragma translate_on + // }}} + endgenerate + + assign mem_resp_write_ready_o = 1'b1, + ack_id = mem_resp_write_i.mem_resp_w_id[0 +: HPDCACHE_WBUF_DIR_PTR_WIDTH], + ack_error = (mem_resp_write_i.mem_resp_w_error != HPDCACHE_MEM_RESP_OK); + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert (HPDCACHE_WBUF_DIR_PTR_WIDTH <= HPDcacheMemIdWidth) else + $fatal("HPDcacheMemIdWidth is not wide enough to fit all possible write buffer transactions"); + // pragma translate_on + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv new file mode 100644 index 0000000..dfef92d --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv @@ -0,0 +1,374 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Riccardo Alidori, Cesar Fuguet + * Maintainers(s): Cesar Fuguet + * Creation Date : June, 2021 + * Description : HPDcache Linear Hardware Memory Prefetcher. + * History : + */ +module hwpf_stride +import hwpf_stride_pkg::*; +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int CACHE_LINE_BYTES = 64 +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // CSR + input logic csr_base_set_i, + input hwpf_stride_base_t csr_base_i, + input logic csr_param_set_i, + input hwpf_stride_param_t csr_param_i, + input logic csr_throttle_set_i, + input hwpf_stride_throttle_t csr_throttle_i, + + output hwpf_stride_base_t csr_base_o, + output hwpf_stride_param_t csr_param_o, + output hwpf_stride_throttle_t csr_throttle_o, + + // If high, the prefetcher is enabled and active + output logic busy_o, + + // Snooping + // Address to snoop on requests ports + output hpdcache_nline_t snoop_nline_o, + // If set to one, the snoop address matched one of the requests + input snoop_match_i, + + // D-Cache interface + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_req_t hpdcache_req_o, + input logic hpdcache_rsp_valid_i, + input hpdcache_rsp_t hpdcache_rsp_i +); +// }}} + + import hpdcache_pkg::hpdcache_req_addr_t; + + // Definition of constants + // {{{ + localparam int STRIDE_WIDTH = $bits(csr_param_i.stride); + localparam int NBLOCKS_WIDTH = $bits(csr_param_i.nblocks); + localparam int NLINES_WIDTH = $bits(csr_param_i.nlines); + localparam int NWAIT_WIDTH = $bits(csr_throttle_i.nwait); + localparam int INFLIGHT_WIDTH = $bits(csr_throttle_i.ninflight); + localparam int NLINES_CNT_WIDTH = NLINES_WIDTH; + // }}} + + // Internal registers and signals + // {{{ + // FSM + enum { + IDLE, + SNOOP, + SEND_REQ, + WAIT, + DONE, + ABORT + } state_d, state_q; + + logic [NBLOCKS_WIDTH-1:0] nblocks_cnt_d, nblocks_cnt_q; + logic [NLINES_CNT_WIDTH-1:0] nlines_cnt_d, nlines_cnt_q; + logic [NWAIT_WIDTH-1:0] nwait_cnt_d, nwait_cnt_q; + logic [INFLIGHT_WIDTH-1:0] inflight_cnt_d, inflight_cnt_q; + logic inflight_inc, inflight_dec; + + hwpf_stride_base_t csr_base_q; + hwpf_stride_base_t shadow_base_q, shadow_base_d; + hwpf_stride_param_t csr_param_q; + hwpf_stride_param_t shadow_param_q, shadow_param_d; + hwpf_stride_throttle_t csr_throttle_q; + hwpf_stride_throttle_t shadow_throttle_q, shadow_throttle_d; + hpdcache_nline_t request_nline_q, request_nline_d; + + hpdcache_set_t hpdcache_req_set; + hpdcache_tag_t hpdcache_req_tag; + + logic csr_base_update; + hpdcache_nline_t increment_stride; + logic is_inflight_max; + + // Default assignment + assign increment_stride = hpdcache_nline_t'(shadow_param_q.stride) + 1'b1; + assign inflight_dec = hpdcache_rsp_valid_i; + assign snoop_nline_o = shadow_base_q.base_cline; + assign is_inflight_max = ( shadow_throttle_q.ninflight == '0 ) ? + 1'b0 : ( inflight_cnt_q >= shadow_throttle_q.ninflight ); + assign csr_base_o = csr_base_q; + assign csr_param_o = csr_param_q; + assign csr_throttle_o = csr_throttle_q; + // }}} + + // Dcache outputs + // {{{ + assign hpdcache_req_set = request_nline_q[0 +: HPDCACHE_SET_WIDTH], + hpdcache_req_tag = request_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH]; + + assign hpdcache_req_o.addr_offset = { hpdcache_req_set, {HPDCACHE_OFFSET_WIDTH{1'b0}} }, + hpdcache_req_o.wdata = '0, + hpdcache_req_o.op = HPDCACHE_REQ_CMO, + hpdcache_req_o.be = '1, + hpdcache_req_o.size = HPDCACHE_REQ_CMO_PREFETCH, + hpdcache_req_o.sid = '0, // this is set when connecting to the dcache + hpdcache_req_o.tid = '0, // this is set by the wrapper of the prefetcher + hpdcache_req_o.need_rsp = 1'b1, + hpdcache_req_o.phys_indexed = 1'b1, + hpdcache_req_o.addr_tag = hpdcache_req_tag, + hpdcache_req_o.pma.uncacheable = 1'b0, + hpdcache_req_o.pma.io = 1'b0; + // }}} + + // Set state of internal registers + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + csr_base_q <= '0; + csr_param_q <= '0; + shadow_base_q <= '0; + shadow_param_q <= '0; + shadow_throttle_q <= '0; + request_nline_q <= '0; + state_q <= IDLE; + end else begin + if (csr_base_set_i) csr_base_q <= csr_base_i; + else if (csr_base_update) csr_base_q <= shadow_base_d; + if (csr_param_set_i) csr_param_q <= csr_param_i; + if (csr_throttle_set_i) csr_throttle_q <= csr_throttle_i; + shadow_base_q <= shadow_base_d; + shadow_param_q <= shadow_param_d; + shadow_throttle_q <= shadow_throttle_d; + request_nline_q <= request_nline_d; + state_q <= state_d; + end + end + // }}} + + // Update internal counters + // {{{ + always_comb begin : inflight_cnt + inflight_cnt_d = inflight_cnt_q; + + // Every time we send a dcache request, increment the counter + if ( inflight_inc ) begin + inflight_cnt_d++; + end + + // Every time we got a response from the cache, decrement the counter + if ( inflight_dec && ( inflight_cnt_q > 0 )) begin + inflight_cnt_d--; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + nblocks_cnt_q <= '0; + nlines_cnt_q <= '0; + nwait_cnt_q <= '0; + inflight_cnt_q <= '0; + end else begin + nblocks_cnt_q <= nblocks_cnt_d; + nlines_cnt_q <= nlines_cnt_d; + nwait_cnt_q <= nwait_cnt_d; + inflight_cnt_q <= inflight_cnt_d; + end + end + // }}} + + // FSM + // {{{ + always_comb begin : fsm_control + // default assignments + hpdcache_req_valid_o = 1'b0; + nblocks_cnt_d = nblocks_cnt_q; + nlines_cnt_d = nlines_cnt_q; + nwait_cnt_d = nwait_cnt_q; + inflight_inc = 1'b0; + busy_o = 1'b0; + csr_base_update = 1'b0; + + shadow_base_d = shadow_base_q; + shadow_param_d = shadow_param_q; + shadow_throttle_d = shadow_throttle_q; + request_nline_d = request_nline_q; + state_d = state_q; + + case ( state_q ) + + IDLE: begin + // If enabled, go snooping the dcache ports + if ( csr_base_q.enable ) begin + shadow_base_d = csr_base_q; + if (( csr_param_q.nlines > 0 ) || ( csr_param_q.nblocks > 0 )) begin + shadow_param_d = csr_param_q; + shadow_throttle_d = csr_throttle_q; + state_d = SNOOP; + end else begin + // no prefetch needed, disarm immediately + shadow_base_d.enable = 1'b0; + csr_base_update = 1'b1; + end + end + end + + + SNOOP: begin + if ( csr_base_q.enable ) begin + // If a snooper matched an address, send the request + if ( snoop_match_i ) begin + state_d = SEND_REQ; + + if ( shadow_param_q.nlines == 0 ) begin + // skip the first block + request_nline_d = shadow_base_q.base_cline + + hpdcache_nline_t'(increment_stride); + nblocks_cnt_d = ( shadow_param_q.nblocks > 0 ) ? + shadow_param_q.nblocks - 1 : 0; + nlines_cnt_d = 0; + + // update the base cacheline to the first one of the next block + shadow_base_d.base_cline = request_nline_d; + end else begin + // skip the first cacheline (of the first block) + request_nline_d = shadow_base_q.base_cline + 1'b1; + nblocks_cnt_d = shadow_param_q.nblocks; + nlines_cnt_d = shadow_param_q.nlines - 1; + end + end + end else begin + state_d = IDLE; + end + end + + + SEND_REQ: begin + busy_o = 1'b1; + + // make the prefetch request to memory + hpdcache_req_valid_o = 1'b1; + + // we've got a grant, so we can move to the next request + if ( hpdcache_req_ready_i ) begin + inflight_inc = 1'b1; + + if ( nlines_cnt_q == 0 ) begin + // go to the first cacheline of the next block + request_nline_d = shadow_base_q.base_cline + + hpdcache_nline_t'(increment_stride); + nblocks_cnt_d = ( nblocks_cnt_q > 0 ) ? nblocks_cnt_q - 1 : 0; + nlines_cnt_d = shadow_param_q.nlines; + + // update the base cacheline to the first one of the next block + shadow_base_d.base_cline = request_nline_d; + end else begin + // go to the next cacheline (within the same block) + request_nline_d = request_nline_q + 1'b1; + nlines_cnt_d = nlines_cnt_q - 1; + end + + // if the NWAIT parameter is equal 0, we can issue a request every cycle + if (( nblocks_cnt_q == 0 ) && ( nlines_cnt_q == 0 )) begin + state_d = DONE; + end else if ( shadow_throttle_q.nwait == 0 ) begin + // Wait if the number of inflight requests is greater than + // the maximum indicated. Otherwise, send the next request + state_d = is_inflight_max ? WAIT : SEND_REQ; + end else begin + // Wait the indicated cycles before sending the next request + nwait_cnt_d = shadow_throttle_q.nwait; + state_d = WAIT; + end + + if ( !csr_base_q.enable ) state_d = ABORT; + end + end + + + WAIT: begin + // Wait until: + // - the indicated number of wait cycles between requests is reached (nwait) + // - the number of inflight requests is below the indicated maximum (ninflight) + busy_o = 1'b1; + if ( csr_base_q.enable ) begin + if ( !is_inflight_max && ( nwait_cnt_q == 0 )) begin + state_d = SEND_REQ; + end + + if ( nwait_cnt_q > 0 ) begin + nwait_cnt_d = nwait_cnt_q - 1; + end + end else begin + state_d = ABORT; + end + end + + + DONE: begin + busy_o = 1'b1; + if ( csr_base_q.enable ) begin + if (( inflight_cnt_q == 0 ) && !is_inflight_max && ( nwait_cnt_q == 0 )) begin + // Copy back shadow base register into the user visible one + csr_base_update = 1'b1; + + // Check the rearm bit + if ( shadow_base_q.rearm ) begin + state_d = SNOOP; + end else begin + state_d = IDLE; + + // disarm the prefetcher + shadow_base_d.enable = 1'b0; + end + + // Check the cycle bit + if ( shadow_base_q.cycle ) begin + // restore the base address + shadow_base_d.base_cline = csr_base_q.base_cline; + end + end + + if ( nwait_cnt_q > 0 ) begin + nwait_cnt_d = nwait_cnt_q - 1; + end + end else begin + state_d = ABORT; + end + end + + ABORT: begin + busy_o = 1'b1; + if ( inflight_cnt_q == 0 ) begin + state_d = IDLE; + end + end + endcase + end + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv new file mode 100644 index 0000000..1aa9df4 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv @@ -0,0 +1,117 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Author(s) : Riccardo Alidori, Cesar Fuguet + * Creation Date : June, 2021 + * Description : Hw prefetchers arbiter + * History : + */ +module hwpf_stride_arb +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter NUM_HW_PREFETCH = 4 +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Dcache input interface + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid_i, + output logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready_o, + input hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req_i, + output logic [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_valid_o, + output hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_o, // Not used + + // Dcache output interface + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_req_t hpdcache_req_o, + input logic hpdcache_rsp_valid_i, + input hpdcache_rsp_t hpdcache_rsp_i // Not used +); +// }}} + + // Internal signals + // {{{ + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid; + hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req; + logic [NUM_HW_PREFETCH-1:0] arb_req_gnt; + // }}} + + // Requesters arbiter + // {{{ + // Pack request ports + genvar gen_i; + generate + for (gen_i = 0; gen_i < NUM_HW_PREFETCH; gen_i++) begin : gen_hwpf_stride_req + assign hwpf_stride_req_ready_o[gen_i] = arb_req_gnt[gen_i] & hpdcache_req_ready_i, + hwpf_stride_req_valid[gen_i] = hwpf_stride_req_valid_i[gen_i], + hwpf_stride_req[gen_i] = hwpf_stride_req_i[gen_i]; + end + endgenerate + + // Arbiter + hpdcache_rrarb #( + .N (NUM_HW_PREFETCH) + ) hwpf_stride_req_arbiter_i ( + .clk_i, + .rst_ni, + .req_i (hwpf_stride_req_valid), + .gnt_o (arb_req_gnt), + .ready_i (hpdcache_req_ready_i) + ); + + // Request Multiplexor + hpdcache_mux #( + .NINPUT (NUM_HW_PREFETCH), + .DATA_WIDTH ($bits(hpdcache_req_t)), + .ONE_HOT_SEL (1'b1) + ) hwpf_stride_req_mux_i ( + .data_i (hwpf_stride_req), + .sel_i (arb_req_gnt), + .data_o (hpdcache_req_o) + ); + + assign hpdcache_req_valid_o = |arb_req_gnt; + // }}} + + // Response demultiplexor + // {{{ + // As the HW prefetcher does not need the TID field in the request, we + // use it to transport the identifier of the specific hardware + // prefetcher. + // This way we share the same SID for all HW prefetchers. Using + // different SIDs means that we need different ports to the cache and + // we actually want to reduce those. + always_comb + begin : resp_demux + for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin + hwpf_stride_rsp_valid_o[i] = hpdcache_rsp_valid_i && (i == int'(hpdcache_rsp_i.tid)); + hwpf_stride_rsp_o[i] = hpdcache_rsp_i; + end + end + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv new file mode 100644 index 0000000..3470b78 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv @@ -0,0 +1,68 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : January, 2023 + * Description : High-Performance, Data-cache (HPDcache) HW memory + * prefetcher package + * History : + */ +package hwpf_stride_pkg; + // Base address configuration register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [63:6] base_cline; + logic [5:3] unused; + logic cycle; + logic rearm; + logic enable; + } hwpf_stride_base_t; + // }}} + + // Parameters configuration register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [63:48] nblocks; + logic [47:32] nlines; + logic [31:0] stride; + } hwpf_stride_param_t; + // }}} + + // Throttle configuration register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [31:16] ninflight; + logic [15:0] nwait; + } hwpf_stride_throttle_t; + // }}} + + // Status register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [63:48] unused1; + logic [47:32] busy; + logic free; + logic [30:20] unused0; + logic [19:16] free_index; + logic [15:0] enabled; + } hwpf_stride_status_t; + // }}} + +endpackage diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv new file mode 100644 index 0000000..ba995b5 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv @@ -0,0 +1,38 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Riccardo Alidori, Cesar Fuguet + * Creation Date : June, 2021 + * Description : Snooper used by the hardware memory prefetcher + * History : + */ +module hwpf_stride_snooper +import hpdcache_pkg::*; +( + input logic en_i, // Snooper enable bit. + input hpdcache_nline_t base_nline_i, // Address to check + input hpdcache_nline_t snoop_addr_i, // Input address to snoop + output snoop_match_o // If high, the Snoopers matched the snoop_address +); + + // The snooper match if enabled and the two addresses are equal + assign snoop_match_o = en_i && ( base_nline_i == snoop_addr_i ); + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv new file mode 100644 index 0000000..fa1cfa4 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv @@ -0,0 +1,265 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Riccardo Alidori, Cesar Fuguet + * Creation Date : June, 2021 + * Description : Linear Hardware Memory Prefetcher wrapper. + * History : + */ +module hwpf_stride_wrapper +import hwpf_stride_pkg::*; +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter NUM_HW_PREFETCH = 4, + parameter NUM_SNOOP_PORTS = 1 +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // CSR + // {{{ + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_base_set_i, + input hwpf_stride_base_t [NUM_HW_PREFETCH-1:0] hwpf_stride_base_i, + output hwpf_stride_base_t [NUM_HW_PREFETCH-1:0] hwpf_stride_base_o, + + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_param_set_i, + input hwpf_stride_param_t [NUM_HW_PREFETCH-1:0] hwpf_stride_param_i, + output hwpf_stride_param_t [NUM_HW_PREFETCH-1:0] hwpf_stride_param_o, + + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_set_i, + input hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_i, + output hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_o, + + output hwpf_stride_status_t hwpf_stride_status_o, + // }}} + + // Snooping + // {{{ + input logic [NUM_SNOOP_PORTS-1:0] snoop_valid_i, + input logic [NUM_SNOOP_PORTS-1:0] snoop_abort_i, + input hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_i, + input hpdcache_tag_t [NUM_SNOOP_PORTS-1:0] snoop_addr_tag_i, + input logic [NUM_SNOOP_PORTS-1:0] snoop_phys_indexed_i, + // }}} + + // Dcache interface + // {{{ + input hpdcache_req_sid_t hpdcache_req_sid_i, + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_req_t hpdcache_req_o, + output logic hpdcache_req_abort_o, + output hpdcache_tag_t hpdcache_req_tag_o, + output hpdcache_pma_t hpdcache_req_pma_o, + input logic hpdcache_rsp_valid_i, + input hpdcache_rsp_t hpdcache_rsp_i + // }}} +); +// }}} + + // Internal registers + // {{{ + logic [NUM_SNOOP_PORTS-1:0] snoop_valid_q; + hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_q; + // }}} + + // Internal signals + // {{{ + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_enable; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_free; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_status_busy; + logic [3:0] hwpf_stride_status_free_idx; + + hpdcache_nline_t [NUM_HW_PREFETCH-1:0] hwpf_snoop_nline; + logic [NUM_HW_PREFETCH-1:0] hwpf_snoop_match; + + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready; + hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req; + + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_valid; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_ready; + hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp_valid; + hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + begin + max_hwpf_stride_assert: assert (NUM_HW_PREFETCH <= 16) else + $error("hwpf_stride: maximum number of HW prefetchers is 16"); + end + // pragma translate_on + // }}} + + // Compute the status information + // {{{ + always_comb begin: hwpf_stride_priority_encoder + hwpf_stride_status_free_idx = '0; + for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin + if (hwpf_stride_free[i]) begin + hwpf_stride_status_free_idx = i; + break; + end + end + end + + // Free flag of engines + assign hwpf_stride_free = ~(hwpf_stride_enable | hwpf_stride_status_busy); + // Busy flags + assign hwpf_stride_status_o[63:32] = {{32-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_status_busy}; + // Global free flag + assign hwpf_stride_status_o[31] = |hwpf_stride_free; + // Free Index + assign hwpf_stride_status_o[30:16] = {11'b0, hwpf_stride_status_free_idx}; + // Enable flags + assign hwpf_stride_status_o[15:0] = {{16-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_enable}; + // }}} + + // Hardware prefetcher engines + // {{{ + generate + for (genvar j = 0; j < NUM_SNOOP_PORTS; j++) begin + always_ff @(posedge clk_i or negedge rst_ni) + begin : snoop_ff + if (!rst_ni) begin + snoop_valid_q[j] <= 1'b0; + snoop_addr_offset_q[j] <= '0; + end else begin + if (snoop_phys_indexed_i[j]) begin + snoop_valid_q[j] <= snoop_valid_i[j]; + snoop_addr_offset_q[j] <= snoop_addr_offset_i[j]; + end + end + end + end + + for (genvar i = 0; i < NUM_HW_PREFETCH; i++) begin + assign hwpf_stride_enable[i] = hwpf_stride_base_o[i].enable; + + // Compute snoop match signals + // {{{ + always_comb + begin : snoop_comb + hwpf_snoop_match[i] = 1'b0; + for (int j = 0; j < NUM_SNOOP_PORTS; j++) begin + automatic logic snoop_valid; + automatic hpdcache_req_offset_t snoop_offset; + automatic hpdcache_nline_t snoop_nline; + + if (snoop_phys_indexed_i[j]) begin + snoop_valid = snoop_valid_i[j]; + snoop_offset = snoop_addr_offset_i[j]; + end else begin + snoop_valid = snoop_valid_q[j]; + snoop_offset = snoop_addr_offset_q[j]; + end + snoop_nline = {snoop_addr_tag_i[j], snoop_offset}; + hwpf_snoop_match[i] |= (snoop_valid && !snoop_abort_i[j] && + (hwpf_snoop_nline[i] == snoop_nline)); + end + end + // }}} + + hwpf_stride #( + .CACHE_LINE_BYTES ( HPDCACHE_CL_WIDTH/8 ) + ) hwpf_stride_i( + .clk_i, + .rst_ni, + + .csr_base_set_i ( hwpf_stride_base_set_i[i] ), + .csr_base_i ( hwpf_stride_base_i[i] ), + .csr_param_set_i ( hwpf_stride_param_set_i[i] ), + .csr_param_i ( hwpf_stride_param_i[i] ), + .csr_throttle_set_i ( hwpf_stride_throttle_set_i[i] ), + .csr_throttle_i ( hwpf_stride_throttle_i[i] ), + + .csr_base_o ( hwpf_stride_base_o[i] ), + .csr_param_o ( hwpf_stride_param_o[i] ), + .csr_throttle_o ( hwpf_stride_throttle_o[i] ), + + .busy_o ( hwpf_stride_status_busy[i] ), + + .snoop_nline_o ( hwpf_snoop_nline[i] ), + .snoop_match_i ( hwpf_snoop_match[i] ), + + .hpdcache_req_valid_o ( hwpf_stride_req_valid[i] ), + .hpdcache_req_ready_i ( hwpf_stride_req_ready[i] ), + .hpdcache_req_o ( hwpf_stride_req[i] ), + .hpdcache_rsp_valid_i ( hwpf_stride_arb_in_rsp_valid[i] ), + .hpdcache_rsp_i ( hwpf_stride_arb_in_rsp[i] ) + ); + + assign hwpf_stride_req_ready[i] = hwpf_stride_arb_in_req_ready[i], + hwpf_stride_arb_in_req_valid[i] = hwpf_stride_req_valid[i], + hwpf_stride_arb_in_req[i].addr_offset = hwpf_stride_req[i].addr_offset, + hwpf_stride_arb_in_req[i].wdata = hwpf_stride_req[i].wdata, + hwpf_stride_arb_in_req[i].op = hwpf_stride_req[i].op, + hwpf_stride_arb_in_req[i].be = hwpf_stride_req[i].be, + hwpf_stride_arb_in_req[i].size = hwpf_stride_req[i].size, + hwpf_stride_arb_in_req[i].sid = hpdcache_req_sid_i, + hwpf_stride_arb_in_req[i].tid = hpdcache_req_tid_t'(i), + hwpf_stride_arb_in_req[i].need_rsp = hwpf_stride_req[i].need_rsp, + hwpf_stride_arb_in_req[i].phys_indexed = hwpf_stride_req[i].phys_indexed, + hwpf_stride_arb_in_req[i].addr_tag = '0, + hwpf_stride_arb_in_req[i].pma = '0; + end + endgenerate + // }}} + + // Hardware prefetcher arbiter betweem engines + // {{{ + hwpf_stride_arb #( + .NUM_HW_PREFETCH ( NUM_HW_PREFETCH ) + ) hwpf_stride_arb_i ( + .clk_i, + .rst_ni, + + // DCache input interface + .hwpf_stride_req_valid_i ( hwpf_stride_arb_in_req_valid ), + .hwpf_stride_req_ready_o ( hwpf_stride_arb_in_req_ready ), + .hwpf_stride_req_i ( hwpf_stride_arb_in_req ), + .hwpf_stride_rsp_valid_o ( hwpf_stride_arb_in_rsp_valid ), + .hwpf_stride_rsp_o ( hwpf_stride_arb_in_rsp ), + + // DCache output interface + .hpdcache_req_valid_o, + .hpdcache_req_ready_i, + .hpdcache_req_o, + .hpdcache_rsp_valid_i, + .hpdcache_rsp_i + ); + + assign hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests + hpdcache_req_tag_o = '0, // unused on physically indexed requests + hpdcache_req_pma_o = '0; // unused on physically indexed requests + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv new file mode 100644 index 0000000..cb32acf --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv @@ -0,0 +1,103 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache Memory Read Request Channel Arbiter + * History : + */ +module hpdcache_mem_req_read_arbiter +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter hpdcache_uint N = 0, + parameter type hpdcache_mem_req_t = logic +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + output logic mem_req_read_ready_o [N-1:0], + input logic mem_req_read_valid_i [N-1:0], + input hpdcache_mem_req_t mem_req_read_i [N-1:0], + + input logic mem_req_read_ready_i, + output logic mem_req_read_valid_o, + output hpdcache_mem_req_t mem_req_read_o +); +// }}} + + logic [N-1:0] mem_read_arb_req_valid; + hpdcache_mem_req_t [N-1:0] mem_read_arb_req; + logic [N-1:0] mem_read_arb_req_gnt; + + logic req_valid; + + genvar gen_i; + + + // Pack inputs + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen + assign mem_read_arb_req_valid[gen_i] = mem_req_read_valid_i[gen_i], + mem_read_arb_req [gen_i] = mem_req_read_i[gen_i]; + end + endgenerate + + assign req_valid = |(mem_read_arb_req_gnt & mem_read_arb_req_valid); + + // Fixed-priority arbiter + hpdcache_fxarb #( + .N (N) + ) hpdcache_fxarb_mem_req_write_i ( + .clk_i, + .rst_ni, + .req_i (mem_read_arb_req_valid), + .gnt_o (mem_read_arb_req_gnt), + .ready_i (mem_req_read_ready_i) + ); + + // Demultiplexor for the ready signal + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen + assign mem_req_read_ready_o[gen_i] = mem_req_read_ready_i & + mem_read_arb_req_gnt[gen_i] & mem_read_arb_req_valid[gen_i]; + end + endgenerate + + assign mem_req_read_valid_o = req_valid; + + // Multiplexor for requests + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_mem_req_t)), + .ONE_HOT_SEL (1'b1) + ) mem_read_req_mux_i ( + .data_i (mem_read_arb_req), + .sel_i (mem_read_arb_req_gnt), + .data_o (mem_req_read_o) + ); + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv new file mode 100644 index 0000000..a7916ec --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv @@ -0,0 +1,193 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache Memory Write Channels Arbiter + * History : + */ +module hpdcache_mem_req_write_arbiter +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter hpdcache_uint N = 0, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + output logic mem_req_write_ready_o [N-1:0], + input logic mem_req_write_valid_i [N-1:0], + input hpdcache_mem_req_t mem_req_write_i [N-1:0], + + output logic mem_req_write_data_ready_o [N-1:0], + input logic mem_req_write_data_valid_i [N-1:0], + input hpdcache_mem_req_w_t mem_req_write_data_i [N-1:0], + + input logic mem_req_write_ready_i, + output logic mem_req_write_valid_o, + output hpdcache_mem_req_t mem_req_write_o, + + input logic mem_req_write_data_ready_i, + output logic mem_req_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_write_data_o +); +// }}} + + typedef enum { + REQ_IDLE, + REQ_META_SENT, + REQ_DATA_SENT + } req_send_fsm_t; + + req_send_fsm_t req_send_fsm_q, req_send_fsm_d; + logic req_valid; + logic req_data_valid; + + logic [N-1:0] mem_write_arb_req_valid; + hpdcache_mem_req_t [N-1:0] mem_write_arb_req; + logic [N-1:0] mem_write_arb_req_data_valid; + hpdcache_mem_req_w_t [N-1:0] mem_write_arb_req_data; + logic [N-1:0] mem_write_arb_req_gnt; + logic mem_write_arb_req_ready; + + genvar gen_i; + + + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen + assign mem_write_arb_req_valid [gen_i] = mem_req_write_valid_i[gen_i], + mem_write_arb_req [gen_i] = mem_req_write_i[gen_i], + mem_write_arb_req_data_valid[gen_i] = mem_req_write_data_valid_i[gen_i], + mem_write_arb_req_data [gen_i] = mem_req_write_data_i[gen_i]; + end + endgenerate + + // Fixed-priority arbiter + hpdcache_fxarb #( + .N (2) + ) hpdcache_fxarb_mem_req_write_i ( + .clk_i, + .rst_ni, + .req_i (mem_write_arb_req_valid), + .gnt_o (mem_write_arb_req_gnt), + .ready_i (mem_write_arb_req_ready) + ); + + assign req_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_valid); + assign req_data_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_data_valid); + + // Request sent FSM + // + // This FSM allows to make sure that the request and its corresponding + // data are sent in order. This is, when a requester sends a request, this + // FSM keeps the grant signal on this requester until it has sent the + // corresponding data. + // + // {{{ + always_comb + begin : req_send_fsm_comb + req_send_fsm_d = req_send_fsm_q; + mem_write_arb_req_ready = 1'b0; + case (req_send_fsm_q) + REQ_IDLE: + if (req_valid && mem_req_write_ready_i) begin + if (req_data_valid) begin + if (mem_req_write_data_ready_i) begin + mem_write_arb_req_ready = 1'b1; + req_send_fsm_d = REQ_IDLE; + end else begin + req_send_fsm_d = REQ_META_SENT; + end + end + end else if (req_data_valid && mem_req_write_data_ready_i) begin + req_send_fsm_d = REQ_DATA_SENT; + end + + REQ_META_SENT: + if (req_data_valid && mem_req_write_data_ready_i) begin + mem_write_arb_req_ready = 1'b1; + req_send_fsm_d = REQ_IDLE; + end + + REQ_DATA_SENT: + if (req_valid && mem_req_write_ready_i) begin + mem_write_arb_req_ready = 1'b1; + req_send_fsm_d = REQ_IDLE; + end + endcase + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : req_send_fsm_ff + if (!rst_ni) begin + req_send_fsm_q <= REQ_IDLE; + end else begin + req_send_fsm_q <= req_send_fsm_d; + end + end + // }}} + + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen + assign mem_req_write_ready_o[gen_i] = + (mem_write_arb_req_gnt[gen_i] & mem_req_write_ready_i) & + (req_send_fsm_q != REQ_META_SENT); + + assign mem_req_write_data_ready_o[gen_i] = + (mem_write_arb_req_gnt[gen_i] & mem_req_write_data_ready_i) & + (req_send_fsm_q != REQ_DATA_SENT); + end + endgenerate + + // Output assignments + // {{{ + assign mem_req_write_valid_o = req_valid & (req_send_fsm_q != REQ_META_SENT); + assign mem_req_write_data_valid_o = req_data_valid & (req_send_fsm_q != REQ_DATA_SENT); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_mem_req_t)), + .ONE_HOT_SEL (1'b1) + ) mem_write_req_mux_i ( + .data_i (mem_write_arb_req), + .sel_i (mem_write_arb_req_gnt), + .data_o (mem_req_write_o) + ); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_mem_req_w_t)), + .ONE_HOT_SEL (1'b1) + ) mem_write_data_req_mux_i ( + .data_i (mem_write_arb_req_data), + .sel_i (mem_write_arb_req_gnt), + .data_o (mem_req_write_data_o) + ); + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv new file mode 100644 index 0000000..c1502a9 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv @@ -0,0 +1,108 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : June, 2022 + * Description : Dcache Memory Reponse Demultiplexer + * History : + */ +module hpdcache_mem_resp_demux +// Parameters +// {{{ +#( + parameter int N = 0, + parameter type resp_t = logic, + parameter type resp_id_t = logic, + + localparam int RT_DEPTH = (1 << $bits(resp_id_t)), + localparam type rt_t = resp_id_t [RT_DEPTH-1:0] +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + output logic mem_resp_ready_o, + input logic mem_resp_valid_i, + input resp_id_t mem_resp_id_i, + input resp_t mem_resp_i, + + input logic mem_resp_ready_i [N-1:0], + output logic mem_resp_valid_o [N-1:0], + output resp_t mem_resp_o [N-1:0], + + input rt_t mem_resp_rt_i +); +// }}} + + typedef logic [$clog2(N)-1:0] sel_t; + + logic [N-1:0] mem_resp_demux_valid; + resp_t [N-1:0] mem_resp_demux; + logic [N-1:0] mem_resp_demux_ready; + sel_t mem_resp_demux_sel; + + // Route the response according to the response ID and the routing table + assign mem_resp_demux_sel = mem_resp_rt_i[int'(mem_resp_id_i)]; + + // Forward the response to the corresponding output port + hpdcache_demux #( + .NOUTPUT (N), + .DATA_WIDTH (1), + .ONE_HOT_SEL (0) + ) i_resp_valid_demux ( + .data_i (mem_resp_valid_i), + .sel_i (mem_resp_demux_sel), + .data_o (mem_resp_demux_valid) + ); + + hpdcache_demux #( + .NOUTPUT (N), + .DATA_WIDTH ($bits(resp_t)), + .ONE_HOT_SEL (0) + ) i_resp_demux ( + .data_i (mem_resp_i), + .sel_i (mem_resp_demux_sel), + .data_o (mem_resp_demux) + ); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH (1), + .ONE_HOT_SEL (0) + ) i_resp_ready_mux ( + .data_i (mem_resp_demux_ready), + .sel_i (mem_resp_demux_sel), + .data_o (mem_resp_ready_o) + ); + + // Pack/unpack responses + generate + for (genvar gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_unpack_resp_gen + assign mem_resp_valid_o [gen_i] = mem_resp_demux_valid [gen_i]; + assign mem_resp_o [gen_i] = mem_resp_demux [gen_i]; + assign mem_resp_demux_ready [gen_i] = mem_resp_ready_i [gen_i]; + end + endgenerate + +endmodule : hpdcache_mem_resp_demux diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv new file mode 100644 index 0000000..ec3fad7 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv @@ -0,0 +1,95 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache memory request to axi read channels + * History : + */ +module hpdcache_mem_to_axi_read +import hpdcache_pkg::*; +#( + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic +) +( + output logic req_ready_o, + input logic req_valid_i, + input hpdcache_mem_req_t req_i, + + input logic resp_ready_i, + output logic resp_valid_o, + output hpdcache_mem_resp_r_t resp_o, + + output logic axi_ar_valid_o, + output ar_chan_t axi_ar_o, + input logic axi_ar_ready_i, + + input logic axi_r_valid_i, + input r_chan_t axi_r_i, + output logic axi_r_ready_o +); + + logic lock; + axi_pkg::cache_t cache; + hpdcache_mem_error_e resp; + + assign lock = (req_i.mem_req_command == HPDCACHE_MEM_ATOMIC) && + (req_i.mem_req_atomic == HPDCACHE_MEM_ATOMIC_LDEX); + + assign cache = req_i.mem_req_cacheable ? + axi_pkg::CACHE_BUFFERABLE | + axi_pkg::CACHE_MODIFIABLE | + axi_pkg::CACHE_RD_ALLOC | + axi_pkg::CACHE_WR_ALLOC : '0; + + always_comb + begin : resp_decode_comb + case (axi_r_i.resp) + axi_pkg::RESP_SLVERR, + axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK; + default: resp = HPDCACHE_MEM_RESP_OK; + endcase + end + + assign req_ready_o = axi_ar_ready_i, + axi_ar_valid_o = req_valid_i, + axi_ar_o.id = req_i.mem_req_id, + axi_ar_o.addr = req_i.mem_req_addr, + axi_ar_o.len = req_i.mem_req_len, + axi_ar_o.size = req_i.mem_req_size, + axi_ar_o.burst = axi_pkg::BURST_INCR, + axi_ar_o.lock = lock, + axi_ar_o.cache = cache, + axi_ar_o.prot = '0, + axi_ar_o.qos = '0, + axi_ar_o.region = '0, + axi_ar_o.user = '0; + + assign axi_r_ready_o = resp_ready_i, + resp_valid_o = axi_r_valid_i, + resp_o.mem_resp_r_error = resp, + resp_o.mem_resp_r_id = axi_r_i.id, + resp_o.mem_resp_r_data = axi_r_i.data, + resp_o.mem_resp_r_last = axi_r_i.last; + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv new file mode 100644 index 0000000..8d8eb9f --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv @@ -0,0 +1,148 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache memory request to axi write channels + * History : + */ +module hpdcache_mem_to_axi_write +import hpdcache_pkg::*; +#( + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic +) +( + output logic req_ready_o, + input logic req_valid_i, + input hpdcache_mem_req_t req_i, + + output logic req_data_ready_o, + input logic req_data_valid_i, + input hpdcache_mem_req_w_t req_data_i, + + input logic resp_ready_i, + output logic resp_valid_o, + output hpdcache_mem_resp_w_t resp_o, + + output logic axi_aw_valid_o, + output aw_chan_t axi_aw_o, + input logic axi_aw_ready_i, + + output logic axi_w_valid_o, + output w_chan_t axi_w_o, + input logic axi_w_ready_i, + + input logic axi_b_valid_i, + input b_chan_t axi_b_i, + output logic axi_b_ready_o +); + + logic lock; + axi_pkg::atop_t atop; + axi_pkg::cache_t cache; + hpdcache_mem_error_e resp; + + always_comb + begin : atop_comb + lock = 1'b0; + atop = '0; + case (req_i.mem_req_command) + HPDCACHE_MEM_ATOMIC: begin + case (req_i.mem_req_atomic) + HPDCACHE_MEM_ATOMIC_STEX: lock = 1'b1; + HPDCACHE_MEM_ATOMIC_ADD : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_ADD}; + HPDCACHE_MEM_ATOMIC_CLR : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_CLR}; + HPDCACHE_MEM_ATOMIC_SET : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_SET}; + HPDCACHE_MEM_ATOMIC_EOR : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_EOR}; + HPDCACHE_MEM_ATOMIC_SMAX: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_SMAX}; + HPDCACHE_MEM_ATOMIC_SMIN: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_SMIN}; + HPDCACHE_MEM_ATOMIC_UMAX: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_UMAX}; + HPDCACHE_MEM_ATOMIC_UMIN: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_UMIN}; + HPDCACHE_MEM_ATOMIC_SWAP: atop = axi_pkg::ATOP_ATOMICSWAP; + endcase + end + endcase + end + + assign cache = (req_i.mem_req_cacheable && !lock) ? + axi_pkg::CACHE_BUFFERABLE | + axi_pkg::CACHE_MODIFIABLE | + axi_pkg::CACHE_RD_ALLOC | + axi_pkg::CACHE_WR_ALLOC : '0; + + always_comb + begin : resp_decode_comb + case (axi_b_i.resp) + axi_pkg::RESP_SLVERR, + axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK; + default: resp = HPDCACHE_MEM_RESP_OK; + endcase + end + + assign req_ready_o = axi_aw_ready_i, + axi_aw_valid_o = req_valid_i, + axi_aw_o.id = req_i.mem_req_id, + axi_aw_o.addr = req_i.mem_req_addr, + axi_aw_o.len = req_i.mem_req_len, + axi_aw_o.size = req_i.mem_req_size, + axi_aw_o.burst = axi_pkg::BURST_INCR, + axi_aw_o.lock = lock, + axi_aw_o.cache = cache, + axi_aw_o.prot = '0, + axi_aw_o.qos = '0, + axi_aw_o.region = '0, + axi_aw_o.atop = atop, + axi_aw_o.user = '0; + + assign req_data_ready_o = axi_w_ready_i, + axi_w_valid_o = req_data_valid_i, + axi_w_o.data = req_data_i.mem_req_w_data, + axi_w_o.strb = req_data_i.mem_req_w_be, + axi_w_o.last = req_data_i.mem_req_w_last, + axi_w_o.user = '0; + + assign axi_b_ready_o = resp_ready_i, + resp_valid_o = axi_b_valid_i, + resp_o.mem_resp_w_error = resp, + resp_o.mem_resp_w_id = axi_b_i.id, + resp_o.mem_resp_w_is_atomic = (axi_b_i.resp == axi_pkg::RESP_EXOKAY); + +endmodule diff --git a/test/type_param/core/cache_subsystem/miss_handler.sv b/test/type_param/core/cache_subsystem/miss_handler.sv new file mode 100644 index 0000000..4755d0b --- /dev/null +++ b/test/type_param/core/cache_subsystem/miss_handler.sv @@ -0,0 +1,826 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 12.11.2017 +// Description: Handles cache misses. + +// -------------- +// MISS Handler +// -------------- + +module miss_handler + import ariane_pkg::*; + import std_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NR_PORTS = 4, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, // flush request + output logic flush_ack_o, // acknowledge successful flush + output logic miss_o, + input logic busy_i, // dcache is busy with something + // Bypass or miss + input logic [NR_PORTS-1:0][$bits(miss_req_t)-1:0] miss_req_i, + // Bypass handling + output logic [NR_PORTS-1:0] bypass_gnt_o, + output logic [NR_PORTS-1:0] bypass_valid_o, + output logic [NR_PORTS-1:0][63:0] bypass_data_o, + + // AXI port + output axi_req_t axi_bypass_o, + input axi_rsp_t axi_bypass_i, + + // Miss handling (~> cacheline refill) + output logic [NR_PORTS-1:0] miss_gnt_o, + output logic [NR_PORTS-1:0] active_serving_o, + + output logic [63:0] critical_word_o, + output logic critical_word_valid_o, + output axi_req_t axi_data_o, + input axi_rsp_t axi_data_i, + + input logic [NR_PORTS-1:0][55:0] mshr_addr_i, + output logic [NR_PORTS-1:0] mshr_addr_matches_o, + output logic [NR_PORTS-1:0] mshr_index_matches_o, + // AMO + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + // Port to SRAMs, for refill and eviction + output logic [DCACHE_SET_ASSOC-1:0] req_o, + output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array + output cache_line_t data_o, + output cl_be_t be_o, + input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, + output logic we_o +); + + // Three MSHR ports + AMO port + parameter NR_BYPASS_PORTS = NR_PORTS + 1; + + // FSM states + enum logic [3:0] { + IDLE, // 0 + FLUSHING, // 1 + FLUSH, // 2 + WB_CACHELINE_FLUSH, // 3 + FLUSH_REQ_STATUS, // 4 + WB_CACHELINE_MISS, // 5 + WAIT_GNT_SRAM, // 6 + MISS, // 7 + REQ_CACHELINE, // 8 + MISS_REPL, // 9 + SAVE_CACHELINE, // A + INIT, // B + AMO_REQ, // C + AMO_WAIT_RESP // D + } + state_d, state_q; + + // Registers + mshr_t mshr_d, mshr_q; + logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q; + logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q; + // cache line to evict + cache_line_t evict_cl_d, evict_cl_q; + + logic serve_amo_d, serve_amo_q; + // Request from one FSM + logic [ NR_PORTS-1:0] miss_req_valid; + logic [ NR_PORTS-1:0] miss_req_bypass; + logic [ NR_PORTS-1:0][63:0] miss_req_addr; + logic [ NR_PORTS-1:0][63:0] miss_req_wdata; + logic [ NR_PORTS-1:0] miss_req_we; + logic [ NR_PORTS-1:0][ 7:0] miss_req_be; + logic [ NR_PORTS-1:0][ 1:0] miss_req_size; + + // Bypass AMO port + bypass_req_t amo_bypass_req; + bypass_rsp_t amo_bypass_rsp; + + // Bypass ports <-> Arbiter + bypass_req_t [ NR_BYPASS_PORTS-1:0] bypass_ports_req; + bypass_rsp_t [ NR_BYPASS_PORTS-1:0] bypass_ports_rsp; + + // Arbiter <-> Bypass AXI adapter + bypass_req_t bypass_adapter_req; + bypass_rsp_t bypass_adapter_rsp; + + // Cache Line Refill <-> AXI + logic req_fsm_miss_valid; + logic [ 63:0] req_fsm_miss_addr; + logic [ DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata; + logic req_fsm_miss_we; + logic [ (DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be; + ariane_pkg::ad_req_t req_fsm_miss_req; + logic [ 1:0] req_fsm_miss_size; + + logic gnt_miss_fsm; + logic valid_miss_fsm; + logic [ (DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm; + + // Cache Management <-> LFSR + logic lfsr_enable; + logic [ DCACHE_SET_ASSOC-1:0] lfsr_oh; + logic [$clog2(DCACHE_SET_ASSOC-1)-1:0] lfsr_bin; + // AMOs + ariane_pkg::amo_t amo_op; + logic [ 63:0] amo_operand_b; + + // ------------------------------ + // Cache Management + // ------------------------------ + always_comb begin : cache_management + automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way; + + for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin + evict_way[i] = data_i[i].valid & data_i[i].dirty; + valid_way[i] = data_i[i].valid; + end + // ---------------------- + // Default Assignments + // ---------------------- + // memory array + req_o = '0; + addr_o = '0; + data_o = '0; + be_o = '0; + we_o = '0; + // Cache controller + miss_gnt_o = '0; + active_serving_o = '0; + // LFSR replacement unit + lfsr_enable = 1'b0; + // to AXI refill + req_fsm_miss_valid = 1'b0; + req_fsm_miss_addr = '0; + req_fsm_miss_wdata = '0; + req_fsm_miss_we = 1'b0; + req_fsm_miss_be = '0; + req_fsm_miss_req = ariane_pkg::CACHE_LINE_REQ; + req_fsm_miss_size = 2'b11; + // to AXI bypass + amo_bypass_req.req = 1'b0; + amo_bypass_req.reqtype = ariane_pkg::SINGLE_REQ; + amo_bypass_req.amo = ariane_pkg::AMO_NONE; + amo_bypass_req.addr = '0; + amo_bypass_req.we = 1'b0; + amo_bypass_req.wdata = '0; + amo_bypass_req.be = '0; + amo_bypass_req.size = 2'b11; + amo_bypass_req.id = 4'b1011; + // core + flush_ack_o = 1'b0; + miss_o = 1'b0; // to performance counter + serve_amo_d = serve_amo_q; + // -------------------------------- + // Flush and Miss operation + // -------------------------------- + state_d = state_q; + cnt_d = cnt_q; + evict_way_d = evict_way_q; + evict_cl_d = evict_cl_q; + mshr_d = mshr_q; + // communicate to the requester which unit we are currently serving + active_serving_o[mshr_q.id] = mshr_q.valid; + // AMOs + amo_resp_o.ack = 1'b0; + amo_resp_o.result = '0; + amo_operand_b = '0; + + case (state_q) + + IDLE: begin + // lowest priority are AMOs, wait until everything else is served before going for the AMOs + if (amo_req_i.req && !busy_i) begin + // 1. Flush the cache + state_d = FLUSH_REQ_STATUS; + serve_amo_d = 1'b1; + cnt_d = '0; + end + // check if we want to flush and can flush e.g.: we are not busy anymore + // TODO: Check that the busy flag is indeed needed + if (flush_i && !busy_i) begin + state_d = FLUSH_REQ_STATUS; + cnt_d = '0; + end + + // check if one of the state machines missed + for (int unsigned i = 0; i < NR_PORTS; i++) begin + // here comes the refill portion of code + if (miss_req_valid[i] && !miss_req_bypass[i]) begin + state_d = MISS; + // we are taking another request so don't take the AMO + serve_amo_d = 1'b0; + // save to MSHR + mshr_d.valid = 1'b1; + mshr_d.we = miss_req_we[i]; + mshr_d.id = i; + mshr_d.addr = miss_req_addr[i][DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:0]; + mshr_d.wdata = miss_req_wdata[i]; + mshr_d.be = miss_req_be[i]; + break; + end + end + end + + // ~> we missed on the cache + MISS: begin + // 1. Check if there is an empty cache-line + // 2. If not -> evict one + req_o = '1; + addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; + state_d = MISS_REPL; + miss_o = 1'b1; + end + + // ~> second miss cycle + MISS_REPL: begin + // if all are valid we need to evict one, pseudo random from LFSR + if (&valid_way) begin + lfsr_enable = 1'b1; + evict_way_d = lfsr_oh; + // do we need to write back the cache line? + if (data_i[lfsr_bin].dirty) begin + state_d = WB_CACHELINE_MISS; + evict_cl_d.tag = data_i[lfsr_bin].tag; + evict_cl_d.data = data_i[lfsr_bin].data; + cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; + // no - we can request a cache line now + end else state_d = REQ_CACHELINE; + // we have at least one free way + end else begin + // get victim cache-line by looking for the first non-valid bit + evict_way_d = get_victim_cl(~valid_way); + state_d = REQ_CACHELINE; + end + end + + // ~> we can just load the cache-line, the way is store in evict_way_q + REQ_CACHELINE: begin + req_fsm_miss_valid = 1'b1; + req_fsm_miss_addr = mshr_q.addr; + + if (gnt_miss_fsm) begin + state_d = SAVE_CACHELINE; + miss_gnt_o[mshr_q.id] = 1'b1; + end + end + + // ~> replace the cacheline + SAVE_CACHELINE: begin + // calculate cacheline offset + automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; + cl_offset = mshr_q.addr[DCACHE_BYTE_OFFSET-1:3] << 6; + // we've got a valid response from refill unit + if (valid_miss_fsm) begin + + addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; + req_o = evict_way_q; + we_o = 1'b1; + be_o = '1; + be_o.vldrty = evict_way_q; + data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; + data_o.data = data_miss_fsm; + data_o.valid = 1'b1; + data_o.dirty = 1'b0; + + // is this a write? + if (mshr_q.we) begin + // Yes, so safe the updated data now + for (int i = 0; i < 8; i++) begin + // check if we really want to write the corresponding byte + if (mshr_q.be[i]) data_o.data[(cl_offset+i*8)+:8] = mshr_q.wdata[i]; + end + // its immediately dirty if we write + data_o.dirty = 1'b1; + end + // reset MSHR + mshr_d.valid = 1'b0; + // go back to idle + state_d = IDLE; + end + end + + // ------------------------------ + // Write Back Operation + // ------------------------------ + // ~> evict a cache line from way saved in evict_way_q + WB_CACHELINE_FLUSH, WB_CACHELINE_MISS: begin + + req_fsm_miss_valid = 1'b1; + req_fsm_miss_addr = { + evict_cl_q.tag, + cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], + {{DCACHE_BYTE_OFFSET} {1'b0}} + }; + req_fsm_miss_be = '1; + req_fsm_miss_we = 1'b1; + req_fsm_miss_wdata = evict_cl_q.data; + + // we've got a grant --> this is timing critical, think about it + if (gnt_miss_fsm) begin + // write status array + addr_o = cnt_q; + req_o = 1'b1; + we_o = 1'b1; + data_o.valid = INVALIDATE_ON_FLUSH ? 1'b0 : 1'b1; + // invalidate + be_o.vldrty = evict_way_q; + // go back to handling the miss or flushing, depending on where we came from + state_d = (state_q == WB_CACHELINE_MISS) ? MISS : FLUSH_REQ_STATUS; + end + end + + // ------------------------------ + // Flushing & Initialization + // ------------------------------ + // ~> make another request to check the same cache-line if there are still some valid entries + FLUSH_REQ_STATUS: begin + req_o = '1; + addr_o = cnt_q; + state_d = FLUSHING; + end + + FLUSHING: begin + // this has priority + // at least one of the cache lines is dirty + if (|evict_way) begin + // evict cache line, look for the first cache-line which is dirty + evict_way_d = get_victim_cl(evict_way); + evict_cl_d = data_i[one_hot_to_bin(evict_way)]; + state_d = WB_CACHELINE_FLUSH; + // not dirty ~> increment and continue + end else begin + // increment and re-request + cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); + state_d = FLUSH_REQ_STATUS; + addr_o = cnt_q; + req_o = 1'b1; + be_o.vldrty = INVALIDATE_ON_FLUSH ? '1 : '0; + we_o = 1'b1; + // finished with flushing operation, go back to idle + if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) begin + // only acknowledge if the flush wasn't triggered by an atomic + flush_ack_o = ~serve_amo_q; + // if we are flushing because of an AMO go to serve it + if (serve_amo_q) begin + state_d = AMO_REQ; + serve_amo_d = 1'b0; + end else begin + state_d = IDLE; + end + end + end + end + + // ~> only called after reset + INIT: begin + // initialize status array + addr_o = cnt_q; + req_o = 1'b1; + we_o = 1'b1; + // only write the dirty array + be_o.vldrty = '1; + cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); + // finished initialization + if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) state_d = IDLE; + end + // ---------------------- + // AMOs + // ---------------------- + // ~> we are here because we need to do the AMO, the cache is clean at this point + AMO_REQ: begin + amo_bypass_req.req = 1'b1; + amo_bypass_req.reqtype = ariane_pkg::SINGLE_REQ; + amo_bypass_req.amo = amo_req_i.amo_op; + // address is in operand a + amo_bypass_req.addr = amo_req_i.operand_a; + if (amo_req_i.amo_op != AMO_LR) begin + amo_bypass_req.we = 1'b1; + end + amo_bypass_req.size = amo_req_i.size; + // AXI implements CLR op instead of AND, negate operand + if (amo_req_i.amo_op == AMO_AND) begin + amo_operand_b = ~amo_req_i.operand_b; + end else begin + amo_operand_b = amo_req_i.operand_b; + end + // align data and byte-enable to correct byte lanes + amo_bypass_req.wdata = amo_operand_b; + if (amo_req_i.size == 2'b11) begin + // 64b transfer + amo_bypass_req.be = 8'b11111111; + end else begin + // 32b transfer + if (amo_req_i.operand_a[2:0] == '0) begin + // 64b aligned -> activate lower 4 byte lanes + amo_bypass_req.be = 8'b00001111; + end else begin + // 64b unaligned -> activate upper 4 byte lanes + amo_bypass_req.be = 8'b11110000; + amo_bypass_req.wdata = amo_operand_b[31:0] << 32; + end + end + + // when request is accepted, wait for response + if (amo_bypass_rsp.gnt) begin + if (amo_bypass_rsp.valid) begin + state_d = IDLE; + amo_resp_o.ack = 1'b1; + amo_resp_o.result = amo_bypass_rsp.rdata; + end else begin + state_d = AMO_WAIT_RESP; + end + end + end + AMO_WAIT_RESP: begin + if (amo_bypass_rsp.valid) begin + state_d = IDLE; + amo_resp_o.ack = 1'b1; + // Request is assumed to be still valid (ack not granted yet) + if (amo_req_i.size == 2'b10) begin + // 32b request + logic [31:0] halfword; + if (amo_req_i.operand_a[2:0] == '0) begin + // 64b aligned -> activate lower 4 byte lanes + halfword = amo_bypass_rsp.rdata[31:0]; + end else begin + // 64b unaligned -> activate upper 4 byte lanes + halfword = amo_bypass_rsp.rdata[63:32]; + end + // Sign-extend 32b requests as per RISC-V spec + amo_resp_o.result = {{32{halfword[31]}}, halfword}; + end else begin + // 64b request + amo_resp_o.result = amo_bypass_rsp.rdata; + end + end + end + endcase + end + + // check MSHR for aliasing + always_comb begin + + mshr_addr_matches_o = 'b0; + mshr_index_matches_o = 'b0; + + for (int i = 0; i < NR_PORTS; i++) begin + // check mshr for potential matching of other units, exclude the unit currently being served + if (mshr_q.valid && mshr_addr_i[i][55:DCACHE_BYTE_OFFSET] == mshr_q.addr[55:DCACHE_BYTE_OFFSET]) begin + mshr_addr_matches_o[i] = 1'b1; + end + + // same as previous, but checking only the index + if (mshr_q.valid && mshr_addr_i[i][DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == mshr_q.addr[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]) begin + mshr_index_matches_o[i] = 1'b1; + end + end + end + // -------------------- + // Sequential Process + // -------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mshr_q <= '0; + state_q <= INIT; + cnt_q <= '0; + evict_way_q <= '0; + evict_cl_q <= '0; + serve_amo_q <= 1'b0; + end else begin + mshr_q <= mshr_d; + state_q <= state_d; + cnt_q <= cnt_d; + evict_way_q <= evict_way_d; + evict_cl_q <= evict_cl_d; + serve_amo_q <= serve_amo_d; + end + end + + //pragma translate_off +`ifndef VERILATOR + // assert that cache only hits on one way + assert property (@(posedge clk_i) $onehot0(evict_way_q)) + else $warning("Evict-way should be one-hot encoded"); +`endif + //pragma translate_on + + // ---------------------- + // Pack bypass ports + // ---------------------- + always_comb begin + logic [$clog2(NR_BYPASS_PORTS)-1:0] id; + + // Pack MHSR ports first + for (id = 0; id < NR_PORTS; id++) begin + bypass_ports_req[id].req = miss_req_valid[id] & miss_req_bypass[id]; + bypass_ports_req[id].reqtype = ariane_pkg::SINGLE_REQ; + bypass_ports_req[id].amo = AMO_NONE; + bypass_ports_req[id].id = 4'b1000 | 4'(id); + bypass_ports_req[id].addr = miss_req_addr[id]; + bypass_ports_req[id].wdata = miss_req_wdata[id]; + bypass_ports_req[id].we = miss_req_we[id]; + bypass_ports_req[id].be = miss_req_be[id]; + bypass_ports_req[id].size = miss_req_size[id]; + + bypass_gnt_o[id] = bypass_ports_rsp[id].gnt; + bypass_valid_o[id] = bypass_ports_rsp[id].valid; + bypass_data_o[id] = bypass_ports_rsp[id].rdata; + end + + // AMO port has lowest priority + bypass_ports_req[id] = amo_bypass_req; + amo_bypass_rsp = bypass_ports_rsp[id]; + end + + // ---------------------- + // Arbitrate bypass ports + // ---------------------- + axi_adapter_arbiter #( + .NR_PORTS (NR_BYPASS_PORTS), + .MAX_OUTSTANDING_REQ(CVA6Cfg.MaxOutstandingStores), + .req_t (bypass_req_t), + .rsp_t (bypass_rsp_t) + ) i_bypass_arbiter ( + .clk_i (clk_i), + .rst_ni(rst_ni), + // Master Side + .req_i (bypass_ports_req), + .rsp_o (bypass_ports_rsp), + // Slave Side + .req_o (bypass_adapter_req), + .rsp_i (bypass_adapter_rsp) + ); + + // ---------------------- + // Bypass AXI Interface + // ---------------------- + // Cast bypass_adapter_req.addr to axi_adapter port size + logic [riscv::XLEN-1:0] bypass_addr; + assign bypass_addr = bypass_adapter_req.addr; + + axi_adapter #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (64), + .CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET), + .axi_req_t (axi_req_t), + .axi_rsp_t (axi_rsp_t) + ) i_bypass_axi_adapter ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .req_i(bypass_adapter_req.req), + .type_i(bypass_adapter_req.reqtype), + .amo_i(bypass_adapter_req.amo), + .id_i(({{CVA6Cfg.AxiIdWidth - 4{1'b0}}, bypass_adapter_req.id})), + .addr_i(bypass_addr), + .wdata_i(bypass_adapter_req.wdata), + .we_i(bypass_adapter_req.we), + .be_i(bypass_adapter_req.be), + .size_i(bypass_adapter_req.size), + .gnt_o(bypass_adapter_rsp.gnt), + .valid_o(bypass_adapter_rsp.valid), + .rdata_o(bypass_adapter_rsp.rdata), + .id_o(), // not used, single outstanding request in arbiter + .critical_word_o(), // not used for single requests + .critical_word_valid_o(), // not used for single requests + .axi_req_o(axi_bypass_o), + .axi_resp_i(axi_bypass_i) + ); + + // ---------------------- + // Cache Line AXI Refill + // ---------------------- + // Cast req_fsm_miss_addr to axi_adapter port size + logic [riscv::XLEN-1:0] miss_addr; + assign miss_addr = req_fsm_miss_addr; + + axi_adapter #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (DCACHE_LINE_WIDTH), + .CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET), + .axi_req_t (axi_req_t), + .axi_rsp_t (axi_rsp_t) + ) i_miss_axi_adapter ( + .clk_i, + .rst_ni, + .req_i (req_fsm_miss_valid), + .type_i (req_fsm_miss_req), + .amo_i (AMO_NONE), + .gnt_o (gnt_miss_fsm), + .addr_i (miss_addr), + .we_i (req_fsm_miss_we), + .wdata_i (req_fsm_miss_wdata), + .be_i (req_fsm_miss_be), + .size_i (req_fsm_miss_size), + .id_i ({{CVA6Cfg.AxiIdWidth - 4{1'b0}}, 4'b0111}), + .valid_o (valid_miss_fsm), + .rdata_o (data_miss_fsm), + .id_o (), + .critical_word_o (critical_word_o), + .critical_word_valid_o(critical_word_valid_o), + .axi_req_o (axi_data_o), + .axi_resp_i (axi_data_i) + ); + + // ----------------- + // Replacement LFSR + // ----------------- + lfsr_8bit #( + .WIDTH(DCACHE_SET_ASSOC) + ) i_lfsr ( + .en_i (lfsr_enable), + .refill_way_oh (lfsr_oh), + .refill_way_bin(lfsr_bin), + .* + ); + + // ----------------- + // Struct Split + // ----------------- + // Hack as system verilog support in modelsim seems to be buggy here + always_comb begin + automatic miss_req_t miss_req; + + for (int unsigned i = 0; i < NR_PORTS; i++) begin + miss_req = miss_req_t'(miss_req_i[i]); + miss_req_valid[i] = miss_req.valid; + miss_req_bypass[i] = miss_req.bypass; + miss_req_addr[i] = miss_req.addr; + miss_req_wdata[i] = miss_req.wdata; + miss_req_we[i] = miss_req.we; + miss_req_be[i] = miss_req.be; + miss_req_size[i] = miss_req.size; + end + end +endmodule + +// -------------- +// AXI Arbiter +// -------------- +// +// Description: Arbitrates access to AXI refill/bypass +// +module axi_adapter_arbiter #( + parameter NR_PORTS = 4, + parameter MAX_OUTSTANDING_REQ = 0, + parameter type req_t = std_cache_pkg::bypass_req_t, + parameter type rsp_t = std_cache_pkg::bypass_rsp_t +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // Master ports + input req_t [NR_PORTS-1:0] req_i, + output rsp_t [NR_PORTS-1:0] rsp_o, + // Slave port + output req_t req_o, + input rsp_t rsp_i +); + + localparam MAX_OUTSTANDING_CNT_WIDTH = $clog2( + MAX_OUTSTANDING_REQ + 1 + ) > 0 ? $clog2( + MAX_OUTSTANDING_REQ + 1 + ) : 1; + + typedef logic [MAX_OUTSTANDING_CNT_WIDTH-1:0] outstanding_cnt_t; + + enum logic { + IDLE, + SERVING + } + state_d, state_q; + + req_t req_d, req_q; + logic [NR_PORTS-1:0] sel_d, sel_q; + outstanding_cnt_t outstanding_cnt_d, outstanding_cnt_q; + + logic [NR_PORTS-1:0] req_flat; + logic any_unselected_port_valid; + + for (genvar i = 0; i < NR_PORTS; i++) begin : gen_req_flat + assign req_flat[i] = req_i[i].req; + end + assign any_unselected_port_valid = |(req_flat & ~(1 << sel_q)); + + + always_comb begin + sel_d = sel_q; + outstanding_cnt_d = outstanding_cnt_q; + + state_d = state_q; + req_d = req_q; + + req_o = req_q; + + rsp_o = '0; + rsp_o[sel_q].rdata = rsp_i.rdata; + + case (state_q) + + IDLE: begin + // wait for incoming requests + for (int unsigned i = 0; i < NR_PORTS; i++) begin + if (req_i[i].req == 1'b1) begin + sel_d = i[$bits(sel_d)-1:0]; + state_d = SERVING; + break; + end + end + + req_d = req_i[sel_d]; + req_o = req_i[sel_d]; + rsp_o[sel_d].gnt = req_i[sel_d].req; + + // Count outstanding transactions, i.e. requests which have been + // granted but response hasn't arrived yet + if (req_o.req && rsp_i.gnt) begin + req_d.req = 1'b0; + outstanding_cnt_d += 1; + end + end + + SERVING: begin + // We can accept multiple outstanding transactions from same port. + // To ensure fairness, we allow this only if all other ports are idle + if ((!req_o.req) && !any_unselected_port_valid && + (outstanding_cnt_q != (MAX_OUTSTANDING_REQ - 1))) begin + if (req_i[sel_q].req) begin + req_d = req_i[sel_q]; + req_o = req_i[sel_q]; + rsp_o[sel_q].gnt = 1'b1; + state_d = SERVING; + end + end + + // Count outstanding transactions, i.e. requests which have been + // granted but response hasn't arrived yet + if (req_o.req && rsp_i.gnt) begin + req_d.req = 1'b0; + outstanding_cnt_d += 1; + end + if (rsp_i.valid) begin + outstanding_cnt_d -= 1; + rsp_o[sel_q].valid = 1'b1; + + if ((outstanding_cnt_d == 0) && (!req_o.req || rsp_i.gnt)) state_d = IDLE; + end + end + + default: /* default */; + endcase + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + sel_q <= '0; + req_q <= '0; + outstanding_cnt_q <= '0; + end else begin + state_q <= state_d; + sel_q <= sel_d; + req_q <= req_d; + outstanding_cnt_q <= outstanding_cnt_d; + end + end + // ------------ + // Assertions + // ------------ + + //pragma translate_off +`ifndef VERILATOR + // make sure that we eventually get an rvalid after we received a grant + assert property (@(posedge clk_i) rsp_i.gnt |-> ##[1:$] rsp_i.valid) + else begin + $error("There was a grant without a rvalid"); + $stop(); + end + // assert that there is no grant without a request or outstanding transactions + assert property (@(negedge clk_i) rsp_i.gnt |-> req_o.req) + else begin + $error("There was a grant without a request."); + $stop(); + end + // assert that the address does not contain X when request is sent + assert property (@(posedge clk_i) (req_o.req) |-> (!$isunknown(req_o.addr))) + else begin + $error("address contains X when request is set"); + $stop(); + end + +`endif + //pragma translate_on +endmodule diff --git a/test/type_param/core/cache_subsystem/std_cache_subsystem.sv b/test/type_param/core/cache_subsystem/std_cache_subsystem.sv new file mode 100644 index 0000000..45ba8bd --- /dev/null +++ b/test/type_param/core/cache_subsystem/std_cache_subsystem.sv @@ -0,0 +1,315 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba , ETH Zurich +// Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Standard Ariane cache subsystem with instruction cache and +// write-back data cache. + + +module std_cache_subsystem + import ariane_pkg::*; + import std_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 4, + parameter type axi_ar_chan_t = logic, + parameter type axi_aw_chan_t = logic, + parameter type axi_w_chan_t = logic, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + input riscv::priv_lvl_t priv_lvl_i, + // I$ + input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) + input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together + output logic icache_miss_o, // to performance counter + // address translation requests + input icache_areq_t icache_areq_i, // to/from frontend + output icache_arsp_t icache_areq_o, + // data requests + input icache_dreq_t icache_dreq_i, // to/from frontend + output icache_drsp_t icache_dreq_o, + // AMOs + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + // D$ + // Cache management + input logic dcache_enable_i, // from CSR + input logic dcache_flush_i, // high until acknowledged + output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic dcache_miss_o, // we missed on a ld/st + output logic wbuffer_empty_o, // statically set to 1, as there is no wbuffer in this cache system + // Request ports + input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU + output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU + // memory side + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i +); + + assign wbuffer_empty_o = 1'b1; + + axi_req_t axi_req_icache; + axi_rsp_t axi_resp_icache; + axi_req_t axi_req_bypass; + axi_rsp_t axi_resp_bypass; + axi_req_t axi_req_data; + axi_rsp_t axi_resp_data; + + cva6_icache_axi_wrapper #( + .CVA6Cfg (CVA6Cfg), + .axi_req_t(axi_req_t), + .axi_rsp_t(axi_rsp_t) + ) i_cva6_icache_axi_wrapper ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .priv_lvl_i(priv_lvl_i), + .flush_i (icache_flush_i), + .en_i (icache_en_i), + .miss_o (icache_miss_o), + .areq_i (icache_areq_i), + .areq_o (icache_areq_o), + .dreq_i (icache_dreq_i), + .dreq_o (icache_dreq_o), + .axi_req_o (axi_req_icache), + .axi_resp_i(axi_resp_icache) + ); + + // decreasing priority + // Port 0: PTW + // Port 1: Load Unit + // Port 2: Accelerator + // Port 3: Store Unit + std_nbdcache #( + .CVA6Cfg (CVA6Cfg), + .NumPorts (NumPorts), + .axi_req_t(axi_req_t), + .axi_rsp_t(axi_rsp_t) + ) i_nbdcache ( + .clk_i, + .rst_ni, + .enable_i (dcache_enable_i), + .flush_i (dcache_flush_i), + .flush_ack_o (dcache_flush_ack_o), + .miss_o (dcache_miss_o), + .axi_bypass_o(axi_req_bypass), + .axi_bypass_i(axi_resp_bypass), + .axi_data_o (axi_req_data), + .axi_data_i (axi_resp_data), + .req_ports_i (dcache_req_ports_i), + .req_ports_o (dcache_req_ports_o), + .amo_req_i, + .amo_resp_o + ); + + // ----------------------- + // Arbitrate AXI Ports + // ----------------------- + logic [1:0] w_select, w_select_fifo, w_select_arbiter; + logic [1:0] w_fifo_usage; + logic w_fifo_empty, w_fifo_full; + + + // AR Channel + stream_arbiter #( + .DATA_T(axi_ar_chan_t), + .N_INP (3) + ) i_stream_arbiter_ar ( + .clk_i, + .rst_ni, + .inp_data_i ({axi_req_icache.ar, axi_req_bypass.ar, axi_req_data.ar}), + .inp_valid_i({axi_req_icache.ar_valid, axi_req_bypass.ar_valid, axi_req_data.ar_valid}), + .inp_ready_o({axi_resp_icache.ar_ready, axi_resp_bypass.ar_ready, axi_resp_data.ar_ready}), + .oup_data_o (axi_req_o.ar), + .oup_valid_o(axi_req_o.ar_valid), + .oup_ready_i(axi_resp_i.ar_ready) + ); + + // AW Channel + stream_arbiter #( + .DATA_T(axi_aw_chan_t), + .N_INP (3) + ) i_stream_arbiter_aw ( + .clk_i, + .rst_ni, + .inp_data_i ({axi_req_icache.aw, axi_req_bypass.aw, axi_req_data.aw}), + .inp_valid_i({axi_req_icache.aw_valid, axi_req_bypass.aw_valid, axi_req_data.aw_valid}), + .inp_ready_o({axi_resp_icache.aw_ready, axi_resp_bypass.aw_ready, axi_resp_data.aw_ready}), + .oup_data_o (axi_req_o.aw), + .oup_valid_o(axi_req_o.aw_valid), + .oup_ready_i(axi_resp_i.aw_ready) + ); + + // WID has been removed in AXI 4 so we need to keep track which AW request has been accepted + // to forward the correct write data. + always_comb begin + w_select = 0; + unique casez (axi_req_o.aw.id) + 4'b0111: w_select = 2; // dcache + 4'b1???: w_select = 1; // bypass + default: w_select = 0; // icache + endcase + end + + // W Channel + fifo_v3 #( + .DATA_WIDTH (2), + // we can have a maximum of 4 oustanding transactions as each port is blocking + .DEPTH (4), + .FALL_THROUGH(1'b1) + ) i_fifo_w_channel ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (w_fifo_full), + .empty_o (), // leave open + .usage_o (w_fifo_usage), + .data_i (w_select), + // a new transaction was requested and granted + .push_i (axi_req_o.aw_valid & axi_resp_i.aw_ready), + // write ID to select the output MUX + .data_o (w_select_fifo), + // transaction has finished + .pop_i (axi_req_o.w_valid & axi_resp_i.w_ready & axi_req_o.w.last) + ); + + // In fall-through mode, the empty_o will be low when push_i is high (on zero usage). + // We do not want this here. Also, usage_o is missing the MSB, so on full fifo, usage_o is zero. + assign w_fifo_empty = w_fifo_usage == 0 && !w_fifo_full; + + // icache will never write so select it as default (e.g.: when no arbitration is active) + // this is equal to setting it to zero + assign w_select_arbiter = w_fifo_empty ? (axi_req_o.aw_valid ? w_select : 0) : w_select_fifo; + + stream_mux #( + .DATA_T(axi_w_chan_t), + .N_INP (3) + ) i_stream_mux_w ( + .inp_data_i ({axi_req_data.w, axi_req_bypass.w, axi_req_icache.w}), + .inp_valid_i({axi_req_data.w_valid, axi_req_bypass.w_valid, axi_req_icache.w_valid}), + .inp_ready_o({axi_resp_data.w_ready, axi_resp_bypass.w_ready, axi_resp_icache.w_ready}), + .inp_sel_i (w_select_arbiter), + .oup_data_o (axi_req_o.w), + .oup_valid_o(axi_req_o.w_valid), + .oup_ready_i(axi_resp_i.w_ready) + ); + + // Route responses based on ID + // 0000 -> I$ + // 0111 -> D$ + // 1??? -> Bypass + // R Channel + assign axi_resp_icache.r = axi_resp_i.r; + assign axi_resp_bypass.r = axi_resp_i.r; + assign axi_resp_data.r = axi_resp_i.r; + + logic [1:0] r_select; + + always_comb begin + r_select = 0; + unique casez (axi_resp_i.r.id) + 4'b0111: r_select = 0; // dcache + 4'b1???: r_select = 1; // bypass + 4'b0000: r_select = 2; // icache + default: r_select = 0; + endcase + end + + stream_demux #( + .N_OUP(3) + ) i_stream_demux_r ( + .inp_valid_i(axi_resp_i.r_valid), + .inp_ready_o(axi_req_o.r_ready), + .oup_sel_i (r_select), + .oup_valid_o({axi_resp_icache.r_valid, axi_resp_bypass.r_valid, axi_resp_data.r_valid}), + .oup_ready_i({axi_req_icache.r_ready, axi_req_bypass.r_ready, axi_req_data.r_ready}) + ); + + // B Channel + logic [1:0] b_select; + + assign axi_resp_icache.b = axi_resp_i.b; + assign axi_resp_bypass.b = axi_resp_i.b; + assign axi_resp_data.b = axi_resp_i.b; + + always_comb begin + b_select = 0; + unique casez (axi_resp_i.b.id) + 4'b0111: b_select = 0; // dcache + 4'b1???: b_select = 1; // bypass + 4'b0000: b_select = 2; // icache + default: b_select = 0; + endcase + end + + stream_demux #( + .N_OUP(3) + ) i_stream_demux_b ( + .inp_valid_i(axi_resp_i.b_valid), + .inp_ready_o(axi_req_o.b_ready), + .oup_sel_i (b_select), + .oup_valid_o({axi_resp_icache.b_valid, axi_resp_bypass.b_valid, axi_resp_data.b_valid}), + .oup_ready_i({axi_req_icache.b_ready, axi_req_bypass.b_ready, axi_req_data.b_ready}) + ); + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + + a_invalid_instruction_fetch : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X", + icache_dreq_o.vaddr, + icache_dreq_o.data + ); + + a_invalid_write_data : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> |dcache_req_ports_i[NumPorts-1].data_be |-> (|dcache_req_ports_i[NumPorts-1].data_wdata) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X", + { + dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index + }, + dcache_req_ports_i[NumPorts-1].data_be, + dcache_req_ports_i[NumPorts-1].data_wdata + ); + generate + for (genvar j = 0; j < NumPorts - 1; j++) begin + a_invalid_read_data : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid data on port %01d: data=%016X", + j, + dcache_req_ports_o[j].data_rdata + ); + end + endgenerate + +`endif + //pragma translate_on +endmodule // std_cache_subsystem diff --git a/test/type_param/core/cache_subsystem/std_nbdcache.sv b/test/type_param/core/cache_subsystem/std_nbdcache.sv new file mode 100644 index 0000000..367c67c --- /dev/null +++ b/test/type_param/core/cache_subsystem/std_nbdcache.sv @@ -0,0 +1,279 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 13.10.2017 +// Description: Nonblocking private L1 dcache + + +module std_nbdcache + import std_cache_pkg::*; + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 4, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // Cache management + input logic enable_i, // from CSR + input logic flush_i, // high until acknowledged + output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic miss_o, // we missed on a LD/ST + // AMOs + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + // Request ports + input dcache_req_i_t [NumPorts-1:0] req_ports_i, // request ports + output dcache_req_o_t [NumPorts-1:0] req_ports_o, // request ports + // Cache AXI refill port + output axi_req_t axi_data_o, + input axi_rsp_t axi_data_i, + output axi_req_t axi_bypass_o, + input axi_rsp_t axi_bypass_i +); + + import std_cache_pkg::*; + + // ------------------------------- + // Controller <-> Arbiter + // ------------------------------- + // 1. Miss handler + // 2. PTW + // 3. Load Unit + // 4. Accelerator + // 5. Store unit + logic [ NumPorts:0][ DCACHE_SET_ASSOC-1:0] req; + logic [ NumPorts:0][DCACHE_INDEX_WIDTH-1:0] addr; + logic [ NumPorts:0] gnt; + cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata; + logic [ NumPorts:0][ DCACHE_TAG_WIDTH-1:0] tag; + + cache_line_t [ NumPorts:0] wdata; + logic [ NumPorts:0] we; + cl_be_t [ NumPorts:0] be; + logic [ DCACHE_SET_ASSOC-1:0] hit_way; + // ------------------------------- + // Controller <-> Miss unit + // ------------------------------- + logic [ NumPorts-1:0] busy; + logic [ NumPorts-1:0][ 55:0] mshr_addr; + logic [ NumPorts-1:0] mshr_addr_matches; + logic [ NumPorts-1:0] mshr_index_matches; + logic [ 63:0] critical_word; + logic critical_word_valid; + + logic [ NumPorts-1:0][ $bits(miss_req_t)-1:0] miss_req; + logic [ NumPorts-1:0] miss_gnt; + logic [ NumPorts-1:0] active_serving; + + logic [ NumPorts-1:0] bypass_gnt; + logic [ NumPorts-1:0] bypass_valid; + logic [ NumPorts-1:0][ 63:0] bypass_data; + // ------------------------------- + // Arbiter <-> Datram, + // ------------------------------- + logic [ DCACHE_SET_ASSOC-1:0] req_ram; + logic [DCACHE_INDEX_WIDTH-1:0] addr_ram; + logic we_ram; + cache_line_t wdata_ram; + cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata_ram; + cl_be_t be_ram; + + // ------------------ + // Cache Controller + // ------------------ + generate + for (genvar i = 0; i < NumPorts; i++) begin : master_ports + cache_ctrl #( + .CVA6Cfg(CVA6Cfg) + ) i_cache_ctrl ( + .bypass_i (~enable_i), + .busy_o (busy[i]), + // from core + .req_port_i(req_ports_i[i]), + .req_port_o(req_ports_o[i]), + // to SRAM array + .req_o (req[i+1]), + .addr_o (addr[i+1]), + .gnt_i (gnt[i+1]), + .data_i (rdata), + .tag_o (tag[i+1]), + .data_o (wdata[i+1]), + .we_o (we[i+1]), + .be_o (be[i+1]), + .hit_way_i (hit_way), + + .miss_req_o (miss_req[i]), + .miss_gnt_i (miss_gnt[i]), + .active_serving_i (active_serving[i]), + .critical_word_i (critical_word), + .critical_word_valid_i(critical_word_valid), + .bypass_gnt_i (bypass_gnt[i]), + .bypass_valid_i (bypass_valid[i]), + .bypass_data_i (bypass_data[i]), + + .mshr_addr_o (mshr_addr[i]), + .mshr_addr_matches_i (mshr_addr_matches[i]), + .mshr_index_matches_i(mshr_index_matches[i]), + .* + ); + end + endgenerate + + // ------------------ + // Miss Handling Unit + // ------------------ + miss_handler #( + .CVA6Cfg (CVA6Cfg), + .NR_PORTS (NumPorts), + .axi_req_t(axi_req_t), + .axi_rsp_t(axi_rsp_t) + ) i_miss_handler ( + .flush_i (flush_i), + .busy_i (|busy), + // AMOs + .amo_req_i (amo_req_i), + .amo_resp_o (amo_resp_o), + .miss_req_i (miss_req), + .miss_gnt_o (miss_gnt), + .bypass_gnt_o (bypass_gnt), + .bypass_valid_o (bypass_valid), + .bypass_data_o (bypass_data), + .critical_word_o (critical_word), + .critical_word_valid_o(critical_word_valid), + .mshr_addr_i (mshr_addr), + .mshr_addr_matches_o (mshr_addr_matches), + .mshr_index_matches_o (mshr_index_matches), + .active_serving_o (active_serving), + .req_o (req[0]), + .addr_o (addr[0]), + .data_i (rdata), + .be_o (be[0]), + .data_o (wdata[0]), + .we_o (we[0]), + .axi_bypass_o, + .axi_bypass_i, + .axi_data_o, + .axi_data_i, + .* + ); + + assign tag[0] = '0; + + // -------------- + // Memory Arrays + // -------------- + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block + sram #( + .DATA_WIDTH(DCACHE_LINE_WIDTH), + .NUM_WORDS (DCACHE_NUM_WORDS) + ) data_sram ( + .req_i (req_ram[i]), + .rst_ni (rst_ni), + .we_i (we_ram), + .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]), + .wuser_i('0), + .wdata_i(wdata_ram.data), + .be_i (be_ram.data), + .ruser_o(), + .rdata_o(rdata_ram[i].data), + .* + ); + + sram #( + .DATA_WIDTH(DCACHE_TAG_WIDTH), + .NUM_WORDS (DCACHE_NUM_WORDS) + ) tag_sram ( + .req_i (req_ram[i]), + .rst_ni (rst_ni), + .we_i (we_ram), + .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]), + .wuser_i('0), + .wdata_i(wdata_ram.tag), + .be_i (be_ram.tag), + .ruser_o(), + .rdata_o(rdata_ram[i].tag), + .* + ); + + end + + // ---------------- + // Valid/Dirty Regs + // ---------------- + + // align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals. + // note: if you have an SRAM that supports flat bit enables for your target technology, + // you can use it here to save the extra 4x overhead introduced by this workaround. + logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata; + + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin + assign dirty_wdata[8*i] = wdata_ram.dirty; + assign dirty_wdata[8*i+1] = wdata_ram.valid; + assign rdata_ram[i].dirty = dirty_rdata[8*i]; + assign rdata_ram[i].valid = dirty_rdata[8*i+1]; + end + + sram #( + .USER_WIDTH(1), + .DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH), + .NUM_WORDS (DCACHE_NUM_WORDS) + ) valid_dirty_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (|req_ram), + .we_i (we_ram), + .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]), + .wuser_i('0), + .wdata_i(dirty_wdata), + .be_i (be_ram.vldrty), + .ruser_o(), + .rdata_o(dirty_rdata) + ); + + // ------------------------------------------------ + // Tag Comparison and memory arbitration + // ------------------------------------------------ + tag_cmp #( + .CVA6Cfg (CVA6Cfg), + .NR_PORTS (NumPorts + 1), + .ADDR_WIDTH (DCACHE_INDEX_WIDTH), + .DCACHE_SET_ASSOC(DCACHE_SET_ASSOC) + ) i_tag_cmp ( + .req_i (req), + .gnt_o (gnt), + .addr_i (addr), + .wdata_i (wdata), + .we_i (we), + .be_i (be), + .rdata_o (rdata), + .tag_i (tag), + .hit_way_o(hit_way), + + .req_o (req_ram), + .addr_o (addr_ram), + .wdata_o(wdata_ram), + .we_o (we_ram), + .be_o (be_ram), + .rdata_i(rdata_ram), + .* + ); + + + //pragma translate_off + initial begin + assert (DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16}) + else $fatal(1, "Cache line size needs to be a power of two multiple of AxiDataWidth"); + end + //pragma translate_on +endmodule diff --git a/test/type_param/core/cache_subsystem/tag_cmp.sv b/test/type_param/core/cache_subsystem/tag_cmp.sv new file mode 100644 index 0000000..a378c13 --- /dev/null +++ b/test/type_param/core/cache_subsystem/tag_cmp.sv @@ -0,0 +1,106 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// Author: Florian Zaruba +// -------------- +// Tag Compare +// -------------- +// +// Description: Arbitrates access to cache memories, simplified request grant protocol +// checks for hit or miss on cache +// +module tag_cmp #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NR_PORTS = 3, + parameter int unsigned ADDR_WIDTH = 64, + parameter type l_data_t = std_cache_pkg::cache_line_t, + parameter type l_be_t = std_cache_pkg::cl_be_t, + parameter int unsigned DCACHE_SET_ASSOC = 8 +) ( + input logic clk_i, + input logic rst_ni, + + input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i, + output logic [NR_PORTS-1:0] gnt_o, + input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i, + input l_data_t [NR_PORTS-1:0] wdata_i, + input logic [NR_PORTS-1:0] we_i, + input l_be_t [NR_PORTS-1:0] be_i, + output l_data_t [DCACHE_SET_ASSOC-1:0] rdata_o, + input logic [NR_PORTS-1:0][ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later + output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way + + + output logic [DCACHE_SET_ASSOC-1:0] req_o, + output logic [ ADDR_WIDTH-1:0] addr_o, + output l_data_t wdata_o, + output logic we_o, + output l_be_t be_o, + input l_data_t [DCACHE_SET_ASSOC-1:0] rdata_i +); + + assign rdata_o = rdata_i; + // one hot encoded + logic [NR_PORTS-1:0] id_d, id_q; + logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] sel_tag; + + always_comb begin : tag_sel + sel_tag = '0; + for (int unsigned i = 0; i < NR_PORTS; i++) if (id_q[i]) sel_tag = tag_i[i]; + end + + for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp + assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0; + end + + always_comb begin + + gnt_o = '0; + id_d = '0; + wdata_o = '0; + req_o = '0; + addr_o = '0; + be_o = '0; + we_o = '0; + // Request Side + // priority select + for (int unsigned i = 0; i < NR_PORTS; i++) begin + req_o = req_i[i]; + id_d = (1'b1 << i); + gnt_o[i] = 1'b1; + addr_o = addr_i[i]; + be_o = be_i[i]; + we_o = we_i[i]; + wdata_o = wdata_i[i]; + + if (req_i[i]) break; + end + +`ifndef SYNTHESIS +`ifndef VERILATOR + // assert that cache only hits on one way + // this only needs to be checked one cycle after all ways have been requested + onehot : + assert property (@(posedge clk_i) disable iff (!rst_ni) &req_i |=> $onehot0(hit_way_o)) + else begin + $fatal(1, "Hit should be one-hot encoded"); + end +`endif +`endif + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + id_q <= 0; + end else begin + id_q <= id_d; + end + end + +endmodule diff --git a/test/type_param/core/cache_subsystem/wt_axi_adapter.sv b/test/type_param/core/cache_subsystem/wt_axi_adapter.sv new file mode 100644 index 0000000..1647f1d --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_axi_adapter.sv @@ -0,0 +1,712 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 08.08.2018 +// Description: adapter module to connect the L1D$ and L1I$ to a 64bit AXI bus. +// + + +module wt_axi_adapter + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned ReqFifoDepth = 2, + parameter int unsigned MetaFifoDepth = wt_cache_pkg::DCACHE_MAX_TX, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + // icache + input logic icache_data_req_i, + output logic icache_data_ack_o, + input icache_req_t icache_data_i, + // returning packets must be consumed immediately + output logic icache_rtrn_vld_o, + output icache_rtrn_t icache_rtrn_o, + + // dcache + input logic dcache_data_req_i, + output logic dcache_data_ack_o, + input dcache_req_t dcache_data_i, + // returning packets must be consumed immediately + output logic dcache_rtrn_vld_o, + output dcache_rtrn_t dcache_rtrn_o, + + // AXI port + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i, + + // Invalidations + input logic [63:0] inval_addr_i, + input logic inval_valid_i, + output logic inval_ready_o +); + + // support up to 512bit cache lines + localparam AxiNumWords = (ariane_pkg::ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH > ariane_pkg::DCACHE_LINE_WIDTH) + + (ariane_pkg::DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH <= ariane_pkg::DCACHE_LINE_WIDTH) ; + localparam MaxNumWords = $clog2(CVA6Cfg.AxiDataWidth / 8); + localparam AxiRdBlenIcache = ariane_pkg::ICACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1; + localparam AxiRdBlenDcache = ariane_pkg::DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1; + + /////////////////////////////////////////////////////// + // request path + /////////////////////////////////////////////////////// + + icache_req_t icache_data; + logic icache_data_full, icache_data_empty; + dcache_req_t dcache_data; + logic dcache_data_full, dcache_data_empty; + + logic [1:0] arb_req, arb_ack; + logic arb_idx, arb_gnt; + + logic axi_rd_req, axi_rd_gnt; + logic axi_wr_req, axi_wr_gnt; + logic axi_wr_valid, axi_rd_valid, axi_rd_rdy, axi_wr_rdy; + logic axi_rd_lock, axi_wr_lock, axi_rd_exokay, axi_wr_exokay, wr_exokay; + logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr, axi_wr_addr; + logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen; + logic [2:0] axi_rd_size, axi_wr_size; + logic [CVA6Cfg.AxiIdWidth-1:0] + axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out, wr_id_out; + logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] axi_wr_data; + logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] axi_wr_user; + logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data; + logic [CVA6Cfg.AxiUserWidth-1:0] axi_rd_user; + logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] axi_wr_be; + logic [5:0] axi_wr_atop; + logic invalidate; + logic [$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] amo_off_d, amo_off_q; + // AMO generates r beat + logic amo_gen_r_d, amo_gen_r_q; + + logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] icache_rtrn_tid_d, icache_rtrn_tid_q; + logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_tid_d, dcache_rtrn_tid_q; + logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_rd_tid, dcache_rtrn_wr_tid; + logic dcache_rd_pop, dcache_wr_pop; + logic icache_rd_full, icache_rd_empty; + logic dcache_rd_full, dcache_rd_empty; + logic dcache_wr_full, dcache_wr_empty; + + assign icache_data_ack_o = icache_data_req_i & ~icache_data_full; + assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full; + + // arbiter + assign arb_req = { + ~(dcache_data_empty | dcache_wr_full | dcache_rd_full), ~(icache_data_empty | icache_rd_full) + }; + + assign arb_gnt = axi_rd_gnt | axi_wr_gnt; + + rr_arb_tree #( + .NumIn (2), + .DataWidth(1), + .AxiVldRdy(1'b1), + .LockIn (1'b1) + ) i_rr_arb_tree ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (arb_req), + .gnt_o (arb_ack), + .data_i ('0), + .gnt_i (arb_gnt), + .req_o (), + .data_o (), + .idx_o (arb_idx) + ); + + // request side + always_comb begin : p_axi_req + // write channel + axi_wr_id_in = {{CVA6Cfg.AxiIdWidth-1{1'b0}}, arb_idx}; + axi_wr_data[0] = {(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.data}}; + axi_wr_user[0] = dcache_data.user; + // Cast to AXI address width + axi_wr_addr = {{CVA6Cfg.AxiAddrWidth-riscv::PLEN{1'b0}}, dcache_data.paddr}; + axi_wr_size = dcache_data.size; + axi_wr_req = 1'b0; + axi_wr_blen = '0;// single word writes + axi_wr_be = '0; + axi_wr_lock = '0; + axi_wr_atop = '0; + amo_off_d = amo_off_q; + amo_gen_r_d = amo_gen_r_q; + + // read channel + axi_rd_id_in = {{CVA6Cfg.AxiIdWidth-1{1'b0}}, arb_idx}; + axi_rd_req = 1'b0; + axi_rd_lock = '0; + axi_rd_blen = '0; + + if (dcache_data.paddr[2] == 1'b0) begin + axi_wr_user = {{64 - CVA6Cfg.AxiUserWidth{1'b0}}, dcache_data.user}; + end else begin + axi_wr_user = {dcache_data.user, {64 - CVA6Cfg.AxiUserWidth{1'b0}}}; + end + + // arbiter mux + if (arb_idx) begin + // Cast to AXI address width + axi_rd_addr = {{CVA6Cfg.AxiAddrWidth - riscv::PLEN{1'b0}}, dcache_data.paddr}; + // If dcache_data.size MSB is set, we want to read as much as possible + axi_rd_size = dcache_data.size[2] ? MaxNumWords[2:0] : dcache_data.size; + if (dcache_data.size[2]) begin + axi_rd_blen = AxiRdBlenDcache[$clog2(AxiNumWords)-1:0]; + end + end else begin + // Cast to AXI address width + axi_rd_addr = {{CVA6Cfg.AxiAddrWidth - riscv::PLEN{1'b0}}, icache_data.paddr}; + axi_rd_size = MaxNumWords[2:0]; // always request max number of words in case of ifill + if (!icache_data.nc) begin + axi_rd_blen = AxiRdBlenIcache[$clog2(AxiNumWords)-1:0]; + end + end + + // signal that an invalidation message + // needs to be generated + invalidate = 1'b0; + + // decode message type + if (|arb_req) begin + if (arb_idx == 0) begin + ////////////////////////////////////// + // IMISS + axi_rd_req = 1'b1; + ////////////////////////////////////// + end else begin + unique case (dcache_data.rtype) + ////////////////////////////////////// + wt_cache_pkg::DCACHE_LOAD_REQ: begin + axi_rd_req = 1'b1; + end + ////////////////////////////////////// + wt_cache_pkg::DCACHE_STORE_REQ: begin + axi_wr_req = 1'b1; + axi_wr_be = '0; + unique case (dcache_data.size[1:0]) + 2'b00: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte + 2'b01: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = '1; // hword + 2'b10: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = '1; // word + default: + if (riscv::IS_XLEN64) + axi_wr_be[0][dcache_data.paddr[$clog2( + CVA6Cfg.AxiDataWidth/8 + )-1:0]+:8] = '1; // dword + endcase + end + ////////////////////////////////////// + wt_cache_pkg::DCACHE_ATOMIC_REQ: begin + if (CVA6Cfg.RVA) begin + // default + // push back an invalidation here. + // since we only keep one read tx in flight, and since + // the dcache drains all writes/reads before executing + // an atomic, this is safe. + invalidate = arb_gnt; + axi_wr_req = 1'b1; + axi_wr_be = '0; + unique case (dcache_data.size[1:0]) + 2'b00: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte + 2'b01: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = + '1; // hword + 2'b10: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = + '1; // word + default: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:8] = + '1; // dword + endcase + amo_gen_r_d = 1'b1; + // need to use a separate ID here, so concat an additional bit + axi_wr_id_in[1] = 1'b1; + + unique case (dcache_data.amo_op) + AMO_LR: begin + axi_rd_lock = 1'b1; + axi_rd_req = 1'b1; + axi_rd_id_in[1] = 1'b1; + // tie to zero in this special case + axi_wr_req = 1'b0; + axi_wr_be = '0; + end + AMO_SC: begin + axi_wr_lock = 1'b1; + amo_gen_r_d = 1'b0; + // needed to properly encode success. store the result at offset within the returned + // AXI data word aligned with the requested word size. + amo_off_d = dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)- + 1:0] & ~((1 << dcache_data.size[1:0]) - 1); + end + // RISC-V atops have a load semantic + AMO_SWAP: axi_wr_atop = axi_pkg::ATOP_ATOMICSWAP; + AMO_ADD: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD + }; + AMO_AND: begin + // in this case we need to invert the data to get a "CLR" + axi_wr_data[0] = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.data}}; + axi_wr_user = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.user}}; + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR + }; + end + AMO_OR: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET + }; + AMO_XOR: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR + }; + AMO_MAX: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX + }; + AMO_MAXU: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX + }; + AMO_MIN: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN + }; + AMO_MINU: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN + }; + default: ; // Do nothing + endcase + end + end + default: ; // Do nothing + ////////////////////////////////////// + endcase + end + end + end + + fifo_v3 #( + .dtype(icache_req_t), + .DEPTH(ReqFifoDepth) + ) i_icache_data_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (icache_data_full), + .empty_o (icache_data_empty), + .usage_o (), + .data_i (icache_data_i), + .push_i (icache_data_ack_o), + .data_o (icache_data), + .pop_i (arb_ack[0]) + ); + + fifo_v3 #( + .dtype(dcache_req_t), + .DEPTH(ReqFifoDepth) + ) i_dcache_data_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (dcache_data_full), + .empty_o (dcache_data_empty), + .usage_o (), + .data_i (dcache_data_i), + .push_i (dcache_data_ack_o), + .data_o (dcache_data), + .pop_i (arb_ack[1]) + ); + + /////////////////////////////////////////////////////// + // meta info feedback fifos + /////////////////////////////////////////////////////// + + logic icache_rtrn_rd_en, dcache_rtrn_rd_en; + logic icache_rtrn_vld_d, icache_rtrn_vld_q, dcache_rtrn_vld_d, dcache_rtrn_vld_q; + + fifo_v3 #( + .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH), + .DEPTH (MetaFifoDepth) + ) i_rd_icache_id ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (icache_rd_full), + .empty_o (icache_rd_empty), + .usage_o (), + .data_i (icache_data.tid), + .push_i (arb_ack[0] & axi_rd_gnt), + .data_o (icache_rtrn_tid_d), + .pop_i (icache_rtrn_vld_d) + ); + + fifo_v3 #( + .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH), + .DEPTH (MetaFifoDepth) + ) i_rd_dcache_id ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (dcache_rd_full), + .empty_o (dcache_rd_empty), + .usage_o (), + .data_i (dcache_data.tid), + .push_i (arb_ack[1] & axi_rd_gnt), + .data_o (dcache_rtrn_rd_tid), + .pop_i (dcache_rd_pop) + ); + + fifo_v3 #( + .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH), + .DEPTH (MetaFifoDepth) + ) i_wr_dcache_id ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (dcache_wr_full), + .empty_o (dcache_wr_empty), + .usage_o (), + .data_i (dcache_data.tid), + .push_i (arb_ack[1] & axi_wr_gnt), + .data_o (dcache_rtrn_wr_tid), + .pop_i (dcache_wr_pop) + ); + + // select correct tid to return + assign dcache_rtrn_tid_d = (dcache_wr_pop) ? dcache_rtrn_wr_tid : dcache_rtrn_rd_tid; + + /////////////////////////////////////////////////////// + // return path + /////////////////////////////////////////////////////// + + // buffer write responses + logic b_full, b_empty, b_push, b_pop; + assign axi_wr_rdy = ~b_full; + assign b_push = axi_wr_valid & axi_wr_rdy; + + fifo_v3 #( + .DATA_WIDTH (CVA6Cfg.AxiIdWidth + 1), + .DEPTH (MetaFifoDepth), + .FALL_THROUGH(1'b1) + ) i_b_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (b_full), + .empty_o (b_empty), + .usage_o (), + .data_i ({axi_wr_exokay, axi_wr_id_out}), + .push_i (b_push), + .data_o ({wr_exokay, wr_id_out}), + .pop_i (b_pop) + ); + + // buffer read responses in shift regs + logic icache_first_d, icache_first_q, dcache_first_d, dcache_first_q; + logic [ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] + icache_rd_shift_user_d, icache_rd_shift_user_q; + logic [DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] + dcache_rd_shift_user_d, dcache_rd_shift_user_q; + logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] + icache_rd_shift_d, icache_rd_shift_q; + logic [DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] + dcache_rd_shift_d, dcache_rd_shift_q; + wt_cache_pkg::dcache_in_t dcache_rtrn_type_d, dcache_rtrn_type_q; + wt_cache_pkg::dcache_inval_t dcache_rtrn_inv_d, dcache_rtrn_inv_q; + logic dcache_sc_rtrn, axi_rd_last; + + always_comb begin : p_axi_rtrn_shift + // output directly from regs + icache_rtrn_o = '0; + icache_rtrn_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK; + icache_rtrn_o.tid = icache_rtrn_tid_q; + icache_rtrn_o.data = icache_rd_shift_q; + icache_rtrn_o.user = icache_rd_shift_user_q; + icache_rtrn_vld_o = icache_rtrn_vld_q; + + dcache_rtrn_o = '0; + dcache_rtrn_o.rtype = dcache_rtrn_type_q; + dcache_rtrn_o.inv = dcache_rtrn_inv_q; + dcache_rtrn_o.tid = dcache_rtrn_tid_q; + dcache_rtrn_o.data = dcache_rd_shift_q; + dcache_rtrn_o.user = dcache_rd_shift_user_q; + dcache_rtrn_vld_o = dcache_rtrn_vld_q; + + // read shift registers + icache_rd_shift_d = icache_rd_shift_q; + icache_rd_shift_user_d = icache_rd_shift_user_q; + dcache_rd_shift_d = dcache_rd_shift_q; + dcache_rd_shift_user_d = dcache_rd_shift_user_q; + icache_first_d = icache_first_q; + dcache_first_d = dcache_first_q; + + if (icache_rtrn_rd_en) begin + icache_first_d = axi_rd_last; + if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin + icache_rd_shift_d[0] = axi_rd_data; + end else begin + icache_rd_shift_d = { + axi_rd_data, icache_rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1] + }; + end + icache_rd_shift_user_d = { + axi_rd_user, icache_rd_shift_user_q[ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1] + }; + // if this is a single word transaction, we need to make sure that word is placed at offset 0 + if (icache_first_q) begin + icache_rd_shift_d[0] = axi_rd_data; + icache_rd_shift_user_d[0] = axi_rd_user; + end + end + + if (dcache_rtrn_rd_en) begin + dcache_first_d = axi_rd_last; + if (DCACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin + dcache_rd_shift_d[0] = axi_rd_data; + end else begin + dcache_rd_shift_d = { + axi_rd_data, dcache_rd_shift_q[DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1] + }; + end + dcache_rd_shift_user_d = { + axi_rd_user, dcache_rd_shift_user_q[DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1] + }; + // if this is a single word transaction, we need to make sure that word is placed at offset 0 + if (dcache_first_q) begin + dcache_rd_shift_d[0] = axi_rd_data; + dcache_rd_shift_user_d[0] = axi_rd_user; + end + end else if (CVA6Cfg.RVA && dcache_sc_rtrn) begin + // encode lr/sc success + dcache_rd_shift_d[0] = '0; + dcache_rd_shift_user_d[0] = '0; + dcache_rd_shift_d[0][amo_off_q*8] = (wr_exokay) ? '0 : 1'b1; + dcache_rd_shift_user_d[0][amo_off_q*8] = (wr_exokay) ? '0 : 1'b1; + end + end + + // decode virtual read channels of icache + always_comb begin : p_axi_rtrn_decode + // we are not ready when invalidating + // note: b's are buffered separately + axi_rd_rdy = ~invalidate; + + icache_rtrn_rd_en = 1'b0; + icache_rtrn_vld_d = 1'b0; + + // decode virtual icache channel, + // this is independent on dcache decoding below + if (axi_rd_valid && axi_rd_id_out == 0 && axi_rd_rdy) begin + icache_rtrn_rd_en = 1'b1; + icache_rtrn_vld_d = axi_rd_last; + end + + dcache_rtrn_rd_en = 1'b0; + dcache_rtrn_vld_d = 1'b0; + dcache_rd_pop = 1'b0; + dcache_wr_pop = 1'b0; + dcache_rtrn_inv_d = '0; + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_LOAD_ACK; + b_pop = 1'b0; + dcache_sc_rtrn = 1'b0; + + // External invalidation requests (from coprocessor). This is safe as + // there are no other transactions when a coprocessor has pending stores. + inval_ready_o = 1'b0; + if (inval_valid_i) begin + inval_ready_o = 1'b1; + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ; + dcache_rtrn_vld_d = 1'b1; + dcache_rtrn_inv_d.all = 1'b1; + dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + ////////////////////////////////////// + // dcache needs some special treatment + // for arbitration and decoding of atomics + ////////////////////////////////////// + // this is safe, there is no other read tx in flight than this atomic. + // note that this self invalidation is handled in this way due to the + // write-through cache architecture, which is aligned with the openpiton + // cache subsystem. + end else if (CVA6Cfg.RVA && invalidate) begin + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ; + dcache_rtrn_vld_d = 1'b1; + + dcache_rtrn_inv_d.all = 1'b1; + dcache_rtrn_inv_d.idx = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + ////////////////////////////////////// + // read responses + // note that in case of atomics, the dcache sequentializes requests and + // guarantees that there are no other pending transactions in flight + end else if (axi_rd_valid && axi_rd_id_out[0] && axi_rd_rdy) begin + dcache_rtrn_rd_en = 1'b1; + dcache_rtrn_vld_d = axi_rd_last; + + // if this was an atomic op + if (CVA6Cfg.RVA && axi_rd_id_out[1]) begin + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK; + + // check if transaction was issued over write channel and pop that ID + if (!dcache_wr_empty) begin + dcache_wr_pop = axi_rd_last; + // if this is not the case, there MUST be an id in the read channel (LR) + end else begin + dcache_rd_pop = axi_rd_last; + end + end else begin + dcache_rd_pop = axi_rd_last; + end + ////////////////////////////////////// + // write responses, check b fifo + end else if (!b_empty) begin + b_pop = 1'b1; + + // this was an atomic + if (CVA6Cfg.RVA && wr_id_out[1]) begin + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK; + + // silently discard b response if we already popped the fifo + // with a R beat (iff the amo transaction generated an R beat) + if (!amo_gen_r_q) begin + dcache_rtrn_vld_d = 1'b1; + dcache_wr_pop = 1'b1; + dcache_sc_rtrn = 1'b1; + end + end else begin + // regular response + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_STORE_ACK; + dcache_rtrn_vld_d = 1'b1; + dcache_wr_pop = 1'b1; + end + end + ////////////////////////////////////// + end + + // remote invalidations are not supported yet (this needs a cache coherence protocol) + // note that the atomic transactions would also need a "master exclusive monitor" in that case + // assign icache_rtrn_o.inv.idx = '0; + // assign icache_rtrn_o.inv.way = '0; + // assign icache_rtrn_o.inv.vld = '0; + // assign icache_rtrn_o.inv.all = '0; + + // assign dcache_rtrn_o.inv.idx = '0; + // assign dcache_rtrn_o.inv.way = '0; + // assign dcache_rtrn_o.inv.vld = '0; + // assign dcache_rtrn_o.inv.all = '0; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf + if (!rst_ni) begin + icache_first_q <= 1'b1; + dcache_first_q <= 1'b1; + icache_rd_shift_q <= '0; + icache_rd_shift_user_q <= '0; + dcache_rd_shift_q <= '0; + dcache_rd_shift_user_q <= '0; + icache_rtrn_vld_q <= '0; + dcache_rtrn_vld_q <= '0; + icache_rtrn_tid_q <= '0; + dcache_rtrn_tid_q <= '0; + dcache_rtrn_type_q <= wt_cache_pkg::DCACHE_LOAD_ACK; + dcache_rtrn_inv_q <= '0; + amo_off_q <= '0; + amo_gen_r_q <= 1'b0; + end else begin + icache_first_q <= icache_first_d; + dcache_first_q <= dcache_first_d; + icache_rd_shift_q <= icache_rd_shift_d; + icache_rd_shift_user_q <= icache_rd_shift_user_d; + dcache_rd_shift_q <= dcache_rd_shift_d; + dcache_rd_shift_user_q <= dcache_rd_shift_user_d; + icache_rtrn_vld_q <= icache_rtrn_vld_d; + dcache_rtrn_vld_q <= dcache_rtrn_vld_d; + icache_rtrn_tid_q <= icache_rtrn_tid_d; + dcache_rtrn_tid_q <= dcache_rtrn_tid_d; + dcache_rtrn_type_q <= dcache_rtrn_type_d; + dcache_rtrn_inv_q <= dcache_rtrn_inv_d; + amo_off_q <= amo_off_d; + amo_gen_r_q <= amo_gen_r_d; + end + end + + + /////////////////////////////////////////////////////// + // axi protocol shim + /////////////////////////////////////////////////////// + + axi_shim #( + .CVA6Cfg (CVA6Cfg), + .AxiNumWords(AxiNumWords), + .axi_req_t (axi_req_t), + .axi_rsp_t (axi_rsp_t) + ) i_axi_shim ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .rd_req_i (axi_rd_req), + .rd_gnt_o (axi_rd_gnt), + .rd_addr_i (axi_rd_addr), + .rd_blen_i (axi_rd_blen), + .rd_size_i (axi_rd_size), + .rd_id_i (axi_rd_id_in), + .rd_rdy_i (axi_rd_rdy), + .rd_lock_i (axi_rd_lock), + .rd_last_o (axi_rd_last), + .rd_valid_o (axi_rd_valid), + .rd_data_o (axi_rd_data), + .rd_user_o (axi_rd_user), + .rd_id_o (axi_rd_id_out), + .rd_exokay_o(axi_rd_exokay), + .wr_req_i (axi_wr_req), + .wr_gnt_o (axi_wr_gnt), + .wr_addr_i (axi_wr_addr), + .wr_data_i (axi_wr_data), + .wr_user_i (axi_wr_user), + .wr_be_i (axi_wr_be), + .wr_blen_i (axi_wr_blen), + .wr_size_i (axi_wr_size), + .wr_id_i (axi_wr_id_in), + .wr_lock_i (axi_wr_lock), + .wr_atop_i (axi_wr_atop), + .wr_rdy_i (axi_wr_rdy), + .wr_valid_o (axi_wr_valid), + .wr_id_o (axi_wr_id_out), + .wr_exokay_o(axi_wr_exokay), + .axi_req_o (axi_req_o), + .axi_resp_i (axi_resp_i) + ); + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + +`endif + //pragma translate_on + +endmodule // wt_l15_adapter diff --git a/test/type_param/core/cache_subsystem/wt_cache_subsystem.sv b/test/type_param/core/cache_subsystem/wt_cache_subsystem.sv new file mode 100644 index 0000000..ec09467 --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_cache_subsystem.sv @@ -0,0 +1,233 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Ariane cache subsystem that is compatible with the OpenPiton +// coherent memory system. +// +// Define PITON_ARIANE if you want to use this cache. +// Define DCACHE_TYPE if you want to use this cache +// with a standard 64 bit AXI interface instead of the OpenPiton +// L1.5 interface. + + +module wt_cache_subsystem + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 4, + parameter type noc_req_t = logic, + parameter type noc_resp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + // I$ + input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) + input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together + output logic icache_miss_o, // to performance counter + // address translation requests + input icache_areq_t icache_areq_i, // to/from frontend + output icache_arsp_t icache_areq_o, + // data requests + input icache_dreq_t icache_dreq_i, // to/from frontend + output icache_drsp_t icache_dreq_o, + // D$ + // Cache management + input logic dcache_enable_i, // from CSR + input logic dcache_flush_i, // high until acknowledged + output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic dcache_miss_o, // we missed on a ld/st + // For Performance Counter + output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, + // AMO interface + input amo_req_t dcache_amo_req_i, + output amo_resp_t dcache_amo_resp_o, + // Request ports + input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU + output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU + // writebuffer status + output logic wbuffer_empty_o, + output logic wbuffer_not_ni_o, + // memory side + output noc_req_t noc_req_o, + input noc_resp_t noc_resp_i, + // Invalidations + input logic [63:0] inval_addr_i, + input logic inval_valid_i, + output logic inval_ready_o + // TODO: interrupt interface +); + + logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld; + wt_cache_pkg::icache_req_t icache_adapter; + wt_cache_pkg::icache_rtrn_t adapter_icache; + + + logic dcache_adapter_data_req, adapter_dcache_data_ack, adapter_dcache_rtrn_vld; + wt_cache_pkg::dcache_req_t dcache_adapter; + wt_cache_pkg::dcache_rtrn_t adapter_dcache; + + cva6_icache #( + // use ID 0 for icache reads + .CVA6Cfg(CVA6Cfg), + .RdTxId (0) + ) i_cva6_icache ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (icache_flush_i), + .en_i (icache_en_i), + .miss_o (icache_miss_o), + .areq_i (icache_areq_i), + .areq_o (icache_areq_o), + .dreq_i (icache_dreq_i), + .dreq_o (icache_dreq_o), + .mem_rtrn_vld_i(adapter_icache_rtrn_vld), + .mem_rtrn_i (adapter_icache), + .mem_data_req_o(icache_adapter_data_req), + .mem_data_ack_i(adapter_icache_data_ack), + .mem_data_o (icache_adapter) + ); + + + // Note: + // Ports 0/1 for PTW and LD unit are read only. + // they have equal prio and are RR arbited + // Port 2 is write only and goes into the merging write buffer + wt_dcache #( + .CVA6Cfg (CVA6Cfg), + // use ID 1 for dcache reads and amos. note that the writebuffer + // uses all IDs up to DCACHE_MAX_TX-1 for write transactions. + .RdAmoTxId(1) + ) i_wt_dcache ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .enable_i (dcache_enable_i), + .flush_i (dcache_flush_i), + .flush_ack_o (dcache_flush_ack_o), + .miss_o (dcache_miss_o), + .wbuffer_empty_o (wbuffer_empty_o), + .wbuffer_not_ni_o(wbuffer_not_ni_o), + .amo_req_i (dcache_amo_req_i), + .amo_resp_o (dcache_amo_resp_o), + .req_ports_i (dcache_req_ports_i), + .req_ports_o (dcache_req_ports_o), + .miss_vld_bits_o (miss_vld_bits_o), + .mem_rtrn_vld_i (adapter_dcache_rtrn_vld), + .mem_rtrn_i (adapter_dcache), + .mem_data_req_o (dcache_adapter_data_req), + .mem_data_ack_i (adapter_dcache_data_ack), + .mem_data_o (dcache_adapter) + ); + + + /////////////////////////////////////////////////////// + // memory plumbing, either use 64bit AXI port or native + // L15 cache interface (derived from OpenSPARC CCX). + /////////////////////////////////////////////////////// + +`ifdef PITON_ARIANE + wt_l15_adapter #( + .CVA6Cfg(CVA6Cfg), + ) i_adapter ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .icache_data_req_i(icache_adapter_data_req), + .icache_data_ack_o(adapter_icache_data_ack), + .icache_data_i (icache_adapter), + .icache_rtrn_vld_o(adapter_icache_rtrn_vld), + .icache_rtrn_o (adapter_icache), + .dcache_data_req_i(dcache_adapter_data_req), + .dcache_data_ack_o(adapter_dcache_data_ack), + .dcache_data_i (dcache_adapter), + .dcache_rtrn_vld_o(adapter_dcache_rtrn_vld), + .dcache_rtrn_o (adapter_dcache), + .l15_req_o (noc_req_o), + .l15_rtrn_i (noc_resp_i) + ); +`else + wt_axi_adapter #( + .CVA6Cfg (CVA6Cfg), + .axi_req_t(noc_req_t), + .axi_rsp_t(noc_resp_t) + ) i_adapter ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .icache_data_req_i(icache_adapter_data_req), + .icache_data_ack_o(adapter_icache_data_ack), + .icache_data_i (icache_adapter), + .icache_rtrn_vld_o(adapter_icache_rtrn_vld), + .icache_rtrn_o (adapter_icache), + .dcache_data_req_i(dcache_adapter_data_req), + .dcache_data_ack_o(adapter_dcache_data_ack), + .dcache_data_i (dcache_adapter), + .dcache_rtrn_vld_o(adapter_dcache_rtrn_vld), + .dcache_rtrn_o (adapter_dcache), + .axi_req_o (noc_req_o), + .axi_resp_i (noc_resp_i), + .inval_addr_i (inval_addr_i), + .inval_valid_i (inval_valid_i), + .inval_ready_o (inval_ready_o) + ); +`endif + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + a_invalid_instruction_fetch : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X", + icache_dreq_o.vaddr, + icache_dreq_o.data + ); + + for (genvar j = 0; j < riscv::XLEN / 8; j++) begin : gen_invalid_write_assertion + a_invalid_write_data : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> dcache_req_ports_i[NumPorts-1].data_be[j] |-> (|dcache_req_ports_i[NumPorts-1].data_wdata[j*8+:8] !== 1'hX)) + else + $warning( + 1, + "[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X, databe=%016X", + { + dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index + }, + dcache_req_ports_i[NumPorts-1].data_be, + dcache_req_ports_i[NumPorts-1].data_wdata, + dcache_req_ports_i[NumPorts-1].data_be & dcache_req_ports_i[NumPorts-1].data_wdata + ); + end + + + for (genvar j = 0; j < NumPorts - 1; j++) begin : gen_assertion + a_invalid_read_data : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid data on port %01d: data=%016X", + j, + dcache_req_ports_o[j].data_rdata + ); + end +`endif + //pragma translate_on + + +endmodule // wt_cache_subsystem diff --git a/test/type_param/core/cache_subsystem/wt_dcache.sv b/test/type_param/core/cache_subsystem/wt_dcache.sv new file mode 100644 index 0000000..af672d8 --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_dcache.sv @@ -0,0 +1,360 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 13.09.2018 +// Description: Write-Through Data cache that is compatible with openpiton. + + +module wt_dcache + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 4, // number of miss ports + // ID to be used for read and AMO transactions. + // note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions + parameter logic [CACHE_ID_WIDTH-1:0] RdAmoTxId = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + // Cache management + input logic enable_i, // from CSR + input logic flush_i, // high until acknowledged + output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic miss_o, // we missed on a ld/st + output logic wbuffer_empty_o, + output logic wbuffer_not_ni_o, + + // AMO interface + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + + // Request ports + input dcache_req_i_t [NumPorts-1:0] req_ports_i, + output dcache_req_o_t [NumPorts-1:0] req_ports_o, + + output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, + + input logic mem_rtrn_vld_i, + input dcache_rtrn_t mem_rtrn_i, + output logic mem_data_req_o, + input logic mem_data_ack_i, + output dcache_req_t mem_data_o +); + + // miss unit <-> read controllers + logic cache_en; + + // miss unit <-> memory + logic wr_cl_vld; + logic wr_cl_nc; + logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we; + logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag; + logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx; + logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off; + logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data; + logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user; + logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be; + logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits; + logic [ DCACHE_SET_ASSOC-1:0] wr_req; + logic wr_ack; + logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_idx; + logic [ DCACHE_OFFSET_WIDTH-1:0] wr_off; + riscv::xlen_t wr_data; + logic [ (riscv::XLEN/8)-1:0] wr_data_be; + logic [ DCACHE_USER_WIDTH-1:0] wr_user; + + // miss unit <-> controllers/wbuffer + logic [ NumPorts-1:0] miss_req; + logic [ NumPorts-1:0] miss_ack; + logic [ NumPorts-1:0] miss_nc; + logic [ NumPorts-1:0] miss_we; + logic [ NumPorts-1:0][ riscv::XLEN-1:0] miss_wdata; + logic [ NumPorts-1:0][ DCACHE_USER_WIDTH-1:0] miss_wuser; + logic [ NumPorts-1:0][ riscv::PLEN-1:0] miss_paddr; + logic [ NumPorts-1:0][ 2:0] miss_size; + logic [ NumPorts-1:0][ CACHE_ID_WIDTH-1:0] miss_id; + logic [ NumPorts-1:0] miss_replay; + logic [ NumPorts-1:0] miss_rtrn_vld; + logic [ CACHE_ID_WIDTH-1:0] miss_rtrn_id; + + // memory <-> read controllers/miss unit + logic [ NumPorts-1:0] rd_prio; + logic [ NumPorts-1:0] rd_tag_only; + logic [ NumPorts-1:0] rd_req; + logic [ NumPorts-1:0] rd_ack; + logic [ NumPorts-1:0][ DCACHE_TAG_WIDTH-1:0] rd_tag; + logic [ NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx; + logic [ NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off; + riscv::xlen_t rd_data; + logic [ DCACHE_USER_WIDTH-1:0] rd_user; + logic [ DCACHE_SET_ASSOC-1:0] rd_vld_bits; + logic [ DCACHE_SET_ASSOC-1:0] rd_hit_oh; + + // miss unit <-> wbuffer + logic [ DCACHE_MAX_TX-1:0][ riscv::PLEN-1:0] tx_paddr; + logic [ DCACHE_MAX_TX-1:0] tx_vld; + + // wbuffer <-> memory + wbuffer_t [ DCACHE_WBUF_DEPTH-1:0] wbuffer_data; + + + /////////////////////////////////////////////////////// + // miss handling unit + /////////////////////////////////////////////////////// + + wt_dcache_missunit #( + .CVA6Cfg (CVA6Cfg), + .AmoTxId (RdAmoTxId), + .NumPorts(NumPorts) + ) i_wt_dcache_missunit ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .enable_i (enable_i), + .flush_i (flush_i), + .flush_ack_o (flush_ack_o), + .miss_o (miss_o), + .wbuffer_empty_i(wbuffer_empty_o), + .cache_en_o (cache_en), + // amo interface + .amo_req_i (amo_req_i), + .amo_resp_o (amo_resp_o), + // miss handling interface + .miss_req_i (miss_req), + .miss_ack_o (miss_ack), + .miss_nc_i (miss_nc), + .miss_we_i (miss_we), + .miss_wdata_i (miss_wdata), + .miss_wuser_i (miss_wuser), + .miss_paddr_i (miss_paddr), + .miss_vld_bits_i(miss_vld_bits_o), + .miss_size_i (miss_size), + .miss_id_i (miss_id), + .miss_replay_o (miss_replay), + .miss_rtrn_vld_o(miss_rtrn_vld), + .miss_rtrn_id_o (miss_rtrn_id), + // from writebuffer + .tx_paddr_i (tx_paddr), + .tx_vld_i (tx_vld), + // cache memory interface + .wr_cl_vld_o (wr_cl_vld), + .wr_cl_nc_o (wr_cl_nc), + .wr_cl_we_o (wr_cl_we), + .wr_cl_tag_o (wr_cl_tag), + .wr_cl_idx_o (wr_cl_idx), + .wr_cl_off_o (wr_cl_off), + .wr_cl_data_o (wr_cl_data), + .wr_cl_user_o (wr_cl_user), + .wr_cl_data_be_o(wr_cl_data_be), + .wr_vld_bits_o (wr_vld_bits), + // memory interface + .mem_rtrn_vld_i (mem_rtrn_vld_i), + .mem_rtrn_i (mem_rtrn_i), + .mem_data_req_o (mem_data_req_o), + .mem_data_ack_i (mem_data_ack_i), + .mem_data_o (mem_data_o) + ); + + /////////////////////////////////////////////////////// + // read controllers (LD unit and PTW/MMU) + /////////////////////////////////////////////////////// + + // 0 is used by MMU, 1 by READ access requests + for (genvar k = 0; k < NumPorts - 1; k++) begin : gen_rd_ports + // set these to high prio ports + if ((k == 0 && MMU_PRESENT) || (k == 1) || (k == 2 && CVA6Cfg.EnableAccelerator)) begin + assign rd_prio[k] = 1'b1; + wt_dcache_ctrl #( + .CVA6Cfg(CVA6Cfg), + .RdTxId (RdAmoTxId) + ) i_wt_dcache_ctrl ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .cache_en_i (cache_en), + // reqs from core + .req_port_i (req_ports_i[k]), + .req_port_o (req_ports_o[k]), + // miss interface + .miss_req_o (miss_req[k]), + .miss_ack_i (miss_ack[k]), + .miss_we_o (miss_we[k]), + .miss_wdata_o (miss_wdata[k]), + .miss_wuser_o (miss_wuser[k]), + .miss_vld_bits_o(miss_vld_bits_o[k]), + .miss_paddr_o (miss_paddr[k]), + .miss_nc_o (miss_nc[k]), + .miss_size_o (miss_size[k]), + .miss_id_o (miss_id[k]), + .miss_replay_i (miss_replay[k]), + .miss_rtrn_vld_i(miss_rtrn_vld[k]), + // used to detect readout mux collisions + .wr_cl_vld_i (wr_cl_vld), + // cache mem interface + .rd_tag_o (rd_tag[k]), + .rd_idx_o (rd_idx[k]), + .rd_off_o (rd_off[k]), + .rd_req_o (rd_req[k]), + .rd_tag_only_o (rd_tag_only[k]), + .rd_ack_i (rd_ack[k]), + .rd_data_i (rd_data), + .rd_user_i (rd_user), + .rd_vld_bits_i (rd_vld_bits), + .rd_hit_oh_i (rd_hit_oh) + ); + end else begin + assign rd_prio[k] = 1'b0; + assign req_ports_o[k] = '0; + assign miss_req[k] = 1'b0; + assign miss_we[k] = 1'b0; + assign miss_wdata[k] = {{riscv::XLEN} {1'b0}}; + assign miss_wuser[k] = {{DCACHE_USER_WIDTH} {1'b0}}; + assign miss_vld_bits_o[k] = {{DCACHE_SET_ASSOC} {1'b0}}; + assign miss_paddr[k] = {{riscv::PLEN} {1'b0}}; + assign miss_nc[k] = 1'b0; + assign miss_size[k] = 3'b0; + assign miss_id[k] = {{CACHE_ID_WIDTH} {1'b0}}; + assign rd_tag[k] = {{DCACHE_TAG_WIDTH} {1'b0}}; + assign rd_idx[k] = {{DCACHE_CL_IDX_WIDTH} {1'b0}}; + assign rd_off[k] = {{DCACHE_OFFSET_WIDTH} {1'b0}}; + assign rd_req[k] = 1'b0; + assign rd_tag_only[k] = 1'b0; + end + end + + /////////////////////////////////////////////////////// + // store unit controller + /////////////////////////////////////////////////////// + + // set read port to low priority + assign rd_prio[NumPorts-1] = 1'b0; + + wt_dcache_wbuffer #( + .CVA6Cfg(CVA6Cfg) + ) i_wt_dcache_wbuffer ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .empty_o (wbuffer_empty_o), + .not_ni_o (wbuffer_not_ni_o), + // TODO: fix this + .cache_en_i (cache_en), + // .cache_en_i ( '0 ), + // request ports from core (store unit) + .req_port_i (req_ports_i[NumPorts-1]), + .req_port_o (req_ports_o[NumPorts-1]), + // miss unit interface + .miss_req_o (miss_req[NumPorts-1]), + .miss_ack_i (miss_ack[NumPorts-1]), + .miss_we_o (miss_we[NumPorts-1]), + .miss_wdata_o (miss_wdata[NumPorts-1]), + .miss_wuser_o (miss_wuser[NumPorts-1]), + .miss_vld_bits_o(miss_vld_bits_o[NumPorts-1]), + .miss_paddr_o (miss_paddr[NumPorts-1]), + .miss_nc_o (miss_nc[NumPorts-1]), + .miss_size_o (miss_size[NumPorts-1]), + .miss_id_o (miss_id[NumPorts-1]), + .miss_rtrn_vld_i(miss_rtrn_vld[NumPorts-1]), + .miss_rtrn_id_i (miss_rtrn_id), + // cache read interface + .rd_tag_o (rd_tag[NumPorts-1]), + .rd_idx_o (rd_idx[NumPorts-1]), + .rd_off_o (rd_off[NumPorts-1]), + .rd_req_o (rd_req[NumPorts-1]), + .rd_tag_only_o (rd_tag_only[NumPorts-1]), + .rd_ack_i (rd_ack[NumPorts-1]), + .rd_data_i (rd_data), + .rd_vld_bits_i (rd_vld_bits), + .rd_hit_oh_i (rd_hit_oh), + // incoming invalidations/cache refills + .wr_cl_vld_i (wr_cl_vld), + .wr_cl_idx_i (wr_cl_idx), + // single word write interface + .wr_req_o (wr_req), + .wr_ack_i (wr_ack), + .wr_idx_o (wr_idx), + .wr_off_o (wr_off), + .wr_data_o (wr_data), + .wr_user_o (wr_user), + .wr_data_be_o (wr_data_be), + // write buffer forwarding + .wbuffer_data_o (wbuffer_data), + .tx_paddr_o (tx_paddr), + .tx_vld_o (tx_vld) + ); + + /////////////////////////////////////////////////////// + // memory arrays, arbitration and tag comparison + /////////////////////////////////////////////////////// + + wt_dcache_mem #( + .CVA6Cfg (CVA6Cfg), + .NumPorts(NumPorts) + ) i_wt_dcache_mem ( + .clk_i (clk_i), + .rst_ni (rst_ni), + // read ports + .rd_prio_i (rd_prio), + .rd_tag_i (rd_tag), + .rd_idx_i (rd_idx), + .rd_off_i (rd_off), + .rd_req_i (rd_req), + .rd_tag_only_i (rd_tag_only), + .rd_ack_o (rd_ack), + .rd_vld_bits_o (rd_vld_bits), + .rd_hit_oh_o (rd_hit_oh), + .rd_data_o (rd_data), + .rd_user_o (rd_user), + // cacheline write port + .wr_cl_vld_i (wr_cl_vld), + .wr_cl_nc_i (wr_cl_nc), + .wr_cl_we_i (wr_cl_we), + .wr_cl_tag_i (wr_cl_tag), + .wr_cl_idx_i (wr_cl_idx), + .wr_cl_off_i (wr_cl_off), + .wr_cl_data_i (wr_cl_data), + .wr_cl_user_i (wr_cl_user), + .wr_cl_data_be_i(wr_cl_data_be), + .wr_vld_bits_i (wr_vld_bits), + // single word write port + .wr_req_i (wr_req), + .wr_ack_o (wr_ack), + .wr_idx_i (wr_idx), + .wr_off_i (wr_off), + .wr_data_i (wr_data), + .wr_user_i (wr_user), + .wr_data_be_i (wr_data_be), + // write buffer forwarding + .wbuffer_data_i (wbuffer_data) + ); + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + // check for concurrency issues + + + //pragma translate_off +`ifndef VERILATOR + flush : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) flush_i |-> flush_ack_o |-> wbuffer_empty_o) + else $fatal(1, "[l1 dcache] flushed cache implies flushed wbuffer"); + + initial begin + // assert wrong parameterizations + assert (DCACHE_INDEX_WIDTH <= 12) + else $fatal(1, "[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages"); + end +`endif + //pragma translate_on + +endmodule // wt_dcache diff --git a/test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv b/test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv new file mode 100644 index 0000000..b5973df --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv @@ -0,0 +1,299 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 13.09.2018 +// Description: DCache controller for read port + + +module wt_dcache_ctrl + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic cache_en_i, + // core request ports + input dcache_req_i_t req_port_i, + output dcache_req_o_t req_port_o, + // interface to miss handler + output logic miss_req_o, + input logic miss_ack_i, + output logic miss_we_o, // unused (set to 0) + output riscv::xlen_t miss_wdata_o, // unused (set to 0) + output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, // unused (set to 0) + output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index + output logic [riscv::PLEN-1:0] miss_paddr_o, + output logic miss_nc_o, // request to I/O space + output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline + output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // set to constant ID + input logic miss_replay_i, // request collided with pending miss - have to replay the request + input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory + // used to detect readout mux collisions + input logic wr_cl_vld_i, + // cache memory interface + output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later + output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o, + output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o, + output logic rd_req_o, // read the word at offset off_i[:3] in all ways + output logic rd_tag_only_o, // set to zero here + input logic rd_ack_i, + input riscv::xlen_t rd_data_i, + input logic [DCACHE_USER_WIDTH-1:0] rd_user_i, + input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, + input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i +); + + // controller FSM + typedef enum logic [2:0] { + IDLE, + READ, + MISS_REQ, + MISS_WAIT, + KILL_MISS, + KILL_MISS_ACK, + REPLAY_REQ, + REPLAY_READ + } state_e; + state_e state_d, state_q; + + logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q; + logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q; + logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q; + logic [DCACHE_TID_WIDTH-1:0] id_d, id_q; + logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q; + logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q; + logic [1:0] data_size_d, data_size_q; + + /////////////////////////////////////////////////////// + // misc + /////////////////////////////////////////////////////// + + // map address to tag/idx/offset and save + assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q; + assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q; + assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q; + assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0] : address_off_q; + assign id_d = (req_port_o.data_gnt) ? req_port_i.data_id : id_q; + assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q; + assign rd_tag_o = address_tag_d; + assign rd_idx_o = address_idx_d; + assign rd_off_o = address_off_d; + + assign req_port_o.data_rdata = rd_data_i; + assign req_port_o.data_ruser = rd_user_i; + assign req_port_o.data_rid = id_q; + + // to miss unit + assign miss_vld_bits_o = vld_data_q; + assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q}; + assign miss_size_o = (miss_nc_o) ? {1'b0, data_size_q} : 3'b111; + + // noncacheable if request goes to I/O space, or if cache is disabled + assign miss_nc_o = (~cache_en_i) | (~config_pkg::is_inside_cacheable_regions( + CVA6Cfg, + {{{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {DCACHE_INDEX_WIDTH{1'b0}}} + )); + + + assign miss_we_o = '0; + assign miss_wdata_o = '0; + assign miss_wuser_o = '0; + assign miss_id_o = RdTxId; + assign rd_req_d = rd_req_o; + assign rd_ack_d = rd_ack_i; + assign rd_tag_only_o = '0; + + /////////////////////////////////////////////////////// + // main control logic + /////////////////////////////////////////////////////// + + always_comb begin : p_fsm + // default assignment + state_d = state_q; + save_tag = 1'b0; + rd_req_o = 1'b0; + miss_req_o = 1'b0; + req_port_o.data_rvalid = 1'b0; + req_port_o.data_gnt = 1'b0; + + // interfaces + unique case (state_q) + ////////////////////////////////// + // wait for an incoming request + IDLE: begin + if (req_port_i.data_req) begin + rd_req_o = 1'b1; + // if read ack then ack the `req_port_o`, and goto `READ` state + if (rd_ack_i) begin + state_d = READ; + req_port_o.data_gnt = 1'b1; + end + end + end + ////////////////////////////////// + // check whether we have a hit + // in case the cache is disabled, + // or in case the address is NC, we + // reuse the miss mechanism to handle + // the request + READ, REPLAY_READ: begin + // speculatively request cache line + rd_req_o = 1'b1; + + // kill -> go back to IDLE + if (req_port_i.kill_req) begin + state_d = IDLE; + req_port_o.data_rvalid = 1'b1; + end else if (req_port_i.tag_valid | state_q == REPLAY_READ) begin + save_tag = (state_q != REPLAY_READ); + if (wr_cl_vld_i || !rd_ack_q) begin + state_d = REPLAY_REQ; + // we've got a hit + end else if ((|rd_hit_oh_i) && cache_en_i) begin + state_d = IDLE; + req_port_o.data_rvalid = 1'b1; + // we can handle another request + if (rd_ack_i && req_port_i.data_req) begin + state_d = READ; + req_port_o.data_gnt = 1'b1; + end + // we've got a miss + end else begin + state_d = MISS_REQ; + end + end + end + ////////////////////////////////// + // issue request + MISS_REQ: begin + miss_req_o = 1'b1; + + if (req_port_i.kill_req) begin + req_port_o.data_rvalid = 1'b1; + if (miss_ack_i) begin + state_d = KILL_MISS; + end else begin + state_d = KILL_MISS_ACK; + end + end else if (miss_replay_i) begin + state_d = REPLAY_REQ; + end else if (miss_ack_i) begin + state_d = MISS_WAIT; + end + end + ////////////////////////////////// + // wait until the memory transaction + // returns. + MISS_WAIT: begin + if (req_port_i.kill_req) begin + req_port_o.data_rvalid = 1'b1; + if (miss_rtrn_vld_i) begin + state_d = IDLE; + end else begin + state_d = KILL_MISS; + end + end else if (miss_rtrn_vld_i) begin + state_d = IDLE; + req_port_o.data_rvalid = 1'b1; + end + end + ////////////////////////////////// + // replay read request + REPLAY_REQ: begin + rd_req_o = 1'b1; + if (req_port_i.kill_req) begin + req_port_o.data_rvalid = 1'b1; + state_d = IDLE; + end else if (rd_ack_i) begin + state_d = REPLAY_READ; + end + end + ////////////////////////////////// + KILL_MISS_ACK: begin + miss_req_o = 1'b1; + // in this case the miss handler did not issue + // a transaction and we can safely go to idle + if (miss_replay_i) begin + state_d = IDLE; + end else if (miss_ack_i) begin + state_d = KILL_MISS; + end + end + ////////////////////////////////// + // killed miss, + // wait until miss unit responds and + // go back to idle + KILL_MISS: begin + if (miss_rtrn_vld_i) begin + state_d = IDLE; + end + end + default: begin + // we should never get here + state_d = IDLE; + end + endcase // state_q + end + + /////////////////////////////////////////////////////// + // ff's + /////////////////////////////////////////////////////// + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + state_q <= IDLE; + address_tag_q <= '0; + address_idx_q <= '0; + address_off_q <= '0; + id_q <= '0; + vld_data_q <= '0; + data_size_q <= '0; + rd_req_q <= '0; + rd_ack_q <= '0; + end else begin + state_q <= state_d; + address_tag_q <= address_tag_d; + address_idx_q <= address_idx_d; + address_off_q <= address_off_d; + id_q <= id_d; + vld_data_q <= vld_data_d; + data_size_q <= data_size_d; + rd_req_q <= rd_req_d; + rd_ack_q <= rd_ack_d; + end + end + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + + hot1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) (!rd_ack_i) |=> cache_en_i |-> $onehot0( + rd_hit_oh_i + )) + else $fatal(1, "[l1 dcache ctrl] rd_hit_oh_i signal must be hot1"); + + initial begin + // assert wrong parameterizations + assert (DCACHE_INDEX_WIDTH <= 12) + else + $fatal(1, "[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages"); + end +`endif + //pragma translate_on + +endmodule // wt_dcache_ctrl diff --git a/test/type_param/core/cache_subsystem/wt_dcache_mem.sv b/test/type_param/core/cache_subsystem/wt_dcache_mem.sv new file mode 100644 index 0000000..b2b41c3 --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_dcache_mem.sv @@ -0,0 +1,428 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 13.09.2018 +// Description: Memory arrays, arbiter and tag comparison for WT dcache. +// +// +// Notes: 1) all ports can trigger a readout of all ways, and the way where the tag hits is selected +// +// 2) only port0 can write full cache lines. higher ports are read only. also, port0 can only read the tag array, +// and does not trigger a cache line readout. +// +// 3) the single word write port is a separate port without access to the tag memory. +// these single word writes can interleave with read operations if they go to different +// cacheline offsets, since each word offset is placed into a different SRAM bank. +// +// 4) Read ports with same priority are RR arbited. but high prio ports (rd_prio_i[port_nr] = '1b1) will stall +// low prio ports (rd_prio_i[port_nr] = '1b0) + + +module wt_dcache_mem + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 3 +) ( + input logic clk_i, + input logic rst_ni, + + // ports + input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later + input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i, + input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i, + input logic [NumPorts-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways + input logic [NumPorts-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays + input logic [NumPorts-1:0] rd_prio_i, // 0: low prio, 1: high prio + output logic [NumPorts-1:0] rd_ack_o, + output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o, + output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o, + output riscv::xlen_t rd_data_o, + output logic [DCACHE_USER_WIDTH-1:0] rd_user_o, + + // only available on port 0, uses address signals of port 0 + input logic wr_cl_vld_i, + input logic wr_cl_nc_i, // noncacheable access + input logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline + input logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i, + input logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i, + input logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i, + input logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data_i, + input logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i, + input logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i, + input logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits_i, + + // separate port for single word write, no tag access + input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3] + output logic wr_ack_o, + input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i, + input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i, + input riscv::xlen_t wr_data_i, + input logic [DCACHE_USER_WIDTH-1:0] wr_user_i, + input logic [(riscv::XLEN/8)-1:0] wr_data_be_i, + + // forwarded wbuffer + input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i +); + + // functions + function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh( + input logic [DCACHE_NUM_BANKS_WIDTH-1:0] in); + logic [DCACHE_NUM_BANKS-1:0] out; + out = '0; + out[in] = 1'b1; + return out; + endfunction + + // number of bits needed to address AXI data. If AxiDataWidth equals XLEN this parameter + // is not needed. Therefore, increment it by one to avoid reverse range select during elaboration. + localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == riscv::XLEN ? $clog2( + CVA6Cfg.AxiDataWidth / 8 + ) + 1 : $clog2( + CVA6Cfg.AxiDataWidth / 8 + ); + + logic [DCACHE_NUM_BANKS-1:0] bank_req; + logic [DCACHE_NUM_BANKS-1:0] bank_we; + logic [DCACHE_NUM_BANKS-1:0][ DCACHE_SET_ASSOC-1:0][(riscv::XLEN/8)-1:0] bank_be; + logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx; + logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q; + logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q; + + logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_wdata; // + logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_rdata; // + logic [DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] rdata_cl; // selected word from each cacheline + logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_wuser; // + logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_ruser; // + logic [DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] ruser_cl; // selected word from each cacheline + + logic [DCACHE_TAG_WIDTH-1:0] rd_tag; + logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs + logic vld_we; // valid bits write enable + logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write + logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem + logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit + + logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q; + + logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh; + logic [ (riscv::XLEN/8)-1:0] wbuffer_be; + riscv::xlen_t wbuffer_rdata, rdata; + logic [DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser; + logic [riscv::PLEN-1:0] wbuffer_cmp_addr; + + logic cmp_en_d, cmp_en_q; + logic rd_acked; + logic [NumPorts-1:0] bank_collision, rd_req_masked, rd_req_prio; + + /////////////////////////////////////////////////////// + // arbiter + /////////////////////////////////////////////////////// + + // Priority is highest for lowest read port index + // + // SRAM bank mapping: + // + // Bank 0 Bank 2 + // [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] .. + + // byte enable mapping + for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_bank + for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : gen_bank_way + assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*(riscv::XLEN/8) +: (riscv::XLEN/8)] : + (wr_req_i[j] & wr_ack_o) ? wr_data_be_i : + '0; + assign bank_wdata[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_i[k*riscv::XLEN +: riscv::XLEN] : + wr_data_i; + assign bank_wuser[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_user_i[k*DCACHE_USER_WIDTH +: DCACHE_USER_WIDTH] : + wr_user_i; + end + end + + assign vld_wdata = wr_vld_bits_i; + assign vld_addr = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d]; + assign rd_tag = rd_tag_i[vld_sel_q]; //delayed by one cycle + assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d]; + assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d]; + assign vld_req = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0; + + + // priority masking + // disable low prio requests when any of the high prio reqs is present + assign rd_req_prio = rd_req_i & rd_prio_i; + assign rd_req_masked = (|rd_req_prio) ? rd_req_prio : rd_req_i; + + logic rd_req; + rr_arb_tree #( + .NumIn (NumPorts), + .DataWidth(1) + ) i_rr_arb_tree ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (rd_req_masked), + .gnt_o (rd_ack_o), + .data_i ('0), + .gnt_i (~wr_cl_vld_i), + .req_o (rd_req), + .data_o (), + .idx_o (vld_sel_d) + ); + + assign rd_acked = rd_req & ~wr_cl_vld_i; + + always_comb begin : p_bank_req + vld_we = wr_cl_vld_i; + bank_req = '0; + wr_ack_o = '0; + bank_we = '0; + bank_idx = '{default: wr_idx_i}; + + for (int k = 0; k < NumPorts; k++) begin + bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] == wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]; + end + + if (wr_cl_vld_i & |wr_cl_we_i) begin + bank_req = '1; + bank_we = '1; + bank_idx = '{default: wr_cl_idx_i}; + end else begin + if (rd_acked) begin + if (!rd_tag_only_i[vld_sel_d]) begin + bank_req = + dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); + bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]] = rd_idx_i[vld_sel_d]; + end + end + + if (|wr_req_i) begin + if (rd_tag_only_i[vld_sel_d] || !(rd_ack_o[vld_sel_d] && bank_collision[vld_sel_d])) begin + wr_ack_o = 1'b1; + bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); + bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); + end + end + end + end + + /////////////////////////////////////////////////////// + // tag comparison, hit generatio, readoud muxes + /////////////////////////////////////////////////////// + + logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_off; + logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off; + logic [ $clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx; + logic [ $clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx; + + assign cmp_en_d = (|vld_req) & ~vld_we; + + // word tag comparison in write buffer + assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} : + {rd_tag, bank_idx_q, bank_off_q}; + // hit generation + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel + // tag comparison of ways >0 + assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q; + // byte offset mux of ways >0 + assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i]; + assign ruser_cl[i] = bank_ruser[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i]; + end + + for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_wbuffer_hit + assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & ({{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_data_i[k].wtag} == (wbuffer_cmp_addr >> riscv::XLEN_ALIGN_BYTES)); + end + + lzc #( + .WIDTH(DCACHE_WBUF_DEPTH) + ) i_lzc_wbuffer_hit ( + .in_i (wbuffer_hit_oh), + .cnt_o (wbuffer_hit_idx), + .empty_o() + ); + + lzc #( + .WIDTH(DCACHE_SET_ASSOC) + ) i_lzc_rd_hit ( + .in_i (rd_hit_oh_o), + .cnt_o (rd_hit_idx), + .empty_o() + ); + + assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data; + assign wbuffer_ruser = wbuffer_data_i[wbuffer_hit_idx].user; + assign wbuffer_be = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0; + + if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset + // In case of an uncached read, return the desired XLEN-bit segment of the most recent AXI read + assign wr_cl_off = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == riscv::XLEN) ? '0 : + {{DCACHE_OFFSET_WIDTH-AXI_OFFSET_WIDTH{1'b0}}, wr_cl_off_i[AXI_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]} : + wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]; + end else begin : gen_piton_offset + assign wr_cl_off = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3]; + end + + always_comb begin + if (wr_cl_vld_i) begin + rdata = wr_cl_data_i[wr_cl_off*riscv::XLEN+:riscv::XLEN]; + ruser = wr_cl_user_i[wr_cl_off*DCACHE_USER_WIDTH+:DCACHE_USER_WIDTH]; + end else begin + rdata = rdata_cl[rd_hit_idx]; + ruser = ruser_cl[rd_hit_idx]; + end + end + + // overlay bytes that hit in the write buffer + for (genvar k = 0; k < (riscv::XLEN / 8); k++) begin : gen_rd_data + assign rd_data_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k+:8] : rdata[8*k+:8]; + end + for (genvar k = 0; k < DCACHE_USER_WIDTH / 8; k++) begin : gen_rd_user + assign rd_user_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_ruser[8*k+:8] : ruser[8*k+:8]; + end + + /////////////////////////////////////////////////////// + // memory arrays and regs + /////////////////////////////////////////////////////// + + logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata[DCACHE_SET_ASSOC-1:0]; + + for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_data_banks + // Data RAM + sram #( + .USER_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * DATA_USER_WIDTH), + .DATA_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * riscv::XLEN), + .USER_EN (ariane_pkg::DATA_USER_EN), + .NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS) + ) i_data_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (bank_req[k]), + .we_i (bank_we[k]), + .addr_i (bank_idx[k]), + .wuser_i(bank_wuser[k]), + .wdata_i(bank_wdata[k]), + .be_i (bank_be[k]), + .ruser_o(bank_ruser[k]), + .rdata_o(bank_rdata[k]) + ); + end + + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_srams + + assign tag_rdata[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH-1:0]; + assign rd_vld_bits_o[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH]; + + // Tag RAM + sram #( + // tag + valid bit + .DATA_WIDTH(ariane_pkg::DCACHE_TAG_WIDTH + 1), + .NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS) + ) i_tag_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (vld_req[i]), + .we_i (vld_we), + .addr_i (vld_addr), + .wuser_i('0), + .wdata_i({vld_wdata[i], wr_cl_tag_i}), + .be_i ('1), + .ruser_o(), + .rdata_o(vld_tag_rdata[i]) + ); + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + bank_idx_q <= '0; + bank_off_q <= '0; + vld_sel_q <= '0; + cmp_en_q <= '0; + end else begin + bank_idx_q <= bank_idx_d; + bank_off_q <= bank_off_d; + vld_sel_q <= vld_sel_d; + cmp_en_q <= cmp_en_d; + end + end + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + initial begin + cach_line_width_axi : + assert (DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth) + else $fatal(1, "[l1 dcache] cache line size needs to be greater or equal AXI data width"); + end + + initial begin + axi_xlen : + assert (CVA6Cfg.AxiDataWidth >= riscv::XLEN) + else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal XLEN"); + end + + initial begin + cach_line_width_xlen : + assert (DCACHE_LINE_WIDTH > riscv::XLEN) + else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than XLEN"); + end + + hit_hot1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0( + rd_hit_oh_o + )) + else $fatal(1, "[l1 dcache] rd_hit_oh_o signal must be hot1"); + + word_write_hot1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) wr_ack_o |-> $onehot0(wr_req_i)) + else $fatal(1, "[l1 dcache] wr_req_i signal must be hot1"); + + wbuffer_hit_hot1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0( + wbuffer_hit_oh + )) + else $fatal(1, "[l1 dcache] wbuffer_hit_oh signal must be hot1"); + + // this is only used for verification! + logic vld_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0]; + logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0]; + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror + if (!rst_ni) begin + vld_mirror <= '{default: '0}; + tag_mirror <= '{default: '0}; + end else begin + for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin + if (vld_req[i] & vld_we) begin + vld_mirror[vld_addr][i] <= vld_wdata[i]; + tag_mirror[vld_addr][i] <= wr_cl_tag_i; + end + end + end + end + + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_dubl_test + assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata); + end + + tag_write_duplicate : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test)) + else $fatal(1, "[l1 dcache] cannot allocate a CL that is already present in the cache"); + +`endif + //pragma translate_on + +endmodule // wt_dcache_mem diff --git a/test/type_param/core/cache_subsystem/wt_dcache_missunit.sv b/test/type_param/core/cache_subsystem/wt_dcache_missunit.sv new file mode 100644 index 0000000..3e06a92 --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_dcache_missunit.sv @@ -0,0 +1,645 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 13.09.2018 +// Description: miss controller for WT dcache. Note that the current assumption +// is that the port with the highest index issues writes instead of reads. + + +module wt_dcache_missunit + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter logic [CACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs + parameter int unsigned NumPorts = 4 // number of miss ports +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // cache management, signals from/to core + input logic enable_i, // from CSR + input logic flush_i, // flush request, this waits for pending tx (write, read) to finish and will clear the cache + output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic miss_o, // we missed on a ld/st + // local cache management signals + input logic wbuffer_empty_i, + output logic cache_en_o, // local cache enable signal + // AMO interface + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + // miss handling interface (ld, ptw, wbuffer) + input logic [NumPorts-1:0] miss_req_i, + output logic [NumPorts-1:0] miss_ack_o, + input logic [NumPorts-1:0] miss_nc_i, + input logic [NumPorts-1:0] miss_we_i, + input logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata_i, + input logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser_i, + input logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr_i, + input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i, + input logic [NumPorts-1:0][2:0] miss_size_i, + input logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID + // signals that the request collided with a pending read + output logic [NumPorts-1:0] miss_replay_o, + // signals response from memory + output logic [NumPorts-1:0] miss_rtrn_vld_o, + output logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads + // from writebuffer + input logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_i, // used to check for address collisions with read operations + input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations + // write interface to cache memory + output logic wr_cl_vld_o, // writes a full cacheline + output logic wr_cl_nc_o, // writes a full cacheline + output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline + output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o, + output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o, + output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o, + output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o, + output logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o, + output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o, + output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o, + // memory interface + input logic mem_rtrn_vld_i, + input dcache_rtrn_t mem_rtrn_i, + output logic mem_data_req_o, + input logic mem_data_ack_i, + output dcache_req_t mem_data_o +); + + // functions + function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh( + input logic [L1D_WAY_WIDTH-1:0] in); + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out; + out = '0; + out[in] = 1'b1; + return out; + endfunction + + // align the physical address to the specified size: + // 000: bytes + // 001: hword + // 010: word + // 011: dword + // 111: DCACHE line + function automatic logic [riscv::PLEN-1:0] paddrSizeAlign(input logic [riscv::PLEN-1:0] paddr, + input logic [2:0] size); + logic [riscv::PLEN-1:0] out; + out = paddr; + unique case (size) + 3'b001: out[0:0] = '0; + 3'b010: out[1:0] = '0; + 3'b011: out[2:0] = '0; + 3'b111: out[DCACHE_OFFSET_WIDTH-1:0] = '0; + default: ; + endcase + return out; + endfunction : paddrSizeAlign + + // controller FSM + typedef enum logic [2:0] { + IDLE, + DRAIN, + AMO, + FLUSH, + STORE_WAIT, + LOAD_WAIT, + AMO_WAIT + } state_e; + state_e state_d, state_q; + + // MSHR for reads + typedef struct packed { + logic [riscv::PLEN-1:0] paddr; + logic [2:0] size; + logic [DCACHE_SET_ASSOC-1:0] vld_bits; + logic [CACHE_ID_WIDTH-1:0] id; + logic nc; + logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way; + logic [$clog2(NumPorts)-1:0] miss_port_idx; + } mshr_t; + + mshr_t mshr_d, mshr_q; + logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way, inv_way, rnd_way; + logic mshr_vld_d, mshr_vld_q, mshr_vld_q1; + logic mshr_allocate; + logic update_lfsr, all_ways_valid; + + logic enable_d, enable_q; + logic flush_ack_d, flush_ack_q; + logic flush_en, flush_done; + logic mask_reads, lock_reqs; + logic amo_sel, miss_is_write; + logic amo_req_d, amo_req_q; + logic [63:0] amo_rtrn_mux; + riscv::xlen_t amo_data, amo_data_a, amo_data_b; + riscv::xlen_t amo_user; //DCACHE USER ? DATA_USER_WIDTH + logic [riscv::PLEN-1:0] tmp_paddr; + logic [$clog2(NumPorts)-1:0] miss_port_idx; + logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q; + logic [NumPorts-1:0] miss_req_masked_d, miss_req_masked_q; + + logic inv_vld, inv_vld_all, cl_write_en; + logic load_ack, store_ack, amo_ack; + + logic [NumPorts-1:0] mshr_rdrd_collision_d, mshr_rdrd_collision_q; + logic [NumPorts-1:0] mshr_rdrd_collision; + logic tx_rdwr_collision, mshr_rdwr_collision; + + /////////////////////////////////////////////////////// + // input arbitration and general control sigs + /////////////////////////////////////////////////////// + + assign cache_en_o = enable_q; + assign cnt_d = (flush_en) ? cnt_q + 1 : '0; + assign flush_done = (cnt_q == wt_cache_pkg::DCACHE_NUM_WORDS - 1); + + assign miss_req_masked_d = (lock_reqs) ? miss_req_masked_q : + (mask_reads) ? miss_we_i & miss_req_i : miss_req_i; + assign miss_is_write = miss_we_i[miss_port_idx]; + + // read port arbiter + lzc #( + .WIDTH(NumPorts) + ) i_lzc_reqs ( + .in_i (miss_req_masked_d), + .cnt_o (miss_port_idx), + .empty_o() + ); + + always_comb begin : p_ack + miss_ack_o = '0; + if (!amo_sel) begin + miss_ack_o[miss_port_idx] = mem_data_ack_i & mem_data_req_o; + end + end + + /////////////////////////////////////////////////////// + // MSHR and way replacement logic (only for read ops) + /////////////////////////////////////////////////////// + + // find invalid cache line + lzc #( + .WIDTH(ariane_pkg::DCACHE_SET_ASSOC) + ) i_lzc_inv ( + .in_i (~miss_vld_bits_i[miss_port_idx]), + .cnt_o (inv_way), + .empty_o(all_ways_valid) + ); + + // generate random cacheline index + lfsr #( + .LfsrWidth(8), + .OutWidth ($clog2(ariane_pkg::DCACHE_SET_ASSOC)) + ) i_lfsr_inv ( + .clk_i (clk_i), + .rst_ni(rst_ni), + .en_i (update_lfsr), + .out_o (rnd_way) + ); + + assign repl_way = (all_ways_valid) ? rnd_way : inv_way; + + assign mshr_d.size = (mshr_allocate) ? miss_size_i[miss_port_idx] : mshr_q.size; + assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i[miss_port_idx] : mshr_q.paddr; + assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits; + assign mshr_d.id = (mshr_allocate) ? miss_id_i[miss_port_idx] : mshr_q.id; + assign mshr_d.nc = (mshr_allocate) ? miss_nc_i[miss_port_idx] : mshr_q.nc; + assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way; + assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx; + + // currently we only have one outstanding read TX, hence an incoming load clears the MSHR + assign mshr_vld_d = (mshr_allocate) ? 1'b1 : (load_ack) ? 1'b0 : mshr_vld_q; + + assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0; + + + for (genvar k = 0; k < NumPorts; k++) begin : gen_rdrd_collision + assign mshr_rdrd_collision[k] = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1); + assign mshr_rdrd_collision_d[k] = (!miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k]; + end + + // read/write collision, stalls the corresponding request + // write port[NumPorts-1] collides with MSHR_Q + assign mshr_rdwr_collision = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[NumPorts-1][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && mshr_vld_q; + + // read collides with inflight TX + always_comb begin : p_tx_coll + tx_rdwr_collision = 1'b0; + for (int k = 0; k < DCACHE_MAX_TX; k++) begin + tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k]; + end + end + + /////////////////////////////////////////////////////// + // to memory + /////////////////////////////////////////////////////// + + // if size = 32bit word, select appropriate offset, replicate for openpiton... + + if (CVA6Cfg.RVA) begin + if (riscv::IS_XLEN64) begin : gen_amo_64b_data + assign amo_data_a = {amo_req_i.operand_b[0+:32], amo_req_i.operand_b[0+:32]}; + assign amo_data_b = amo_req_i.operand_b; + end else begin : gen_amo_32b_data + assign amo_data_a = amo_req_i.operand_b[0+:32]; + end + end + + always_comb begin + if (CVA6Cfg.RVA) begin + if (riscv::IS_XLEN64) begin + if (amo_req_i.size == 2'b10) begin + amo_data = amo_data_a; + end else begin + amo_data = amo_data_b; + end + end else begin + amo_data = amo_data_a; + end + if (ariane_pkg::DATA_USER_EN) begin + amo_user = amo_data; + end else begin + amo_user = '0; + end + end + end + + if (CVA6Cfg.RVA) begin + // note: openpiton returns a full cacheline! + if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_rtrn_mux + if (CVA6Cfg.AxiDataWidth > 64) begin + assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[$clog2( + CVA6Cfg.AxiDataWidth/8 + )-1:3]*64+:64]; + end else begin + assign amo_rtrn_mux = mem_rtrn_i.data[0+:64]; + end + end else begin : gen_piton_rtrn_mux + assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64+:64]; + end + + // always sign extend 32bit values + assign amo_resp_o.result = (amo_req_i.size==2'b10) ? {{32{amo_rtrn_mux[amo_req_i.operand_a[2]*32 + 31]}},amo_rtrn_mux[amo_req_i.operand_a[2]*32 +: 32]} : + amo_rtrn_mux ; + assign amo_req_d = amo_req_i.req; + end + + // outgoing memory requests (AMOs are always uncached) + assign mem_data_o.tid = (CVA6Cfg.RVA && amo_sel) ? AmoTxId : miss_id_i[miss_port_idx]; + assign mem_data_o.nc = (CVA6Cfg.RVA && amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx]; + assign mem_data_o.way = (CVA6Cfg.RVA && amo_sel) ? '0 : repl_way; + assign mem_data_o.data = (CVA6Cfg.RVA && amo_sel) ? amo_data : miss_wdata_i[miss_port_idx]; + assign mem_data_o.user = (CVA6Cfg.RVA && amo_sel) ? amo_user : miss_wuser_i[miss_port_idx]; + assign mem_data_o.size = (CVA6Cfg.RVA && amo_sel) ? {1'b0, amo_req_i.size} : miss_size_i [miss_port_idx]; + assign mem_data_o.amo_op = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.amo_op : AMO_NONE; + + assign tmp_paddr = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.operand_a[riscv::PLEN-1:0] : miss_paddr_i[miss_port_idx]; + assign mem_data_o.paddr = paddrSizeAlign(tmp_paddr, mem_data_o.size); + + /////////////////////////////////////////////////////// + // back-off mechanism for LR/SC completion guarantee + /////////////////////////////////////////////////////// + + logic sc_fail, sc_pass, sc_backoff_over; + exp_backoff #( + .Seed (3), + .MaxExp(16) + ) i_exp_backoff ( + .clk_i, + .rst_ni, + .set_i (sc_fail), + .clr_i (sc_pass), + .is_zero_o(sc_backoff_over) + ); + + /////////////////////////////////////////////////////// + // responses from memory + /////////////////////////////////////////////////////// + + // keep track of pending stores + logic store_sent; + logic [$clog2(wt_cache_pkg::DCACHE_MAX_TX + 1)-1:0] stores_inflight_d, stores_inflight_q; + assign store_sent = mem_data_req_o & mem_data_ack_i & (mem_data_o.rtype == DCACHE_STORE_REQ); + + assign stores_inflight_d = (store_ack && store_sent) ? stores_inflight_q : + (store_ack) ? stores_inflight_q - 1 : + (store_sent) ? stores_inflight_q + 1 : + stores_inflight_q; + + // incoming responses + always_comb begin : p_rtrn_logic + load_ack = 1'b0; + store_ack = 1'b0; + amo_ack = 1'b0; + inv_vld = 1'b0; + inv_vld_all = 1'b0; + sc_fail = 1'b0; + sc_pass = 1'b0; + miss_rtrn_vld_o = '0; + if (mem_rtrn_vld_i) begin + unique case (mem_rtrn_i.rtype) + DCACHE_LOAD_ACK: begin + if (mshr_vld_q) begin + load_ack = 1'b1; + miss_rtrn_vld_o[mshr_q.miss_port_idx] = 1'b1; + end + end + DCACHE_STORE_ACK: begin + if (stores_inflight_q > 0) begin + store_ack = 1'b1; + miss_rtrn_vld_o[NumPorts-1] = 1'b1; + end + end + DCACHE_ATOMIC_ACK: begin + if (CVA6Cfg.RVA) begin + if (amo_req_q) begin + amo_ack = 1'b1; + // need to set SC backoff counter if + // this op failed + if (amo_req_i.amo_op == AMO_SC) begin + if (amo_resp_o.result > 0) begin + sc_fail = 1'b1; + end else begin + sc_pass = 1'b1; + end + end + end + end + end + DCACHE_INV_REQ: begin + inv_vld = mem_rtrn_i.inv.vld | mem_rtrn_i.inv.all; + inv_vld_all = mem_rtrn_i.inv.all; + end + // TODO: + // DCACHE_INT_REQ: begin + // end + default: begin + end + endcase + end + end + + // to write buffer + assign miss_rtrn_id_o = mem_rtrn_i.tid; + + /////////////////////////////////////////////////////// + // writes to cache memory + /////////////////////////////////////////////////////// + + // cacheline write port + assign wr_cl_nc_o = mshr_q.nc; + assign wr_cl_vld_o = load_ack | (|wr_cl_we_o); + + assign wr_cl_we_o = (flush_en) ? '1 : (inv_vld_all) ? '1 : (inv_vld) ? dcache_way_bin2oh( + mem_rtrn_i.inv.way + ) : (cl_write_en) ? dcache_way_bin2oh( + mshr_q.repl_way + ) : '0; + + assign wr_vld_bits_o = (flush_en) ? '0 : (inv_vld) ? '0 : (cl_write_en) ? dcache_way_bin2oh( + mshr_q.repl_way + ) : '0; + + assign wr_cl_idx_o = (flush_en) ? cnt_q : + (inv_vld) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : + mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; + + assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; + assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0]; + assign wr_cl_data_o = mem_rtrn_i.data; + assign wr_cl_user_o = mem_rtrn_i.user; + assign wr_cl_data_be_o = (cl_write_en) ? '1 : '0;// we only write complete cachelines into the memory + + // only non-NC responses write to the cache + assign cl_write_en = load_ack & ~mshr_q.nc; + + /////////////////////////////////////////////////////// + // main control logic for generating tx + /////////////////////////////////////////////////////// + + always_comb begin : p_fsm + // default assignment + state_d = state_q; + + flush_ack_o = 1'b0; + mem_data_o.rtype = DCACHE_LOAD_REQ; + mem_data_req_o = 1'b0; + amo_resp_o.ack = 1'b0; + miss_replay_o = '0; + + // disabling cache is possible anytime, enabling goes via flush + enable_d = enable_q & enable_i; + flush_ack_d = flush_ack_q; + flush_en = 1'b0; + amo_sel = 1'b0; + update_lfsr = 1'b0; + mshr_allocate = 1'b0; + lock_reqs = 1'b0; + mask_reads = mshr_vld_q; + + // interfaces + unique case (state_q) + ////////////////////////////////// + // wait for misses / amo ops + IDLE: begin + if (flush_i || (enable_i && !enable_q)) begin + if (wbuffer_empty_i && !mshr_vld_q) begin + flush_ack_d = flush_i; + state_d = FLUSH; + end else begin + state_d = DRAIN; + end + end else if (CVA6Cfg.RVA && amo_req_i.req) begin + if (wbuffer_empty_i && !mshr_vld_q) begin + state_d = AMO; + end else begin + state_d = DRAIN; + end + // we've got a miss to handle + end else if (|miss_req_masked_d) begin + // this is a write miss, just pass through (but check whether write collides with MSHR) + if (miss_is_write) begin + // stall in case this write collides with the MSHR address + if (!mshr_rdwr_collision) begin + mem_data_req_o = 1'b1; + mem_data_o.rtype = DCACHE_STORE_REQ; + if (!mem_data_ack_i) begin + state_d = STORE_WAIT; + end + end + // this is a read miss, can only allocate 1 MSHR + // in case of a load_ack we can accept a new miss, since the MSHR is being cleared + end else if (!mshr_vld_q || load_ack) begin + // replay the read request in case the address has collided with MSHR during the time the request was pending + // i.e., the cache state may have been updated in the mean time due to a refill at the same CL address + if (mshr_rdrd_collision_d[miss_port_idx]) begin + miss_replay_o[miss_port_idx] = 1'b1; + // stall in case this CL address overlaps with a write TX that is in flight + end else if (!tx_rdwr_collision) begin + mem_data_req_o = 1'b1; + mem_data_o.rtype = DCACHE_LOAD_REQ; + update_lfsr = all_ways_valid & mem_data_ack_i; // need to evict a random way + mshr_allocate = mem_data_ack_i; + if (!mem_data_ack_i) begin + state_d = LOAD_WAIT; + end + end + end + end + end + ////////////////////////////////// + // wait until this request is acked + STORE_WAIT: begin + lock_reqs = 1'b1; + mem_data_req_o = 1'b1; + mem_data_o.rtype = DCACHE_STORE_REQ; + if (mem_data_ack_i) begin + state_d = IDLE; + end + end + ////////////////////////////////// + // wait until this request is acked + LOAD_WAIT: begin + lock_reqs = 1'b1; + mem_data_req_o = 1'b1; + mem_data_o.rtype = DCACHE_LOAD_REQ; + if (mem_data_ack_i) begin + update_lfsr = all_ways_valid; // need to evict a random way + mshr_allocate = 1'b1; + state_d = IDLE; + end + end + ////////////////////////////////// + // only handle stores, do not accept new read requests + // wait until MSHR is cleared and wbuffer is empty + DRAIN: begin + mask_reads = 1'b1; + // these are writes, check whether they collide with MSHR + if (|miss_req_masked_d && !mshr_rdwr_collision) begin + mem_data_req_o = 1'b1; + mem_data_o.rtype = DCACHE_STORE_REQ; + end + + if (wbuffer_empty_i && !mshr_vld_q) begin + state_d = IDLE; + end + end + ////////////////////////////////// + // flush the cache + FLUSH: begin + // internal flush signal + flush_en = 1'b1; + if (flush_done) begin + state_d = IDLE; + flush_ack_o = flush_ack_q; + flush_ack_d = 1'b0; + enable_d = enable_i; + end + end + ////////////////////////////////// + // send out amo op request + AMO: begin + if (CVA6Cfg.RVA) begin + mem_data_o.rtype = DCACHE_ATOMIC_REQ; + amo_sel = 1'b1; + // if this is an LR, we need to consult the backoff counter + if ((amo_req_i.amo_op != AMO_LR) || sc_backoff_over) begin + mem_data_req_o = 1'b1; + if (mem_data_ack_i) begin + state_d = AMO_WAIT; + end + end + end + end + ////////////////////////////////// + // block and wait until AMO OP returns + AMO_WAIT: begin + if (CVA6Cfg.RVA) begin + amo_sel = 1'b1; + if (amo_ack) begin + amo_resp_o.ack = 1'b1; + state_d = IDLE; + end + end + end + ////////////////////////////////// + default: begin + // we should never get here + state_d = IDLE; + end + endcase // state_q + end + + /////////////////////////////////////////////////////// + // ff's + /////////////////////////////////////////////////////// + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + state_q <= FLUSH; + cnt_q <= '0; + enable_q <= '0; + flush_ack_q <= '0; + mshr_vld_q <= '0; + mshr_vld_q1 <= '0; + mshr_q <= '0; + mshr_rdrd_collision_q <= '0; + miss_req_masked_q <= '0; + amo_req_q <= '0; + stores_inflight_q <= '0; + end else begin + state_q <= state_d; + cnt_q <= cnt_d; + enable_q <= enable_d; + flush_ack_q <= flush_ack_d; + mshr_vld_q <= mshr_vld_d; + mshr_vld_q1 <= mshr_vld_q; + mshr_q <= mshr_d; + mshr_rdrd_collision_q <= mshr_rdrd_collision_d; + miss_req_masked_q <= miss_req_masked_d; + amo_req_q <= amo_req_d; + stores_inflight_q <= stores_inflight_d; + end + end + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + + read_tid : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id) + else $fatal(1, "[l1 dcache missunit] TID of load response doesn't match"); + + read_ports : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) |miss_req_i[NumPorts-2:0] |-> miss_we_i[NumPorts-2:0] == 0) + else $fatal(1, "[l1 dcache missunit] only last port can issue write requests"); + + write_port : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) miss_req_i[NumPorts-1] |-> miss_we_i[NumPorts-1]) + else $fatal(1, "[l1 dcache missunit] last port can only issue write requests"); + + initial begin + // assert wrong parameterizations + assert (NumPorts >= 2) + else + $fatal( + 1, "[l1 dcache missunit] at least two ports are required (one read port, one write port)" + ); + end +`endif + //pragma translate_on + +endmodule // wt_dcache_missunit diff --git a/test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv b/test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv new file mode 100644 index 0000000..8e9c39d --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv @@ -0,0 +1,635 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 13.09.2018 +// Description: coalescing write buffer for WT dcache +// +// A couple of notes: +// +// 1) the write buffer behaves as a fully-associative cache, and is therefore coalescing. +// this cache is used by the cache readout logic to forward data to the load unit. +// +// each byte can be in the following states (valid/dirty/txblock): +// +// 0/0/0: invalid -> free entry in the buffer +// 1/1/0: valid and dirty, Byte is hence not part of TX in-flight +// 1/0/1: valid and not dirty, Byte is part of a TX in-flight +// 1/1/1: valid and, part of tx and dirty. this means that the byte has been +// overwritten while in TX and needs to be retransmitted once the write of that byte returns. +// 1/0/0: this would represent a clean state, but is never reached in the wbuffer in the current implementation. +// this is because when a TX returns, and the byte is in state [1/0/1], it is written to cache if needed and +// its state is immediately cleared to 0/x/x. +// +// this state is used to distinguish between bytes that have been written and not +// yet sent to the memory subsystem, and bytes that are part of a transaction. +// +// 2) further, each word in the write buffer has a cache states (checked, hit_oh) +// +// checked == 0: unknown cache state +// checked == 1: cache state has been looked up, valid way is stored in "hit_oh" +// +// cache invalidations/refills affecting a particular word will clear its word state to 0, +// so another lookup has to be done. note that these lookups are triggered as soon as there is +// a valid word with checked == 0 in the write buffer. +// +// 3) returning write ACKs trigger a cache update if the word is present in the cache, and evict that +// word from the write buffer. if the word is not allocated to the cache, it is just evicted from the write buffer. +// if the word cache state is VOID, the pipeline is stalled until it is clear whether that word is in the cache or not. +// +// 4) we handle NC writes using the writebuffer circuitry. upon an NC request, the writebuffer will first be drained. +// then, only the NC word is written into the write buffer and no further write requests are acknowledged until that +// word has been evicted from the write buffer. + + +module wt_dcache_wbuffer + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + input logic cache_en_i, // writes are treated as NC if disabled + output logic empty_o, // asserted if no data is present in write buffer + output logic not_ni_o, // asserted if no ni data is present in write buffer + // core request ports + input dcache_req_i_t req_port_i, + output dcache_req_o_t req_port_o, + // interface to miss handler + input logic miss_ack_i, + output logic [riscv::PLEN-1:0] miss_paddr_o, + output logic miss_req_o, + output logic miss_we_o, // always 1 here + output riscv::xlen_t miss_wdata_o, + output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, + output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0) + output logic miss_nc_o, // request to I/O space + output logic [2:0] miss_size_o, // + output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1) + // write responses from memory + input logic miss_rtrn_vld_i, + input logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear + // cache read interface + output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later + output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o, + output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o, + output logic rd_req_o, // read the word at offset off_i[:3] in all ways + output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays + input logic rd_ack_i, + input riscv::xlen_t rd_data_i, // unused + input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused + input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i, + // cacheline writes + input logic wr_cl_vld_i, + input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i, + // cache word write interface + output logic [DCACHE_SET_ASSOC-1:0] wr_req_o, + input logic wr_ack_i, + output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o, + output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o, + output riscv::xlen_t wr_data_o, + output logic [(riscv::XLEN/8)-1:0] wr_data_be_o, + output logic [DCACHE_USER_WIDTH-1:0] wr_user_o, + // to forwarding logic and miss unit + output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o, + output logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_o, // used to check for address collisions with read operations + output logic [DCACHE_MAX_TX-1:0] tx_vld_o +); + + tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q; + wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q; + logic [DCACHE_WBUF_DEPTH-1:0] valid; + logic [DCACHE_WBUF_DEPTH-1:0] dirty; + logic [DCACHE_WBUF_DEPTH-1:0] tocheck; + logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit; + //logic [DCACHE_WBUF_DEPTH-1:0][7:0] bdirty; + logic [DCACHE_WBUF_DEPTH-1:0][(riscv::XLEN/8)-1:0] bdirty; + + logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] + next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr; + logic [CACHE_ID_WIDTH-1:0] tx_id, rtrn_id; + + logic [riscv::XLEN_ALIGN_BYTES-1:0] bdirty_off; + logic [(riscv::XLEN/8)-1:0] tx_be; + logic [riscv::PLEN-1:0] wr_paddr, rd_paddr, extract_tag; + logic [DCACHE_TAG_WIDTH-1:0] rd_tag_d, rd_tag_q; + logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_d, rd_hit_oh_q; + logic check_en_d, check_en_q, check_en_q1; + logic full, dirty_rd_en, rdy; + logic rtrn_empty, evict; + logic [DCACHE_WBUF_DEPTH-1:0] ni_pending_d, ni_pending_q; + logic wbuffer_wren; + logic free_tx_slots; + + logic wr_cl_vld_q, wr_cl_vld_d; + logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d; + + logic [riscv::PLEN-1:0] debug_paddr[DCACHE_WBUF_DEPTH-1:0]; + + wbuffer_t wbuffer_check_mux, wbuffer_dirty_mux; + + /////////////////////////////////////////////////////// + // misc + /////////////////////////////////////////////////////// + logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] miss_tag; + logic is_nc_miss; + logic is_ni; + assign miss_tag = miss_paddr_o[ariane_pkg::DCACHE_INDEX_WIDTH+:ariane_pkg::DCACHE_TAG_WIDTH]; + assign is_nc_miss = !config_pkg::is_inside_cacheable_regions( + CVA6Cfg, + { + {64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {DCACHE_INDEX_WIDTH{1'b0}} + } + ); + assign miss_nc_o = !cache_en_i || is_nc_miss; + // Non-idempotent if request goes to NI region + assign is_ni = config_pkg::is_inside_nonidempotent_regions( + CVA6Cfg, + { + {64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, + req_port_i.address_tag, + {DCACHE_INDEX_WIDTH{1'b0}} + } + ); + + assign miss_we_o = 1'b1; + assign miss_vld_bits_o = '0; + assign wbuffer_data_o = wbuffer_q; + + for (genvar k = 0; k < DCACHE_MAX_TX; k++) begin : gen_tx_vld + assign tx_vld_o[k] = tx_stat_q[k].vld; + assign tx_paddr_o[k] = { + {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[tx_stat_q[k].ptr].wtag << riscv::XLEN_ALIGN_BYTES + }; + end + + /////////////////////////////////////////////////////// + // openpiton does not understand byte enable sigs + // need to convert to the four cases: + // 00: byte + // 01: halfword + // 10: word + // 11: dword + // non-contiguous writes need to be serialized! + // e.g. merged dwords with BE like this: 8'b01001100 + /////////////////////////////////////////////////////// + + // get byte offset + lzc #( + .WIDTH(riscv::XLEN / 8) + ) i_vld_bdirty ( + .in_i (bdirty[dirty_ptr]), + .cnt_o (bdirty_off), + .empty_o() + ); + + // add the offset to the physical base address of this buffer entry + assign miss_paddr_o = {wbuffer_dirty_mux.wtag, bdirty_off}; + assign miss_id_o = tx_id; + + // is there any dirty word to be transmitted, and is there a free TX slot? + assign miss_req_o = (|dirty) && free_tx_slots; + + // get size of aligned words, and the corresponding byte enables + // note: openpiton can only handle aligned offsets + size, and hence + // we have to split unaligned data into multiple transfers (see toSize64) + // e.g. if we have the following valid bytes: 0011_1001 -> TX0: 0000_0001, TX1: 0000_1000, TX2: 0011_0000 + if (riscv::IS_XLEN64) begin : gen_size_64b + assign miss_size_o = {1'b0, toSize64(bdirty[dirty_ptr])}; + end else begin : gen_size_32b + assign miss_size_o = {1'b0, toSize32(bdirty[dirty_ptr])}; + end + + // replicate transfers shorter than a dword + assign miss_wdata_o = riscv::IS_XLEN64 ? repData64( + wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0] + ) : repData32( + wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0] + ); + if (ariane_pkg::DATA_USER_EN) begin + assign miss_wuser_o = riscv::IS_XLEN64 ? repData64( + wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0] + ) : repData32( + wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0] + ); + end else begin + assign miss_wuser_o = '0; + end + + assign tx_be = riscv::IS_XLEN64 ? to_byte_enable8( + bdirty_off, miss_size_o[1:0] + ) : to_byte_enable4( + bdirty_off, miss_size_o[1:0] + ); + + /////////////////////////////////////////////////////// + // TX status registers and ID counters + /////////////////////////////////////////////////////// + + // TODO: todo: make this fall through if timing permits it + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DATA_WIDTH ($clog2(DCACHE_MAX_TX)), + .DEPTH (DCACHE_MAX_TX) + ) i_rtrn_id_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (), + .empty_o (rtrn_empty), + .usage_o (), + .data_i (miss_rtrn_id_i), + .push_i (miss_rtrn_vld_i), + .data_o (rtrn_id), + .pop_i (evict) + ); + + always_comb begin : p_tx_stat + tx_stat_d = tx_stat_q; + evict = 1'b0; + wr_req_o = '0; + + // clear entry if it is clear whether it can be pushed to the cache or not + if ((!rtrn_empty) && wbuffer_q[rtrn_ptr].checked) begin + // check if data is clean and can be written, otherwise skip + // check if CL is present, otherwise skip + if ((|wr_data_be_o) && (|wbuffer_q[rtrn_ptr].hit_oh)) begin + wr_req_o = wbuffer_q[rtrn_ptr].hit_oh; + if (wr_ack_i) begin + evict = 1'b1; + tx_stat_d[rtrn_id].vld = 1'b0; + end + end else begin + evict = 1'b1; + tx_stat_d[rtrn_id].vld = 1'b0; + end + end + + // allocate a new entry + if (dirty_rd_en) begin + tx_stat_d[tx_id].vld = 1'b1; + tx_stat_d[tx_id].ptr = dirty_ptr; + tx_stat_d[tx_id].be = tx_be; + end + end + + assign free_tx_slots = |(~tx_vld_o); + + // next word to lookup in the cache + rr_arb_tree #( + .NumIn (DCACHE_MAX_TX), + .LockIn (1'b1), + .DataWidth(1) + ) i_tx_id_rr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (~tx_vld_o), + .gnt_o (), + .data_i ('0), + .gnt_i (dirty_rd_en), + .req_o (), + .data_o (), + .idx_o (tx_id) + ); + + /////////////////////////////////////////////////////// + // cache readout & update + /////////////////////////////////////////////////////// + + assign extract_tag = rd_paddr >> DCACHE_INDEX_WIDTH; + assign rd_tag_d = extract_tag[DCACHE_TAG_WIDTH-1:0]; + + // trigger TAG readout in cache + assign rd_tag_only_o = 1'b1; + assign rd_paddr = { + {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_check_mux.wtag << riscv::XLEN_ALIGN_BYTES + }; + assign rd_req_o = |tocheck; + assign rd_tag_o = rd_tag_q; //delay by one cycle + assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; + assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0]; + assign check_en_d = rd_req_o & rd_ack_i; + + // cache update port + assign rtrn_ptr = tx_stat_q[rtrn_id].ptr; + // if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache + // when the TX returns + assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty); + assign wr_paddr = { + {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[rtrn_ptr].wtag << riscv::XLEN_ALIGN_BYTES + }; + assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; + assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0]; + assign wr_data_o = wbuffer_q[rtrn_ptr].data; + assign wr_user_o = wbuffer_q[rtrn_ptr].user; + + + /////////////////////////////////////////////////////// + // readout of status bits, index calculation + /////////////////////////////////////////////////////// + + logic [DCACHE_WBUF_DEPTH-1:0][DCACHE_CL_IDX_WIDTH-1:0] wtag_comp; + + assign wr_cl_vld_d = wr_cl_vld_i; + assign wr_cl_idx_d = wr_cl_idx_i; + + for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_flags + // only for debug, will be pruned + if (CVA6Cfg.DebugEn) begin + assign debug_paddr[k] = { + {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[k].wtag << riscv::XLEN_ALIGN_BYTES + }; + end + + // dirty bytes that are ready for transmission. + // note that we cannot retransmit a word that is already in-flight + // since the multiple transactions might overtake each other in the memory system! + assign bdirty[k] = (|wbuffer_q[k].txblock) ? '0 : wbuffer_q[k].dirty & wbuffer_q[k].valid; + + + assign dirty[k] = |bdirty[k]; + assign valid[k] = |wbuffer_q[k].valid; + assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]}); + + // checks if an invalidation/cache refill hits a particular word + // note: an invalidation can hit multiple words! + // need to respect previous cycle, too, since we add a cycle of latency to the rd_hit_oh_i signal... + assign wtag_comp[k] = wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES-1:DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES]; + assign inval_hit[k] = (wr_cl_vld_d & valid[k] & (wtag_comp[k] == wr_cl_idx_d)) | + (wr_cl_vld_q & valid[k] & (wtag_comp[k] == wr_cl_idx_q)); + + // these word have to be looked up in the cache + assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k]; + end + + assign wr_ptr = (|wbuffer_hit_oh) ? hit_ptr : next_ptr; + assign rdy = (|wbuffer_hit_oh) | (~full); + + // next free entry in the buffer + lzc #( + .WIDTH(DCACHE_WBUF_DEPTH) + ) i_vld_lzc ( + .in_i (~valid), + .cnt_o (next_ptr), + .empty_o(full) + ); + + // get index of hit + lzc #( + .WIDTH(DCACHE_WBUF_DEPTH) + ) i_hit_lzc ( + .in_i (wbuffer_hit_oh), + .cnt_o (hit_ptr), + .empty_o() + ); + + // next dirty word to serve + rr_arb_tree #( + .NumIn (DCACHE_WBUF_DEPTH), + .LockIn (1'b1), + .DataType(wbuffer_t) + ) i_dirty_rr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (dirty), + .gnt_o (), + .data_i (wbuffer_q), + .gnt_i (dirty_rd_en), + .req_o (), + .data_o (wbuffer_dirty_mux), + .idx_o (dirty_ptr) + ); + + // next word to lookup in the cache + rr_arb_tree #( + .NumIn (DCACHE_WBUF_DEPTH), + .DataType(wbuffer_t) + ) i_clean_rr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (tocheck), + .gnt_o (), + .data_i (wbuffer_q), + .gnt_i (check_en_d), + .req_o (), + .data_o (wbuffer_check_mux), + .idx_o (check_ptr_d) + ); + + /////////////////////////////////////////////////////// + // update logic + /////////////////////////////////////////////////////// + + assign req_port_o.data_rvalid = '0; + assign req_port_o.data_rdata = '0; + assign req_port_o.data_ruser = '0; + assign req_port_o.data_rid = '0; + + assign rd_hit_oh_d = rd_hit_oh_i; + + logic ni_inside, ni_conflict; + assign ni_inside = |ni_pending_q; + assign ni_conflict = CVA6Cfg.NonIdemPotenceEn && is_ni && ni_inside; + assign not_ni_o = !ni_inside; + assign empty_o = !(|valid); + + // TODO: rewrite and separate into MUXES and write strobe logic + always_comb begin : p_buffer + wbuffer_d = wbuffer_q; + ni_pending_d = ni_pending_q; + dirty_rd_en = 1'b0; + req_port_o.data_gnt = 1'b0; + wbuffer_wren = 1'b0; + + // TAG lookup returns, mark corresponding word + if (check_en_q1) begin + if (|wbuffer_q[check_ptr_q1].valid) begin + wbuffer_d[check_ptr_q1].checked = 1'b1; + wbuffer_d[check_ptr_q1].hit_oh = rd_hit_oh_q; + end + end + + // if an invalidation or cache line refill comes in and hits on the write buffer, + // we have to discard our knowledge of the corresponding cacheline state + for (int k = 0; k < DCACHE_WBUF_DEPTH; k++) begin + if (inval_hit[k]) begin + wbuffer_d[k].checked = 1'b0; + end + end + + // once TX write response came back, we can clear the TX block. if it was not dirty, we + // can completely evict it - otherwise we have to leave it there for retransmission + if (evict) begin + for (int k = 0; k < (riscv::XLEN / 8); k++) begin + if (tx_stat_q[rtrn_id].be[k]) begin + wbuffer_d[rtrn_ptr].txblock[k] = 1'b0; + if (!wbuffer_q[rtrn_ptr].dirty[k]) begin + wbuffer_d[rtrn_ptr].valid[k] = 1'b0; + + // NOTE: this is not strictly needed, but makes it much + // easier to debug, since no invalid data remains in the buffer + // wbuffer_d[rtrn_ptr].data[k*8 +:8] = '0; + end + end + end + // if all bytes are evicted, clear the cache status flag + if (wbuffer_d[rtrn_ptr].valid == 0) begin + wbuffer_d[rtrn_ptr].checked = 1'b0; + ni_pending_d[rtrn_ptr] = 1'b0; + end + end + + // mark bytes sent out to the memory system + if (miss_req_o && miss_ack_i) begin + dirty_rd_en = 1'b1; + for (int k = 0; k < (riscv::XLEN / 8); k++) begin + if (tx_be[k]) begin + wbuffer_d[dirty_ptr].dirty[k] = 1'b0; + wbuffer_d[dirty_ptr].txblock[k] = 1'b1; + end + end + end + + // write new word into the buffer + if (req_port_i.data_req && rdy) begin + // in case we have an NI address, need to drain the buffer first + // in case we are serving an NI address, we block until it is written to memory + if (!ni_conflict) begin //empty of NI operations + wbuffer_wren = 1'b1; + + req_port_o.data_gnt = 1'b1; + ni_pending_d[wr_ptr] = is_ni; + + wbuffer_d[wr_ptr].checked = 1'b0; + wbuffer_d[wr_ptr].wtag = { + req_port_i.address_tag, + req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES] + }; + + // mark bytes as dirty + for (int k = 0; k < (riscv::XLEN / 8); k++) begin + if (req_port_i.data_be[k]) begin + wbuffer_d[wr_ptr].valid[k] = 1'b1; + wbuffer_d[wr_ptr].dirty[k] = 1'b1; + wbuffer_d[wr_ptr].data[k*8+:8] = req_port_i.data_wdata[k*8+:8]; + if (ariane_pkg::DATA_USER_EN) begin + wbuffer_d[wr_ptr].user[k*8+:8] = req_port_i.data_wuser[k*8+:8]; + end else begin + wbuffer_d[wr_ptr].user[k*8+:8] = '0; + end + end + end + end + end + end + + + /////////////////////////////////////////////////////// + // ff's + /////////////////////////////////////////////////////// + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + wbuffer_q <= '{default: '0}; + tx_stat_q <= '{default: '0}; + ni_pending_q <= '0; + check_ptr_q <= '0; + check_ptr_q1 <= '0; + check_en_q <= '0; + check_en_q1 <= '0; + rd_tag_q <= '0; + rd_hit_oh_q <= '0; + wr_cl_vld_q <= '0; + wr_cl_idx_q <= '0; + end else begin + wbuffer_q <= wbuffer_d; + tx_stat_q <= tx_stat_d; + ni_pending_q <= ni_pending_d; + check_ptr_q <= check_ptr_d; + check_ptr_q1 <= check_ptr_q; + check_en_q <= check_en_d; + check_en_q1 <= check_en_q; + rd_tag_q <= rd_tag_d; + rd_hit_oh_q <= rd_hit_oh_d; + wr_cl_vld_q <= wr_cl_vld_d; + wr_cl_idx_q <= wr_cl_idx_d; + end + end + + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + + hot1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> $onehot0( + wbuffer_hit_oh + )) + else $fatal(1, "[l1 dcache wbuffer] wbuffer_hit_oh signal must be hot1"); + + tx_status : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) evict && miss_ack_i && miss_req_o |-> (tx_id != rtrn_id)) + else $fatal(1, "[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle"); + + tx_valid0 : + assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> tx_stat_q[rtrn_id].vld) + else $fatal(1, "[l1 dcache wbuffer] evicting invalid transaction slot"); + + tx_valid1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid) + else $fatal(1, "[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid"); + + write_full : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> req_port_o.data_gnt |-> ((!full) || (|wbuffer_hit_oh))) + else $fatal(1, "[l1 dcache wbuffer] cannot write if full or no hit"); + + unused0 : + assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.tag_valid) + else $fatal(1, "[l1 dcache wbuffer] req_port_i.tag_valid should not be asserted"); + + unused1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.kill_req) + else $fatal(1, "[l1 dcache wbuffer] req_port_i.kill_req should not be asserted"); + + for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_assert1 + for (genvar j = 0; j < (riscv::XLEN / 8); j++) begin : gen_assert2 + byteStates : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} ) + else + $fatal( + 1, + "[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b", + j, + k, + wbuffer_q[k].valid[j], + wbuffer_q[k].dirty[j], + wbuffer_q[k].txblock[j] + ); + end + end +`endif + //pragma translate_on + +endmodule // wt_dcache_wbuffer diff --git a/test/type_param/core/commit_stage.sv b/test/type_param/core/commit_stage.sv new file mode 100644 index 0000000..8f168be --- /dev/null +++ b/test/type_param/core/commit_stage.sv @@ -0,0 +1,298 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 15.04.2017 +// Description: Commits to the architectural state resulting from the scoreboard. + + +module commit_stage + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic halt_i, // request to halt the core + input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline + output exception_t exception_o, // take exception to controller + output logic dirty_fp_state_o, // mark the F state as dirty + input logic single_step_i, // we are in single step debug mode + // from scoreboard + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit + output logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_o, // acknowledge that we are indeed committing + // to register file + output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o, // register file write address + output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o, // register file write data + output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o, // register file write enable + output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o, // floating point register enable + // Atomic memory operations + input amo_resp_t amo_resp_i, // result of AMO operation + // to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline) + output logic [riscv::VLEN-1:0] pc_o, + // to/from CSR file + output fu_op csr_op_o, // decoded CSR operation + output riscv::xlen_t csr_wdata_o, // data to write to CSR + input riscv::xlen_t csr_rdata_i, // data to read from CSR + input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit) + output logic csr_write_fflags_o, // write the fflags CSR + // commit signals to ex + output logic commit_lsu_o, // commit the pending store + input logic commit_lsu_ready_i, // commit buffer of LSU is ready + output logic [TRANS_ID_BITS-1:0] commit_tran_id_o, // transaction id of first commit port + output logic amo_valid_commit_o, // valid AMO in commit stage + input logic no_st_pending_i, // there is no store pending + output logic commit_csr_o, // commit the pending CSR instruction + output logic fence_i_o, // flush I$ and pipeline + output logic fence_o, // flush D$ and pipeline + output logic flush_commit_o, // request a pipeline flush + output logic sfence_vma_o // flush TLBs and pipeline +); + + // ila_0 i_ila_commit ( + // .clk(clk_i), // input wire clk + // .probe0(commit_instr_i[0].pc), // input wire [63:0] probe0 + // .probe1(commit_instr_i[1].pc), // input wire [63:0] probe1 + // .probe2(commit_instr_i[0].valid), // input wire [0:0] probe2 + // .probe3(commit_instr_i[1].valid), // input wire [0:0] probe3 + // .probe4(commit_ack_o[0]), // input wire [0:0] probe4 + // .probe5(commit_ack_o[0]), // input wire [0:0] probe5 + // .probe6(1'b0), // input wire [0:0] probe6 + // .probe7(1'b0), // input wire [0:0] probe7 + // .probe8(1'b0), // input wire [0:0] probe8 + // .probe9(1'b0) // input wire [0:0] probe9 + // ); + + for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_waddr + assign waddr_o[i] = commit_instr_i[i].rd[4:0]; + end + + assign pc_o = commit_instr_i[0].pc; + // Dirty the FP state if we are committing anything related to the FPU + always_comb begin : dirty_fp_state + dirty_fp_state_o = 1'b0; + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + dirty_fp_state_o |= commit_ack_o[i] & (commit_instr_i[i].fu inside {FPU, FPU_VEC} || (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + commit_instr_i[i].op + ))); + // Check if we issued a vector floating-point instruction to the accellerator + dirty_fp_state_o |= commit_instr_i[i].fu == ACCEL && commit_instr_i[i].vfp; + end + end + + assign commit_tran_id_o = commit_instr_i[0].trans_id; + + logic instr_0_is_amo; + assign instr_0_is_amo = is_amo(commit_instr_i[0].op); + // ------------------- + // Commit Instruction + // ------------------- + // write register file or commit instruction in LSU or CSR Buffer + always_comb begin : commit + // default assignments + commit_ack_o[0] = 1'b0; + + amo_valid_commit_o = 1'b0; + + we_gpr_o[0] = 1'b0; + we_fpr_o = '{default: 1'b0}; + commit_lsu_o = 1'b0; + commit_csr_o = 1'b0; + // amos will commit on port 0 + wdata_o[0] = (CVA6Cfg.RVA && amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result; + csr_op_o = ADD; // this corresponds to a CSR NOP + csr_wdata_o = {riscv::XLEN{1'b0}}; + fence_i_o = 1'b0; + fence_o = 1'b0; + sfence_vma_o = 1'b0; + csr_write_fflags_o = 1'b0; + flush_commit_o = 1'b0; + + // we will not commit the instruction if we took an exception + // and we do not commit the instruction if we requested a halt + if (commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i) begin + // we can definitely write the register file + // if the instruction is not committing anything the destination + commit_ack_o[0] = 1'b1; + if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[0].op)) begin + we_fpr_o[0] = 1'b1; + end else begin + we_gpr_o[0] = 1'b1; + end + // check whether the instruction we retire was a store + if ((!CVA6Cfg.RVA && commit_instr_i[0].fu == STORE) || (CVA6Cfg.RVA && commit_instr_i[0].fu == STORE && !instr_0_is_amo)) begin + // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store) + if (commit_lsu_ready_i) begin + commit_ack_o[0] = 1'b1; + commit_lsu_o = 1'b1; + // stall in case the store buffer is not able to accept anymore instructions + end else begin + commit_ack_o[0] = 1'b0; + end + end + // --------- + // FPU Flags + // --------- + if (CVA6Cfg.FpPresent) begin + if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin + // write the CSR with potential exception flags from retiring floating point instruction + csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[0].ex.cause[4:0]}; + csr_write_fflags_o = 1'b1; + commit_ack_o[0] = 1'b1; + end + end + // --------- + // CSR Logic + // --------- + // check whether the instruction we retire was a CSR instruction and it did not + // throw an exception + if (commit_instr_i[0].fu == CSR) begin + // write the CSR file + csr_op_o = commit_instr_i[0].op; + csr_wdata_o = commit_instr_i[0].result; + if (!csr_exception_i.valid) begin + commit_csr_o = 1'b1; + wdata_o[0] = csr_rdata_i; + commit_ack_o[0] = 1'b1; + end else begin + commit_ack_o[0] = 1'b0; + we_gpr_o[0] = 1'b0; + end + end + // ------------------ + // SFENCE.VMA Logic + // ------------------ + // sfence.vma is idempotent so we can safely re-execute it after returning + // from interrupt service routine + // check if this instruction was a SFENCE_VMA + if (CVA6Cfg.RVS && commit_instr_i[0].op == SFENCE_VMA) begin + // no store pending so we can flush the TLBs and pipeline + sfence_vma_o = no_st_pending_i; + // wait for the store buffer to drain until flushing the pipeline + commit_ack_o[0] = no_st_pending_i; + end + // ------------------ + // FENCE.I Logic + // ------------------ + // fence.i is idempotent so we can safely re-execute it after returning + // from interrupt service routine + // Fence synchronizes data and instruction streams. That means that we need to flush the private icache + // and the private dcache. This is the most expensive instruction. + if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && DCACHE_TYPE == int'(config_pkg::WB) && commit_instr_i[0].fu != STORE)) begin + commit_ack_o[0] = no_st_pending_i; + // tell the controller to flush the I$ + fence_i_o = no_st_pending_i; + end + // ------------------ + // FENCE Logic + // ------------------ + // fence is idempotent so we can safely re-execute it after returning + // from interrupt service routine + if (commit_instr_i[0].op == FENCE) begin + commit_ack_o[0] = no_st_pending_i; + // tell the controller to flush the D$ + fence_o = no_st_pending_i; + end + // ------------------ + // AMO + // ------------------ + if (CVA6Cfg.RVA && instr_0_is_amo) begin + // AMO finished + commit_ack_o[0] = amo_resp_i.ack; + // flush the pipeline + flush_commit_o = amo_resp_i.ack; + amo_valid_commit_o = 1'b1; + we_gpr_o[0] = amo_resp_i.ack; + end + end + + if (CVA6Cfg.NrCommitPorts > 1) begin + + commit_ack_o[1] = 1'b0; + we_gpr_o[1] = 1'b0; + wdata_o[1] = commit_instr_i[1].result; + + // ----------------- + // Commit Port 2 + // ----------------- + // check if the second instruction can be committed as well and the first wasn't a CSR instruction + // also if we are in single step mode don't retire the second instruction + if (commit_ack_o[0] && commit_instr_i[1].valid + && !halt_i + && !(commit_instr_i[0].fu inside {CSR}) + && !flush_dcache_i + && !instr_0_is_amo + && !single_step_i) begin + // only if the first instruction didn't throw an exception and this instruction won't throw an exception + // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC + if (!exception_o.valid && !commit_instr_i[1].ex.valid + && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin + + if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[1].op)) we_fpr_o[1] = 1'b1; + else we_gpr_o[1] = 1'b1; + + commit_ack_o[1] = 1'b1; + + // additionally check if we are retiring an FPU instruction because we need to make sure that we write all + // exception flags + if (CVA6Cfg.FpPresent && commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin + if (csr_write_fflags_o) + csr_wdata_o = { + {riscv::XLEN - 5{1'b0}}, + (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0]) + }; + else csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[1].ex.cause[4:0]}; + + csr_write_fflags_o = 1'b1; + end + end + end + end + end + + // ----------------------------- + // Exception & Interrupt Logic + // ----------------------------- + // here we know for sure that we are taking the exception + always_comb begin : exception_handling + // Multiple simultaneous interrupts and traps at the same privilege level are handled in the following decreasing + // priority order: external interrupts, software interrupts, timer interrupts, then finally any synchronous traps. (1.10 p.30) + // interrupts are correctly prioritized in the CSR reg file, exceptions are prioritized here + exception_o.valid = 1'b0; + exception_o.cause = '0; + exception_o.tval = '0; + // we need a valid instruction in the commit stage + if (commit_instr_i[0].valid) begin + // ------------------------ + // check for CSR exception + // ------------------------ + if (csr_exception_i.valid) begin + exception_o = csr_exception_i; + // if no earlier exception happened the commit instruction will still contain + // the instruction bits from the ID stage. If a earlier exception happened we don't care + // as we will overwrite it anyway in the next IF bl + exception_o.tval = commit_instr_i[0].ex.tval; + end + // ------------------------ + // Earlier Exceptions + // ------------------------ + // but we give precedence to exceptions which happened earlier e.g.: instruction page + // faults for example + if (commit_instr_i[0].ex.valid) begin + exception_o = commit_instr_i[0].ex; + end + end + // Don't take any exceptions iff: + // - If we halted the processor + if (halt_i) begin + exception_o.valid = 1'b0; + end + end +endmodule diff --git a/test/type_param/core/compressed_decoder.sv b/test/type_param/core/compressed_decoder.sv new file mode 100644 index 0000000..c218a83 --- /dev/null +++ b/test/type_param/core/compressed_decoder.sv @@ -0,0 +1,935 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. // +// +// Author: Florian Zaruba - zarubaf@iis.ee.ethz.ch +// Engineer: Sven Stucki - svstucki@student.ethz.ch +// +// Design Name: Compressed instruction decoder +// Project Name: zero-riscy +// Language: SystemVerilog +// +// Description: Decodes RISC-V compressed instructions into their RV32 +// equivalent. This module is fully combinatorial. + + +module compressed_decoder #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic [31:0] instr_i, + output logic [31:0] instr_o, + output logic illegal_instr_o, + output logic is_compressed_o +); + + // ------------------- + // Compressed Decoder + // ------------------- + always_comb begin + illegal_instr_o = 1'b0; + instr_o = '0; + is_compressed_o = 1'b1; + instr_o = instr_i; + + // I: | imm[11:0] | rs1 | funct3 | rd | opcode | + // S: | imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode | + unique case (instr_i[1:0]) + // C0 + riscv::OpcodeC0: begin + unique case (instr_i[15:13]) + riscv::OpcodeC0Addi4spn: begin + // c.addi4spn -> addi rd', x2, imm + instr_o = { + 2'b0, + instr_i[10:7], + instr_i[12:11], + instr_i[5], + instr_i[6], + 2'b00, + 5'h02, + 3'b000, + 2'b01, + instr_i[4:2], + riscv::OpcodeOpImm + }; + if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1; + end + + riscv::OpcodeC0Fld: begin + if (CVA6Cfg.FpPresent) begin + // c.fld -> fld rd', imm(rs1') + // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 | + instr_o = { + 4'b0, + instr_i[6:5], + instr_i[12:10], + 3'b000, + 2'b01, + instr_i[9:7], + 3'b011, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoadFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + riscv::OpcodeC0Lw: begin + // c.lw -> lw rd', imm(rs1') + instr_o = { + 5'b0, + instr_i[5], + instr_i[12:10], + instr_i[6], + 2'b00, + 2'b01, + instr_i[9:7], + 3'b010, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoad + }; + end + + riscv::OpcodeC0Ld: begin + // RV64 + // c.ld -> ld rd', imm(rs1') + // RV32 + // c.flw -> flw fprd', imm(rs1') + if (riscv::IS_XLEN64) begin + // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 | + instr_o = { + 4'b0, + instr_i[6:5], + instr_i[12:10], + 3'b000, + 2'b01, + instr_i[9:7], + 3'b011, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoad + }; + end else begin + if (CVA6Cfg.FpPresent) begin + // CFLW: | funct3 (change to LW) | imm[5:3] | rs1' | imm[2|6] | rd' | C0 | + instr_o = { + 5'b0, + instr_i[5], + instr_i[12:10], + instr_i[6], + 2'b00, + 2'b01, + instr_i[9:7], + 3'b010, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoadFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + end + + riscv::OpcodeC0Zcb: begin + if (CVA6Cfg.RVZCB) begin + unique case (instr_i[12:10]) + 3'b000: begin + // c.lbu -> lbu rd', uimm(rs1') + instr_o = { + 10'b0, + instr_i[5], + instr_i[6], + 2'b01, + instr_i[9:7], + 3'b100, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoad + }; + end + + 3'b001: begin + if (instr_i[6]) begin + // c.lh -> lh rd', uimm(rs1') + instr_o = { + 10'b0, + instr_i[5], + 1'b0, + 2'b01, + instr_i[9:7], + 3'b001, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoad + }; + end else begin + // c.lhu -> lhu rd', uimm(rs1') + instr_o = { + 10'b0, + instr_i[5], + 1'b0, + 2'b01, + instr_i[9:7], + 3'b101, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoad + }; + end + end + + 3'b010: begin + // c.sb -> sb rs2', uimm(rs1') + instr_o = { + 7'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b000, + 3'b0, + instr_i[5], + instr_i[6], + riscv::OpcodeStore + }; + end + + 3'b011: begin + // c.sh -> sh rs2', uimm(rs1') + instr_o = { + 7'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b001, + 3'b0, + instr_i[5], + 1'b0, + riscv::OpcodeStore + }; + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + + end else begin + instr_o = instr_i; + illegal_instr_o = 1'b1; + end + end + + riscv::OpcodeC0Fsd: begin + if (CVA6Cfg.FpPresent) begin + // c.fsd -> fsd rs2', imm(rs1') + instr_o = { + 4'b0, + instr_i[6:5], + instr_i[12], + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b011, + instr_i[11:10], + 3'b000, + riscv::OpcodeStoreFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + riscv::OpcodeC0Sw: begin + // c.sw -> sw rs2', imm(rs1') + instr_o = { + 5'b0, + instr_i[5], + instr_i[12], + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b010, + instr_i[11:10], + instr_i[6], + 2'b00, + riscv::OpcodeStore + }; + end + + riscv::OpcodeC0Sd: begin + // RV64 + // c.sd -> sd rs2', imm(rs1') + // RV32 + // c.fsw -> fsw fprs2', imm(rs1') + if (riscv::IS_XLEN64) begin + instr_o = { + 4'b0, + instr_i[6:5], + instr_i[12], + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b011, + instr_i[11:10], + 3'b000, + riscv::OpcodeStore + }; + end else begin + if (CVA6Cfg.FpPresent) begin + instr_o = { + 5'b0, + instr_i[5], + instr_i[12], + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b010, + instr_i[11:10], + instr_i[6], + 2'b00, + riscv::OpcodeStoreFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + // C1 + riscv::OpcodeC1: begin + unique case (instr_i[15:13]) + riscv::OpcodeC1Addi: begin + // c.addi -> addi rd, rd, nzimm + // c.nop -> addi 0, 0, 0 + instr_o = { + {6{instr_i[12]}}, + instr_i[12], + instr_i[6:2], + instr_i[11:7], + 3'b0, + instr_i[11:7], + riscv::OpcodeOpImm + }; + end + + + riscv::OpcodeC1Addiw: begin // or riscv::OpcodeC1Jal for RV32IC + if (riscv::IS_XLEN64) begin + // c.addiw -> addiw rd, rd, nzimm for RV64IC + if (instr_i[11:7] != 5'h0) begin // only valid if the destination is not r0 + instr_o = { + {6{instr_i[12]}}, + instr_i[12], + instr_i[6:2], + instr_i[11:7], + 3'b0, + instr_i[11:7], + riscv::OpcodeOpImm32 + }; + end else begin + illegal_instr_o = 1'b1; + end + end else begin + // c.jal -> jal x1, imm for RV32IC only + instr_o = { + instr_i[12], + instr_i[8], + instr_i[10:9], + instr_i[6], + instr_i[7], + instr_i[2], + instr_i[11], + instr_i[5:3], + {9{instr_i[12]}}, + 5'b1, + riscv::OpcodeJal + }; + + + + end + end + + riscv::OpcodeC1Li: begin + // c.li -> addi rd, x0, nzimm + instr_o = { + {6{instr_i[12]}}, + instr_i[12], + instr_i[6:2], + 5'b0, + 3'b0, + instr_i[11:7], + riscv::OpcodeOpImm + }; + end + + riscv::OpcodeC1LuiAddi16sp: begin + // c.lui -> lui rd, imm + instr_o = {{15{instr_i[12]}}, instr_i[6:2], instr_i[11:7], riscv::OpcodeLui}; + + if (instr_i[11:7] == 5'h02) begin + // c.addi16sp -> addi x2, x2, nzimm + instr_o = { + {3{instr_i[12]}}, + instr_i[4:3], + instr_i[5], + instr_i[2], + instr_i[6], + 4'b0, + 5'h02, + 3'b000, + 5'h02, + riscv::OpcodeOpImm + }; + end + + if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; + end + + riscv::OpcodeC1MiscAlu: begin + unique case (instr_i[11:10]) + 2'b00, 2'b01: begin + // 00: c.srli -> srli rd, rd, shamt + // 01: c.srai -> srai rd, rd, shamt + instr_o = { + 1'b0, + instr_i[10], + 4'b0, + instr_i[12], + instr_i[6:2], + 2'b01, + instr_i[9:7], + 3'b101, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end + + 2'b10: begin + // c.andi -> andi rd, rd, imm + instr_o = { + {6{instr_i[12]}}, + instr_i[12], + instr_i[6:2], + 2'b01, + instr_i[9:7], + 3'b111, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end + + 2'b11: begin + unique case ({ + instr_i[12], instr_i[6:5] + }) + 3'b000: begin + // c.sub -> sub rd', rd', rs2' + instr_o = { + 2'b01, + 5'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b000, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end + + 3'b001: begin + // c.xor -> xor rd', rd', rs2' + instr_o = { + 7'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b100, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end + + 3'b010: begin + // c.or -> or rd', rd', rs2' + instr_o = { + 7'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b110, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end + + 3'b011: begin + // c.and -> and rd', rd', rs2' + instr_o = { + 7'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b111, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end + + 3'b100: begin + if (riscv::IS_XLEN64) begin + // c.subw -> subw rd', rd', rs2' + instr_o = { + 2'b01, + 5'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b000, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp32 + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + 3'b101: begin + if (riscv::IS_XLEN64) begin + // c.addw -> addw rd', rd', rs2' + instr_o = { + 2'b00, + 5'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b000, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp32 + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + 3'b110: begin + if (CVA6Cfg.RVZCB) begin + // c.mul -> mul rd', rd', rs2' + instr_o = { + 6'b0, + 1'b1, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b000, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end else begin + instr_o = instr_i; + illegal_instr_o = 1'b1; + end + end + + 3'b111: begin + if (CVA6Cfg.RVZCB) begin + + unique case (instr_i[4:2]) + 3'b000: begin + // c.zext.b -> andi rd', rd', 0xff + instr_o = { + 4'b0, + 8'hFF, + 2'b01, + instr_i[9:7], + 3'b111, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end + + 3'b001: begin + if (CVA6Cfg.RVB) begin + // c.sext.b -> sext.b rd', rd' + instr_o = { + 7'h30, + 5'h4, + 2'b01, + instr_i[9:7], + 3'b001, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end else illegal_instr_o = 1'b1; + end + + 3'b010: begin + if (CVA6Cfg.RVB) begin + // c.zext.h -> zext.h rd', rd' + if (riscv::IS_XLEN64) begin + instr_o = { + 7'h4, + 5'h0, + 2'b01, + instr_i[9:7], + 3'b100, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp32 + }; + end else begin + instr_o = { + 7'h4, + 5'h0, + 2'b01, + instr_i[9:7], + 3'b100, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end + end else illegal_instr_o = 1'b1; + end + + 3'b011: begin + if (CVA6Cfg.RVB) begin + // c.sext.h -> sext.h rd', rd' + instr_o = { + 7'h30, + 5'h5, + 2'b01, + instr_i[9:7], + 3'b001, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end else illegal_instr_o = 1'b1; + end + + 3'b100: begin + if (CVA6Cfg.RVB) begin + // c.zext.w -> add.uw + if (riscv::IS_XLEN64) begin + instr_o = { + 7'h4, + 5'h0, + 2'b01, + instr_i[9:7], + 3'b000, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp32 + }; + end else begin + illegal_instr_o = 1'b1; + end + end else illegal_instr_o = 1'b1; + end + + 3'b101: begin + // c.not -> xori rd', rd', -1 + instr_o = { + 12'hFFF, + 2'b01, + instr_i[9:7], + 3'b100, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end + + default: begin + instr_o = instr_i; + illegal_instr_o = 1; + end + endcase + end + end + endcase + end + endcase + end + + riscv::OpcodeC1J: begin + // 101: c.j -> jal x0, imm + instr_o = { + instr_i[12], + instr_i[8], + instr_i[10:9], + instr_i[6], + instr_i[7], + instr_i[2], + instr_i[11], + instr_i[5:3], + {9{instr_i[12]}}, + 4'b0, + ~instr_i[15], + riscv::OpcodeJal + }; + end + + riscv::OpcodeC1Beqz, riscv::OpcodeC1Bnez: begin + // 0: c.beqz -> beq rs1', x0, imm + // 1: c.bnez -> bne rs1', x0, imm + instr_o = { + {4{instr_i[12]}}, + instr_i[6:5], + instr_i[2], + 5'b0, + 2'b01, + instr_i[9:7], + 2'b00, + instr_i[13], + instr_i[11:10], + instr_i[4:3], + instr_i[12], + riscv::OpcodeBranch + }; + end + endcase + end + + // C2 + riscv::OpcodeC2: begin + unique case (instr_i[15:13]) + riscv::OpcodeC2Slli: begin + // c.slli -> slli rd, rd, shamt + instr_o = { + 6'b0, + instr_i[12], + instr_i[6:2], + instr_i[11:7], + 3'b001, + instr_i[11:7], + riscv::OpcodeOpImm + }; + end + + riscv::OpcodeC2Fldsp: begin + if (CVA6Cfg.FpPresent) begin + // c.fldsp -> fld rd, imm(x2) + instr_o = { + 3'b0, + instr_i[4:2], + instr_i[12], + instr_i[6:5], + 3'b000, + 5'h02, + 3'b011, + instr_i[11:7], + riscv::OpcodeLoadFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + riscv::OpcodeC2Lwsp: begin + // c.lwsp -> lw rd, imm(x2) + instr_o = { + 4'b0, + instr_i[3:2], + instr_i[12], + instr_i[6:4], + 2'b00, + 5'h02, + 3'b010, + instr_i[11:7], + riscv::OpcodeLoad + }; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end + + riscv::OpcodeC2Ldsp: begin + // RV64 + // c.ldsp -> ld rd, imm(x2) + // RV32 + // c.flwsp -> flw fprd, imm(x2) + if (riscv::IS_XLEN64) begin + instr_o = { + 3'b0, + instr_i[4:2], + instr_i[12], + instr_i[6:5], + 3'b000, + 5'h02, + 3'b011, + instr_i[11:7], + riscv::OpcodeLoad + }; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end else begin + if (CVA6Cfg.FpPresent) begin + instr_o = { + 4'b0, + instr_i[3:2], + instr_i[12], + instr_i[6:4], + 2'b00, + 5'h02, + 3'b010, + instr_i[11:7], + riscv::OpcodeLoadFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + end + + riscv::OpcodeC2JalrMvAdd: begin + if (instr_i[12] == 1'b0) begin + // c.mv -> add rd/rs1, x0, rs2 + instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOp}; + + if (instr_i[6:2] == 5'b0) begin + // c.jr -> jalr x0, rd/rs1, 0 + instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, riscv::OpcodeJalr}; + // rs1 != 0 + illegal_instr_o = (instr_i[11:7] != '0) ? 1'b0 : 1'b1; + end + end else begin + // c.add -> add rd, rd, rs2 + instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOp}; + + if (instr_i[6:2] == 5'b0) begin + if (instr_i[11:7] == 5'b0) begin + // c.ebreak -> ebreak + instr_o = {32'h00_10_00_73}; + end else begin + // c.jalr -> jalr x1, rs1, 0 + instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, riscv::OpcodeJalr}; + end + end + end + end + + riscv::OpcodeC2Fsdsp: begin + if (CVA6Cfg.FpPresent) begin + // c.fsdsp -> fsd rs2, imm(x2) + instr_o = { + 3'b0, + instr_i[9:7], + instr_i[12], + instr_i[6:2], + 5'h02, + 3'b011, + instr_i[11:10], + 3'b000, + riscv::OpcodeStoreFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + riscv::OpcodeC2Swsp: begin + // c.swsp -> sw rs2, imm(x2) + instr_o = { + 4'b0, + instr_i[8:7], + instr_i[12], + instr_i[6:2], + 5'h02, + 3'b010, + instr_i[11:9], + 2'b00, + riscv::OpcodeStore + }; + end + + riscv::OpcodeC2Sdsp: begin + // RV64 + // c.sdsp -> sd rs2, imm(x2) + // RV32 + // c.fswsp -> fsw fprs2, imm(x2) + if (riscv::IS_XLEN64) begin + instr_o = { + 3'b0, + instr_i[9:7], + instr_i[12], + instr_i[6:2], + 5'h02, + 3'b011, + instr_i[11:10], + 3'b000, + riscv::OpcodeStore + }; + end else begin + if (CVA6Cfg.FpPresent) begin + instr_o = { + 4'b0, + instr_i[8:7], + instr_i[12], + instr_i[6:2], + 5'h02, + 3'b010, + instr_i[11:9], + 2'b00, + riscv::OpcodeStoreFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + // normal instruction + default: is_compressed_o = 1'b0; + endcase + + // Check if the instruction was illegal, if it was then output the offending instruction (zero-extended) + if (illegal_instr_o) begin + instr_o = instr_i; + end + end +endmodule diff --git a/test/type_param/core/controller.sv b/test/type_param/core/controller.sv new file mode 100644 index 0000000..c2db321 --- /dev/null +++ b/test/type_param/core/controller.sv @@ -0,0 +1,194 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.05.2017 +// Description: Flush controller + + +module controller + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + output logic set_pc_commit_o, // Set PC om PC Gen + output logic flush_if_o, // Flush the IF stage + output logic flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard + output logic flush_id_o, // Flush ID stage + output logic flush_ex_o, // Flush EX stage + output logic flush_bp_o, // Flush branch predictors + output logic flush_icache_o, // Flush ICache + output logic flush_dcache_o, // Flush DCache + input logic flush_dcache_ack_i, // Acknowledge the whole DCache Flush + output logic flush_tlb_o, // Flush TLBs + + input logic halt_csr_i, // Halt request from CSR (WFI instruction) + input logic halt_acc_i, // Halt request from accelerator dispatcher + output logic halt_o, // Halt signal to commit stage + input logic eret_i, // Return from exception + input logic ex_valid_i, // We got an exception, flush the pipeline + input logic set_debug_pc_i, // set the debug pc from CSR + input bp_resolve_t resolved_branch_i, // We got a resolved branch, check if we need to flush the front-end + input logic flush_csr_i, // We got an instruction which altered the CSR, flush the pipeline + input logic fence_i_i, // fence.i in + input logic fence_i, // fence in + input logic sfence_vma_i, // We got an instruction to flush the TLBs and pipeline + input logic flush_commit_i, // Flush request from commit stage + input logic flush_acc_i // Flush request from accelerator +); + + // active fence - high if we are currently flushing the dcache + logic fence_active_d, fence_active_q; + logic flush_dcache; + + // ------------ + // Flush CTRL + // ------------ + always_comb begin : flush_ctrl + fence_active_d = fence_active_q; + set_pc_commit_o = 1'b0; + flush_if_o = 1'b0; + flush_unissued_instr_o = 1'b0; + flush_id_o = 1'b0; + flush_ex_o = 1'b0; + flush_dcache = 1'b0; + flush_icache_o = 1'b0; + flush_tlb_o = 1'b0; + flush_bp_o = 1'b0; + // ------------ + // Mis-predict + // ------------ + // flush on mispredict + if (resolved_branch_i.is_mispredict) begin + // flush only un-issued instructions + flush_unissued_instr_o = 1'b1; + // and if stage + flush_if_o = 1'b1; + end + + // --------------------------------- + // FENCE + // --------------------------------- + if (fence_i) begin + // this can be seen as a CSR instruction with side-effect + set_pc_commit_o = 1'b1; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + // this is not needed in the case since we + // have a write-through cache in this case + if (DCACHE_TYPE == int'(config_pkg::WB)) begin + flush_dcache = 1'b1; + fence_active_d = 1'b1; + end + end + + // --------------------------------- + // FENCE.I + // --------------------------------- + if (fence_i_i) begin + set_pc_commit_o = 1'b1; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + flush_icache_o = 1'b1; + // this is not needed in the case since we + // have a write-through cache in this case + if (DCACHE_TYPE == int'(config_pkg::WB)) begin + flush_dcache = 1'b1; + fence_active_d = 1'b1; + end + end + + // this is not needed in the case since we + // have a write-through cache in this case + if (DCACHE_TYPE == int'(config_pkg::WB)) begin + // wait for the acknowledge here + if (flush_dcache_ack_i && fence_active_q) begin + fence_active_d = 1'b0; + // keep the flush dcache signal high as long as we didn't get the acknowledge from the cache + end else if (fence_active_q) begin + flush_dcache = 1'b1; + end + end + // --------------------------------- + // SFENCE.VMA + // --------------------------------- + if (CVA6Cfg.RVS && sfence_vma_i) begin + set_pc_commit_o = 1'b1; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + + flush_tlb_o = 1'b1; + end + + // Set PC to commit stage and flush pipeline + if (flush_csr_i || flush_acc_i) begin + set_pc_commit_o = 1'b1; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + end else if (CVA6Cfg.RVA && flush_commit_i) begin + set_pc_commit_o = 1'b1; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + end + + // --------------------------------- + // 1. Exception + // 2. Return from exception + // --------------------------------- + if (ex_valid_i || eret_i || (CVA6Cfg.DebugEn && set_debug_pc_i)) begin + // don't flush pcgen as we want to take the exception: Flush PCGen is not a flush signal + // for the PC Gen stage but instead tells it to take the PC we gave it + set_pc_commit_o = 1'b0; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + // this potentially reduces performance, but is needed + // to suppress speculative fetches to virtual memory from + // machine mode. TODO: remove when PMA checkers have been + // added to the system + flush_bp_o = 1'b1; + end + end + + // ---------------------- + // Halt Logic + // ---------------------- + always_comb begin + // halt the core if the fence is active + halt_o = halt_csr_i || halt_acc_i || (DCACHE_TYPE == int'(config_pkg::WB) && fence_active_q); + end + + // ---------------------- + // Registers + // ---------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + fence_active_q <= 1'b0; + flush_dcache_o <= 1'b0; + end else begin + fence_active_q <= fence_active_d; + // register on the flush signal, this signal might be critical + flush_dcache_o <= flush_dcache; + end + end +endmodule diff --git a/test/type_param/core/csr_buffer.sv b/test/type_param/core/csr_buffer.sv new file mode 100644 index 0000000..57be04d --- /dev/null +++ b/test/type_param/core/csr_buffer.sv @@ -0,0 +1,76 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 05.05.2017 +// Description: Buffer to hold CSR address, this acts like a functional unit +// to the scoreboard. + + +module csr_buffer + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + + input fu_data_t fu_data_i, + + output logic csr_ready_o, // FU is ready e.g. not busy + input logic csr_valid_i, // Input is valid + output riscv::xlen_t csr_result_o, + input logic csr_commit_i, // commit the pending CSR OP + // to CSR file + output logic [11:0] csr_addr_o // CSR address to commit stage +); + // this is a single entry store buffer for the address of the CSR + // which we are going to need in the commit stage + struct packed { + logic [11:0] csr_address; + logic valid; + } + csr_reg_n, csr_reg_q; + + // control logic, scoreboard signals + assign csr_result_o = fu_data_i.operand_a; + assign csr_addr_o = csr_reg_q.csr_address; + + // write logic + always_comb begin : write + csr_reg_n = csr_reg_q; + // by default we are ready + csr_ready_o = 1'b1; + // if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready + if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i) csr_ready_o = 1'b0; + // if we got a valid from the scoreboard + // store the CSR address + if (csr_valid_i) begin + csr_reg_n.csr_address = fu_data_i.operand_b[11:0]; + csr_reg_n.valid = 1'b1; + end + // if we get a commit and no new valid instruction -> clear the valid bit + if (csr_commit_i && ~csr_valid_i) begin + csr_reg_n.valid = 1'b0; + end + // clear the buffer if we flushed + if (flush_i) csr_reg_n.valid = 1'b0; + end + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + csr_reg_q <= '{default: 0}; + end else begin + csr_reg_q <= csr_reg_n; + end + end + +endmodule diff --git a/test/type_param/core/csr_regfile.sv b/test/type_param/core/csr_regfile.sv new file mode 100644 index 0000000..88c1644 --- /dev/null +++ b/test/type_param/core/csr_regfile.sv @@ -0,0 +1,1646 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 05.05.2017 +// Description: CSR Register File as specified by RISC-V + + +module csr_regfile + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int AsidWidth = 1, + parameter int unsigned MHPMCounterNum = 6 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic time_irq_i, // Timer threw a interrupt + // send a flush request out if a CSR with a side effect has changed (e.g. written) + output logic flush_o, + output logic halt_csr_o, // halt requested + // commit acknowledge + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // Commit acknowledged a instruction -> increase instret CSR + // Core and Cluster ID + input logic[riscv::VLEN-1:0] boot_addr_i, // Address from which to start booting, mtvec is set to the same address + input logic[riscv::XLEN-1:0] hart_id_i, // Hart id in a multicore environment (reflected in a CSR) + // we are taking an exception + input exception_t ex_i, // We've got an exception from the commit stage, take it + + input fu_op csr_op_i, // Operation to perform on the CSR file + input logic [11:0] csr_addr_i, // Address of the register to read/write + input logic [riscv::XLEN-1:0] csr_wdata_i, // Write data in + output logic [riscv::XLEN-1:0] csr_rdata_o, // Read data out + input logic dirty_fp_state_i, // Mark the FP sate as dirty + input logic csr_write_fflags_i, // Write fflags register e.g.: we are retiring a floating point instruction + input logic dirty_v_state_i, // Mark the V state as dirty + input logic [riscv::VLEN-1:0] pc_i, // PC of instruction accessing the CSR + output exception_t csr_exception_o, // attempts to access a CSR without appropriate privilege + // level or to write a read-only register also + // raises illegal instruction exceptions. + // Interrupts/Exceptions + output logic [riscv::VLEN-1:0] epc_o, // Output the exception PC to PC Gen, the correct CSR (mepc, sepc) is set accordingly + output logic eret_o, // Return from exception, set the PC of epc_o + output logic [riscv::VLEN-1:0] trap_vector_base_o, // Output base of exception vector, correct CSR is output (mtvec, stvec) + output riscv::priv_lvl_t priv_lvl_o, // Current privilege level the CPU is in + // FP Imprecise exceptions + input logic [4:0] acc_fflags_ex_i, // Imprecise FP exception from the accelerator (fcsr.fflags format) + input logic acc_fflags_ex_valid_i, // An FP exception from the accelerator occurred + // FPU + output riscv::xs_t fs_o, // Floating point extension status + output logic [4:0] fflags_o, // Floating-Point Accured Exceptions + output logic [2:0] frm_o, // Floating-Point Dynamic Rounding Mode + output logic [6:0] fprec_o, // Floating-Point Precision Control + // Vector extension + output riscv::xs_t vs_o, // Vector extension status + // Decoder + output irq_ctrl_t irq_ctrl_o, // interrupt management to id stage + // MMU + output logic en_translation_o, // enable VA translation + output logic en_ld_st_translation_o, // enable VA translation for load and stores + output riscv::priv_lvl_t ld_st_priv_lvl_o, // Privilege level at which load and stores should happen + output logic sum_o, + output logic mxr_o, + output logic [riscv::PPNW-1:0] satp_ppn_o, + output logic [AsidWidth-1:0] asid_o, + // external interrupts + input logic [1:0] irq_i, // external interrupt in + input logic ipi_i, // inter processor interrupt -> connected to machine mode sw + input logic debug_req_i, // debug request in + output logic set_debug_pc_o, + // Virtualization Support + output logic tvm_o, // trap virtual memory + output logic tw_o, // timeout wait + output logic tsr_o, // trap sret + output logic debug_mode_o, // we are in debug mode -> that will change some decoding + output logic single_step_o, // we are in single-step mode + // Caches + output logic icache_en_o, // L1 ICache Enable + output logic dcache_en_o, // L1 DCache Enable + // Accelerator + output logic acc_cons_en_o, // Accelerator memory consistent mode + // Performance Counter + output logic [11:0] perf_addr_o, // read/write address to performance counter module + output logic [riscv::XLEN-1:0] perf_data_o, // write data to performance counter module + input logic [riscv::XLEN-1:0] perf_data_i, // read data from performance counter module + output logic perf_we_o, + // PMPs + output riscv::pmpcfg_t [15:0] pmpcfg_o, // PMP configuration containing pmpcfg for max 16 PMPs + output logic [15:0][riscv::PLEN-3:0] pmpaddr_o, // PMP addresses + output logic [31:0] mcountinhibit_o +); + // internal signal to keep track of access exceptions + logic read_access_exception, update_access_exception, privilege_violation; + logic csr_we, csr_read; + riscv::xlen_t csr_wdata, csr_rdata; + riscv::priv_lvl_t trap_to_priv_lvl; + // register for enabling load store address translation, this is critical, hence the register + logic en_ld_st_translation_d, en_ld_st_translation_q; + logic mprv; + logic mret; // return from M-mode exception + logic sret; // return from S-mode exception + logic dret; // return from debug mode + // CSR write causes us to mark the FPU state as dirty + logic dirty_fp_state_csr; + riscv::mstatus_rv_t mstatus_q, mstatus_d; + riscv::xlen_t mstatus_extended; + riscv::satp_t satp_q, satp_d; + riscv::dcsr_t dcsr_q, dcsr_d; + riscv::csr_t csr_addr; + // privilege level register + riscv::priv_lvl_t priv_lvl_d, priv_lvl_q; + // we are in debug + logic debug_mode_q, debug_mode_d; + logic mtvec_rst_load_q; // used to determine whether we came out of reset + + riscv::xlen_t dpc_q, dpc_d; + riscv::xlen_t dscratch0_q, dscratch0_d; + riscv::xlen_t dscratch1_q, dscratch1_d; + riscv::xlen_t mtvec_q, mtvec_d; + riscv::xlen_t medeleg_q, medeleg_d; + riscv::xlen_t mideleg_q, mideleg_d; + riscv::xlen_t mip_q, mip_d; + riscv::xlen_t mie_q, mie_d; + riscv::xlen_t mcounteren_q, mcounteren_d; + riscv::xlen_t mscratch_q, mscratch_d; + riscv::xlen_t mepc_q, mepc_d; + riscv::xlen_t mcause_q, mcause_d; + riscv::xlen_t mtval_q, mtval_d; + logic fiom_d, fiom_q; + + riscv::xlen_t stvec_q, stvec_d; + riscv::xlen_t scounteren_q, scounteren_d; + riscv::xlen_t sscratch_q, sscratch_d; + riscv::xlen_t sepc_q, sepc_d; + riscv::xlen_t scause_q, scause_d; + riscv::xlen_t stval_q, stval_d; + riscv::xlen_t dcache_q, dcache_d; + riscv::xlen_t icache_q, icache_d; + riscv::xlen_t acc_cons_q, acc_cons_d; + + logic wfi_d, wfi_q; + + logic [63:0] cycle_q, cycle_d; + logic [63:0] instret_q, instret_d; + + riscv::pmpcfg_t [15:0] pmpcfg_q, pmpcfg_d; + logic [15:0][riscv::PLEN-3:0] pmpaddr_q, pmpaddr_d; + logic [MHPMCounterNum+3-1:0] mcountinhibit_d, mcountinhibit_q; + logic [3:0] index; + + localparam riscv::xlen_t IsaCode = (riscv::XLEN'(CVA6Cfg.RVA) << 0) // A - Atomic Instructions extension + | (riscv::XLEN'(CVA6Cfg.RVB) << 1) // C - Bitmanip extension + | (riscv::XLEN'(CVA6Cfg.RVC) << 2) // C - Compressed extension + | (riscv::XLEN'(CVA6Cfg.RVD) << 3) // D - Double precision floating-point extension + | (riscv::XLEN'(CVA6Cfg.RVF) << 5) // F - Single precision floating-point extension + | (riscv::XLEN'(1) << 8) // I - RV32I/64I/128I base ISA + | (riscv::XLEN'(1) << 12) // M - Integer Multiply/Divide extension + | (riscv::XLEN'(0) << 13) // N - User level interrupts supported + | (riscv::XLEN'(CVA6Cfg.RVS) << 18) // S - Supervisor mode implemented + | (riscv::XLEN'(CVA6Cfg.RVU) << 20) // U - User mode implemented + | (riscv::XLEN'(CVA6Cfg.RVV) << 21) // V - Vector extension + | (riscv::XLEN'(CVA6Cfg.NSX) << 23) // X - Non-standard extensions present + | ((riscv::XLEN == 64 ? 2 : 1) << riscv::XLEN - 2); // MXL + + assign pmpcfg_o = pmpcfg_q[15:0]; + assign pmpaddr_o = pmpaddr_q; + + riscv::fcsr_t fcsr_q, fcsr_d; + // ---------------- + // Assignments + // ---------------- + assign csr_addr = riscv::csr_t'(csr_addr_i); + assign fs_o = mstatus_q.fs; + assign vs_o = mstatus_q.vs; + // ---------------- + // CSR Read logic + // ---------------- + assign mstatus_extended = riscv::IS_XLEN64 ? mstatus_q[riscv::XLEN-1:0] : + {mstatus_q.sd, mstatus_q.wpri3[7:0], mstatus_q[22:0]}; + + + always_comb begin : csr_read_process + // a read access exception can only occur if we attempt to read a CSR which does not exist + read_access_exception = 1'b0; + csr_rdata = '0; + perf_addr_o = csr_addr.address[11:0]; + index = '0; + + if (csr_read) begin + unique case (csr_addr.address) + riscv::CSR_FFLAGS: begin + if (CVA6Cfg.FpPresent) begin + csr_rdata = {{riscv::XLEN - 5{1'b0}}, fcsr_q.fflags}; + end else begin + read_access_exception = 1'b1; + end + end + riscv::CSR_FRM: begin + if (CVA6Cfg.FpPresent) begin + csr_rdata = {{riscv::XLEN - 3{1'b0}}, fcsr_q.frm}; + end else begin + read_access_exception = 1'b1; + end + end + riscv::CSR_FCSR: begin + if (CVA6Cfg.FpPresent) begin + csr_rdata = {{riscv::XLEN - 8{1'b0}}, fcsr_q.frm, fcsr_q.fflags}; + end else begin + read_access_exception = 1'b1; + end + end + // non-standard extension + riscv::CSR_FTRAN: begin + if (CVA6Cfg.FpPresent) begin + csr_rdata = {{riscv::XLEN - 7{1'b0}}, fcsr_q.fprec}; + end else begin + read_access_exception = 1'b1; + end + end + // debug registers + riscv::CSR_DCSR: + if (CVA6Cfg.DebugEn) csr_rdata = {{riscv::XLEN - 32{1'b0}}, dcsr_q}; + else read_access_exception = 1'b1; + riscv::CSR_DPC: + if (CVA6Cfg.DebugEn) csr_rdata = dpc_q; + else read_access_exception = 1'b1; + riscv::CSR_DSCRATCH0: + if (CVA6Cfg.DebugEn) csr_rdata = dscratch0_q; + else read_access_exception = 1'b1; + riscv::CSR_DSCRATCH1: + if (CVA6Cfg.DebugEn) csr_rdata = dscratch1_q; + else read_access_exception = 1'b1; + // trigger module registers + riscv::CSR_TSELECT: read_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA1: read_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA2: read_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA3: read_access_exception = 1'b1; // not implemented + // supervisor registers + riscv::CSR_SSTATUS: begin + if (CVA6Cfg.RVS) + csr_rdata = mstatus_extended & ariane_pkg::SMODE_STATUS_READ_MASK[riscv::XLEN-1:0]; + else read_access_exception = 1'b1; + end + riscv::CSR_SIE: + if (CVA6Cfg.RVS) csr_rdata = mie_q & mideleg_q; + else read_access_exception = 1'b1; + riscv::CSR_SIP: + if (CVA6Cfg.RVS) csr_rdata = mip_q & mideleg_q; + else read_access_exception = 1'b1; + riscv::CSR_STVEC: + if (CVA6Cfg.RVS) csr_rdata = stvec_q; + else read_access_exception = 1'b1; + riscv::CSR_SCOUNTEREN: + if (CVA6Cfg.RVS) csr_rdata = scounteren_q; + else read_access_exception = 1'b1; + riscv::CSR_SSCRATCH: + if (CVA6Cfg.RVS) csr_rdata = sscratch_q; + else read_access_exception = 1'b1; + riscv::CSR_SEPC: + if (CVA6Cfg.RVS) csr_rdata = sepc_q; + else read_access_exception = 1'b1; + riscv::CSR_SCAUSE: + if (CVA6Cfg.RVS) csr_rdata = scause_q; + else read_access_exception = 1'b1; + riscv::CSR_STVAL: + if (CVA6Cfg.RVS) csr_rdata = stval_q; + else read_access_exception = 1'b1; + riscv::CSR_SATP: begin + if (CVA6Cfg.RVS) begin + // intercept reads to SATP if in S-Mode and TVM is enabled + if (priv_lvl_o == riscv::PRIV_LVL_S && mstatus_q.tvm) begin + read_access_exception = 1'b1; + end else begin + csr_rdata = satp_q; + end + end else begin + read_access_exception = 1'b1; + end + end + // machine mode registers + riscv::CSR_MSTATUS: csr_rdata = mstatus_extended; + riscv::CSR_MSTATUSH: + if (riscv::XLEN == 32) csr_rdata = '0; + else read_access_exception = 1'b1; + riscv::CSR_MISA: csr_rdata = IsaCode; + riscv::CSR_MEDELEG: + if (CVA6Cfg.RVS) csr_rdata = medeleg_q; + else read_access_exception = 1'b1; + riscv::CSR_MIDELEG: + if (CVA6Cfg.RVS) csr_rdata = mideleg_q; + else read_access_exception = 1'b1; + riscv::CSR_MIE: csr_rdata = mie_q; + riscv::CSR_MTVEC: csr_rdata = mtvec_q; + riscv::CSR_MCOUNTEREN: csr_rdata = mcounteren_q; + riscv::CSR_MSCRATCH: csr_rdata = mscratch_q; + riscv::CSR_MEPC: csr_rdata = mepc_q; + riscv::CSR_MCAUSE: csr_rdata = mcause_q; + riscv::CSR_MTVAL: csr_rdata = mtval_q; + riscv::CSR_MIP: csr_rdata = mip_q; + riscv::CSR_MENVCFG: csr_rdata = '0 | fiom_q; + riscv::CSR_MENVCFGH: begin + if (riscv::XLEN == 32) csr_rdata = '0; + else read_access_exception = 1'b1; + end + riscv::CSR_MVENDORID: csr_rdata = OPENHWGROUP_MVENDORID; + riscv::CSR_MARCHID: csr_rdata = ARIANE_MARCHID; + riscv::CSR_MIMPID: csr_rdata = '0; // not implemented + riscv::CSR_MHARTID: csr_rdata = hart_id_i; + riscv::CSR_MCONFIGPTR: csr_rdata = '0; // not implemented + riscv::CSR_MCOUNTINHIBIT: + csr_rdata = {{(riscv::XLEN - (MHPMCounterNum + 3)) {1'b0}}, mcountinhibit_q}; + // Counters and Timers + riscv::CSR_MCYCLE: csr_rdata = cycle_q[riscv::XLEN-1:0]; + riscv::CSR_MCYCLEH: + if (riscv::XLEN == 32) csr_rdata = cycle_q[63:32]; + else read_access_exception = 1'b1; + riscv::CSR_MINSTRET: csr_rdata = instret_q[riscv::XLEN-1:0]; + riscv::CSR_MINSTRETH: + if (riscv::XLEN == 32) csr_rdata = instret_q[63:32]; + else read_access_exception = 1'b1; + riscv::CSR_CYCLE: csr_rdata = cycle_q[riscv::XLEN-1:0]; + riscv::CSR_CYCLEH: + if (riscv::XLEN == 32) csr_rdata = cycle_q[63:32]; + else read_access_exception = 1'b1; + riscv::CSR_INSTRET: csr_rdata = instret_q[riscv::XLEN-1:0]; + riscv::CSR_INSTRETH: + if (riscv::XLEN == 32) csr_rdata = instret_q[63:32]; + else read_access_exception = 1'b1; + //Event Selector + riscv::CSR_MHPM_EVENT_3, + riscv::CSR_MHPM_EVENT_4, + riscv::CSR_MHPM_EVENT_5, + riscv::CSR_MHPM_EVENT_6, + riscv::CSR_MHPM_EVENT_7, + riscv::CSR_MHPM_EVENT_8, + riscv::CSR_MHPM_EVENT_9, + riscv::CSR_MHPM_EVENT_10, + riscv::CSR_MHPM_EVENT_11, + riscv::CSR_MHPM_EVENT_12, + riscv::CSR_MHPM_EVENT_13, + riscv::CSR_MHPM_EVENT_14, + riscv::CSR_MHPM_EVENT_15, + riscv::CSR_MHPM_EVENT_16, + riscv::CSR_MHPM_EVENT_17, + riscv::CSR_MHPM_EVENT_18, + riscv::CSR_MHPM_EVENT_19, + riscv::CSR_MHPM_EVENT_20, + riscv::CSR_MHPM_EVENT_21, + riscv::CSR_MHPM_EVENT_22, + riscv::CSR_MHPM_EVENT_23, + riscv::CSR_MHPM_EVENT_24, + riscv::CSR_MHPM_EVENT_25, + riscv::CSR_MHPM_EVENT_26, + riscv::CSR_MHPM_EVENT_27, + riscv::CSR_MHPM_EVENT_28, + riscv::CSR_MHPM_EVENT_29, + riscv::CSR_MHPM_EVENT_30, + riscv::CSR_MHPM_EVENT_31 : + csr_rdata = perf_data_i; + + riscv::CSR_MHPM_COUNTER_3, + riscv::CSR_MHPM_COUNTER_4, + riscv::CSR_MHPM_COUNTER_5, + riscv::CSR_MHPM_COUNTER_6, + riscv::CSR_MHPM_COUNTER_7, + riscv::CSR_MHPM_COUNTER_8, + riscv::CSR_MHPM_COUNTER_9, + riscv::CSR_MHPM_COUNTER_10, + riscv::CSR_MHPM_COUNTER_11, + riscv::CSR_MHPM_COUNTER_12, + riscv::CSR_MHPM_COUNTER_13, + riscv::CSR_MHPM_COUNTER_14, + riscv::CSR_MHPM_COUNTER_15, + riscv::CSR_MHPM_COUNTER_16, + riscv::CSR_MHPM_COUNTER_17, + riscv::CSR_MHPM_COUNTER_18, + riscv::CSR_MHPM_COUNTER_19, + riscv::CSR_MHPM_COUNTER_20, + riscv::CSR_MHPM_COUNTER_21, + riscv::CSR_MHPM_COUNTER_22, + riscv::CSR_MHPM_COUNTER_23, + riscv::CSR_MHPM_COUNTER_24, + riscv::CSR_MHPM_COUNTER_25, + riscv::CSR_MHPM_COUNTER_26, + riscv::CSR_MHPM_COUNTER_27, + riscv::CSR_MHPM_COUNTER_28, + riscv::CSR_MHPM_COUNTER_29, + riscv::CSR_MHPM_COUNTER_30, + riscv::CSR_MHPM_COUNTER_31 : + csr_rdata = perf_data_i; + + riscv::CSR_MHPM_COUNTER_3H, + riscv::CSR_MHPM_COUNTER_4H, + riscv::CSR_MHPM_COUNTER_5H, + riscv::CSR_MHPM_COUNTER_6H, + riscv::CSR_MHPM_COUNTER_7H, + riscv::CSR_MHPM_COUNTER_8H, + riscv::CSR_MHPM_COUNTER_9H, + riscv::CSR_MHPM_COUNTER_10H, + riscv::CSR_MHPM_COUNTER_11H, + riscv::CSR_MHPM_COUNTER_12H, + riscv::CSR_MHPM_COUNTER_13H, + riscv::CSR_MHPM_COUNTER_14H, + riscv::CSR_MHPM_COUNTER_15H, + riscv::CSR_MHPM_COUNTER_16H, + riscv::CSR_MHPM_COUNTER_17H, + riscv::CSR_MHPM_COUNTER_18H, + riscv::CSR_MHPM_COUNTER_19H, + riscv::CSR_MHPM_COUNTER_20H, + riscv::CSR_MHPM_COUNTER_21H, + riscv::CSR_MHPM_COUNTER_22H, + riscv::CSR_MHPM_COUNTER_23H, + riscv::CSR_MHPM_COUNTER_24H, + riscv::CSR_MHPM_COUNTER_25H, + riscv::CSR_MHPM_COUNTER_26H, + riscv::CSR_MHPM_COUNTER_27H, + riscv::CSR_MHPM_COUNTER_28H, + riscv::CSR_MHPM_COUNTER_29H, + riscv::CSR_MHPM_COUNTER_30H, + riscv::CSR_MHPM_COUNTER_31H : + if (riscv::XLEN == 32) csr_rdata = perf_data_i; + else read_access_exception = 1'b1; + + // Performance counters (User Mode - R/O Shadows) + riscv::CSR_HPM_COUNTER_3, + riscv::CSR_HPM_COUNTER_4, + riscv::CSR_HPM_COUNTER_5, + riscv::CSR_HPM_COUNTER_6, + riscv::CSR_HPM_COUNTER_7, + riscv::CSR_HPM_COUNTER_8, + riscv::CSR_HPM_COUNTER_9, + riscv::CSR_HPM_COUNTER_10, + riscv::CSR_HPM_COUNTER_11, + riscv::CSR_HPM_COUNTER_12, + riscv::CSR_HPM_COUNTER_13, + riscv::CSR_HPM_COUNTER_14, + riscv::CSR_HPM_COUNTER_15, + riscv::CSR_HPM_COUNTER_16, + riscv::CSR_HPM_COUNTER_17, + riscv::CSR_HPM_COUNTER_18, + riscv::CSR_HPM_COUNTER_19, + riscv::CSR_HPM_COUNTER_20, + riscv::CSR_HPM_COUNTER_21, + riscv::CSR_HPM_COUNTER_22, + riscv::CSR_HPM_COUNTER_23, + riscv::CSR_HPM_COUNTER_24, + riscv::CSR_HPM_COUNTER_25, + riscv::CSR_HPM_COUNTER_26, + riscv::CSR_HPM_COUNTER_27, + riscv::CSR_HPM_COUNTER_28, + riscv::CSR_HPM_COUNTER_29, + riscv::CSR_HPM_COUNTER_30, + riscv::CSR_HPM_COUNTER_31 : + csr_rdata = perf_data_i; + + riscv::CSR_HPM_COUNTER_3H, + riscv::CSR_HPM_COUNTER_4H, + riscv::CSR_HPM_COUNTER_5H, + riscv::CSR_HPM_COUNTER_6H, + riscv::CSR_HPM_COUNTER_7H, + riscv::CSR_HPM_COUNTER_8H, + riscv::CSR_HPM_COUNTER_9H, + riscv::CSR_HPM_COUNTER_10H, + riscv::CSR_HPM_COUNTER_11H, + riscv::CSR_HPM_COUNTER_12H, + riscv::CSR_HPM_COUNTER_13H, + riscv::CSR_HPM_COUNTER_14H, + riscv::CSR_HPM_COUNTER_15H, + riscv::CSR_HPM_COUNTER_16H, + riscv::CSR_HPM_COUNTER_17H, + riscv::CSR_HPM_COUNTER_18H, + riscv::CSR_HPM_COUNTER_19H, + riscv::CSR_HPM_COUNTER_20H, + riscv::CSR_HPM_COUNTER_21H, + riscv::CSR_HPM_COUNTER_22H, + riscv::CSR_HPM_COUNTER_23H, + riscv::CSR_HPM_COUNTER_24H, + riscv::CSR_HPM_COUNTER_25H, + riscv::CSR_HPM_COUNTER_26H, + riscv::CSR_HPM_COUNTER_27H, + riscv::CSR_HPM_COUNTER_28H, + riscv::CSR_HPM_COUNTER_29H, + riscv::CSR_HPM_COUNTER_30H, + riscv::CSR_HPM_COUNTER_31H : + if (riscv::XLEN == 32) csr_rdata = perf_data_i; + else read_access_exception = 1'b1; + + // custom (non RISC-V) cache control + riscv::CSR_DCACHE: csr_rdata = dcache_q; + riscv::CSR_ICACHE: csr_rdata = icache_q; + // custom (non RISC-V) accelerator memory consistency mode + riscv::CSR_ACC_CONS: begin + if (CVA6Cfg.EnableAccelerator) begin + csr_rdata = acc_cons_q; + end else begin + read_access_exception = 1'b1; + end + end + // PMPs + riscv::CSR_PMPCFG0: csr_rdata = pmpcfg_q[riscv::XLEN/8-1:0]; + riscv::CSR_PMPCFG1: + if (riscv::XLEN == 32) csr_rdata = pmpcfg_q[7:4]; + else read_access_exception = 1'b1; + riscv::CSR_PMPCFG2: csr_rdata = pmpcfg_q[8+:riscv::XLEN/8]; + riscv::CSR_PMPCFG3: + if (riscv::XLEN == 32) csr_rdata = pmpcfg_q[15:12]; + else read_access_exception = 1'b1; + // PMPADDR + riscv::CSR_PMPADDR0, + riscv::CSR_PMPADDR1, + riscv::CSR_PMPADDR2, + riscv::CSR_PMPADDR3, + riscv::CSR_PMPADDR4, + riscv::CSR_PMPADDR5, + riscv::CSR_PMPADDR6, + riscv::CSR_PMPADDR7, + riscv::CSR_PMPADDR8, + riscv::CSR_PMPADDR9, + riscv::CSR_PMPADDR10, + riscv::CSR_PMPADDR11, + riscv::CSR_PMPADDR12, + riscv::CSR_PMPADDR13, + riscv::CSR_PMPADDR14, + riscv::CSR_PMPADDR15: begin + // index is specified by the last byte in the address + index = csr_addr.csr_decode.address[3:0]; + // Important: we only support granularity 8 bytes (G=1) + // -> last bit of pmpaddr must be set 0/1 based on the mode: + // NA4, NAPOT: 1 + // TOR, OFF: 0 + if (pmpcfg_q[index].addr_mode[1] == 1'b1) csr_rdata = pmpaddr_q[index][riscv::PLEN-3:0]; + else csr_rdata = {pmpaddr_q[index][riscv::PLEN-3:1], 1'b0}; + end + default: read_access_exception = 1'b1; + endcase + end + end + // --------------------------- + // CSR Write and update logic + // --------------------------- + riscv::xlen_t mask; + always_comb begin : csr_update + automatic riscv::satp_t satp; + automatic logic [63:0] instret; + + + satp = satp_q; + instret = instret_q; + + mcountinhibit_d = mcountinhibit_q; + + // -------------------- + // Counters + // -------------------- + cycle_d = cycle_q; + instret_d = instret_q; + if (!debug_mode_q) begin + // increase instruction retired counter + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + if (commit_ack_i[i] && !ex_i.valid && !mcountinhibit_q[2]) instret++; + end + instret_d = instret; + // increment the cycle count + if (!mcountinhibit_q[0]) cycle_d = cycle_q + 1'b1; + else cycle_d = cycle_q; + end + + eret_o = 1'b0; + flush_o = 1'b0; + update_access_exception = 1'b0; + + set_debug_pc_o = 1'b0; + + perf_we_o = 1'b0; + perf_data_o = 'b0; + + fcsr_d = fcsr_q; + + priv_lvl_d = priv_lvl_q; + debug_mode_d = debug_mode_q; + dcsr_d = dcsr_q; + dpc_d = dpc_q; + dscratch0_d = dscratch0_q; + dscratch1_d = dscratch1_q; + mstatus_d = mstatus_q; + + // check whether we come out of reset + // this is a workaround. some tools have issues + // having boot_addr_i in the asynchronous + // reset assignment to mtvec_d, even though + // boot_addr_i will be assigned a constant + // on the top-level. + if (mtvec_rst_load_q) begin + mtvec_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, boot_addr_i} + 'h40; + end else begin + mtvec_d = mtvec_q; + end + + medeleg_d = medeleg_q; + mideleg_d = mideleg_q; + mip_d = mip_q; + mie_d = mie_q; + mepc_d = mepc_q; + mcause_d = mcause_q; + mcounteren_d = mcounteren_q; + mscratch_d = mscratch_q; + mtval_d = mtval_q; + fiom_d = fiom_q; + dcache_d = dcache_q; + icache_d = icache_q; + acc_cons_d = acc_cons_q; + + sepc_d = sepc_q; + scause_d = scause_q; + stvec_d = stvec_q; + scounteren_d = scounteren_q; + sscratch_d = sscratch_q; + stval_d = stval_q; + satp_d = satp_q; + + en_ld_st_translation_d = en_ld_st_translation_q; + dirty_fp_state_csr = 1'b0; + + pmpcfg_d = pmpcfg_q; + pmpaddr_d = pmpaddr_q; + + // check for correct access rights and that we are writing + if (csr_we) begin + unique case (csr_addr.address) + // Floating-Point + riscv::CSR_FFLAGS: begin + if (CVA6Cfg.FpPresent) begin + dirty_fp_state_csr = 1'b1; + fcsr_d.fflags = csr_wdata[4:0]; + // this instruction has side-effects + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_FRM: begin + if (CVA6Cfg.FpPresent) begin + dirty_fp_state_csr = 1'b1; + fcsr_d.frm = csr_wdata[2:0]; + // this instruction has side-effects + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_FCSR: begin + if (CVA6Cfg.FpPresent) begin + dirty_fp_state_csr = 1'b1; + fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space + // this instruction has side-effects + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_FTRAN: begin + if (CVA6Cfg.FpPresent) begin + dirty_fp_state_csr = 1'b1; + fcsr_d.fprec = csr_wdata[6:0]; // ignore writes to reserved space + // this instruction has side-effects + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + // debug CSR + riscv::CSR_DCSR: begin + if (CVA6Cfg.DebugEn) begin + dcsr_d = csr_wdata[31:0]; + // debug is implemented + dcsr_d.xdebugver = 4'h4; + // currently not supported + dcsr_d.nmip = 1'b0; + dcsr_d.stopcount = 1'b0; + dcsr_d.stoptime = 1'b0; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_DPC: + if (CVA6Cfg.DebugEn) dpc_d = csr_wdata; + else update_access_exception = 1'b1; + riscv::CSR_DSCRATCH0: + if (CVA6Cfg.DebugEn) dscratch0_d = csr_wdata; + else update_access_exception = 1'b1; + riscv::CSR_DSCRATCH1: + if (CVA6Cfg.DebugEn) dscratch1_d = csr_wdata; + else update_access_exception = 1'b1; + // trigger module CSRs + riscv::CSR_TSELECT: update_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA1: update_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA2: update_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA3: update_access_exception = 1'b1; // not implemented + // sstatus is a subset of mstatus - mask it accordingly + riscv::CSR_SSTATUS: begin + if (CVA6Cfg.RVS) begin + mask = ariane_pkg::SMODE_STATUS_WRITE_MASK[riscv::XLEN-1:0]; + mstatus_d = (mstatus_q & ~{{64-riscv::XLEN{1'b0}}, mask}) | {{64-riscv::XLEN{1'b0}}, (csr_wdata & mask)}; + // hardwire to zero if floating point extension is not present + if (!CVA6Cfg.FpPresent) begin + mstatus_d.fs = riscv::Off; + end + // hardwire to zero if vector extension is not present + if (!CVA6Cfg.RVV) begin + mstatus_d.vs = riscv::Off; + end + // this instruction has side-effects + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + // even machine mode interrupts can be visible and set-able to supervisor + // if the corresponding bit in mideleg is set + riscv::CSR_SIE: begin + if (CVA6Cfg.RVS) begin + // the mideleg makes sure only delegate-able register (and therefore also only implemented registers) are written + mie_d = (mie_q & ~mideleg_q) | (csr_wdata & mideleg_q); + end else begin + update_access_exception = 1'b1; + end + end + + riscv::CSR_SIP: begin + if (CVA6Cfg.RVS) begin + // only the supervisor software interrupt is write-able, iff delegated + mask = riscv::MIP_SSIP & mideleg_q; + mip_d = (mip_q & ~mask) | (csr_wdata & mask); + end else begin + update_access_exception = 1'b1; + end + end + + riscv::CSR_STVEC: + if (CVA6Cfg.RVS) stvec_d = {csr_wdata[riscv::XLEN-1:2], 1'b0, csr_wdata[0]}; + else update_access_exception = 1'b1; + riscv::CSR_SCOUNTEREN: + if (CVA6Cfg.RVS) scounteren_d = {{riscv::XLEN - 32{1'b0}}, csr_wdata[31:0]}; + else update_access_exception = 1'b1; + riscv::CSR_SSCRATCH: + if (CVA6Cfg.RVS) sscratch_d = csr_wdata; + else update_access_exception = 1'b1; + riscv::CSR_SEPC: + if (CVA6Cfg.RVS) sepc_d = {csr_wdata[riscv::XLEN-1:1], 1'b0}; + else update_access_exception = 1'b1; + riscv::CSR_SCAUSE: + if (CVA6Cfg.RVS) scause_d = csr_wdata; + else update_access_exception = 1'b1; + riscv::CSR_STVAL: + if (CVA6Cfg.RVS) stval_d = csr_wdata; + else update_access_exception = 1'b1; + // supervisor address translation and protection + riscv::CSR_SATP: begin + if (CVA6Cfg.RVS) begin + // intercept SATP writes if in S-Mode and TVM is enabled + if (priv_lvl_o == riscv::PRIV_LVL_S && mstatus_q.tvm) update_access_exception = 1'b1; + else begin + satp = riscv::satp_t'(csr_wdata); + // only make ASID_LEN - 1 bit stick, that way software can figure out how many ASID bits are supported + satp.asid = satp.asid & {{(riscv::ASIDW - AsidWidth) {1'b0}}, {AsidWidth{1'b1}}}; + // only update if we actually support this mode + if (riscv::vm_mode_t'(satp.mode) == riscv::ModeOff || + riscv::vm_mode_t'(satp.mode) == riscv::MODE_SV) + satp_d = satp; + end + // changing the mode can have side-effects on address translation (e.g.: other instructions), re-fetch + // the next instruction by executing a flush + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + + riscv::CSR_MSTATUS: begin + mstatus_d = {{64 - riscv::XLEN{1'b0}}, csr_wdata}; + mstatus_d.xs = riscv::Off; + if (!CVA6Cfg.FpPresent) begin + mstatus_d.fs = riscv::Off; + end + if (!CVA6Cfg.RVV) begin + mstatus_d.vs = riscv::Off; + end + mstatus_d.wpri3 = 9'b0; + mstatus_d.wpri1 = 1'b0; + mstatus_d.wpri2 = 1'b0; + mstatus_d.wpri0 = 1'b0; + mstatus_d.ube = 1'b0; // CVA6 is little-endian + // this register has side-effects on other registers, flush the pipeline + flush_o = 1'b1; + end + riscv::CSR_MSTATUSH: if (riscv::XLEN != 32) update_access_exception = 1'b1; + // MISA is WARL (Write Any Value, Reads Legal Value) + riscv::CSR_MISA: ; + // machine exception delegation register + // 0 - 15 exceptions supported + riscv::CSR_MEDELEG: begin + if (CVA6Cfg.RVS) begin + mask = (1 << riscv::INSTR_ADDR_MISALIGNED) | + (1 << riscv::BREAKPOINT) | + (1 << riscv::ENV_CALL_UMODE) | + (1 << riscv::INSTR_PAGE_FAULT) | + (1 << riscv::LOAD_PAGE_FAULT) | + (1 << riscv::STORE_PAGE_FAULT); + medeleg_d = (medeleg_q & ~mask) | (csr_wdata & mask); + end else begin + update_access_exception = 1'b1; + end + end + // machine interrupt delegation register + // we do not support user interrupt delegation + riscv::CSR_MIDELEG: begin + if (CVA6Cfg.RVS) begin + mask = riscv::MIP_SSIP | riscv::MIP_STIP | riscv::MIP_SEIP; + mideleg_d = (mideleg_q & ~mask) | (csr_wdata & mask); + end else begin + update_access_exception = 1'b1; + end + end + // mask the register so that unsupported interrupts can never be set + riscv::CSR_MIE: begin + mask = riscv::MIP_SSIP | riscv::MIP_STIP | riscv::MIP_SEIP | riscv::MIP_MSIP | riscv::MIP_MTIP | riscv::MIP_MEIP; + mie_d = (mie_q & ~mask) | (csr_wdata & mask); // we only support supervisor and M-mode interrupts + end + + riscv::CSR_MTVEC: begin + mtvec_d = {csr_wdata[riscv::XLEN-1:2], 1'b0, csr_wdata[0]}; + // we are in vector mode, this implementation requires the additional + // alignment constraint of 64 * 4 bytes + if (csr_wdata[0]) mtvec_d = {csr_wdata[riscv::XLEN-1:8], 7'b0, csr_wdata[0]}; + end + riscv::CSR_MCOUNTEREN: mcounteren_d = {{riscv::XLEN - 32{1'b0}}, csr_wdata[31:0]}; + + riscv::CSR_MSCRATCH: mscratch_d = csr_wdata; + riscv::CSR_MEPC: mepc_d = {csr_wdata[riscv::XLEN-1:1], 1'b0}; + riscv::CSR_MCAUSE: mcause_d = csr_wdata; + riscv::CSR_MTVAL: mtval_d = csr_wdata; + riscv::CSR_MIP: begin + mask = riscv::MIP_SSIP | riscv::MIP_STIP | riscv::MIP_SEIP; + mip_d = (mip_q & ~mask) | (csr_wdata & mask); + end + riscv::CSR_MENVCFG: if (CVA6Cfg.RVS) fiom_d = csr_wdata[0]; + riscv::CSR_MENVCFGH: begin + if (riscv::XLEN != 32) update_access_exception = 1'b1; + end + riscv::CSR_MCOUNTINHIBIT: + mcountinhibit_d = {csr_wdata[MHPMCounterNum+2:2], 1'b0, csr_wdata[0]}; + // performance counters + riscv::CSR_MCYCLE: cycle_d[riscv::XLEN-1:0] = csr_wdata; + riscv::CSR_MCYCLEH: + if (riscv::XLEN == 32) cycle_d[63:32] = csr_wdata; + else update_access_exception = 1'b1; + riscv::CSR_MINSTRET: instret_d[riscv::XLEN-1:0] = csr_wdata; + riscv::CSR_MINSTRETH: + if (riscv::XLEN == 32) instret_d[63:32] = csr_wdata; + else update_access_exception = 1'b1; + //Event Selector + riscv::CSR_MHPM_EVENT_3, + riscv::CSR_MHPM_EVENT_4, + riscv::CSR_MHPM_EVENT_5, + riscv::CSR_MHPM_EVENT_6, + riscv::CSR_MHPM_EVENT_7, + riscv::CSR_MHPM_EVENT_8, + riscv::CSR_MHPM_EVENT_9, + riscv::CSR_MHPM_EVENT_10, + riscv::CSR_MHPM_EVENT_11, + riscv::CSR_MHPM_EVENT_12, + riscv::CSR_MHPM_EVENT_13, + riscv::CSR_MHPM_EVENT_14, + riscv::CSR_MHPM_EVENT_15, + riscv::CSR_MHPM_EVENT_16, + riscv::CSR_MHPM_EVENT_17, + riscv::CSR_MHPM_EVENT_18, + riscv::CSR_MHPM_EVENT_19, + riscv::CSR_MHPM_EVENT_20, + riscv::CSR_MHPM_EVENT_21, + riscv::CSR_MHPM_EVENT_22, + riscv::CSR_MHPM_EVENT_23, + riscv::CSR_MHPM_EVENT_24, + riscv::CSR_MHPM_EVENT_25, + riscv::CSR_MHPM_EVENT_26, + riscv::CSR_MHPM_EVENT_27, + riscv::CSR_MHPM_EVENT_28, + riscv::CSR_MHPM_EVENT_29, + riscv::CSR_MHPM_EVENT_30, + riscv::CSR_MHPM_EVENT_31 : begin + perf_we_o = 1'b1; + perf_data_o = csr_wdata; + end + + riscv::CSR_MHPM_COUNTER_3, + riscv::CSR_MHPM_COUNTER_4, + riscv::CSR_MHPM_COUNTER_5, + riscv::CSR_MHPM_COUNTER_6, + riscv::CSR_MHPM_COUNTER_7, + riscv::CSR_MHPM_COUNTER_8, + riscv::CSR_MHPM_COUNTER_9, + riscv::CSR_MHPM_COUNTER_10, + riscv::CSR_MHPM_COUNTER_11, + riscv::CSR_MHPM_COUNTER_12, + riscv::CSR_MHPM_COUNTER_13, + riscv::CSR_MHPM_COUNTER_14, + riscv::CSR_MHPM_COUNTER_15, + riscv::CSR_MHPM_COUNTER_16, + riscv::CSR_MHPM_COUNTER_17, + riscv::CSR_MHPM_COUNTER_18, + riscv::CSR_MHPM_COUNTER_19, + riscv::CSR_MHPM_COUNTER_20, + riscv::CSR_MHPM_COUNTER_21, + riscv::CSR_MHPM_COUNTER_22, + riscv::CSR_MHPM_COUNTER_23, + riscv::CSR_MHPM_COUNTER_24, + riscv::CSR_MHPM_COUNTER_25, + riscv::CSR_MHPM_COUNTER_26, + riscv::CSR_MHPM_COUNTER_27, + riscv::CSR_MHPM_COUNTER_28, + riscv::CSR_MHPM_COUNTER_29, + riscv::CSR_MHPM_COUNTER_30, + riscv::CSR_MHPM_COUNTER_31 : begin + perf_we_o = 1'b1; + perf_data_o = csr_wdata; + end + + riscv::CSR_MHPM_COUNTER_3H, + riscv::CSR_MHPM_COUNTER_4H, + riscv::CSR_MHPM_COUNTER_5H, + riscv::CSR_MHPM_COUNTER_6H, + riscv::CSR_MHPM_COUNTER_7H, + riscv::CSR_MHPM_COUNTER_8H, + riscv::CSR_MHPM_COUNTER_9H, + riscv::CSR_MHPM_COUNTER_10H, + riscv::CSR_MHPM_COUNTER_11H, + riscv::CSR_MHPM_COUNTER_12H, + riscv::CSR_MHPM_COUNTER_13H, + riscv::CSR_MHPM_COUNTER_14H, + riscv::CSR_MHPM_COUNTER_15H, + riscv::CSR_MHPM_COUNTER_16H, + riscv::CSR_MHPM_COUNTER_17H, + riscv::CSR_MHPM_COUNTER_18H, + riscv::CSR_MHPM_COUNTER_19H, + riscv::CSR_MHPM_COUNTER_20H, + riscv::CSR_MHPM_COUNTER_21H, + riscv::CSR_MHPM_COUNTER_22H, + riscv::CSR_MHPM_COUNTER_23H, + riscv::CSR_MHPM_COUNTER_24H, + riscv::CSR_MHPM_COUNTER_25H, + riscv::CSR_MHPM_COUNTER_26H, + riscv::CSR_MHPM_COUNTER_27H, + riscv::CSR_MHPM_COUNTER_28H, + riscv::CSR_MHPM_COUNTER_29H, + riscv::CSR_MHPM_COUNTER_30H, + riscv::CSR_MHPM_COUNTER_31H : begin + perf_we_o = 1'b1; + if (riscv::XLEN == 32) perf_data_o = csr_wdata; + else update_access_exception = 1'b1; + end + + riscv::CSR_DCACHE: dcache_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit + riscv::CSR_ICACHE: icache_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit + riscv::CSR_ACC_CONS: begin + if (CVA6Cfg.EnableAccelerator) begin + acc_cons_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit + end else begin + update_access_exception = 1'b1; + end + end + // PMP locked logic + // 1. refuse to update any locked entry + // 2. also refuse to update the entry below a locked TOR entry + // Note that writes to pmpcfg below a locked TOR entry are valid + riscv::CSR_PMPCFG0: + for (int i = 0; i < (riscv::XLEN / 8); i++) + if (!pmpcfg_q[i].locked) pmpcfg_d[i] = csr_wdata[i*8+:8]; + riscv::CSR_PMPCFG1: begin + if (riscv::XLEN == 32) begin + for (int i = 0; i < 4; i++) + if (!pmpcfg_q[i+4].locked) pmpcfg_d[i+4] = csr_wdata[i*8+:8]; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_PMPCFG2: + for (int i = 0; i < (riscv::XLEN / 8); i++) + if (!pmpcfg_q[i+8].locked) pmpcfg_d[i+8] = csr_wdata[i*8+:8]; + riscv::CSR_PMPCFG3: begin + if (riscv::XLEN == 32) begin + for (int i = 0; i < 4; i++) + if (!pmpcfg_q[i+12].locked) pmpcfg_d[i+12] = csr_wdata[i*8+:8]; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_PMPADDR0, + riscv::CSR_PMPADDR1, + riscv::CSR_PMPADDR2, + riscv::CSR_PMPADDR3, + riscv::CSR_PMPADDR4, + riscv::CSR_PMPADDR5, + riscv::CSR_PMPADDR6, + riscv::CSR_PMPADDR7, + riscv::CSR_PMPADDR8, + riscv::CSR_PMPADDR9, + riscv::CSR_PMPADDR10, + riscv::CSR_PMPADDR11, + riscv::CSR_PMPADDR12, + riscv::CSR_PMPADDR13, + riscv::CSR_PMPADDR14, + riscv::CSR_PMPADDR15: begin + // index is specified by the last byte in the address + automatic logic [3:0] index = csr_addr.csr_decode.address[3:0]; + // check if the entry or the entry above is locked + if (!pmpcfg_q[index].locked && !(pmpcfg_q[index+1].locked && pmpcfg_q[index].addr_mode == riscv::TOR)) begin + pmpaddr_d[index] = csr_wdata[riscv::PLEN-3:0]; + end + end + default: update_access_exception = 1'b1; + endcase + end + + mstatus_d.sxl = riscv::XLEN_64; + mstatus_d.uxl = riscv::XLEN_64; + + // mark the floating point extension register as dirty + if (CVA6Cfg.FpPresent && (dirty_fp_state_csr || dirty_fp_state_i)) begin + mstatus_d.fs = riscv::Dirty; + end + // mark the vector extension register as dirty + if (CVA6Cfg.RVV && dirty_v_state_i) begin + mstatus_d.vs = riscv::Dirty; + end + // hardwired extension registers + mstatus_d.sd = (mstatus_q.xs == riscv::Dirty) | (mstatus_q.fs == riscv::Dirty); + + // reserve PMPCFG bits 5 and 6 (hardwire to 0) + for (int i = 0; i < CVA6Cfg.NrPMPEntries; i++) pmpcfg_d[i].reserved = 2'b0; + + // write the floating point status register + if (CVA6Cfg.FpPresent && csr_write_fflags_i) begin + fcsr_d.fflags = csr_wdata_i[4:0] | fcsr_q.fflags; + end + + // ---------------------------- + // Accelerator FP imprecise exceptions + // ---------------------------- + + // Update fflags as soon as a FP exception occurs in the accelerator + // The exception is imprecise, and the fcsr.fflags update always happens immediately + if (CVA6Cfg.EnableAccelerator) begin + fcsr_d.fflags |= acc_fflags_ex_valid_i ? acc_fflags_ex_i : 5'b0; + end + + // --------------------- + // External Interrupts + // --------------------- + // Machine Mode External Interrupt Pending + mip_d[riscv::IRQ_M_EXT] = irq_i[0]; + // Machine software interrupt + mip_d[riscv::IRQ_M_SOFT] = ipi_i; + // Timer interrupt pending, coming from platform timer + mip_d[riscv::IRQ_M_TIMER] = time_irq_i; + + // ----------------------- + // Manage Exception Stack + // ----------------------- + // update exception CSRs + // we got an exception update cause, pc and stval register + trap_to_priv_lvl = riscv::PRIV_LVL_M; + // Exception is taken and we are not in debug mode + // exceptions in debug mode don't update any fields + if ((CVA6Cfg.DebugEn && !debug_mode_q && ex_i.cause != riscv::DEBUG_REQUEST && ex_i.valid) || (!CVA6Cfg.DebugEn && ex_i.valid)) begin + // do not flush, flush is reserved for CSR writes with side effects + flush_o = 1'b0; + // figure out where to trap to + // a m-mode trap might be delegated if we are taking it in S mode + // first figure out if this was an exception or an interrupt e.g.: look at bit (XLEN-1) + // the cause register can only be $clog2(riscv::XLEN) bits long (as we only support XLEN exceptions) + if (CVA6Cfg.RVS && ((ex_i.cause[riscv::XLEN-1] && mideleg_q[ex_i.cause[$clog2( + riscv::XLEN + )-1:0]]) || (~ex_i.cause[riscv::XLEN-1] && medeleg_q[ex_i.cause[$clog2( + riscv::XLEN + )-1:0]]))) begin + // traps never transition from a more-privileged mode to a less privileged mode + // so if we are already in M mode, stay there + if (priv_lvl_o == riscv::PRIV_LVL_M) trap_to_priv_lvl = riscv::PRIV_LVL_M; + else trap_to_priv_lvl = riscv::PRIV_LVL_S; + end + + // trap to supervisor mode + if (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S) begin + // update sstatus + mstatus_d.sie = 1'b0; + mstatus_d.spie = mstatus_q.sie; + // this can either be user or supervisor mode + mstatus_d.spp = priv_lvl_q[0]; + // set cause + scause_d = ex_i.cause; + // set epc + sepc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + // set mtval or stval + stval_d = (ariane_pkg::ZERO_TVAL + && (ex_i.cause inside { + riscv::ILLEGAL_INSTR, + riscv::BREAKPOINT, + riscv::ENV_CALL_UMODE, + riscv::ENV_CALL_SMODE, + riscv::ENV_CALL_MMODE + } || ex_i.cause[riscv::XLEN-1])) ? '0 : ex_i.tval; + // trap to machine mode + end else begin + // update mstatus + mstatus_d.mie = 1'b0; + mstatus_d.mpie = mstatus_q.mie; + // save the previous privilege mode + mstatus_d.mpp = priv_lvl_q; + mcause_d = ex_i.cause; + // set epc + mepc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + // set mtval or stval + mtval_d = (ariane_pkg::ZERO_TVAL + && (ex_i.cause inside { + riscv::ILLEGAL_INSTR, + riscv::BREAKPOINT, + riscv::ENV_CALL_UMODE, + riscv::ENV_CALL_SMODE, + riscv::ENV_CALL_MMODE + } || ex_i.cause[riscv::XLEN-1])) ? '0 : ex_i.tval; + end + + priv_lvl_d = trap_to_priv_lvl; + end + + // ------------------------------ + // Debug + // ------------------------------ + // Explains why Debug Mode was entered. + // When there are multiple reasons to enter Debug Mode in a single cycle, hardware should set cause to the cause with the highest priority. + // 1: An ebreak instruction was executed. (priority 3) + // 2: The Trigger Module caused a breakpoint exception. (priority 4) + // 3: The debugger requested entry to Debug Mode. (priority 2) + // 4: The hart single stepped because step was set. (priority 1) + // we are currently not in debug mode and could potentially enter + if (!debug_mode_q) begin + dcsr_d.prv = priv_lvl_o; + // trigger module fired + + // caused by a breakpoint + if (CVA6Cfg.DebugEn && ex_i.valid && ex_i.cause == riscv::BREAKPOINT) begin + dcsr_d.prv = priv_lvl_o; + // check that we actually want to enter debug depending on the privilege level we are currently in + unique case (priv_lvl_o) + riscv::PRIV_LVL_M: begin + debug_mode_d = dcsr_q.ebreakm; + set_debug_pc_o = dcsr_q.ebreakm; + end + riscv::PRIV_LVL_S: begin + if (CVA6Cfg.RVS) begin + debug_mode_d = dcsr_q.ebreaks; + set_debug_pc_o = dcsr_q.ebreaks; + end + end + riscv::PRIV_LVL_U: begin + if (CVA6Cfg.RVU) begin + debug_mode_d = dcsr_q.ebreaku; + set_debug_pc_o = dcsr_q.ebreaku; + end + end + default: ; + endcase + // save PC of next this instruction e.g.: the next one to be executed + dpc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + dcsr_d.cause = ariane_pkg::CauseBreakpoint; + end + + // we've got a debug request + if (CVA6Cfg.DebugEn && ex_i.valid && ex_i.cause == riscv::DEBUG_REQUEST) begin + dcsr_d.prv = priv_lvl_o; + // save the PC + dpc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + // enter debug mode + debug_mode_d = 1'b1; + // jump to the base address + set_debug_pc_o = 1'b1; + // save the cause as external debug request + dcsr_d.cause = ariane_pkg::CauseRequest; + end + + // single step enable and we just retired an instruction + if (CVA6Cfg.DebugEn && dcsr_q.step && commit_ack_i[0]) begin + dcsr_d.prv = priv_lvl_o; + // valid CTRL flow change + if (commit_instr_i[0].fu == CTRL_FLOW) begin + // we saved the correct target address during execute + dpc_d = { + {riscv::XLEN - riscv::VLEN{commit_instr_i[0].bp.predict_address[riscv::VLEN-1]}}, + commit_instr_i[0].bp.predict_address + }; + // exception valid + end else if (ex_i.valid) begin + dpc_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, trap_vector_base_o}; + // return from environment + end else if (eret_o) begin + dpc_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, epc_o}; + // consecutive PC + end else begin + dpc_d = { + {riscv::XLEN - riscv::VLEN{commit_instr_i[0].pc[riscv::VLEN-1]}}, + commit_instr_i[0].pc + (commit_instr_i[0].is_compressed ? 'h2 : 'h4) + }; + end + debug_mode_d = 1'b1; + set_debug_pc_o = 1'b1; + dcsr_d.cause = ariane_pkg::CauseSingleStep; + end + end + // go in halt-state again when we encounter an exception + if (CVA6Cfg.DebugEn && debug_mode_q && ex_i.valid && ex_i.cause == riscv::BREAKPOINT) begin + set_debug_pc_o = 1'b1; + end + + // ------------------------------ + // MPRV - Modify Privilege Level + // ------------------------------ + // Set the address translation at which the load and stores should occur + // we can use the previous values since changing the address translation will always involve a pipeline flush + if (ariane_pkg::MMU_PRESENT && mprv && CVA6Cfg.RVS && riscv::vm_mode_t'(satp_q.mode) == riscv::MODE_SV && (mstatus_q.mpp != riscv::PRIV_LVL_M)) + en_ld_st_translation_d = 1'b1; + else // otherwise we go with the regular settings + en_ld_st_translation_d = en_translation_o; + + ld_st_priv_lvl_o = (mprv) ? mstatus_q.mpp : priv_lvl_o; + en_ld_st_translation_o = en_ld_st_translation_q; + // ------------------------------ + // Return from Environment + // ------------------------------ + // When executing an xRET instruction, supposing xPP holds the value y, xIE is set to xPIE; the privilege + // mode is changed to y; xPIE is set to 1; and xPP is set to U + if (mret) begin + // return from exception, IF doesn't care from where we are returning + eret_o = 1'b1; + // return to the previous privilege level and restore all enable flags + // get the previous machine interrupt enable flag + mstatus_d.mie = mstatus_q.mpie; + // restore the previous privilege level + priv_lvl_d = mstatus_q.mpp; + // set mpp to user mode + mstatus_d.mpp = riscv::PRIV_LVL_U; + // set mpie to 1 + mstatus_d.mpie = 1'b1; + end + + if (CVA6Cfg.RVS && sret) begin + // return from exception, IF doesn't care from where we are returning + eret_o = 1'b1; + // return the previous supervisor interrupt enable flag + mstatus_d.sie = mstatus_q.spie; + // restore the previous privilege level + priv_lvl_d = riscv::priv_lvl_t'({1'b0, mstatus_q.spp}); + // set spp to user mode + mstatus_d.spp = 1'b0; + // set spie to 1 + mstatus_d.spie = 1'b1; + end + + // return from debug mode + if (CVA6Cfg.DebugEn && dret) begin + // return from exception, IF doesn't care from where we are returning + eret_o = 1'b1; + // restore the previous privilege level + priv_lvl_d = riscv::priv_lvl_t'(dcsr_q.prv); + // actually return from debug mode + debug_mode_d = 1'b0; + end + end + + // --------------------------- + // CSR OP Select Logic + // --------------------------- + always_comb begin : csr_op_logic + csr_wdata = csr_wdata_i; + csr_we = 1'b1; + csr_read = 1'b1; + mret = 1'b0; + sret = 1'b0; + dret = 1'b0; + + unique case (csr_op_i) + CSR_WRITE: csr_wdata = csr_wdata_i; + CSR_SET: csr_wdata = csr_wdata_i | csr_rdata; + CSR_CLEAR: csr_wdata = (~csr_wdata_i) & csr_rdata; + CSR_READ: csr_we = 1'b0; + MRET: begin + // the return should not have any write or read side-effects + csr_we = 1'b0; + csr_read = 1'b0; + mret = 1'b1; // signal a return from machine mode + end + default: begin + if (CVA6Cfg.RVS && csr_op_i == SRET) begin + // the return should not have any write or read side-effects + csr_we = 1'b0; + csr_read = 1'b0; + sret = 1'b1; // signal a return from supervisor mode + end else if (CVA6Cfg.DebugEn && csr_op_i == DRET) begin + // the return should not have any write or read side-effects + csr_we = 1'b0; + csr_read = 1'b0; + dret = 1'b1; // signal a return from debug mode + end else begin + csr_we = 1'b0; + csr_read = 1'b0; + end + end + endcase + // if we are violating our privilges do not update the architectural state + if (privilege_violation) begin + csr_we = 1'b0; + csr_read = 1'b0; + end + end + + assign irq_ctrl_o.mie = mie_q; + assign irq_ctrl_o.mip = mip_q; + assign irq_ctrl_o.sie = mstatus_q.sie; + assign irq_ctrl_o.mideleg = mideleg_q; + assign irq_ctrl_o.global_enable = (~debug_mode_q) + // interrupts are enabled during single step or we are not stepping + // No need to check interrupts during single step if we don't support DEBUG mode + & (~CVA6Cfg.DebugEn | (~dcsr_q.step | dcsr_q.stepie)) + & ((mstatus_q.mie & (priv_lvl_o == riscv::PRIV_LVL_M)) + | (priv_lvl_o != riscv::PRIV_LVL_M)); + + always_comb begin : privilege_check + // ----------------- + // Privilege Check + // ----------------- + privilege_violation = 1'b0; + // if we are reading or writing, check for the correct privilege level this has + // precedence over interrupts + if (csr_op_i inside {CSR_WRITE, CSR_SET, CSR_CLEAR, CSR_READ}) begin + if ((riscv::priv_lvl_t'(priv_lvl_o & csr_addr.csr_decode.priv_lvl) != csr_addr.csr_decode.priv_lvl)) begin + privilege_violation = 1'b1; + end + // check access to debug mode only CSRs + if ((!CVA6Cfg.DebugEn && csr_addr_i[11:4] == 8'h7b) || (CVA6Cfg.DebugEn && csr_addr_i[11:4] == 8'h7b && !debug_mode_q)) begin + privilege_violation = 1'b1; + end + // check counter-enabled counter CSR accesses + // counter address range is C00 to C1F + if (csr_addr_i inside {[riscv::CSR_CYCLE : riscv::CSR_HPM_COUNTER_31]}) begin + if (priv_lvl_o == riscv::PRIV_LVL_S && CVA6Cfg.RVS) begin + privilege_violation = ~mcounteren_q[csr_addr_i[4:0]]; + end else if (priv_lvl_o == riscv::PRIV_LVL_U && CVA6Cfg.RVU) begin + privilege_violation = ~mcounteren_q[csr_addr_i[4:0]] | ~scounteren_q[csr_addr_i[4:0]]; + end else if (priv_lvl_o == riscv::PRIV_LVL_M) begin + privilege_violation = 1'b0; + end + end + end + end + // ---------------------- + // CSR Exception Control + // ---------------------- + always_comb begin : exception_ctrl + csr_exception_o = {{riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, 1'b0}; + // ---------------------------------- + // Illegal Access (decode exception) + // ---------------------------------- + // we got an exception in one of the processes above + // throw an illegal instruction exception + if (update_access_exception || read_access_exception) begin + csr_exception_o.cause = riscv::ILLEGAL_INSTR; + // we don't set the tval field as this will be set by the commit stage + // this spares the extra wiring from commit to CSR and back to commit + csr_exception_o.valid = 1'b1; + end + + if (privilege_violation) begin + csr_exception_o.cause = riscv::ILLEGAL_INSTR; + csr_exception_o.valid = 1'b1; + end + end + + // ------------------- + // Wait for Interrupt + // ------------------- + always_comb begin : wfi_ctrl + // wait for interrupt register + wfi_d = wfi_q; + // if there is any (enabled) interrupt pending un-stall the core + // also un-stall if we want to enter debug mode + if (|(mip_q & mie_q) || (CVA6Cfg.DebugEn && debug_req_i) || irq_i[1]) begin + wfi_d = 1'b0; + // or alternatively if there is no exception pending and we are not in debug mode wait here + // for the interrupt + end else if (((CVA6Cfg.DebugEn && !debug_mode_q) && csr_op_i == WFI && !ex_i.valid) || (!CVA6Cfg.DebugEn && csr_op_i == WFI && !ex_i.valid)) begin + wfi_d = 1'b1; + end + end + + // output assignments dependent on privilege mode + always_comb begin : priv_output + trap_vector_base_o = {mtvec_q[riscv::VLEN-1:2], 2'b0}; + // output user mode stvec + if (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S) begin + trap_vector_base_o = {stvec_q[riscv::VLEN-1:2], 2'b0}; + end + + // if we are in debug mode jump to a specific address + if (CVA6Cfg.DebugEn && debug_mode_q) begin + trap_vector_base_o = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.ExceptionAddress[riscv::VLEN-1:0]; + end + + // check if we are in vectored mode, if yes then do BASE + 4 * cause we + // are imposing an additional alignment-constraint of 64 * 4 bytes since + // we want to spare the costly addition. Furthermore check to which + // privilege level we are jumping and whether the vectored mode is + // activated for _that_ privilege level. + if (ex_i.cause[riscv::XLEN-1] && + ((((CVA6Cfg.RVS || CVA6Cfg.RVU) && trap_to_priv_lvl == riscv::PRIV_LVL_M && mtvec_q[0]) || (!CVA6Cfg.RVS && !CVA6Cfg.RVU && mtvec_q[0])) + || (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S && stvec_q[0]))) begin + trap_vector_base_o[7:2] = ex_i.cause[5:0]; + end + + epc_o = mepc_q[riscv::VLEN-1:0]; + // we are returning from supervisor mode, so take the sepc register + if (CVA6Cfg.RVS && sret) begin + epc_o = sepc_q[riscv::VLEN-1:0]; + end + // we are returning from debug mode, to take the dpc register + if (CVA6Cfg.DebugEn && dret) begin + epc_o = dpc_q[riscv::VLEN-1:0]; + end + end + + // ------------------- + // Output Assignments + // ------------------- + always_comb begin + // When the SEIP bit is read with a CSRRW, CSRRS, or CSRRC instruction, the value + // returned in the rd destination register contains the logical-OR of the software-writable + // bit and the interrupt signal from the interrupt controller. + csr_rdata_o = csr_rdata; + + unique case (csr_addr.address) + riscv::CSR_MIP: + csr_rdata_o = csr_rdata | ({{riscv::XLEN - 1{1'b0}}, irq_i[1]} << riscv::IRQ_S_EXT); + // in supervisor mode we also need to check whether we delegated this bit + riscv::CSR_SIP: begin + if (CVA6Cfg.RVS) begin + csr_rdata_o = csr_rdata + | ({{riscv::XLEN-1{1'b0}}, (irq_i[1] & mideleg_q[riscv::IRQ_S_EXT])} << riscv::IRQ_S_EXT); + end + end + default: ; + endcase + end + + // in debug mode we execute with privilege level M + assign priv_lvl_o = (CVA6Cfg.DebugEn && debug_mode_q) ? riscv::PRIV_LVL_M : priv_lvl_q; + // FPU outputs + assign fflags_o = fcsr_q.fflags; + assign frm_o = fcsr_q.frm; + assign fprec_o = fcsr_q.fprec; + // MMU outputs + assign satp_ppn_o = satp_q.ppn; + assign asid_o = satp_q.asid[AsidWidth-1:0]; + assign sum_o = mstatus_q.sum; + // we support bare memory addressing and SV39 + assign en_translation_o = ((CVA6Cfg.RVS && riscv::vm_mode_t'(satp_q.mode) == riscv::MODE_SV) && + priv_lvl_o != riscv::PRIV_LVL_M) + ? 1'b1 + : 1'b0; + assign mxr_o = mstatus_q.mxr; + assign tvm_o = mstatus_q.tvm; + assign tw_o = mstatus_q.tw; + assign tsr_o = mstatus_q.tsr; + assign halt_csr_o = wfi_q; +`ifdef PITON_ARIANE + assign icache_en_o = icache_q[0]; +`else + assign icache_en_o = icache_q[0] & (~debug_mode_q); +`endif + assign dcache_en_o = dcache_q[0]; + assign acc_cons_en_o = CVA6Cfg.EnableAccelerator ? acc_cons_q[0] : 1'b0; + + // determine if mprv needs to be considered if in debug mode + assign mprv = (CVA6Cfg.DebugEn && debug_mode_q && !dcsr_q.mprven) ? 1'b0 : mstatus_q.mprv; + assign debug_mode_o = debug_mode_q; + assign single_step_o = dcsr_q.step; + assign mcountinhibit_o = {{29 - MHPMCounterNum{1'b0}}, mcountinhibit_q}; + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + priv_lvl_q <= riscv::PRIV_LVL_M; + // floating-point registers + fcsr_q <= '0; + // debug signals + debug_mode_q <= 1'b0; + if (CVA6Cfg.DebugEn) begin + dcsr_q <= '0; + dcsr_q.prv <= riscv::PRIV_LVL_M; + dcsr_q.xdebugver <= 4'h4; + dpc_q <= '0; + dscratch0_q <= {riscv::XLEN{1'b0}}; + dscratch1_q <= {riscv::XLEN{1'b0}}; + end + // machine mode registers + mstatus_q <= 64'b0; + // set to boot address + direct mode + 4 byte offset which is the initial trap + mtvec_rst_load_q <= 1'b1; + mtvec_q <= '0; + mip_q <= {riscv::XLEN{1'b0}}; + mie_q <= {riscv::XLEN{1'b0}}; + mepc_q <= {riscv::XLEN{1'b0}}; + mcause_q <= {riscv::XLEN{1'b0}}; + mcounteren_q <= {riscv::XLEN{1'b0}}; + mscratch_q <= {riscv::XLEN{1'b0}}; + mtval_q <= {riscv::XLEN{1'b0}}; + fiom_q <= '0; + dcache_q <= {{riscv::XLEN - 1{1'b0}}, 1'b1}; + icache_q <= {{riscv::XLEN - 1{1'b0}}, 1'b1}; + mcountinhibit_q <= '0; + acc_cons_q <= {{riscv::XLEN - 1{1'b0}}, CVA6Cfg.EnableAccelerator}; + // supervisor mode registers + if (CVA6Cfg.RVS) begin + medeleg_q <= {riscv::XLEN{1'b0}}; + mideleg_q <= {riscv::XLEN{1'b0}}; + sepc_q <= {riscv::XLEN{1'b0}}; + scause_q <= {riscv::XLEN{1'b0}}; + stvec_q <= {riscv::XLEN{1'b0}}; + scounteren_q <= {riscv::XLEN{1'b0}}; + sscratch_q <= {riscv::XLEN{1'b0}}; + stval_q <= {riscv::XLEN{1'b0}}; + satp_q <= {riscv::XLEN{1'b0}}; + end + // timer and counters + cycle_q <= 64'b0; + instret_q <= 64'b0; + // aux registers + en_ld_st_translation_q <= 1'b0; + // wait for interrupt + wfi_q <= 1'b0; + // pmp + for (int i = 0; i < 16; i++) begin + if (i < CVA6Cfg.NrPMPEntries) begin + pmpcfg_q[i] <= riscv::pmpcfg_t'(CVA6Cfg.PMPCfgRstVal[i]); + pmpaddr_q[i] <= CVA6Cfg.PMPAddrRstVal[i][riscv::PLEN-3:0]; + end else begin + pmpcfg_q[i] <= '0; + pmpaddr_q[i] <= '0; + end + end + end else begin + priv_lvl_q <= priv_lvl_d; + // floating-point registers + fcsr_q <= fcsr_d; + // debug signals + if (CVA6Cfg.DebugEn) begin + debug_mode_q <= debug_mode_d; + dcsr_q <= dcsr_d; + dpc_q <= dpc_d; + dscratch0_q <= dscratch0_d; + dscratch1_q <= dscratch1_d; + end + // machine mode registers + mstatus_q <= mstatus_d; + mtvec_rst_load_q <= 1'b0; + mtvec_q <= mtvec_d; + mip_q <= mip_d; + mie_q <= mie_d; + mepc_q <= mepc_d; + mcause_q <= mcause_d; + mcounteren_q <= mcounteren_d; + mscratch_q <= mscratch_d; + mtval_q <= mtval_d; + fiom_q <= fiom_d; + dcache_q <= dcache_d; + icache_q <= icache_d; + mcountinhibit_q <= mcountinhibit_d; + acc_cons_q <= acc_cons_d; + // supervisor mode registers + if (CVA6Cfg.RVS) begin + medeleg_q <= medeleg_d; + mideleg_q <= mideleg_d; + sepc_q <= sepc_d; + scause_q <= scause_d; + stvec_q <= stvec_d; + scounteren_q <= scounteren_d; + sscratch_q <= sscratch_d; + stval_q <= stval_d; + satp_q <= satp_d; + end + // timer and counters + cycle_q <= cycle_d; + instret_q <= instret_d; + // aux registers + en_ld_st_translation_q <= en_ld_st_translation_d; + // wait for interrupt + wfi_q <= wfi_d; + // pmp + for (int i = 0; i < 16; i++) begin + if (i < CVA6Cfg.NrPMPEntries) begin + // We only support >=8-byte granularity, NA4 is disabled + if(!CVA6Cfg.PMPEntryReadOnly[i] && pmpcfg_d[i].addr_mode != riscv::NA4 && !(pmpcfg_d[i].access_type.r == '0 && pmpcfg_d[i].access_type.w == '1)) begin + pmpcfg_q[i] <= pmpcfg_d[i]; + end else begin + pmpcfg_q[i] <= pmpcfg_q[i]; + end + if (!CVA6Cfg.PMPEntryReadOnly[i]) begin + pmpaddr_q[i] <= pmpaddr_d[i]; + end else begin + pmpaddr_q[i] <= pmpaddr_q[i]; + end + end else begin + pmpcfg_q[i] <= '0; + pmpaddr_q[i] <= '0; + end + end + end + end + + //------------- + // Assertions + //------------- + //pragma translate_off + // check that eret and ex are never valid together + assert property (@(posedge clk_i) disable iff (!rst_ni !== '0) !(eret_o && ex_i.valid)) + else begin + $error("eret and exception should never be valid at the same time"); + $stop(); + end + //pragma translate_on +endmodule diff --git a/test/type_param/core/cva6.sv b/test/type_param/core/cva6.sv new file mode 100644 index 0000000..4b9ccf0 --- /dev/null +++ b/test/type_param/core/cva6.sv @@ -0,0 +1,1401 @@ +// Copyright 2017-2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.03.2017 +// Description: CVA6 Top-level module + + +module cva6 + import ariane_pkg::*; +#( + // CVA6 config + parameter config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg, + parameter bit IsRVFI = bit'(cva6_config_pkg::CVA6ConfigRvfiTrace), + // RVFI + parameter type rvfi_probes_t = struct packed { + logic [TRANS_ID_BITS-1:0] issue_pointer; + logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer; + logic flush_unissued_instr; + logic decoded_instr_valid; + logic flush; + logic decoded_instr_ack; + logic issue_instr_ack; + logic fetch_entry_valid; + logic [31:0] instruction; + logic is_compressed; + riscv::xlen_t rs1_forwarding; + riscv::xlen_t rs2_forwarding; + scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr; + exception_t ex_commit; + riscv::priv_lvl_t priv_lvl; + lsu_ctrl_t lsu_ctrl; + logic [((CVA6Cfg.CvxifEn || CVA6Cfg.RVV) ? 5 : 4)-1:0][riscv::XLEN-1:0] wbdata; + logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack; + logic [riscv::PLEN-1:0] mem_paddr; + logic debug_mode; + logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata; + }, + + // AXI types + parameter type axi_ar_chan_t = struct packed { + logic [CVA6Cfg.AxiIdWidth-1:0] id; + logic [CVA6Cfg.AxiAddrWidth-1:0] addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + logic [CVA6Cfg.AxiUserWidth-1:0] user; + }, + parameter type axi_aw_chan_t = struct packed { + logic [CVA6Cfg.AxiIdWidth-1:0] id; + logic [CVA6Cfg.AxiAddrWidth-1:0] addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + axi_pkg::atop_t atop; + logic [CVA6Cfg.AxiUserWidth-1:0] user; + }, + parameter type axi_w_chan_t = struct packed { + logic [CVA6Cfg.AxiDataWidth-1:0] data; + logic [(CVA6Cfg.AxiDataWidth/8)-1:0] strb; + logic last; + logic [CVA6Cfg.AxiUserWidth-1:0] user; + }, + parameter type b_chan_t = struct packed { + logic [CVA6Cfg.AxiIdWidth-1:0] id; + axi_pkg::resp_t resp; + logic [CVA6Cfg.AxiUserWidth-1:0] user; + }, + parameter type r_chan_t = struct packed { + logic [CVA6Cfg.AxiIdWidth-1:0] id; + logic [CVA6Cfg.AxiDataWidth-1:0] data; + axi_pkg::resp_t resp; + logic last; + logic [CVA6Cfg.AxiUserWidth-1:0] user; + }, + parameter type noc_req_t = struct packed { + axi_aw_chan_t aw; + logic aw_valid; + axi_w_chan_t w; + logic w_valid; + logic b_ready; + axi_ar_chan_t ar; + logic ar_valid; + logic r_ready; + }, + parameter type noc_resp_t = struct packed { + logic aw_ready; + logic ar_ready; + logic w_ready; + logic b_valid; + b_chan_t b; + logic r_valid; + r_chan_t r; + }, + // + parameter type acc_cfg_t = logic, + parameter acc_cfg_t AccCfg = '0, + parameter type cvxif_req_t = cvxif_pkg::cvxif_req_t, + parameter type cvxif_resp_t = cvxif_pkg::cvxif_resp_t +) ( + input logic clk_i, + input logic rst_ni, + // Core ID, Cluster ID and boot address are considered more or less static + input logic [riscv::VLEN-1:0] boot_addr_i, // reset boot address + input logic [riscv::XLEN-1:0] hart_id_i, // hart id in a multicore environment (reflected in a CSR) + // Interrupt inputs + input logic [1:0] irq_i, // level sensitive IR lines, mip & sip (async) + input logic ipi_i, // inter-processor interrupts (async) + // Timer facilities + input logic time_irq_i, // timer interrupt in (async) + input logic debug_req_i, // debug request (async) + // RISC-V formal interface port (`rvfi`): + // Can be left open when formal tracing is not needed. + output rvfi_probes_t rvfi_probes_o, + output cvxif_req_t cvxif_req_o, + input cvxif_resp_t cvxif_resp_i, + // memory side + output noc_req_t noc_req_o, + input noc_resp_t noc_resp_i +); + + // ------------------------------------------ + // CVA6 configuration + // ------------------------------------------ + // Extended config + localparam bit RVF = (riscv::IS_XLEN64 | riscv::IS_XLEN32) & CVA6Cfg.FpuEn; + localparam bit RVD = (riscv::IS_XLEN64 ? 1 : 0) & CVA6Cfg.FpuEn; + localparam bit FpPresent = RVF | RVD | CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8; + localparam bit NSX = CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8 | CVA6Cfg.XFVec; // Are non-standard extensions present? + localparam int unsigned FLen = RVD ? 64 : // D ext. + RVF ? 32 : // F ext. + CVA6Cfg.XF16 ? 16 : // Xf16 ext. + CVA6Cfg.XF16ALT ? 16 : // Xf16alt ext. + CVA6Cfg.XF8 ? 8 : // Xf8 ext. + 1; // Unused in case of no FP + + // Transprecision floating-point extensions configuration + localparam bit RVFVec = RVF & CVA6Cfg.XFVec & FLen>32; // FP32 vectors available if vectors and larger fmt enabled + localparam bit XF16Vec = CVA6Cfg.XF16 & CVA6Cfg.XFVec & FLen>16; // FP16 vectors available if vectors and larger fmt enabled + localparam bit XF16ALTVec = CVA6Cfg.XF16ALT & CVA6Cfg.XFVec & FLen>16; // FP16ALT vectors available if vectors and larger fmt enabled + localparam bit XF8Vec = CVA6Cfg.XF8 & CVA6Cfg.XFVec & FLen>8; // FP8 vectors available if vectors and larger fmt enabled + + localparam bit EnableAccelerator = CVA6Cfg.RVV; // Currently only used by V extension (Ara) + localparam int unsigned NrWbPorts = (CVA6Cfg.CvxifEn || EnableAccelerator) ? 5 : 4; + + localparam NrRgprPorts = 2; + + localparam bit NonIdemPotenceEn = CVA6Cfg.NrNonIdempotentRules && CVA6Cfg.NonIdempotentLength; // Currently only used by V extension (Ara) + + localparam config_pkg::cva6_cfg_t CVA6ExtendCfg = { + CVA6Cfg.NrCommitPorts, + CVA6Cfg.AxiAddrWidth, + CVA6Cfg.AxiDataWidth, + CVA6Cfg.AxiIdWidth, + CVA6Cfg.AxiUserWidth, + CVA6Cfg.NrLoadBufEntries, + CVA6Cfg.FpuEn, + CVA6Cfg.XF16, + CVA6Cfg.XF16ALT, + CVA6Cfg.XF8, + CVA6Cfg.RVA, + CVA6Cfg.RVB, + CVA6Cfg.RVV, + CVA6Cfg.RVC, + CVA6Cfg.RVZCB, + CVA6Cfg.XFVec, + CVA6Cfg.CvxifEn, + CVA6Cfg.ZiCondExtEn, + // Extended + bit'(RVF), + bit'(RVD), + bit'(FpPresent), + bit'(NSX), + unsigned'(FLen), + bit'(RVFVec), + bit'(XF16Vec), + bit'(XF16ALTVec), + bit'(XF8Vec), + unsigned'(NrRgprPorts), + unsigned'(NrWbPorts), + bit'(EnableAccelerator), + CVA6Cfg.RVS, + CVA6Cfg.RVU, + CVA6Cfg.HaltAddress, + CVA6Cfg.ExceptionAddress, + CVA6Cfg.RASDepth, + CVA6Cfg.BTBEntries, + CVA6Cfg.BHTEntries, + CVA6Cfg.DmBaseAddress, + CVA6Cfg.NrPMPEntries, + CVA6Cfg.PMPCfgRstVal, + CVA6Cfg.PMPAddrRstVal, + CVA6Cfg.PMPEntryReadOnly, + CVA6Cfg.NOCType, + CVA6Cfg.NrNonIdempotentRules, + CVA6Cfg.NonIdempotentAddrBase, + CVA6Cfg.NonIdempotentLength, + CVA6Cfg.NrExecuteRegionRules, + CVA6Cfg.ExecuteRegionAddrBase, + CVA6Cfg.ExecuteRegionLength, + CVA6Cfg.NrCachedRegionRules, + CVA6Cfg.CachedRegionAddrBase, + CVA6Cfg.CachedRegionLength, + CVA6Cfg.MaxOutstandingStores, + CVA6Cfg.DebugEn, + NonIdemPotenceEn, + CVA6Cfg.AxiBurstWriteEn + }; + + + // ------------------------------------------ + // Global Signals + // Signals connecting more than one module + // ------------------------------------------ + riscv::priv_lvl_t priv_lvl; + exception_t ex_commit; // exception from commit stage + bp_resolve_t resolved_branch; + logic [ riscv::VLEN-1:0] pc_commit; + logic eret; + logic [CVA6ExtendCfg.NrCommitPorts-1:0] commit_ack; + + localparam NumPorts = 4; + cvxif_pkg::cvxif_req_t cvxif_req; + cvxif_pkg::cvxif_resp_t cvxif_resp; + + // -------------- + // PCGEN <-> CSR + // -------------- + logic [riscv::VLEN-1:0] trap_vector_base_commit_pcgen; + logic [riscv::VLEN-1:0] epc_commit_pcgen; + // -------------- + // IF <-> ID + // -------------- + fetch_entry_t fetch_entry_if_id; + logic fetch_valid_if_id; + logic fetch_ready_id_if; + + // -------------- + // ID <-> ISSUE + // -------------- + scoreboard_entry_t issue_entry_id_issue; + logic issue_entry_valid_id_issue; + logic is_ctrl_fow_id_issue; + logic issue_instr_issue_id; + + // -------------- + // ISSUE <-> EX + // -------------- + logic [riscv::VLEN-1:0] rs1_forwarding_id_ex; // unregistered version of fu_data_o.operanda + logic [riscv::VLEN-1:0] rs2_forwarding_id_ex; // unregistered version of fu_data_o.operandb + + fu_data_t fu_data_id_ex; + logic [riscv::VLEN-1:0] pc_id_ex; + logic is_compressed_instr_id_ex; + // fixed latency units + logic flu_ready_ex_id; + logic [TRANS_ID_BITS-1:0] flu_trans_id_ex_id; + logic flu_valid_ex_id; + riscv::xlen_t flu_result_ex_id; + exception_t flu_exception_ex_id; + // ALU + logic alu_valid_id_ex; + // Branches and Jumps + logic branch_valid_id_ex; + + branchpredict_sbe_t branch_predict_id_ex; + logic resolve_branch_ex_id; + // LSU + logic lsu_valid_id_ex; + logic lsu_ready_ex_id; + + logic [TRANS_ID_BITS-1:0] load_trans_id_ex_id; + riscv::xlen_t load_result_ex_id; + logic load_valid_ex_id; + exception_t load_exception_ex_id; + + riscv::xlen_t store_result_ex_id; + logic [TRANS_ID_BITS-1:0] store_trans_id_ex_id; + logic store_valid_ex_id; + exception_t store_exception_ex_id; + // MULT + logic mult_valid_id_ex; + // FPU + logic fpu_ready_ex_id; + logic fpu_valid_id_ex; + logic [1:0] fpu_fmt_id_ex; + logic [2:0] fpu_rm_id_ex; + logic [TRANS_ID_BITS-1:0] fpu_trans_id_ex_id; + riscv::xlen_t fpu_result_ex_id; + logic fpu_valid_ex_id; + exception_t fpu_exception_ex_id; + // Accelerator + logic stall_acc_id; + scoreboard_entry_t issue_instr_id_acc; + logic issue_instr_hs_id_acc; + logic [TRANS_ID_BITS-1:0] acc_trans_id_ex_id; + riscv::xlen_t acc_result_ex_id; + logic acc_valid_ex_id; + exception_t acc_exception_ex_id; + logic halt_acc_ctrl; + logic [4:0] acc_resp_fflags; + logic acc_resp_fflags_valid; + // CSR + logic csr_valid_id_ex; + // CVXIF + logic [TRANS_ID_BITS-1:0] x_trans_id_ex_id; + riscv::xlen_t x_result_ex_id; + logic x_valid_ex_id; + exception_t x_exception_ex_id; + logic x_we_ex_id; + logic x_issue_valid_id_ex; + logic x_issue_ready_ex_id; + logic [31:0] x_off_instr_id_ex; + // -------------- + // EX <-> COMMIT + // -------------- + // CSR Commit + logic csr_commit_commit_ex; + logic dirty_fp_state; + logic dirty_v_state; + // LSU Commit + logic lsu_commit_commit_ex; + logic lsu_commit_ready_ex_commit; + logic [TRANS_ID_BITS-1:0] lsu_commit_trans_id; + logic stall_st_pending_ex; + logic no_st_pending_ex; + logic no_st_pending_commit; + logic amo_valid_commit; + // ACCEL Commit + logic acc_valid_acc_ex; + // -------------- + // ID <-> COMMIT + // -------------- + scoreboard_entry_t [CVA6ExtendCfg.NrCommitPorts-1:0] commit_instr_id_commit; + // -------------- + // RVFI + // -------------- + logic [TRANS_ID_BITS-1:0] rvfi_issue_pointer; + logic [CVA6ExtendCfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] rvfi_commit_pointer; + // -------------- + // COMMIT <-> ID + // -------------- + logic [CVA6ExtendCfg.NrCommitPorts-1:0][4:0] waddr_commit_id; + logic [CVA6ExtendCfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_commit_id; + logic [CVA6ExtendCfg.NrCommitPorts-1:0] we_gpr_commit_id; + logic [CVA6ExtendCfg.NrCommitPorts-1:0] we_fpr_commit_id; + // -------------- + // CSR <-> * + // -------------- + logic [4:0] fflags_csr_commit; + riscv::xs_t fs; + logic [2:0] frm_csr_id_issue_ex; + logic [6:0] fprec_csr_ex; + riscv::xs_t vs; + logic enable_translation_csr_ex; + logic en_ld_st_translation_csr_ex; + riscv::priv_lvl_t ld_st_priv_lvl_csr_ex; + logic sum_csr_ex; + logic mxr_csr_ex; + logic [riscv::PPNW-1:0] satp_ppn_csr_ex; + logic [ASID_WIDTH-1:0] asid_csr_ex; + logic [11:0] csr_addr_ex_csr; + fu_op csr_op_commit_csr; + riscv::xlen_t csr_wdata_commit_csr; + riscv::xlen_t csr_rdata_csr_commit; + exception_t csr_exception_csr_commit; + logic tvm_csr_id; + logic tw_csr_id; + logic tsr_csr_id; + irq_ctrl_t irq_ctrl_csr_id; + logic dcache_en_csr_nbdcache; + logic csr_write_fflags_commit_cs; + logic icache_en_csr; + logic acc_cons_en_csr; + logic debug_mode; + logic single_step_csr_commit; + riscv::pmpcfg_t [15:0] pmpcfg; + logic [15:0][riscv::PLEN-3:0] pmpaddr; + logic [31:0] mcountinhibit_csr_perf; + // ---------------------------- + // Performance Counters <-> * + // ---------------------------- + logic [11:0] addr_csr_perf; + riscv::xlen_t data_csr_perf, data_perf_csr; + logic we_csr_perf; + + logic icache_flush_ctrl_cache; + logic itlb_miss_ex_perf; + logic dtlb_miss_ex_perf; + logic dcache_miss_cache_perf; + logic icache_miss_cache_perf; + logic [ NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits; + logic stall_issue; + // -------------- + // CTRL <-> * + // -------------- + logic set_pc_ctrl_pcgen; + logic flush_csr_ctrl; + logic flush_unissued_instr_ctrl_id; + logic flush_ctrl_if; + logic flush_ctrl_id; + logic flush_ctrl_ex; + logic flush_ctrl_bp; + logic flush_tlb_ctrl_ex; + logic fence_i_commit_controller; + logic fence_commit_controller; + logic sfence_vma_commit_controller; + logic halt_ctrl; + logic halt_csr_ctrl; + logic dcache_flush_ctrl_cache; + logic dcache_flush_ack_cache_ctrl; + logic set_debug_pc; + logic flush_commit; + logic flush_acc; + + icache_areq_t icache_areq_ex_cache; + icache_arsp_t icache_areq_cache_ex; + icache_dreq_t icache_dreq_if_cache; + icache_drsp_t icache_dreq_cache_if; + + amo_req_t amo_req; + amo_resp_t amo_resp; + logic sb_full; + + // ---------------- + // DCache <-> * + // ---------------- + dcache_req_i_t [ 2:0] dcache_req_ports_ex_cache; + dcache_req_o_t [ 2:0] dcache_req_ports_cache_ex; + dcache_req_i_t [ 1:0] dcache_req_ports_acc_cache; + dcache_req_o_t [ 1:0] dcache_req_ports_cache_acc; + logic dcache_commit_wbuffer_empty; + logic dcache_commit_wbuffer_not_ni; + + //RVFI + lsu_ctrl_t rvfi_lsu_ctrl; + logic [riscv::PLEN-1:0] rvfi_mem_paddr; + logic rvfi_is_compressed; + rvfi_probes_t rvfi_probes; + + + // Accelerator port + logic [ 63:0] inval_addr; + logic inval_valid; + logic inval_ready; + + // -------------- + // Frontend + // -------------- + frontend #( + .CVA6Cfg(CVA6ExtendCfg) + ) i_frontend ( + .flush_i (flush_ctrl_if), // not entirely correct + .flush_bp_i (1'b0), + .halt_i (halt_ctrl), + .debug_mode_i (debug_mode), + .boot_addr_i (boot_addr_i[riscv::VLEN-1:0]), + .icache_dreq_i (icache_dreq_cache_if), + .icache_dreq_o (icache_dreq_if_cache), + .resolved_branch_i (resolved_branch), + .pc_commit_i (pc_commit), + .set_pc_commit_i (set_pc_ctrl_pcgen), + .set_debug_pc_i (set_debug_pc), + .epc_i (epc_commit_pcgen), + .eret_i (eret), + .trap_vector_base_i (trap_vector_base_commit_pcgen), + .ex_valid_i (ex_commit.valid), + .fetch_entry_o (fetch_entry_if_id), + .fetch_entry_valid_o(fetch_valid_if_id), + .fetch_entry_ready_i(fetch_ready_id_if), + .* + ); + + // --------- + // ID + // --------- + id_stage #( + .CVA6Cfg(CVA6ExtendCfg) + ) id_stage_i ( + .clk_i, + .rst_ni, + .flush_i(flush_ctrl_if), + .debug_req_i, + + .fetch_entry_i (fetch_entry_if_id), + .fetch_entry_valid_i(fetch_valid_if_id), + .fetch_entry_ready_o(fetch_ready_id_if), + + .issue_entry_o (issue_entry_id_issue), + .issue_entry_valid_o(issue_entry_valid_id_issue), + .is_ctrl_flow_o (is_ctrl_fow_id_issue), + .issue_instr_ack_i (issue_instr_issue_id), + + .rvfi_is_compressed_o(rvfi_is_compressed), + + .priv_lvl_i (priv_lvl), + .fs_i (fs), + .frm_i (frm_csr_id_issue_ex), + .vs_i (vs), + .irq_i (irq_i), + .irq_ctrl_i (irq_ctrl_csr_id), + .debug_mode_i(debug_mode), + .tvm_i (tvm_csr_id), + .tw_i (tw_csr_id), + .tsr_i (tsr_csr_id) + ); + + logic [NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_ex_id; + logic [NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_ex_id; + exception_t [NrWbPorts-1:0] ex_ex_ex_id; // exception from execute, ex_stage to id_stage + logic [NrWbPorts-1:0] wt_valid_ex_id; + + if (CVA6ExtendCfg.CvxifEn) begin + assign trans_id_ex_id = { + x_trans_id_ex_id, + flu_trans_id_ex_id, + load_trans_id_ex_id, + store_trans_id_ex_id, + fpu_trans_id_ex_id + }; + assign wbdata_ex_id = { + x_result_ex_id, flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id + }; + assign ex_ex_ex_id = { + x_exception_ex_id, + flu_exception_ex_id, + load_exception_ex_id, + store_exception_ex_id, + fpu_exception_ex_id + }; + assign wt_valid_ex_id = { + x_valid_ex_id, flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id + }; + end else if (CVA6ExtendCfg.EnableAccelerator) begin + assign trans_id_ex_id = { + flu_trans_id_ex_id, + load_trans_id_ex_id, + store_trans_id_ex_id, + fpu_trans_id_ex_id, + acc_trans_id_ex_id + }; + assign wbdata_ex_id = { + flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id, acc_result_ex_id + }; + assign ex_ex_ex_id = { + flu_exception_ex_id, + load_exception_ex_id, + store_exception_ex_id, + fpu_exception_ex_id, + acc_exception_ex_id + }; + assign wt_valid_ex_id = { + flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id, acc_valid_ex_id + }; + end else begin + assign trans_id_ex_id = { + flu_trans_id_ex_id, load_trans_id_ex_id, store_trans_id_ex_id, fpu_trans_id_ex_id + }; + assign wbdata_ex_id = { + flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id + }; + assign ex_ex_ex_id = { + flu_exception_ex_id, load_exception_ex_id, store_exception_ex_id, fpu_exception_ex_id + }; + assign wt_valid_ex_id = {flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id}; + end + + if (CVA6ExtendCfg.CvxifEn && CVA6ExtendCfg.EnableAccelerator) begin : gen_err_xif_and_acc + $error("X-interface and accelerator port cannot be enabled at the same time."); + end + + // --------- + // Issue + // --------- + issue_stage #( + .CVA6Cfg(CVA6ExtendCfg) + ) issue_stage_i ( + .clk_i, + .rst_ni, + .sb_full_o (sb_full), + .flush_unissued_instr_i(flush_unissued_instr_ctrl_id), + .flush_i (flush_ctrl_id), + .stall_i (stall_acc_id), + // ID Stage + .decoded_instr_i (issue_entry_id_issue), + .decoded_instr_valid_i (issue_entry_valid_id_issue), + .is_ctrl_flow_i (is_ctrl_fow_id_issue), + .decoded_instr_ack_o (issue_instr_issue_id), + // Functional Units + .rs1_forwarding_o (rs1_forwarding_id_ex), + .rs2_forwarding_o (rs2_forwarding_id_ex), + .fu_data_o (fu_data_id_ex), + .pc_o (pc_id_ex), + .is_compressed_instr_o (is_compressed_instr_id_ex), + // fixed latency unit ready + .flu_ready_i (flu_ready_ex_id), + // ALU + .alu_valid_o (alu_valid_id_ex), + // Branches and Jumps + .branch_valid_o (branch_valid_id_ex), // branch is valid + .branch_predict_o (branch_predict_id_ex), // branch predict to ex + .resolve_branch_i (resolve_branch_ex_id), // in order to resolve the branch + // LSU + .lsu_ready_i (lsu_ready_ex_id), + .lsu_valid_o (lsu_valid_id_ex), + // Multiplier + .mult_valid_o (mult_valid_id_ex), + // FPU + .fpu_ready_i (fpu_ready_ex_id), + .fpu_valid_o (fpu_valid_id_ex), + .fpu_fmt_o (fpu_fmt_id_ex), + .fpu_rm_o (fpu_rm_id_ex), + // CSR + .csr_valid_o (csr_valid_id_ex), + // CVXIF + .x_issue_valid_o (x_issue_valid_id_ex), + .x_issue_ready_i (x_issue_ready_ex_id), + .x_off_instr_o (x_off_instr_id_ex), + // Accelerator + .issue_instr_o (issue_instr_id_acc), + .issue_instr_hs_o (issue_instr_hs_id_acc), + // Commit + .resolved_branch_i (resolved_branch), + .trans_id_i (trans_id_ex_id), + .wbdata_i (wbdata_ex_id), + .ex_ex_i (ex_ex_ex_id), + .wt_valid_i (wt_valid_ex_id), + .x_we_i (x_we_ex_id), + + .waddr_i (waddr_commit_id), + .wdata_i (wdata_commit_id), + .we_gpr_i (we_gpr_commit_id), + .we_fpr_i (we_fpr_commit_id), + .commit_instr_o (commit_instr_id_commit), + .commit_ack_i (commit_ack), + // Performance Counters + .stall_issue_o (stall_issue), + //RVFI + .rvfi_issue_pointer_o (rvfi_issue_pointer), + .rvfi_commit_pointer_o(rvfi_commit_pointer), + .* + ); + + // --------- + // EX + // --------- + ex_stage #( + .CVA6Cfg (CVA6ExtendCfg), + .ASID_WIDTH(ASID_WIDTH) + ) ex_stage_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .debug_mode_i (debug_mode), + .flush_i (flush_ctrl_ex), + .rs1_forwarding_i (rs1_forwarding_id_ex), + .rs2_forwarding_i (rs2_forwarding_id_ex), + .fu_data_i (fu_data_id_ex), + .pc_i (pc_id_ex), + .is_compressed_instr_i(is_compressed_instr_id_ex), + // fixed latency units + .flu_result_o (flu_result_ex_id), + .flu_trans_id_o (flu_trans_id_ex_id), + .flu_valid_o (flu_valid_ex_id), + .flu_exception_o (flu_exception_ex_id), + .flu_ready_o (flu_ready_ex_id), + // ALU + .alu_valid_i (alu_valid_id_ex), + // Branches and Jumps + .branch_valid_i (branch_valid_id_ex), + .branch_predict_i (branch_predict_id_ex), // branch predict to ex + .resolved_branch_o (resolved_branch), + .resolve_branch_o (resolve_branch_ex_id), + // CSR + .csr_valid_i (csr_valid_id_ex), + .csr_addr_o (csr_addr_ex_csr), + .csr_commit_i (csr_commit_commit_ex), // from commit + // MULT + .mult_valid_i (mult_valid_id_ex), + // LSU + .lsu_ready_o (lsu_ready_ex_id), + .lsu_valid_i (lsu_valid_id_ex), + + .load_result_o (load_result_ex_id), + .load_trans_id_o (load_trans_id_ex_id), + .load_valid_o (load_valid_ex_id), + .load_exception_o(load_exception_ex_id), + + .store_result_o (store_result_ex_id), + .store_trans_id_o (store_trans_id_ex_id), + .store_valid_o (store_valid_ex_id), + .store_exception_o(store_exception_ex_id), + + .lsu_commit_i (lsu_commit_commit_ex), // from commit + .lsu_commit_ready_o (lsu_commit_ready_ex_commit), // to commit + .commit_tran_id_i (lsu_commit_trans_id), // from commit + .stall_st_pending_i (stall_st_pending_ex), + .no_st_pending_o (no_st_pending_ex), + // FPU + .fpu_ready_o (fpu_ready_ex_id), + .fpu_valid_i (fpu_valid_id_ex), + .fpu_fmt_i (fpu_fmt_id_ex), + .fpu_rm_i (fpu_rm_id_ex), + .fpu_frm_i (frm_csr_id_issue_ex), + .fpu_prec_i (fprec_csr_ex), + .fpu_trans_id_o (fpu_trans_id_ex_id), + .fpu_result_o (fpu_result_ex_id), + .fpu_valid_o (fpu_valid_ex_id), + .fpu_exception_o (fpu_exception_ex_id), + .amo_valid_commit_i (amo_valid_commit), + .amo_req_o (amo_req), + .amo_resp_i (amo_resp), + // CoreV-X-Interface + .x_valid_i (x_issue_valid_id_ex), + .x_ready_o (x_issue_ready_ex_id), + .x_off_instr_i (x_off_instr_id_ex), + .x_trans_id_o (x_trans_id_ex_id), + .x_exception_o (x_exception_ex_id), + .x_result_o (x_result_ex_id), + .x_valid_o (x_valid_ex_id), + .x_we_o (x_we_ex_id), + .cvxif_req_o (cvxif_req), + .cvxif_resp_i (cvxif_resp), + // Accelerator + .acc_valid_i (acc_valid_acc_ex), + // Performance counters + .itlb_miss_o (itlb_miss_ex_perf), + .dtlb_miss_o (dtlb_miss_ex_perf), + // Memory Management + .enable_translation_i (enable_translation_csr_ex), // from CSR + .en_ld_st_translation_i (en_ld_st_translation_csr_ex), + .flush_tlb_i (flush_tlb_ctrl_ex), + .priv_lvl_i (priv_lvl), // from CSR + .ld_st_priv_lvl_i (ld_st_priv_lvl_csr_ex), // from CSR + .sum_i (sum_csr_ex), // from CSR + .mxr_i (mxr_csr_ex), // from CSR + .satp_ppn_i (satp_ppn_csr_ex), // from CSR + .asid_i (asid_csr_ex), // from CSR + .icache_areq_i (icache_areq_cache_ex), + .icache_areq_o (icache_areq_ex_cache), + // DCACHE interfaces + .dcache_req_ports_i (dcache_req_ports_cache_ex), + .dcache_req_ports_o (dcache_req_ports_ex_cache), + .dcache_wbuffer_empty_i (dcache_commit_wbuffer_empty), + .dcache_wbuffer_not_ni_i(dcache_commit_wbuffer_not_ni), + // PMP + .pmpcfg_i (pmpcfg), + .pmpaddr_i (pmpaddr), + //RVFI + .rvfi_lsu_ctrl_o (rvfi_lsu_ctrl), + .rvfi_mem_paddr_o (rvfi_mem_paddr) + ); + + // --------- + // Commit + // --------- + + // we have to make sure that the whole write buffer path is empty before + // used e.g. for fence instructions. + assign no_st_pending_commit = no_st_pending_ex & dcache_commit_wbuffer_empty; + + commit_stage #( + .CVA6Cfg(CVA6ExtendCfg) + ) commit_stage_i ( + .clk_i, + .rst_ni, + .halt_i (halt_ctrl), + .flush_dcache_i (dcache_flush_ctrl_cache), + .exception_o (ex_commit), + .dirty_fp_state_o (dirty_fp_state), + .single_step_i (single_step_csr_commit), + .commit_instr_i (commit_instr_id_commit), + .commit_ack_o (commit_ack), + .no_st_pending_i (no_st_pending_commit), + .waddr_o (waddr_commit_id), + .wdata_o (wdata_commit_id), + .we_gpr_o (we_gpr_commit_id), + .we_fpr_o (we_fpr_commit_id), + .commit_lsu_o (lsu_commit_commit_ex), + .commit_lsu_ready_i(lsu_commit_ready_ex_commit), + .commit_tran_id_o (lsu_commit_trans_id), + .amo_valid_commit_o(amo_valid_commit), + .amo_resp_i (amo_resp), + .commit_csr_o (csr_commit_commit_ex), + .pc_o (pc_commit), + .csr_op_o (csr_op_commit_csr), + .csr_wdata_o (csr_wdata_commit_csr), + .csr_rdata_i (csr_rdata_csr_commit), + .csr_write_fflags_o(csr_write_fflags_commit_cs), + .csr_exception_i (csr_exception_csr_commit), + .fence_i_o (fence_i_commit_controller), + .fence_o (fence_commit_controller), + .sfence_vma_o (sfence_vma_commit_controller), + .flush_commit_o (flush_commit), + .* + ); + + // --------- + // CSR + // --------- + csr_regfile #( + .CVA6Cfg (CVA6ExtendCfg), + .AsidWidth (ASID_WIDTH), + .MHPMCounterNum(MHPMCounterNum) + ) csr_regfile_i ( + .flush_o (flush_csr_ctrl), + .halt_csr_o (halt_csr_ctrl), + .commit_instr_i (commit_instr_id_commit), + .commit_ack_i (commit_ack), + .boot_addr_i (boot_addr_i[riscv::VLEN-1:0]), + .hart_id_i (hart_id_i[riscv::XLEN-1:0]), + .ex_i (ex_commit), + .csr_op_i (csr_op_commit_csr), + .csr_write_fflags_i (csr_write_fflags_commit_cs), + .dirty_fp_state_i (dirty_fp_state), + .dirty_v_state_i (dirty_v_state), + .csr_addr_i (csr_addr_ex_csr), + .csr_wdata_i (csr_wdata_commit_csr), + .csr_rdata_o (csr_rdata_csr_commit), + .pc_i (pc_commit), + .csr_exception_o (csr_exception_csr_commit), + .epc_o (epc_commit_pcgen), + .eret_o (eret), + .set_debug_pc_o (set_debug_pc), + .trap_vector_base_o (trap_vector_base_commit_pcgen), + .priv_lvl_o (priv_lvl), + .acc_fflags_ex_i (acc_resp_fflags), + .acc_fflags_ex_valid_i (acc_resp_fflags_valid), + .fs_o (fs), + .fflags_o (fflags_csr_commit), + .frm_o (frm_csr_id_issue_ex), + .fprec_o (fprec_csr_ex), + .vs_o (vs), + .irq_ctrl_o (irq_ctrl_csr_id), + .ld_st_priv_lvl_o (ld_st_priv_lvl_csr_ex), + .en_translation_o (enable_translation_csr_ex), + .en_ld_st_translation_o(en_ld_st_translation_csr_ex), + .sum_o (sum_csr_ex), + .mxr_o (mxr_csr_ex), + .satp_ppn_o (satp_ppn_csr_ex), + .asid_o (asid_csr_ex), + .tvm_o (tvm_csr_id), + .tw_o (tw_csr_id), + .tsr_o (tsr_csr_id), + .debug_mode_o (debug_mode), + .single_step_o (single_step_csr_commit), + .dcache_en_o (dcache_en_csr_nbdcache), + .icache_en_o (icache_en_csr), + .acc_cons_en_o (acc_cons_en_csr), + .perf_addr_o (addr_csr_perf), + .perf_data_o (data_csr_perf), + .perf_data_i (data_perf_csr), + .perf_we_o (we_csr_perf), + .pmpcfg_o (pmpcfg), + .pmpaddr_o (pmpaddr), + .mcountinhibit_o (mcountinhibit_csr_perf), + .debug_req_i, + .ipi_i, + .irq_i, + .time_irq_i, + .* + ); + + // ------------------------ + // Performance Counters + // ------------------------ + if (PERF_COUNTER_EN) begin : gen_perf_counter + perf_counters #( + .CVA6Cfg (CVA6ExtendCfg), + .NumPorts(NumPorts) + ) perf_counters_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .debug_mode_i (debug_mode), + .addr_i (addr_csr_perf), + .we_i (we_csr_perf), + .data_i (data_csr_perf), + .data_o (data_perf_csr), + .commit_instr_i(commit_instr_id_commit), + .commit_ack_i (commit_ack), + + .l1_icache_miss_i (icache_miss_cache_perf), + .l1_dcache_miss_i (dcache_miss_cache_perf), + .itlb_miss_i (itlb_miss_ex_perf), + .dtlb_miss_i (dtlb_miss_ex_perf), + .sb_full_i (sb_full), + .if_empty_i (~fetch_valid_if_id), + .ex_i (ex_commit), + .eret_i (eret), + .resolved_branch_i (resolved_branch), + .branch_exceptions_i(flu_exception_ex_id), + .l1_icache_access_i (icache_dreq_if_cache), + .l1_dcache_access_i (dcache_req_ports_ex_cache), + .miss_vld_bits_i (miss_vld_bits), + .i_tlb_flush_i (flush_tlb_ctrl_ex), + .stall_issue_i (stall_issue), + .mcountinhibit_i (mcountinhibit_csr_perf) + ); + end : gen_perf_counter + else begin : gen_no_perf_counter + assign data_perf_csr = '0; + end : gen_no_perf_counter + + // ------------ + // Controller + // ------------ + controller #( + .CVA6Cfg(CVA6ExtendCfg) + ) controller_i ( + // flush ports + .set_pc_commit_o (set_pc_ctrl_pcgen), + .flush_unissued_instr_o(flush_unissued_instr_ctrl_id), + .flush_if_o (flush_ctrl_if), + .flush_id_o (flush_ctrl_id), + .flush_ex_o (flush_ctrl_ex), + .flush_bp_o (flush_ctrl_bp), + .flush_tlb_o (flush_tlb_ctrl_ex), + .flush_dcache_o (dcache_flush_ctrl_cache), + .flush_dcache_ack_i (dcache_flush_ack_cache_ctrl), + + .halt_csr_i (halt_csr_ctrl), + .halt_acc_i (halt_acc_ctrl), + .halt_o (halt_ctrl), + // control ports + .eret_i (eret), + .ex_valid_i (ex_commit.valid), + .set_debug_pc_i (set_debug_pc), + .flush_csr_i (flush_csr_ctrl), + .resolved_branch_i(resolved_branch), + .fence_i_i (fence_i_commit_controller), + .fence_i (fence_commit_controller), + .sfence_vma_i (sfence_vma_commit_controller), + .flush_commit_i (flush_commit), + .flush_acc_i (flush_acc), + + .flush_icache_o(icache_flush_ctrl_cache), + .* + ); + + // ------------------- + // Cache Subsystem + // ------------------- + + // Acc dispatcher and store buffer share a dcache request port. + // Store buffer always has priority access over acc dipsatcher. + dcache_req_i_t [NumPorts-1:0] dcache_req_to_cache; + dcache_req_o_t [NumPorts-1:0] dcache_req_from_cache; + + // D$ request + assign dcache_req_to_cache[0] = dcache_req_ports_ex_cache[0]; + assign dcache_req_to_cache[1] = dcache_req_ports_ex_cache[1]; + assign dcache_req_to_cache[2] = dcache_req_ports_acc_cache[0]; + assign dcache_req_to_cache[3] = dcache_req_ports_ex_cache[2].data_req ? dcache_req_ports_ex_cache [2] : + dcache_req_ports_acc_cache[1]; + + // D$ response + assign dcache_req_ports_cache_ex[0] = dcache_req_from_cache[0]; + assign dcache_req_ports_cache_ex[1] = dcache_req_from_cache[1]; + assign dcache_req_ports_cache_acc[0] = dcache_req_from_cache[2]; + always_comb begin : gen_dcache_req_store_data_gnt + dcache_req_ports_cache_ex[2] = dcache_req_from_cache[3]; + dcache_req_ports_cache_acc[1] = dcache_req_from_cache[3]; + + // Set gnt signal + dcache_req_ports_cache_ex[2].data_gnt &= dcache_req_ports_ex_cache[2].data_req; + dcache_req_ports_cache_acc[1].data_gnt &= !dcache_req_ports_ex_cache[2].data_req; + end + + if (DCACHE_TYPE == int'(config_pkg::WT)) begin : gen_cache_wt + // this is a cache subsystem that is compatible with OpenPiton + wt_cache_subsystem #( + .CVA6Cfg (CVA6ExtendCfg), + .NumPorts (NumPorts), + .noc_req_t (noc_req_t), + .noc_resp_t(noc_resp_t) + ) i_cache_subsystem ( + // to D$ + .clk_i (clk_i), + .rst_ni (rst_ni), + // I$ + .icache_en_i (icache_en_csr), + .icache_flush_i (icache_flush_ctrl_cache), + .icache_miss_o (icache_miss_cache_perf), + .icache_areq_i (icache_areq_ex_cache), + .icache_areq_o (icache_areq_cache_ex), + .icache_dreq_i (icache_dreq_if_cache), + .icache_dreq_o (icache_dreq_cache_if), + // D$ + .dcache_enable_i (dcache_en_csr_nbdcache), + .dcache_flush_i (dcache_flush_ctrl_cache), + .dcache_flush_ack_o(dcache_flush_ack_cache_ctrl), + // to commit stage + .dcache_amo_req_i (amo_req), + .dcache_amo_resp_o (amo_resp), + // from PTW, Load Unit and Store Unit + .dcache_miss_o (dcache_miss_cache_perf), + .miss_vld_bits_o (miss_vld_bits), + .dcache_req_ports_i(dcache_req_to_cache), + .dcache_req_ports_o(dcache_req_from_cache), + // write buffer status + .wbuffer_empty_o (dcache_commit_wbuffer_empty), + .wbuffer_not_ni_o (dcache_commit_wbuffer_not_ni), + // memory side + .noc_req_o (noc_req_o), + .noc_resp_i (noc_resp_i), + .inval_addr_i (inval_addr), + .inval_valid_i (inval_valid), + .inval_ready_o (inval_ready) + ); + end else if (DCACHE_TYPE == int'(config_pkg::HPDCACHE)) begin : gen_cache_hpd + cva6_hpdcache_subsystem #( + .CVA6Cfg (CVA6ExtendCfg), + .NumPorts (NumPorts), + .noc_req_t (noc_req_t), + .noc_resp_t(noc_resp_t), + .cmo_req_t (logic /*FIXME*/), + .cmo_rsp_t (logic /*FIXME*/) + ) i_cache_subsystem ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + .icache_en_i (icache_en_csr), + .icache_flush_i(icache_flush_ctrl_cache), + .icache_miss_o (icache_miss_cache_perf), + .icache_areq_i (icache_areq_ex_cache), + .icache_areq_o (icache_areq_cache_ex), + .icache_dreq_i (icache_dreq_if_cache), + .icache_dreq_o (icache_dreq_cache_if), + + .dcache_enable_i (dcache_en_csr_nbdcache), + .dcache_flush_i (dcache_flush_ctrl_cache), + .dcache_flush_ack_o(dcache_flush_ack_cache_ctrl), + .dcache_miss_o (dcache_miss_cache_perf), + + .dcache_amo_req_i (amo_req), + .dcache_amo_resp_o(amo_resp), + + .dcache_cmo_req_i ('0 /*FIXME*/), + .dcache_cmo_resp_o( /*FIXME*/), + + .dcache_req_ports_i(dcache_req_to_cache), + .dcache_req_ports_o(dcache_req_from_cache), + + .wbuffer_empty_o (dcache_commit_wbuffer_empty), + .wbuffer_not_ni_o(dcache_commit_wbuffer_not_ni), + + .hwpf_base_set_i ('0 /*FIXME*/), + .hwpf_base_i ('0 /*FIXME*/), + .hwpf_base_o ( /*FIXME*/), + .hwpf_param_set_i ('0 /*FIXME*/), + .hwpf_param_i ('0 /*FIXME*/), + .hwpf_param_o ( /*FIXME*/), + .hwpf_throttle_set_i('0 /*FIXME*/), + .hwpf_throttle_i ('0 /*FIXME*/), + .hwpf_throttle_o ( /*FIXME*/), + .hwpf_status_o ( /*FIXME*/), + + .noc_req_o (noc_req_o), + .noc_resp_i(noc_resp_i) + ); + assign inval_ready = 1'b1; + end else begin : gen_cache_wb + std_cache_subsystem #( + // note: this only works with one cacheable region + // not as important since this cache subsystem is about to be + // deprecated + .CVA6Cfg (CVA6ExtendCfg), + .NumPorts (NumPorts), + .axi_ar_chan_t(axi_ar_chan_t), + .axi_aw_chan_t(axi_aw_chan_t), + .axi_w_chan_t (axi_w_chan_t), + .axi_req_t (noc_req_t), + .axi_rsp_t (noc_resp_t) + ) i_cache_subsystem ( + // to D$ + .clk_i (clk_i), + .rst_ni (rst_ni), + .priv_lvl_i (priv_lvl), + // I$ + .icache_en_i (icache_en_csr), + .icache_flush_i (icache_flush_ctrl_cache), + .icache_miss_o (icache_miss_cache_perf), + .icache_areq_i (icache_areq_ex_cache), + .icache_areq_o (icache_areq_cache_ex), + .icache_dreq_i (icache_dreq_if_cache), + .icache_dreq_o (icache_dreq_cache_if), + // D$ + .dcache_enable_i (dcache_en_csr_nbdcache), + .dcache_flush_i (dcache_flush_ctrl_cache), + .dcache_flush_ack_o(dcache_flush_ack_cache_ctrl), + // to commit stage + .amo_req_i (amo_req), + .amo_resp_o (amo_resp), + .dcache_miss_o (dcache_miss_cache_perf), + // this is statically set to 1 as the std_cache does not have a wbuffer + .wbuffer_empty_o (dcache_commit_wbuffer_empty), + // from PTW, Load Unit and Store Unit + .dcache_req_ports_i(dcache_req_to_cache), + .dcache_req_ports_o(dcache_req_from_cache), + // memory side + .axi_req_o (noc_req_o), + .axi_resp_i (noc_resp_i) + ); + assign dcache_commit_wbuffer_not_ni = 1'b1; + assign inval_ready = 1'b1; + end + + // ---------------- + // Accelerator + // ---------------- + + if (CVA6ExtendCfg.EnableAccelerator) begin : gen_accelerator + acc_dispatcher #( + .CVA6Cfg (CVA6ExtendCfg), + .acc_cfg_t (acc_cfg_t), + .AccCfg (AccCfg), + .acc_req_t (cvxif_req_t), + .acc_resp_t(cvxif_resp_t) + ) i_acc_dispatcher ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_unissued_instr_i(flush_unissued_instr_ctrl_id), + .flush_ex_i (flush_ctrl_ex), + .flush_pipeline_o (flush_acc), + .acc_cons_en_i (acc_cons_en_csr), + .acc_fflags_valid_o (acc_resp_fflags_valid), + .acc_fflags_o (acc_resp_fflags), + .ld_st_priv_lvl_i (ld_st_priv_lvl_csr_ex), + .sum_i (sum_csr_ex), + .pmpcfg_i (pmpcfg), + .pmpaddr_i (pmpaddr), + .fcsr_frm_i (frm_csr_id_issue_ex), + .dirty_v_state_o (dirty_v_state), + .issue_instr_i (issue_instr_id_acc), + .issue_instr_hs_i (issue_instr_hs_id_acc), + .issue_stall_o (stall_acc_id), + .fu_data_i (fu_data_id_ex), + .commit_instr_i (commit_instr_id_commit), + .commit_st_barrier_i (fence_i_commit_controller | fence_commit_controller), + .acc_trans_id_o (acc_trans_id_ex_id), + .acc_result_o (acc_result_ex_id), + .acc_valid_o (acc_valid_ex_id), + .acc_exception_o (acc_exception_ex_id), + .acc_valid_ex_o (acc_valid_acc_ex), + .commit_ack_i (commit_ack), + .acc_stall_st_pending_o(stall_st_pending_ex), + .acc_no_st_pending_i (no_st_pending_commit), + .dcache_req_ports_i (dcache_req_ports_ex_cache), + .ctrl_halt_o (halt_acc_ctrl), + .acc_dcache_req_ports_o(dcache_req_ports_acc_cache), + .acc_dcache_req_ports_i(dcache_req_ports_cache_acc), + .inval_ready_i (inval_ready), + .inval_valid_o (inval_valid), + .inval_addr_o (inval_addr), + .acc_req_o (cvxif_req_o), + .acc_resp_i (cvxif_resp_i) + ); + end : gen_accelerator + else begin : gen_no_accelerator + assign acc_trans_id_ex_id = '0; + assign acc_result_ex_id = '0; + assign acc_valid_ex_id = '0; + assign acc_exception_ex_id = '0; + assign acc_resp_fflags = '0; + assign acc_resp_fflags_valid = '0; + assign stall_acc_id = '0; + assign dirty_v_state = '0; + assign acc_valid_acc_ex = '0; + assign halt_acc_ctrl = '0; + assign stall_st_pending_ex = '0; + assign flush_acc = '0; + + // D$ connection is unused + assign dcache_req_ports_acc_cache = '0; + + // No invalidation interface + assign inval_valid = '0; + assign inval_addr = '0; + + // Feed through cvxif + assign cvxif_req_o = cvxif_req; + assign cvxif_resp = cvxif_resp_i; + end : gen_no_accelerator + + // ------------------- + // Parameter Check + // ------------------- + // pragma translate_off +`ifndef VERILATOR + initial config_pkg::check_cfg(CVA6Cfg); +`endif + // pragma translate_on + + // ------------------- + // Instruction Tracer + // ------------------- + + //pragma translate_off +`ifdef PITON_ARIANE + localparam PC_QUEUE_DEPTH = 16; + + logic piton_pc_vld; + logic [ riscv::VLEN-1:0] piton_pc; + logic [CVA6ExtendCfg.NrCommitPorts-1:0][riscv::VLEN-1:0] pc_data; + logic [CVA6ExtendCfg.NrCommitPorts-1:0] pc_pop, pc_empty; + + for (genvar i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin : gen_pc_fifo + fifo_v3 #( + .DATA_WIDTH(64), + .DEPTH(PC_QUEUE_DEPTH) + ) i_pc_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i ('0), + .testmode_i('0), + .full_o (), + .empty_o (pc_empty[i]), + .usage_o (), + .data_i (commit_instr_id_commit[i].pc), + .push_i (commit_ack[i] & ~commit_instr_id_commit[i].ex.valid), + .data_o (pc_data[i]), + .pop_i (pc_pop[i]) + ); + end + + rr_arb_tree #( + .NumIn(CVA6ExtendCfg.NrCommitPorts), + .DataWidth(64) + ) i_rr_arb_tree ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (~pc_empty), + .gnt_o (pc_pop), + .data_i (pc_data), + .gnt_i (piton_pc_vld), + .req_o (piton_pc_vld), + .data_o (piton_pc), + .idx_o () + ); +`endif // PITON_ARIANE + +`ifndef VERILATOR + instr_tracer_if tracer_if (clk_i); + // assign instruction tracer interface + // control signals + assign tracer_if.rstn = rst_ni; + assign tracer_if.flush_unissued = flush_unissued_instr_ctrl_id; + assign tracer_if.flush = flush_ctrl_ex; + // fetch + assign tracer_if.instruction = id_stage_i.fetch_entry_i.instruction; + assign tracer_if.fetch_valid = id_stage_i.fetch_entry_valid_i; + assign tracer_if.fetch_ack = id_stage_i.fetch_entry_ready_o; + // Issue + assign tracer_if.issue_ack = issue_stage_i.i_scoreboard.issue_ack_i; + assign tracer_if.issue_sbe = issue_stage_i.i_scoreboard.issue_instr_o; + // write-back + assign tracer_if.waddr = waddr_commit_id; + assign tracer_if.wdata = wdata_commit_id; + assign tracer_if.we_gpr = we_gpr_commit_id; + assign tracer_if.we_fpr = we_fpr_commit_id; + // commit + assign tracer_if.commit_instr = commit_instr_id_commit; + assign tracer_if.commit_ack = commit_ack; + // branch predict + assign tracer_if.resolve_branch = resolved_branch; + // address translation + // stores + assign tracer_if.st_valid = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.valid_i; + assign tracer_if.st_paddr = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.paddr_i; + // loads + assign tracer_if.ld_valid = ex_stage_i.lsu_i.i_load_unit.req_port_o.tag_valid; + assign tracer_if.ld_kill = ex_stage_i.lsu_i.i_load_unit.req_port_o.kill_req; + assign tracer_if.ld_paddr = ex_stage_i.lsu_i.i_load_unit.paddr_i; + // exceptions + assign tracer_if.exception = commit_stage_i.exception_o; + // assign current privilege level + assign tracer_if.priv_lvl = priv_lvl; + assign tracer_if.debug_mode = debug_mode; + + instr_tracer instr_tracer_i ( + .tracer_if(tracer_if), + .hart_id_i + ); + + // mock tracer for Verilator, to be used with spike-dasm +`else + + int f; + logic [63:0] cycles; + + initial begin + string fn; + $sformat(fn, "trace_hart_%0.0f.dasm", hart_id_i); + f = $fopen(fn, "w"); + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + cycles <= 0; + end else begin + byte mode = ""; + if (CVA6Cfg.DebugEn && debug_mode) mode = "D"; + else begin + case (priv_lvl) + riscv::PRIV_LVL_M: mode = "M"; + riscv::PRIV_LVL_S: if (CVA6Cfg.RVS) mode = "S"; + riscv::PRIV_LVL_U: mode = "U"; + default: ; // Do nothing + endcase + end + for (int i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin + if (commit_ack[i] && !commit_instr_id_commit[i].ex.valid) begin + $fwrite(f, "%d 0x%0h %s (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, mode, + commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].ex.tval[31:0]); + end else if (commit_ack[i] && commit_instr_id_commit[i].ex.valid) begin + if (commit_instr_id_commit[i].ex.cause == 2) begin + $fwrite(f, "Exception Cause: Illegal Instructions, DASM(%h) PC=%h\n", + commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].pc); + end else begin + if (CVA6Cfg.DebugEn && debug_mode) begin + $fwrite(f, "%d 0x%0h %s (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, + mode, commit_instr_id_commit[i].ex.tval[31:0], + commit_instr_id_commit[i].ex.tval[31:0]); + end else begin + $fwrite(f, "Exception Cause: %5d, DASM(%h) PC=%h\n", + commit_instr_id_commit[i].ex.cause, commit_instr_id_commit[i].ex.tval[31:0], + commit_instr_id_commit[i].pc); + end + end + end + end + cycles <= cycles + 1; + end + end + + final begin + $fclose(f); + end +`endif // VERILATOR + //pragma translate_on + + + if (IsRVFI) begin + + cva6_rvfi_probes #( + .CVA6Cfg (CVA6ExtendCfg), + .rvfi_probes_t(rvfi_probes_t) + ) i_cva6_rvfi_combi ( + + .flush_i (flush_ctrl_if), + .issue_instr_ack_i (issue_instr_issue_id), + .fetch_entry_valid_i(fetch_valid_if_id), + .instruction_i (fetch_entry_if_id.instruction), + .is_compressed_i (rvfi_is_compressed), + + .issue_pointer_i (rvfi_issue_pointer), + .commit_pointer_i(rvfi_commit_pointer), + + .flush_unissued_instr_i(flush_unissued_instr_ctrl_id), + .decoded_instr_valid_i (issue_entry_valid_id_issue), + .decoded_instr_ack_i (issue_instr_issue_id), + + .rs1_forwarding_i(rs1_forwarding_id_ex), + .rs2_forwarding_i(rs2_forwarding_id_ex), + + .commit_instr_i(commit_instr_id_commit), + .ex_commit_i (ex_commit), + .priv_lvl_i (priv_lvl), + + .lsu_ctrl_i (rvfi_lsu_ctrl), + .wbdata_i (wbdata_ex_id), + .commit_ack_i(commit_ack), + .mem_paddr_i (rvfi_mem_paddr), + .debug_mode_i(debug_mode), + .wdata_i (wdata_commit_id), + + .rvfi_probes_o(rvfi_probes_o) + + ); + + end //IsRVFI + +endmodule // ariane diff --git a/test/type_param/core/cva6_accel_first_pass_decoder_stub.sv b/test/type_param/core/cva6_accel_first_pass_decoder_stub.sv new file mode 100644 index 0000000..20ffb1e --- /dev/null +++ b/test/type_param/core/cva6_accel_first_pass_decoder_stub.sv @@ -0,0 +1,31 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Author: Nils Wistoff + +// Module stub for the cva6_accel_first_pass_decoder. Replace this with your accelerator's +// first pass decoder. + +module cva6_accel_first_pass_decoder + import ariane_pkg::*; +( + input logic [31:0] instruction_i, // instruction from IF + input riscv::xs_t fs_i, // floating point extension status + input riscv::xs_t vs_i, // vector extension status + output logic is_accel_o, // is an accelerator instruction + output scoreboard_entry_t instruction_o, // predecoded instruction + output logic illegal_instr_o, // is an illegal instruction + output logic is_control_flow_instr_o // is a control flow instruction +); + + assign is_accel_o = 1'b0; + assign instruction_o = '0; + assign illegal_instr_o = 1'b0; + assign is_control_flow_instr_o = 1'b0; + + $error("cva6_accel_first_pass_decoder: instantiated non-functional module stub.\ + Please replace this with your accelerator's first pass decoder \ + (or unset ENABLE_ACCELERATOR)."); + +endmodule : cva6_accel_first_pass_decoder diff --git a/test/type_param/core/cva6_rvfi.sv b/test/type_param/core/cva6_rvfi.sv new file mode 100644 index 0000000..972a50a --- /dev/null +++ b/test/type_param/core/cva6_rvfi.sv @@ -0,0 +1,294 @@ +// Copyright 2024 Thales DIS France SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Yannick Casamatta - Thales +// Date: 09/01/2024 + + +module cva6_rvfi + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type rvfi_instr_t = logic, + parameter type rvfi_probes_t = logic +) ( + + input logic clk_i, + input logic rst_ni, + + input rvfi_probes_t rvfi_probes_i, + output rvfi_instr_t [CVA6Cfg.NrCommitPorts-1:0] rvfi_o + +); + + // ------------------------------------------ + // CVA6 configuration + // ------------------------------------------ + // Extended config + localparam bit RVF = (riscv::IS_XLEN64 | riscv::IS_XLEN32) & CVA6Cfg.FpuEn; + localparam bit RVD = (riscv::IS_XLEN64 ? 1 : 0) & CVA6Cfg.FpuEn; + localparam bit FpPresent = RVF | RVD | CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8; + localparam bit NSX = CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8 | CVA6Cfg.XFVec; // Are non-standard extensions present? + localparam int unsigned FLen = RVD ? 64 : // D ext. + RVF ? 32 : // F ext. + CVA6Cfg.XF16 ? 16 : // Xf16 ext. + CVA6Cfg.XF16ALT ? 16 : // Xf16alt ext. + CVA6Cfg.XF8 ? 8 : // Xf8 ext. + 1; // Unused in case of no FP + + // Transprecision floating-point extensions configuration + localparam bit RVFVec = RVF & CVA6Cfg.XFVec & FLen>32; // FP32 vectors available if vectors and larger fmt enabled + localparam bit XF16Vec = CVA6Cfg.XF16 & CVA6Cfg.XFVec & FLen>16; // FP16 vectors available if vectors and larger fmt enabled + localparam bit XF16ALTVec = CVA6Cfg.XF16ALT & CVA6Cfg.XFVec & FLen>16; // FP16ALT vectors available if vectors and larger fmt enabled + localparam bit XF8Vec = CVA6Cfg.XF8 & CVA6Cfg.XFVec & FLen>8; // FP8 vectors available if vectors and larger fmt enabled + + localparam bit EnableAccelerator = CVA6Cfg.RVV; // Currently only used by V extension (Ara) + localparam int unsigned NrWbPorts = (CVA6Cfg.CvxifEn || EnableAccelerator) ? 5 : 4; + + localparam NrRgprPorts = 2; + + localparam bit NonIdemPotenceEn = CVA6Cfg.NrNonIdempotentRules && CVA6Cfg.NonIdempotentLength; // Currently only used by V extension (Ara) + + localparam config_pkg::cva6_cfg_t CVA6ExtendCfg = { + CVA6Cfg.NrCommitPorts, + CVA6Cfg.AxiAddrWidth, + CVA6Cfg.AxiDataWidth, + CVA6Cfg.AxiIdWidth, + CVA6Cfg.AxiUserWidth, + CVA6Cfg.NrLoadBufEntries, + CVA6Cfg.FpuEn, + CVA6Cfg.XF16, + CVA6Cfg.XF16ALT, + CVA6Cfg.XF8, + CVA6Cfg.RVA, + CVA6Cfg.RVB, + CVA6Cfg.RVV, + CVA6Cfg.RVC, + CVA6Cfg.RVZCB, + CVA6Cfg.XFVec, + CVA6Cfg.CvxifEn, + CVA6Cfg.ZiCondExtEn, + // Extended + bit'(RVF), + bit'(RVD), + bit'(FpPresent), + bit'(NSX), + unsigned'(FLen), + bit'(RVFVec), + bit'(XF16Vec), + bit'(XF16ALTVec), + bit'(XF8Vec), + unsigned'(NrRgprPorts), + unsigned'(NrWbPorts), + bit'(EnableAccelerator), + CVA6Cfg.RVS, + CVA6Cfg.RVU, + CVA6Cfg.HaltAddress, + CVA6Cfg.ExceptionAddress, + CVA6Cfg.RASDepth, + CVA6Cfg.BTBEntries, + CVA6Cfg.BHTEntries, + CVA6Cfg.DmBaseAddress, + CVA6Cfg.NrPMPEntries, + CVA6Cfg.PMPCfgRstVal, + CVA6Cfg.PMPAddrRstVal, + CVA6Cfg.PMPEntryReadOnly, + CVA6Cfg.NOCType, + CVA6Cfg.NrNonIdempotentRules, + CVA6Cfg.NonIdempotentAddrBase, + CVA6Cfg.NonIdempotentLength, + CVA6Cfg.NrExecuteRegionRules, + CVA6Cfg.ExecuteRegionAddrBase, + CVA6Cfg.ExecuteRegionLength, + CVA6Cfg.NrCachedRegionRules, + CVA6Cfg.CachedRegionAddrBase, + CVA6Cfg.CachedRegionLength, + CVA6Cfg.MaxOutstandingStores, + CVA6Cfg.DebugEn, + NonIdemPotenceEn, + CVA6Cfg.AxiBurstWriteEn + }; + + logic flush; + logic issue_instr_ack; + logic fetch_entry_valid; + logic [ 31:0] instruction; + logic is_compressed; + + logic [ TRANS_ID_BITS-1:0] issue_pointer; + logic [CVA6ExtendCfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer; + + logic flush_unissued_instr; + logic decoded_instr_valid; + logic decoded_instr_ack; + + riscv::xlen_t rs1_forwarding; + riscv::xlen_t rs2_forwarding; + + scoreboard_entry_t [CVA6ExtendCfg.NrCommitPorts-1:0] commit_instr; + exception_t ex_commit; + riscv::priv_lvl_t priv_lvl; + + lsu_ctrl_t lsu_ctrl; + logic [ CVA6ExtendCfg.NrWbPorts-1:0][ riscv::XLEN-1:0] wbdata; + logic [CVA6ExtendCfg.NrCommitPorts-1:0] commit_ack; + logic [ riscv::PLEN-1:0] mem_paddr; + logic debug_mode; + logic [CVA6ExtendCfg.NrCommitPorts-1:0][ riscv::XLEN-1:0] wdata; + + logic [ riscv::VLEN-1:0] lsu_addr; + logic [ (riscv::XLEN/8)-1:0] lsu_rmask; + logic [ (riscv::XLEN/8)-1:0] lsu_wmask; + logic [ TRANS_ID_BITS-1:0] lsu_addr_trans_id; + + assign flush = rvfi_probes_i.flush; + assign issue_instr_ack = rvfi_probes_i.issue_instr_ack; + assign fetch_entry_valid = rvfi_probes_i.fetch_entry_valid; + assign instruction = rvfi_probes_i.instruction; + assign is_compressed = rvfi_probes_i.is_compressed; + + assign issue_pointer = rvfi_probes_i.issue_pointer; + assign commit_pointer = rvfi_probes_i.commit_pointer; + + assign flush_unissued_instr = rvfi_probes_i.flush_unissued_instr; + assign decoded_instr_valid = rvfi_probes_i.decoded_instr_valid; + assign decoded_instr_ack = rvfi_probes_i.decoded_instr_ack; + + assign rs1_forwarding = rvfi_probes_i.rs1_forwarding; + assign rs2_forwarding = rvfi_probes_i.rs2_forwarding; + + assign commit_instr = rvfi_probes_i.commit_instr; + assign ex_commit = rvfi_probes_i.ex_commit; + assign priv_lvl = rvfi_probes_i.priv_lvl; + + assign lsu_ctrl = rvfi_probes_i.lsu_ctrl; + assign wbdata = rvfi_probes_i.wbdata; + assign commit_ack = rvfi_probes_i.commit_ack; + assign mem_paddr = rvfi_probes_i.mem_paddr; + assign debug_mode = rvfi_probes_i.debug_mode; + assign wdata = rvfi_probes_i.wdata; + + assign lsu_addr = lsu_ctrl.vaddr; + assign lsu_rmask = lsu_ctrl.fu == LOAD ? lsu_ctrl.be : '0; + assign lsu_wmask = lsu_ctrl.fu == STORE ? lsu_ctrl.be : '0; + assign lsu_addr_trans_id = lsu_ctrl.trans_id; + + + //ID STAGE + + typedef struct packed { + logic valid; + logic [31:0] instr; + } issue_struct_t; + issue_struct_t issue_n, issue_q; + + always_comb begin + issue_n = issue_q; + + if (issue_instr_ack) issue_n.valid = 1'b0; + + if ((!issue_q.valid || issue_instr_ack) && fetch_entry_valid) begin + issue_n.valid = 1'b1; + issue_n.instr = (is_compressed) ? {{16{1'b0}}, instruction[15:0]} : instruction; + end + + if (flush) issue_n.valid = 1'b0; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + issue_q <= '0; + end else begin + issue_q <= issue_n; + end + end + + //ISSUE STAGE + + // this is the FIFO struct of the issue queue + typedef struct packed { + riscv::xlen_t rs1_rdata; + riscv::xlen_t rs2_rdata; + logic [riscv::VLEN-1:0] lsu_addr; + logic [(riscv::XLEN/8)-1:0] lsu_rmask; + logic [(riscv::XLEN/8)-1:0] lsu_wmask; + riscv::xlen_t lsu_wdata; + logic [31:0] instr; + } sb_mem_t; + sb_mem_t [NR_SB_ENTRIES-1:0] mem_q, mem_n; + + always_comb begin : issue_fifo + mem_n = mem_q; + + if (decoded_instr_valid && decoded_instr_ack && !flush_unissued_instr) begin + mem_n[issue_pointer] = '{ + rs1_rdata: rs1_forwarding, + rs2_rdata: rs2_forwarding, + lsu_addr: '0, + lsu_rmask: '0, + lsu_wmask: '0, + lsu_wdata: '0, + instr: issue_q.instr + }; + end + + if (lsu_rmask != 0) begin + mem_n[lsu_addr_trans_id].lsu_addr = lsu_addr; + mem_n[lsu_addr_trans_id].lsu_rmask = lsu_rmask; + end else if (lsu_wmask != 0) begin + mem_n[lsu_addr_trans_id].lsu_addr = lsu_addr; + mem_n[lsu_addr_trans_id].lsu_wmask = lsu_wmask; + mem_n[lsu_addr_trans_id].lsu_wdata = wbdata[1]; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : regs + if (!rst_ni) begin + mem_q <= '{default: sb_mem_t'(0)}; + end else begin + mem_q <= mem_n; + end + end + + //---------------------------------------------------------------------------------------------------------- + // PACK + //---------------------------------------------------------------------------------------------------------- + + always_comb begin + for (int i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin + logic exception; + exception = commit_instr[i].valid && ex_commit.valid; + rvfi_o[i].valid = (commit_ack[i] && !ex_commit.valid) || + (exception && (ex_commit.cause == riscv::ENV_CALL_MMODE || + ex_commit.cause == riscv::ENV_CALL_SMODE || + ex_commit.cause == riscv::ENV_CALL_UMODE)); + rvfi_o[i].insn = mem_q[commit_pointer[i]].instr; + // when trap, the instruction is not executed + rvfi_o[i].trap = exception; + rvfi_o[i].cause = ex_commit.cause; + rvfi_o[i].mode = (CVA6ExtendCfg.DebugEn && debug_mode) ? 2'b10 : priv_lvl; + rvfi_o[i].ixl = riscv::XLEN == 64 ? 2 : 1; + rvfi_o[i].rs1_addr = commit_instr[i].rs1[4:0]; + rvfi_o[i].rs2_addr = commit_instr[i].rs2[4:0]; + rvfi_o[i].rd_addr = commit_instr[i].rd[4:0]; + rvfi_o[i].rd_wdata = (CVA6ExtendCfg.FpPresent && is_rd_fpr(commit_instr[i].op)) ? + commit_instr[i].result : wdata[i]; + rvfi_o[i].pc_rdata = commit_instr[i].pc; + rvfi_o[i].mem_addr = mem_q[commit_pointer[i]].lsu_addr; + // So far, only write paddr is reported. TODO: read paddr + rvfi_o[i].mem_paddr = mem_paddr; + rvfi_o[i].mem_wmask = mem_q[commit_pointer[i]].lsu_wmask; + rvfi_o[i].mem_wdata = mem_q[commit_pointer[i]].lsu_wdata; + rvfi_o[i].mem_rmask = mem_q[commit_pointer[i]].lsu_rmask; + rvfi_o[i].mem_rdata = commit_instr[i].result; + rvfi_o[i].rs1_rdata = mem_q[commit_pointer[i]].rs1_rdata; + rvfi_o[i].rs2_rdata = mem_q[commit_pointer[i]].rs2_rdata; + end + end + + +endmodule diff --git a/test/type_param/core/cva6_rvfi_probes.sv b/test/type_param/core/cva6_rvfi_probes.sv new file mode 100644 index 0000000..81d2c5d --- /dev/null +++ b/test/type_param/core/cva6_rvfi_probes.sv @@ -0,0 +1,81 @@ +// Copyright 2024 Thales DIS France SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Yannick Casamatta - Thales +// Date: 09/01/2024 + + +module cva6_rvfi_probes + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type rvfi_probes_t = logic +) ( + + input logic flush_i, + input logic issue_instr_ack_i, + input logic fetch_entry_valid_i, + input logic [31:0] instruction_i, + input logic is_compressed_i, + + input logic [TRANS_ID_BITS-1:0] issue_pointer_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer_i, + + input logic flush_unissued_instr_i, + input logic decoded_instr_valid_i, + input logic decoded_instr_ack_i, + + input riscv::xlen_t rs1_forwarding_i, + input riscv::xlen_t rs2_forwarding_i, + + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, + input exception_t ex_commit_i, + input riscv::priv_lvl_t priv_lvl_i, + + input lsu_ctrl_t lsu_ctrl_i, + input logic [ CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, + input logic [ riscv::PLEN-1:0] mem_paddr_i, + input logic debug_mode_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i, + output rvfi_probes_t rvfi_probes_o +); + + always_comb begin + rvfi_probes_o = '0; + + rvfi_probes_o.flush = flush_i; + rvfi_probes_o.issue_instr_ack = issue_instr_ack_i; + rvfi_probes_o.fetch_entry_valid = fetch_entry_valid_i; + rvfi_probes_o.instruction = instruction_i; + rvfi_probes_o.is_compressed = is_compressed_i; + + rvfi_probes_o.issue_pointer = issue_pointer_i; + rvfi_probes_o.commit_pointer = commit_pointer_i; + + rvfi_probes_o.flush_unissued_instr = flush_unissued_instr_i; + rvfi_probes_o.decoded_instr_valid = decoded_instr_valid_i; + rvfi_probes_o.decoded_instr_ack = decoded_instr_ack_i; + + rvfi_probes_o.rs1_forwarding = rs1_forwarding_i; + rvfi_probes_o.rs2_forwarding = rs2_forwarding_i; + + rvfi_probes_o.commit_instr = commit_instr_i; + rvfi_probes_o.ex_commit = ex_commit_i; + rvfi_probes_o.priv_lvl = priv_lvl_i; + + rvfi_probes_o.lsu_ctrl = lsu_ctrl_i; + rvfi_probes_o.wbdata = wbdata_i; + rvfi_probes_o.commit_ack = commit_ack_i; + rvfi_probes_o.mem_paddr = mem_paddr_i; + rvfi_probes_o.debug_mode = debug_mode_i; + rvfi_probes_o.wdata = wdata_i; + + end + + +endmodule diff --git a/test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv b/test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv new file mode 100644 index 0000000..08e801c --- /dev/null +++ b/test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv @@ -0,0 +1,155 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com) +// Example coprocessor adds rs1,rs2(,rs3) together and gives back the result to the CPU via the CoreV-X-Interface. +// Coprocessor delays the sending of the result depending on result least significant bits. + +module cvxif_example_coprocessor + import cvxif_pkg::*; + import cvxif_instr_pkg::*; +( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input cvxif_req_t cvxif_req_i, + output cvxif_resp_t cvxif_resp_o +); + + //Compressed interface + logic x_compressed_valid_i; + logic x_compressed_ready_o; + x_compressed_req_t x_compressed_req_i; + x_compressed_resp_t x_compressed_resp_o; + //Issue interface + logic x_issue_valid_i; + logic x_issue_ready_o; + x_issue_req_t x_issue_req_i; + x_issue_resp_t x_issue_resp_o; + //Commit interface + logic x_commit_valid_i; + x_commit_t x_commit_i; + //Memory interface + logic x_mem_valid_o; + logic x_mem_ready_i; + x_mem_req_t x_mem_req_o; + x_mem_resp_t x_mem_resp_i; + //Memory result interface + logic x_mem_result_valid_i; + x_mem_result_t x_mem_result_i; + //Result interface + logic x_result_valid_o; + logic x_result_ready_i; + x_result_t x_result_o; + + assign x_compressed_valid_i = cvxif_req_i.x_compressed_valid; + assign x_compressed_req_i = cvxif_req_i.x_compressed_req; + assign x_issue_valid_i = cvxif_req_i.x_issue_valid; + assign x_issue_req_i = cvxif_req_i.x_issue_req; + assign x_commit_valid_i = cvxif_req_i.x_commit_valid; + assign x_commit_i = cvxif_req_i.x_commit; + assign x_mem_ready_i = cvxif_req_i.x_mem_ready; + assign x_mem_resp_i = cvxif_req_i.x_mem_resp; + assign x_mem_result_valid_i = cvxif_req_i.x_mem_result_valid; + assign x_mem_result_i = cvxif_req_i.x_mem_result; + assign x_result_ready_i = cvxif_req_i.x_result_ready; + + assign cvxif_resp_o.x_compressed_ready = x_compressed_ready_o; + assign cvxif_resp_o.x_compressed_resp = x_compressed_resp_o; + assign cvxif_resp_o.x_issue_ready = x_issue_ready_o; + assign cvxif_resp_o.x_issue_resp = x_issue_resp_o; + assign cvxif_resp_o.x_mem_valid = x_mem_valid_o; + assign cvxif_resp_o.x_mem_req = x_mem_req_o; + assign cvxif_resp_o.x_result_valid = x_result_valid_o; + assign cvxif_resp_o.x_result = x_result_o; + + //Compressed interface + assign x_compressed_ready_o = '0; + assign x_compressed_resp_o.instr = '0; + assign x_compressed_resp_o.accept = '0; + + instr_decoder #( + .NbInstr (cvxif_instr_pkg::NbInstr), + .CoproInstr(cvxif_instr_pkg::CoproInstr) + ) instr_decoder_i ( + .clk_i (clk_i), + .x_issue_req_i (x_issue_req_i), + .x_issue_resp_o(x_issue_resp_o) + ); + + typedef struct packed { + x_issue_req_t req; + x_issue_resp_t resp; + } x_issue_t; + + logic fifo_full, fifo_empty; + logic x_issue_ready_q; + logic instr_push, instr_pop; + x_issue_t req_i; + x_issue_t req_o; + + + + assign instr_push = x_issue_resp_o.accept ? 1 : 0; + assign instr_pop = (x_commit_i.x_commit_kill && x_commit_valid_i) || x_result_valid_o; + assign x_issue_ready_q = ~fifo_full; // if something is in the fifo, the instruction is being processed + // so we can't receive anything else + assign req_i.req = x_issue_req_i; + assign req_i.resp = x_issue_resp_o; + + always_ff @(posedge clk_i or negedge rst_ni) begin : regs + if (!rst_ni) begin + x_issue_ready_o <= 1; + end else begin + x_issue_ready_o <= x_issue_ready_q; + end + end + + fifo_v3 #( + .FALL_THROUGH(1), //data_o ready and pop in the same cycle + .DATA_WIDTH (64), + .DEPTH (8), + .dtype (x_issue_t) + ) fifo_commit_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (fifo_full), + .empty_o (fifo_empty), + .usage_o (), + .data_i (req_i), + .push_i (instr_push), + .data_o (req_o), + .pop_i (instr_pop) + ); + + logic [3:0] c; + counter #( + .WIDTH(4) + ) counter_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (~x_commit_i.x_commit_kill && x_commit_valid_i), + .en_i (1'b1), + .load_i (), + .down_i (), + .d_i (), + .q_o (c), + .overflow_o() + ); + + always_comb begin + x_result_o.data = req_o.req.rs[0] + req_o.req.rs[1] + (X_NUM_RS == 3 ? req_o.req.rs[2] : 0); + x_result_valid_o = (c == x_result_o.data[3:0]) && ~fifo_empty ? 1 : 0; + x_result_o.id = req_o.req.id; + x_result_o.rd = req_o.req.instr[11:7]; + x_result_o.we = req_o.resp.writeback & x_result_valid_o; + x_result_o.exc = 0; + x_result_o.exccode = 0; + end + +endmodule diff --git a/test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv b/test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv new file mode 100644 index 0000000..035cb04 --- /dev/null +++ b/test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv @@ -0,0 +1,47 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com) + +package cvxif_instr_pkg; + + typedef struct packed { + logic [31:0] instr; + logic [31:0] mask; + cvxif_pkg::x_issue_resp_t resp; + } copro_issue_resp_t; + + // 2 Possible RISCV instructions for Coprocessor + parameter int unsigned NbInstr = 2; + parameter copro_issue_resp_t CoproInstr[NbInstr] = '{ + '{ + instr: 32'b00000_00_00000_00000_0_00_00000_0101011, // custom1 opcode + mask: 32'b00000_00_00000_00000_0_00_00000_1111111, + resp : '{ + accept : 1'b1, + writeback : 1'b0, + dualwrite : 1'b0, + dualread : 1'b0, + loadstore : 1'b0, + exc : 1'b0 + } + }, + '{ + instr: 32'b00000_00_00000_00000_0_00_00000_1011011, // custom2 opcode + mask: 32'b00000_00_00000_00000_0_00_00000_1111111, + resp : '{ + accept : 1'b1, + writeback : 1'b1, + dualwrite : 1'b0, + dualread : 1'b0, + loadstore : 1'b0, + exc : 1'b0 + } + } + }; + +endpackage diff --git a/test/type_param/core/cvxif_example/instr_decoder.sv b/test/type_param/core/cvxif_example/instr_decoder.sv new file mode 100644 index 0000000..0cf1bdf --- /dev/null +++ b/test/type_param/core/cvxif_example/instr_decoder.sv @@ -0,0 +1,49 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com) + +module instr_decoder + import cvxif_pkg::*; +#( + parameter int NbInstr = 1, + parameter cvxif_instr_pkg::copro_issue_resp_t CoproInstr[NbInstr] = {0} +) ( + input logic clk_i, + input x_issue_req_t x_issue_req_i, + output x_issue_resp_t x_issue_resp_o +); + + logic [NbInstr-1:0] sel; + + for (genvar i = 0; i < NbInstr; i++) begin : gen_predecoder_selector + assign sel[i] = ((CoproInstr[i].mask & x_issue_req_i.instr) == CoproInstr[i].instr); + end + + always_comb begin + x_issue_resp_o.accept = '0; + x_issue_resp_o.writeback = '0; + x_issue_resp_o.dualwrite = '0; + x_issue_resp_o.dualread = '0; + x_issue_resp_o.loadstore = '0; + x_issue_resp_o.exc = '0; + for (int unsigned i = 0; i < NbInstr; i++) begin + if (sel[i]) begin + x_issue_resp_o.accept = CoproInstr[i].resp.accept; + x_issue_resp_o.writeback = CoproInstr[i].resp.writeback; + x_issue_resp_o.dualwrite = CoproInstr[i].resp.dualwrite; + x_issue_resp_o.dualread = CoproInstr[i].resp.dualread; + x_issue_resp_o.loadstore = CoproInstr[i].resp.loadstore; + x_issue_resp_o.exc = CoproInstr[i].resp.exc; + end + end + end + + assert property (@(posedge clk_i) $onehot0(sel)) + else $warning("This offloaded instruction is valid for multiple coprocessor instructions !"); + +endmodule diff --git a/test/type_param/core/cvxif_fu.sv b/test/type_param/core/cvxif_fu.sv new file mode 100644 index 0000000..fb0058b --- /dev/null +++ b/test/type_param/core/cvxif_fu.sv @@ -0,0 +1,112 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Guillaume CHAUVON (guillaume.chauvon@thalesgroup.com) + +// Functional Unit for the logic of the CoreV-X-Interface + + +module cvxif_fu + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input fu_data_t fu_data_i, + input riscv::priv_lvl_t priv_lvl_i, + //from issue + input logic x_valid_i, + output logic x_ready_o, + input logic [ 31:0] x_off_instr_i, + //to writeback + output logic [TRANS_ID_BITS-1:0] x_trans_id_o, + output exception_t x_exception_o, + output riscv::xlen_t x_result_o, + output logic x_valid_o, + output logic x_we_o, + //to coprocessor + output cvxif_pkg::cvxif_req_t cvxif_req_o, + input cvxif_pkg::cvxif_resp_t cvxif_resp_i +); + localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS; + + logic illegal_n, illegal_q; + logic [TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q; + logic [31:0] illegal_instr_n, illegal_instr_q; + logic [X_NUM_RS-1:0] rs_valid; + + if (cvxif_pkg::X_NUM_RS == 3) begin : gen_third_operand + assign rs_valid = 3'b111; + end else begin : gen_no_third_operand + assign rs_valid = 2'b11; + end + + always_comb begin + cvxif_req_o = '0; + cvxif_req_o.x_result_ready = 1'b1; + x_ready_o = cvxif_resp_i.x_issue_ready; + if (x_valid_i) begin + cvxif_req_o.x_issue_valid = x_valid_i; + cvxif_req_o.x_issue_req.instr = x_off_instr_i; + cvxif_req_o.x_issue_req.mode = priv_lvl_i; + cvxif_req_o.x_issue_req.id = fu_data_i.trans_id; + cvxif_req_o.x_issue_req.rs[0] = fu_data_i.operand_a; + cvxif_req_o.x_issue_req.rs[1] = fu_data_i.operand_b; + if (cvxif_pkg::X_NUM_RS == 3) begin + cvxif_req_o.x_issue_req.rs[2] = fu_data_i.imm; + end + cvxif_req_o.x_issue_req.rs_valid = rs_valid; + cvxif_req_o.x_commit_valid = x_valid_i; + cvxif_req_o.x_commit.id = fu_data_i.trans_id; + cvxif_req_o.x_commit.x_commit_kill = 1'b0; + end + end + + always_comb begin + illegal_n = illegal_q; + illegal_id_n = illegal_id_q; + illegal_instr_n = illegal_instr_q; + if (~cvxif_resp_i.x_issue_resp.accept && cvxif_req_o.x_issue_valid && cvxif_resp_i.x_issue_ready && ~illegal_n) begin + illegal_n = 1'b1; + illegal_id_n = cvxif_req_o.x_issue_req.id; + illegal_instr_n = cvxif_req_o.x_issue_req.instr; + end + x_valid_o = cvxif_resp_i.x_result_valid; //Read result only when CVXIF is enabled + x_trans_id_o = x_valid_o ? cvxif_resp_i.x_result.id : '0; + x_result_o = x_valid_o ? cvxif_resp_i.x_result.data : '0; + x_exception_o.cause = x_valid_o ? {{(riscv::XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0; + x_exception_o.valid = x_valid_o ? cvxif_resp_i.x_result.exc : '0; + x_exception_o.tval = '0; + x_we_o = x_valid_o ? cvxif_resp_i.x_result.we : '0; + if (illegal_n) begin + if (~x_valid_o) begin + x_trans_id_o = illegal_id_n; + x_result_o = '0; + x_valid_o = 1'b1; + x_exception_o.cause = riscv::ILLEGAL_INSTR; + x_exception_o.valid = 1'b1; + x_exception_o.tval = illegal_instr_n; + x_we_o = '0; + illegal_n = '0; // Reset flag for illegal instr. illegal_id and illegal instr values are a don't care, no need to reset it. + end + end + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + illegal_q <= 1'b0; + illegal_id_q <= '0; + illegal_instr_q <= '0; + end else begin + illegal_q <= illegal_n; + illegal_id_q <= illegal_id_n; + illegal_instr_q <= illegal_instr_n; + end + end + +endmodule diff --git a/test/type_param/core/decoder.sv b/test/type_param/core/decoder.sv new file mode 100644 index 0000000..eead0c4 --- /dev/null +++ b/test/type_param/core/decoder.sv @@ -0,0 +1,1397 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// File: issue_read_operands.sv +// Author: Florian Zaruba +// Date: 8.4.2017 +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// Description: Issues instruction from the scoreboard and fetches the operands +// This also includes all the forwarding logic +// + +module decoder + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic debug_req_i, // external debug request + input logic [riscv::VLEN-1:0] pc_i, // PC from IF + input logic is_compressed_i, // is a compressed instruction + input logic [15:0] compressed_instr_i, // compressed form of instruction + input logic is_illegal_i, // illegal compressed instruction + input logic [31:0] instruction_i, // instruction from IF + input branchpredict_sbe_t branch_predict_i, + input exception_t ex_i, // if an exception occured in if + input logic [1:0] irq_i, // external interrupt + input irq_ctrl_t irq_ctrl_i, // interrupt control and status information from CSRs + // From CSR + input riscv::priv_lvl_t priv_lvl_i, // current privilege level + input logic debug_mode_i, // we are in debug mode + input riscv::xs_t fs_i, // floating point extension status + input logic [2:0] frm_i, // floating-point dynamic rounding mode + input riscv::xs_t vs_i, // vector extension status + input logic tvm_i, // trap virtual memory + input logic tw_i, // timeout wait + input logic tsr_i, // trap sret + output scoreboard_entry_t instruction_o, // scoreboard entry to scoreboard + output logic is_control_flow_instr_o // this instruction will change the control flow +); + logic illegal_instr; + logic illegal_instr_bm; + logic illegal_instr_zic; + logic illegal_instr_non_bm; + // this instruction is an environment call (ecall), it is handled like an exception + logic ecall; + // this instruction is a software break-point + logic ebreak; + // this instruction needs floating-point rounding-mode verification + logic check_fprm; + riscv::instruction_t instr; + assign instr = riscv::instruction_t'(instruction_i); + // -------------------- + // Immediate select + // -------------------- + enum logic [3:0] {NOIMM, IIMM, SIMM, SBIMM, UIMM, JIMM, RS3} imm_select; + + riscv::xlen_t imm_i_type; + riscv::xlen_t imm_s_type; + riscv::xlen_t imm_sb_type; + riscv::xlen_t imm_u_type; + riscv::xlen_t imm_uj_type; + riscv::xlen_t imm_bi_type; + + // --------------------------------------- + // Accelerator instructions' first-pass decoder + // --------------------------------------- + logic is_accel; + scoreboard_entry_t acc_instruction; + logic acc_illegal_instr; + logic acc_is_control_flow_instr; + + if (CVA6Cfg.EnableAccelerator) begin : gen_accel_decoder + // This module is responsible for a light-weight decoding of accelerator instructions, + // identifying them, but also whether they read/write scalar registers. + // Accelerators are supposed to define this module. + cva6_accel_first_pass_decoder i_accel_decoder ( + .instruction_i(instruction_i), + .fs_i(fs_i), + .vs_i(vs_i), + .is_accel_o(is_accel), + .instruction_o(acc_instruction), + .illegal_instr_o(acc_illegal_instr), + .is_control_flow_instr_o(acc_is_control_flow_instr) + ); + end : gen_accel_decoder + else begin + assign is_accel = 1'b0; + assign acc_instruction = '0; + assign acc_illegal_instr = 1'b1; // this should never propagate + assign acc_is_control_flow_instr = 1'b0; + end + + always_comb begin : decoder + + imm_select = NOIMM; + is_control_flow_instr_o = 1'b0; + illegal_instr = 1'b0; + illegal_instr_non_bm = 1'b0; + illegal_instr_bm = 1'b0; + illegal_instr_zic = 1'b0; + instruction_o.pc = pc_i; + instruction_o.trans_id = '0; + instruction_o.fu = NONE; + instruction_o.op = ariane_pkg::ADD; + instruction_o.rs1 = '0; + instruction_o.rs2 = '0; + instruction_o.rd = '0; + instruction_o.use_pc = 1'b0; + instruction_o.is_compressed = is_compressed_i; + instruction_o.use_zimm = 1'b0; + instruction_o.bp = branch_predict_i; + instruction_o.vfp = 1'b0; + ecall = 1'b0; + ebreak = 1'b0; + check_fprm = 1'b0; + + if (~ex_i.valid) begin + case (instr.rtype.opcode) + riscv::OpcodeSystem: begin + instruction_o.fu = CSR; + instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.rs2[4:0] = instr.rtype.rs2; //TODO: needs to be checked if better way is available + instruction_o.rd[4:0] = instr.itype.rd; + + unique case (instr.itype.funct3) + 3'b000: begin + // check if the RD and and RS1 fields are zero, this may be reset for the SENCE.VMA instruction + if (instr.itype.rs1 != '0 || instr.itype.rd != '0) illegal_instr = 1'b1; + // decode the immiediate field + case (instr.itype.imm) + // ECALL -> inject exception + 12'b0: ecall = 1'b1; + // EBREAK -> inject exception + 12'b1: ebreak = 1'b1; + // SRET + 12'b1_0000_0010: begin + if (CVA6Cfg.RVS) begin + instruction_o.op = ariane_pkg::SRET; + // check privilege level, SRET can only be executed in S and M mode + // we'll just decode an illegal instruction if we are in the wrong privilege level + if (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U) begin + illegal_instr = 1'b1; + // do not change privilege level if this is an illegal instruction + instruction_o.op = ariane_pkg::ADD; + end + // if we are in S-Mode and Trap SRET (tsr) is set -> trap on illegal instruction + if (priv_lvl_i == riscv::PRIV_LVL_S && tsr_i) begin + illegal_instr = 1'b1; + // do not change privilege level if this is an illegal instruction + instruction_o.op = ariane_pkg::ADD; + end + end else begin + illegal_instr = 1'b1; + instruction_o.op = ariane_pkg::ADD; + end + end + // MRET + 12'b11_0000_0010: begin + instruction_o.op = ariane_pkg::MRET; + // check privilege level, MRET can only be executed in M mode + // otherwise we decode an illegal instruction + if ((CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S) || (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U)) + illegal_instr = 1'b1; + end + // DRET + 12'b111_1011_0010: begin + instruction_o.op = ariane_pkg::DRET; + if (CVA6Cfg.DebugEn) begin + // check that we are in debug mode when executing this instruction + illegal_instr = (!debug_mode_i) ? 1'b1 : illegal_instr; + end else begin + illegal_instr = 1'b1; + end + end + // WFI + 12'b1_0000_0101: begin + instruction_o.op = ariane_pkg::WFI; + // if timeout wait is set, trap on an illegal instruction in S Mode + // (after 0 cycles timeout) + if (CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S && tw_i) begin + illegal_instr = 1'b1; + instruction_o.op = ariane_pkg::ADD; + end + // we don't support U mode interrupts so WFI is illegal in this context + if (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U) begin + illegal_instr = 1'b1; + instruction_o.op = ariane_pkg::ADD; + end + end + // SFENCE.VMA + default: begin + if (instr.instr[31:25] == 7'b1001) begin + // check privilege level, SFENCE.VMA can only be executed in M/S mode + // otherwise decode an illegal instruction + illegal_instr = (((CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S) || ((!CVA6Cfg.RVS && !CVA6Cfg.RVU) || priv_lvl_i == riscv::PRIV_LVL_M)) && instr.itype.rd == '0) ? 1'b0 : 1'b1; + instruction_o.op = ariane_pkg::SFENCE_VMA; + // check TVM flag and intercept SFENCE.VMA call if necessary + if (CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S && tvm_i) + illegal_instr = 1'b1; + end else begin + illegal_instr = 1'b1; + end + end + endcase + end + // atomically swaps values in the CSR and integer register + 3'b001: begin // CSRRW + imm_select = IIMM; + instruction_o.op = ariane_pkg::CSR_WRITE; + end + // atomically set values in the CSR and write back to rd + 3'b010: begin // CSRRS + imm_select = IIMM; + // this is just a read + if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ; + else instruction_o.op = ariane_pkg::CSR_SET; + end + // atomically clear values in the CSR and write back to rd + 3'b011: begin // CSRRC + imm_select = IIMM; + // this is just a read + if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ; + else instruction_o.op = ariane_pkg::CSR_CLEAR; + end + // use zimm and iimm + 3'b101: begin // CSRRWI + instruction_o.rs1[4:0] = instr.itype.rs1; + imm_select = IIMM; + instruction_o.use_zimm = 1'b1; + instruction_o.op = ariane_pkg::CSR_WRITE; + end + 3'b110: begin // CSRRSI + instruction_o.rs1[4:0] = instr.itype.rs1; + imm_select = IIMM; + instruction_o.use_zimm = 1'b1; + // this is just a read + if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ; + else instruction_o.op = ariane_pkg::CSR_SET; + end + 3'b111: begin // CSRRCI + instruction_o.rs1[4:0] = instr.itype.rs1; + imm_select = IIMM; + instruction_o.use_zimm = 1'b1; + // this is just a read + if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ; + else instruction_o.op = ariane_pkg::CSR_CLEAR; + end + default: illegal_instr = 1'b1; + endcase + end + // Memory ordering instructions + riscv::OpcodeMiscMem: begin + instruction_o.fu = CSR; + instruction_o.rs1 = '0; + instruction_o.rs2 = '0; + instruction_o.rd = '0; + + case (instr.stype.funct3) + // FENCE + // Currently implemented as a whole DCache flush boldly ignoring other things + 3'b000: instruction_o.op = ariane_pkg::FENCE; + // FENCE.I + 3'b001: instruction_o.op = ariane_pkg::FENCE_I; + + default: illegal_instr = 1'b1; + endcase + end + + // -------------------------- + // Reg-Reg Operations + // -------------------------- + riscv::OpcodeOp: begin + // -------------------------------------------- + // Vectorial Floating-Point Reg-Reg Operations + // -------------------------------------------- + if (instr.rvftype.funct2 == 2'b10) begin // Prefix 10 for all Xfvec ops + // only generate decoder if FP extensions are enabled (static) + if (CVA6Cfg.FpPresent && CVA6Cfg.XFVec && fs_i != riscv::Off) begin + automatic logic allow_replication; // control honoring of replication flag + + instruction_o.fu = FPU_VEC; // Same unit, but sets 'vectorial' signal + instruction_o.rs1[4:0] = instr.rvftype.rs1; + instruction_o.rs2[4:0] = instr.rvftype.rs2; + instruction_o.rd[4:0] = instr.rvftype.rd; + check_fprm = 1'b1; + allow_replication = 1'b1; + // decode vectorial FP instruction + unique case (instr.rvftype.vecfltop) + 5'b00001: begin + instruction_o.op = ariane_pkg::FADD; // vfadd.vfmt - Vectorial FP Addition + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2[4:0] = instr.rvftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00010: begin + instruction_o.op = ariane_pkg::FSUB; // vfsub.vfmt - Vectorial FP Subtraction + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2[4:0] = instr.rvftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00011: + instruction_o.op = ariane_pkg::FMUL; // vfmul.vfmt - Vectorial FP Multiplication + 5'b00100: + instruction_o.op = ariane_pkg::FDIV; // vfdiv.vfmt - Vectorial FP Division + 5'b00101: begin + instruction_o.op = ariane_pkg::VFMIN; // vfmin.vfmt - Vectorial FP Minimum + check_fprm = 1'b0; // rounding mode irrelevant + end + 5'b00110: begin + instruction_o.op = ariane_pkg::VFMAX; // vfmax.vfmt - Vectorial FP Maximum + check_fprm = 1'b0; // rounding mode irrelevant + end + 5'b00111: begin + instruction_o.op = ariane_pkg::FSQRT; // vfsqrt.vfmt - Vectorial FP Square Root + allow_replication = 1'b0; // only one operand + if (instr.rvftype.rs2 != 5'b00000) illegal_instr = 1'b1; // rs2 must be 0 + end + 5'b01000: begin + instruction_o.op = ariane_pkg::FMADD; // vfmac.vfmt - Vectorial FP Multiply-Accumulate + imm_select = SIMM; // rd into result field (upper bits don't matter) + end + 5'b01001: begin + instruction_o.op = ariane_pkg::FMSUB; // vfmre.vfmt - Vectorial FP Multiply-Reduce + imm_select = SIMM; // rd into result field (upper bits don't matter) + end + 5'b01100: begin + unique case (instr.rvftype.rs2) inside // operation encoded in rs2, `inside` for matching ? + 5'b00000: begin + instruction_o.rs2[4:0] = instr.rvftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + if (instr.rvftype.repl) + instruction_o.op = ariane_pkg::FMV_X2F; // vfmv.vfmt.x - GPR to FPR Move + else instruction_o.op = ariane_pkg::FMV_F2X; // vfmv.x.vfmt - FPR to GPR Move + check_fprm = 1'b0; // no rounding for moves + end + 5'b00001: begin + instruction_o.op = ariane_pkg::FCLASS; // vfclass.vfmt - Vectorial FP Classify + check_fprm = 1'b0; // no rounding for classification + allow_replication = 1'b0; // R must not be set + end + 5'b00010: + instruction_o.op = ariane_pkg::FCVT_F2I; // vfcvt.x.vfmt - Vectorial FP to Int Conversion + 5'b00011: + instruction_o.op = ariane_pkg::FCVT_I2F; // vfcvt.vfmt.x - Vectorial Int to FP Conversion + 5'b001??: begin + instruction_o.op = ariane_pkg::FCVT_F2F; // vfcvt.vfmt.vfmt - Vectorial FP to FP Conversion + instruction_o.rs2[4:0] = instr.rvftype.rd; // set rs2 = rd as target vector for conversion + imm_select = IIMM; // rs2 holds part of the intruction + // TODO CHECK R bit for valid fmt combinations + // determine source format + unique case (instr.rvftype.rs2[21:20]) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~CVA6Cfg.RVFVec) illegal_instr = 1'b1; + 2'b01: if (~CVA6Cfg.XF16ALTVec) illegal_instr = 1'b1; + 2'b10: if (~CVA6Cfg.XF16Vec) illegal_instr = 1'b1; + 2'b11: if (~CVA6Cfg.XF8Vec) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end + default: illegal_instr = 1'b1; + endcase + end + 5'b01101: begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = ariane_pkg::VFSGNJ; // vfsgnj.vfmt - Vectorial FP Sign Injection + end + 5'b01110: begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = ariane_pkg::VFSGNJN; // vfsgnjn.vfmt - Vectorial FP Negated Sign Injection + end + 5'b01111: begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = ariane_pkg::VFSGNJX; // vfsgnjx.vfmt - Vectorial FP XORed Sign Injection + end + 5'b10000: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFEQ; // vfeq.vfmt - Vectorial FP Equality + end + 5'b10001: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFNE; // vfne.vfmt - Vectorial FP Non-Equality + end + 5'b10010: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFLT; // vfle.vfmt - Vectorial FP Less Than + end + 5'b10011: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFGE; // vfge.vfmt - Vectorial FP Greater or Equal + end + 5'b10100: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFLE; // vfle.vfmt - Vectorial FP Less or Equal + end + 5'b10101: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFGT; // vfgt.vfmt - Vectorial FP Greater Than + end + 5'b11000: begin + instruction_o.op = ariane_pkg::VFCPKAB_S; // vfcpka/b.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, lowest 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~CVA6Cfg.RVF) + illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: begin + if (~CVA6Cfg.RVFVec) + illegal_instr = 1'b1; // destination vector not supported + if (instr.rvftype.repl) + illegal_instr = 1'b1; // no entries 2/3 in vector of 2 fp32 + end + 2'b01: begin + if (~CVA6Cfg.XF16ALTVec) + illegal_instr = 1'b1; // destination vector not supported + end + 2'b10: begin + if (~CVA6Cfg.XF16Vec) + illegal_instr = 1'b1; // destination vector not supported + end + 2'b11: begin + if (~CVA6Cfg.XF8Vec) + illegal_instr = 1'b1; // destination vector not supported + end + default: illegal_instr = 1'b1; + endcase + end + 5'b11001: begin + instruction_o.op = ariane_pkg::VFCPKCD_S; // vfcpkc/d.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, second 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~CVA6Cfg.RVF) + illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: illegal_instr = 1'b1; // no entries 4-7 in vector of 2 FP32 + 2'b01: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16ALT + 2'b10: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16 + 2'b11: begin + if (~CVA6Cfg.XF8Vec) + illegal_instr = 1'b1; // destination vector not supported + end + default: illegal_instr = 1'b1; + endcase + end + 5'b11010: begin + instruction_o.op = ariane_pkg::VFCPKAB_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, lowest 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~CVA6Cfg.RVD) + illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: begin + if (~CVA6Cfg.RVFVec) + illegal_instr = 1'b1; // destination vector not supported + if (instr.rvftype.repl) + illegal_instr = 1'b1; // no entries 2/3 in vector of 2 fp32 + end + 2'b01: begin + if (~CVA6Cfg.XF16ALTVec) + illegal_instr = 1'b1; // destination vector not supported + end + 2'b10: begin + if (~CVA6Cfg.XF16Vec) + illegal_instr = 1'b1; // destination vector not supported + end + 2'b11: begin + if (~CVA6Cfg.XF8Vec) + illegal_instr = 1'b1; // destination vector not supported + end + default: illegal_instr = 1'b1; + endcase + end + 5'b11011: begin + instruction_o.op = ariane_pkg::VFCPKCD_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, second 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~CVA6Cfg.RVD) + illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: illegal_instr = 1'b1; // no entries 4-7 in vector of 2 FP32 + 2'b01: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16ALT + 2'b10: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16 + 2'b11: begin + if (~CVA6Cfg.XF8Vec) + illegal_instr = 1'b1; // destination vector not supported + end + default: illegal_instr = 1'b1; + endcase + end + default: illegal_instr = 1'b1; + endcase + + // check format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~CVA6Cfg.RVFVec) illegal_instr = 1'b1; + 2'b01: if (~CVA6Cfg.XF16ALTVec) illegal_instr = 1'b1; + 2'b10: if (~CVA6Cfg.XF16Vec) illegal_instr = 1'b1; + 2'b11: if (~CVA6Cfg.XF8Vec) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check disallowed replication + if (~allow_replication & instr.rvftype.repl) illegal_instr = 1'b1; + + // check rounding mode + if (check_fprm) begin + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000 : 3'b100]: ; //legal rounding modes + default: illegal_instr = 1'b1; + endcase + end + + end else begin // No vectorial FP enabled (static) + illegal_instr = 1'b1; + end + + // --------------------------- + // Integer Reg-Reg Operations + // --------------------------- + end else begin + if (CVA6Cfg.RVB) begin + instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001 || ((instr.rtype.funct7 == 7'b000_0101) && !(instr.rtype.funct3[14]))) ? MULT : ALU; + end else begin + instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU; + end + instruction_o.rs1[4:0] = instr.rtype.rs1; + instruction_o.rs2[4:0] = instr.rtype.rs2; + instruction_o.rd[4:0] = instr.rtype.rd; + + unique case ({ + instr.rtype.funct7, instr.rtype.funct3 + }) + {7'b000_0000, 3'b000} : instruction_o.op = ariane_pkg::ADD; // Add + {7'b010_0000, 3'b000} : instruction_o.op = ariane_pkg::SUB; // Sub + {7'b000_0000, 3'b010} : instruction_o.op = ariane_pkg::SLTS; // Set Lower Than + { + 7'b000_0000, 3'b011 + } : + instruction_o.op = ariane_pkg::SLTU; // Set Lower Than Unsigned + {7'b000_0000, 3'b100} : instruction_o.op = ariane_pkg::XORL; // Xor + {7'b000_0000, 3'b110} : instruction_o.op = ariane_pkg::ORL; // Or + {7'b000_0000, 3'b111} : instruction_o.op = ariane_pkg::ANDL; // And + {7'b000_0000, 3'b001} : instruction_o.op = ariane_pkg::SLL; // Shift Left Logical + {7'b000_0000, 3'b101} : instruction_o.op = ariane_pkg::SRL; // Shift Right Logical + {7'b010_0000, 3'b101} : instruction_o.op = ariane_pkg::SRA; // Shift Right Arithmetic + // Multiplications + {7'b000_0001, 3'b000} : instruction_o.op = ariane_pkg::MUL; + {7'b000_0001, 3'b001} : instruction_o.op = ariane_pkg::MULH; + {7'b000_0001, 3'b010} : instruction_o.op = ariane_pkg::MULHSU; + {7'b000_0001, 3'b011} : instruction_o.op = ariane_pkg::MULHU; + {7'b000_0001, 3'b100} : instruction_o.op = ariane_pkg::DIV; + {7'b000_0001, 3'b101} : instruction_o.op = ariane_pkg::DIVU; + {7'b000_0001, 3'b110} : instruction_o.op = ariane_pkg::REM; + {7'b000_0001, 3'b111} : instruction_o.op = ariane_pkg::REMU; + default: begin + illegal_instr_non_bm = 1'b1; + end + endcase + if (CVA6Cfg.RVB) begin + unique case ({ + instr.rtype.funct7, instr.rtype.funct3 + }) + //Logical with Negate + {7'b010_0000, 3'b111} : instruction_o.op = ariane_pkg::ANDN; // Andn + {7'b010_0000, 3'b110} : instruction_o.op = ariane_pkg::ORN; // Orn + {7'b010_0000, 3'b100} : instruction_o.op = ariane_pkg::XNOR; // Xnor + //Shift and Add (Bitmanip) + {7'b001_0000, 3'b010} : instruction_o.op = ariane_pkg::SH1ADD; // Sh1add + {7'b001_0000, 3'b100} : instruction_o.op = ariane_pkg::SH2ADD; // Sh2add + {7'b001_0000, 3'b110} : instruction_o.op = ariane_pkg::SH3ADD; // Sh3add + // Integer maximum/minimum + {7'b000_0101, 3'b110} : instruction_o.op = ariane_pkg::MAX; // max + {7'b000_0101, 3'b111} : instruction_o.op = ariane_pkg::MAXU; // maxu + {7'b000_0101, 3'b100} : instruction_o.op = ariane_pkg::MIN; // min + {7'b000_0101, 3'b101} : instruction_o.op = ariane_pkg::MINU; // minu + // Single bit instructions + {7'b010_0100, 3'b001} : instruction_o.op = ariane_pkg::BCLR; // bclr + {7'b010_0100, 3'b101} : instruction_o.op = ariane_pkg::BEXT; // bext + {7'b011_0100, 3'b001} : instruction_o.op = ariane_pkg::BINV; // binv + {7'b001_0100, 3'b001} : instruction_o.op = ariane_pkg::BSET; // bset + // Carry-Less-Multiplication (clmul, clmulh, clmulr) + {7'b000_0101, 3'b001} : instruction_o.op = ariane_pkg::CLMUL; // clmul + {7'b000_0101, 3'b011} : instruction_o.op = ariane_pkg::CLMULH; // clmulh + {7'b000_0101, 3'b010} : instruction_o.op = ariane_pkg::CLMULR; // clmulr + // Bitwise Shifting + {7'b011_0000, 3'b001} : instruction_o.op = ariane_pkg::ROL; // rol + {7'b011_0000, 3'b101} : instruction_o.op = ariane_pkg::ROR; // ror + // Zero Extend Op + {7'b000_0100, 3'b100} : instruction_o.op = ariane_pkg::ZEXTH; + default: begin + illegal_instr_bm = 1'b1; + end + endcase + end + if (CVA6Cfg.ZiCondExtEn) begin + unique case ({ + instr.rtype.funct7, instr.rtype.funct3 + }) + //Conditional move + {7'b000_0111, 3'b101} : instruction_o.op = ariane_pkg::CZERO_EQZ; // czero.eqz + {7'b000_0111, 3'b111} : instruction_o.op = ariane_pkg::CZERO_NEZ; // czero.nez + default: begin + illegal_instr_zic = 1'b1; + end + endcase + end + //VCS coverage on + unique case ({ + CVA6Cfg.RVB, CVA6Cfg.ZiCondExtEn + }) + 2'b00: illegal_instr = illegal_instr_non_bm; + 2'b01: illegal_instr = illegal_instr_non_bm & illegal_instr_zic; + 2'b10: illegal_instr = illegal_instr_non_bm & illegal_instr_bm; + 2'b11: illegal_instr = illegal_instr_non_bm & illegal_instr_bm & illegal_instr_zic; + endcase + end + end + + // -------------------------- + // 32bit Reg-Reg Operations + // -------------------------- + riscv::OpcodeOp32: begin + instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU; + instruction_o.rs1[4:0] = instr.rtype.rs1; + instruction_o.rs2[4:0] = instr.rtype.rs2; + instruction_o.rd[4:0] = instr.rtype.rd; + if (riscv::IS_XLEN64) begin + unique case ({ + instr.rtype.funct7, instr.rtype.funct3 + }) + {7'b000_0000, 3'b000} : instruction_o.op = ariane_pkg::ADDW; // addw + {7'b010_0000, 3'b000} : instruction_o.op = ariane_pkg::SUBW; // subw + {7'b000_0000, 3'b001} : instruction_o.op = ariane_pkg::SLLW; // sllw + {7'b000_0000, 3'b101} : instruction_o.op = ariane_pkg::SRLW; // srlw + {7'b010_0000, 3'b101} : instruction_o.op = ariane_pkg::SRAW; // sraw + // Multiplications + {7'b000_0001, 3'b000} : instruction_o.op = ariane_pkg::MULW; + {7'b000_0001, 3'b100} : instruction_o.op = ariane_pkg::DIVW; + {7'b000_0001, 3'b101} : instruction_o.op = ariane_pkg::DIVUW; + {7'b000_0001, 3'b110} : instruction_o.op = ariane_pkg::REMW; + {7'b000_0001, 3'b111} : instruction_o.op = ariane_pkg::REMUW; + default: illegal_instr_non_bm = 1'b1; + endcase + if (CVA6Cfg.RVB) begin + unique case ({ + instr.rtype.funct7, instr.rtype.funct3 + }) + // Shift with Add (Unsigned Word) + {7'b001_0000, 3'b010}: instruction_o.op = ariane_pkg::SH1ADDUW; // sh1add.uw + {7'b001_0000, 3'b100}: instruction_o.op = ariane_pkg::SH2ADDUW; // sh2add.uw + {7'b001_0000, 3'b110}: instruction_o.op = ariane_pkg::SH3ADDUW; // sh3add.uw + // Unsigned word Op's + {7'b000_0100, 3'b000}: instruction_o.op = ariane_pkg::ADDUW; // add.uw + // Bitwise Shifting + {7'b011_0000, 3'b001}: instruction_o.op = ariane_pkg::ROLW; // rolw + {7'b011_0000, 3'b101}: instruction_o.op = ariane_pkg::RORW; // rorw + default: illegal_instr_bm = 1'b1; + endcase + illegal_instr = illegal_instr_non_bm & illegal_instr_bm; + end else begin + illegal_instr = illegal_instr_non_bm; + end + end else illegal_instr = 1'b1; + end + // -------------------------------- + // Reg-Immediate Operations + // -------------------------------- + riscv::OpcodeOpImm: begin + instruction_o.fu = ALU; + imm_select = IIMM; + instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.rd[4:0] = instr.itype.rd; + unique case (instr.itype.funct3) + 3'b000: instruction_o.op = ariane_pkg::ADD; // Add Immediate + 3'b010: instruction_o.op = ariane_pkg::SLTS; // Set to one if Lower Than Immediate + 3'b011: + instruction_o.op = ariane_pkg::SLTU; // Set to one if Lower Than Immediate Unsigned + 3'b100: instruction_o.op = ariane_pkg::XORL; // Exclusive Or with Immediate + 3'b110: instruction_o.op = ariane_pkg::ORL; // Or with Immediate + 3'b111: instruction_o.op = ariane_pkg::ANDL; // And with Immediate + + 3'b001: begin + instruction_o.op = ariane_pkg::SLL; // Shift Left Logical by Immediate + if (instr.instr[31:26] != 6'b0) illegal_instr_non_bm = 1'b1; + if (instr.instr[25] != 1'b0 && riscv::XLEN == 32) illegal_instr_non_bm = 1'b1; + end + + 3'b101: begin + if (instr.instr[31:26] == 6'b0) + instruction_o.op = ariane_pkg::SRL; // Shift Right Logical by Immediate + else if (instr.instr[31:26] == 6'b010_000) + instruction_o.op = ariane_pkg::SRA; // Shift Right Arithmetically by Immediate + else illegal_instr_non_bm = 1'b1; + if (instr.instr[25] != 1'b0 && riscv::XLEN == 32) illegal_instr_non_bm = 1'b1; + end + endcase + if (CVA6Cfg.RVB) begin + unique case (instr.itype.funct3) + 3'b001: begin + if (instr.instr[31:25] == 7'b0110000) begin + if (instr.instr[22:20] == 3'b100) instruction_o.op = ariane_pkg::SEXTB; + else if (instr.instr[22:20] == 3'b101) instruction_o.op = ariane_pkg::SEXTH; + else if (instr.instr[22:20] == 3'b010) instruction_o.op = ariane_pkg::CPOP; + else if (instr.instr[22:20] == 3'b000) instruction_o.op = ariane_pkg::CLZ; + else if (instr.instr[22:20] == 3'b001) instruction_o.op = ariane_pkg::CTZ; + end else if (instr.instr[31:26] == 6'b010010) instruction_o.op = ariane_pkg::BCLRI; + else if (instr.instr[31:26] == 6'b011010) instruction_o.op = ariane_pkg::BINVI; + else if (instr.instr[31:26] == 6'b001010) instruction_o.op = ariane_pkg::BSETI; + else illegal_instr_bm = 1'b1; + end + 3'b101: begin + if (instr.instr[31:20] == 12'b001010000111) instruction_o.op = ariane_pkg::ORCB; + else if (riscv::IS_XLEN64 && instr.instr[31:20] == 12'b011010111000) + instruction_o.op = ariane_pkg::REV8; + else if (instr.instr[31:20] == 12'b011010011000) + instruction_o.op = ariane_pkg::REV8; + else if (instr.instr[31:26] == 6'b010_010) instruction_o.op = ariane_pkg::BEXTI; + else if (instr.instr[31:26] == 6'b011_000) instruction_o.op = ariane_pkg::RORI; + else illegal_instr_bm = 1'b1; + end + default: illegal_instr_bm = 1'b1; + endcase + illegal_instr = illegal_instr_non_bm & illegal_instr_bm; + end else begin + illegal_instr = illegal_instr_non_bm; + end + end + + // -------------------------------- + // 32 bit Reg-Immediate Operations + // -------------------------------- + riscv::OpcodeOpImm32: begin + instruction_o.fu = ALU; + imm_select = IIMM; + instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.rd[4:0] = instr.itype.rd; + if (riscv::IS_XLEN64) begin + unique case (instr.itype.funct3) + 3'b000: instruction_o.op = ariane_pkg::ADDW; // Add Immediate + 3'b001: begin + instruction_o.op = ariane_pkg::SLLW; // Shift Left Logical by Immediate + if (instr.instr[31:25] != 7'b0) illegal_instr_non_bm = 1'b1; + end + 3'b101: begin + if (instr.instr[31:25] == 7'b0) + instruction_o.op = ariane_pkg::SRLW; // Shift Right Logical by Immediate + else if (instr.instr[31:25] == 7'b010_0000) + instruction_o.op = ariane_pkg::SRAW; // Shift Right Arithmetically by Immediate + else illegal_instr_non_bm = 1'b1; + end + default: illegal_instr_non_bm = 1'b1; + endcase + if (CVA6Cfg.RVB) begin + unique case (instr.itype.funct3) + 3'b001: begin + if (instr.instr[31:25] == 7'b0110000) begin + if (instr.instr[21:20] == 2'b10) instruction_o.op = ariane_pkg::CPOPW; + else if (instr.instr[21:20] == 2'b00) instruction_o.op = ariane_pkg::CLZW; + else if (instr.instr[21:20] == 2'b01) instruction_o.op = ariane_pkg::CTZW; + else illegal_instr_bm = 1'b1; + end else if (instr.instr[31:26] == 6'b000010) begin + instruction_o.op = ariane_pkg::SLLIUW; // Shift Left Logic by Immediate (Unsigned Word) + end else illegal_instr_bm = 1'b1; + end + 3'b101: begin + if (instr.instr[31:25] == 7'b011_0000) instruction_o.op = ariane_pkg::RORIW; + else illegal_instr_bm = 1'b1; + end + default: illegal_instr_bm = 1'b1; + endcase + illegal_instr = illegal_instr_non_bm & illegal_instr_bm; + end else begin + illegal_instr = illegal_instr_non_bm; + end + + end else illegal_instr = 1'b1; + end + // -------------------------------- + // LSU + // -------------------------------- + riscv::OpcodeStore: begin + instruction_o.fu = STORE; + imm_select = SIMM; + instruction_o.rs1[4:0] = instr.stype.rs1; + instruction_o.rs2[4:0] = instr.stype.rs2; + // determine store size + unique case (instr.stype.funct3) + 3'b000: instruction_o.op = ariane_pkg::SB; + 3'b001: instruction_o.op = ariane_pkg::SH; + 3'b010: instruction_o.op = ariane_pkg::SW; + 3'b011: + if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::SD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end + + riscv::OpcodeLoad: begin + instruction_o.fu = LOAD; + imm_select = IIMM; + instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.rd[4:0] = instr.itype.rd; + // determine load size and signed type + unique case (instr.itype.funct3) + 3'b000: instruction_o.op = ariane_pkg::LB; + 3'b001: instruction_o.op = ariane_pkg::LH; + 3'b010: instruction_o.op = ariane_pkg::LW; + 3'b100: instruction_o.op = ariane_pkg::LBU; + 3'b101: instruction_o.op = ariane_pkg::LHU; + 3'b110: + if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::LWU; + else illegal_instr = 1'b1; + 3'b011: + if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::LD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end + + // -------------------------------- + // Floating-Point Load/store + // -------------------------------- + riscv::OpcodeStoreFp: begin + if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = STORE; + imm_select = SIMM; + instruction_o.rs1[4:0] = instr.stype.rs1; + instruction_o.rs2[4:0] = instr.stype.rs2; + // determine store size + unique case (instr.stype.funct3) + // Only process instruction if corresponding extension is active (static) + 3'b000: + if (CVA6Cfg.XF8) instruction_o.op = ariane_pkg::FSB; + else illegal_instr = 1'b1; + 3'b001: + if (CVA6Cfg.XF16 | CVA6Cfg.XF16ALT) instruction_o.op = ariane_pkg::FSH; + else illegal_instr = 1'b1; + 3'b010: + if (CVA6Cfg.RVF) instruction_o.op = ariane_pkg::FSW; + else illegal_instr = 1'b1; + 3'b011: + if (CVA6Cfg.RVD) instruction_o.op = ariane_pkg::FSD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end else illegal_instr = 1'b1; + end + + riscv::OpcodeLoadFp: begin + if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = LOAD; + imm_select = IIMM; + instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.rd[4:0] = instr.itype.rd; + // determine load size + unique case (instr.itype.funct3) + // Only process instruction if corresponding extension is active (static) + 3'b000: + if (CVA6Cfg.XF8) instruction_o.op = ariane_pkg::FLB; + else illegal_instr = 1'b1; + 3'b001: + if (CVA6Cfg.XF16 | CVA6Cfg.XF16ALT) instruction_o.op = ariane_pkg::FLH; + else illegal_instr = 1'b1; + 3'b010: + if (CVA6Cfg.RVF) instruction_o.op = ariane_pkg::FLW; + else illegal_instr = 1'b1; + 3'b011: + if (CVA6Cfg.RVD) instruction_o.op = ariane_pkg::FLD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end else illegal_instr = 1'b1; + end + + // ---------------------------------- + // Floating-Point Reg-Reg Operations + // ---------------------------------- + riscv::OpcodeMadd, riscv::OpcodeMsub, riscv::OpcodeNmsub, riscv::OpcodeNmadd: begin + if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = FPU; + instruction_o.rs1[4:0] = instr.r4type.rs1; + instruction_o.rs2[4:0] = instr.r4type.rs2; + instruction_o.rd[4:0] = instr.r4type.rd; + imm_select = RS3; // rs3 into result field + check_fprm = 1'b1; + // select the correct fused operation + unique case (instr.r4type.opcode) + default: instruction_o.op = ariane_pkg::FMADD; // fmadd.fmt - FP Fused multiply-add + riscv::OpcodeMsub: + instruction_o.op = ariane_pkg::FMSUB; // fmsub.fmt - FP Fused multiply-subtract + riscv::OpcodeNmsub: + instruction_o.op = ariane_pkg::FNMSUB; // fnmsub.fmt - FP Negated fused multiply-subtract + riscv::OpcodeNmadd: + instruction_o.op = ariane_pkg::FNMADD; // fnmadd.fmt - FP Negated fused multiply-add + endcase + + // determine fp format + unique case (instr.r4type.funct2) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~CVA6Cfg.RVF) illegal_instr = 1'b1; + 2'b01: if (~CVA6Cfg.RVD) illegal_instr = 1'b1; + 2'b10: if (~CVA6Cfg.XF16 & ~CVA6Cfg.XF16ALT) illegal_instr = 1'b1; + 2'b11: if (~CVA6Cfg.XF8) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check rounding mode + if (check_fprm) begin + unique case (instr.rftype.rm) inside + [3'b000 : 3'b100]: ; //legal rounding modes + 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 + if (~CVA6Cfg.XF16ALT || instr.rftype.fmt != 2'b10) illegal_instr = 1'b1; + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000 : 3'b100]: ; //legal rounding modes + default: illegal_instr = 1'b1; + endcase + end + 3'b111: begin + // rounding mode from frm csr + unique case (frm_i) inside + [3'b000 : 3'b100]: ; //legal rounding modes + default: illegal_instr = 1'b1; + endcase + end + default: illegal_instr = 1'b1; + endcase + end + end else begin + illegal_instr = 1'b1; + end + end + + riscv::OpcodeOpFp: begin + if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = FPU; + instruction_o.rs1[4:0] = instr.rftype.rs1; + instruction_o.rs2[4:0] = instr.rftype.rs2; + instruction_o.rd[4:0] = instr.rftype.rd; + check_fprm = 1'b1; + // decode FP instruction + unique case (instr.rftype.funct5) + 5'b00000: begin + instruction_o.op = ariane_pkg::FADD; // fadd.fmt - FP Addition + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2[4:0] = instr.rftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00001: begin + instruction_o.op = ariane_pkg::FSUB; // fsub.fmt - FP Subtraction + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2[4:0] = instr.rftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00010: instruction_o.op = ariane_pkg::FMUL; // fmul.fmt - FP Multiplication + 5'b00011: instruction_o.op = ariane_pkg::FDIV; // fdiv.fmt - FP Division + 5'b01011: begin + instruction_o.op = ariane_pkg::FSQRT; // fsqrt.fmt - FP Square Root + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + 5'b00100: begin + instruction_o.op = ariane_pkg::FSGNJ; // fsgn{j[n]/jx}.fmt - FP Sign Injection + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (CVA6Cfg.XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr.rftype.rm inside {[3'b000 : 3'b010], [3'b100 : 3'b110]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000 : 3'b010]})) illegal_instr = 1'b1; + end + end + 5'b00101: begin + instruction_o.op = ariane_pkg::FMIN_MAX; // fmin/fmax.fmt - FP Minimum / Maximum + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (CVA6Cfg.XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr.rftype.rm inside {[3'b000 : 3'b001], [3'b100 : 3'b101]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000 : 3'b001]})) illegal_instr = 1'b1; + end + end + 5'b01000: begin + instruction_o.op = ariane_pkg::FCVT_F2F; // fcvt.fmt.fmt - FP to FP Conversion + instruction_o.rs2[4:0] = instr.rvftype.rs1; // tie rs2 to rs1 to be safe (vectors use rs2) + imm_select = IIMM; // rs2 holds part of the intruction + if (|instr.rftype.rs2[24:23]) + illegal_instr = 1'b1; // bits [22:20] used, other bits must be 0 + // check source format + unique case (instr.rftype.rs2[22:20]) + // Only process instruction if corresponding extension is active (static) + 3'b000: if (~CVA6Cfg.RVF) illegal_instr = 1'b1; + 3'b001: if (~CVA6Cfg.RVD) illegal_instr = 1'b1; + 3'b010: if (~CVA6Cfg.XF16) illegal_instr = 1'b1; + 3'b110: if (~CVA6Cfg.XF16ALT) illegal_instr = 1'b1; + 3'b011: if (~CVA6Cfg.XF8) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end + 5'b10100: begin + instruction_o.op = ariane_pkg::FCMP; // feq/flt/fle.fmt - FP Comparisons + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (CVA6Cfg.XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr.rftype.rm inside {[3'b000 : 3'b010], [3'b100 : 3'b110]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000 : 3'b010]})) illegal_instr = 1'b1; + end + end + 5'b11000: begin + instruction_o.op = ariane_pkg::FCVT_F2I; // fcvt.ifmt.fmt - FP to Int Conversion + imm_select = IIMM; // rs2 holds part of the instruction + if (|instr.rftype.rs2[24:22]) + illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + end + 5'b11010: begin + instruction_o.op = ariane_pkg::FCVT_I2F; // fcvt.fmt.ifmt - Int to FP Conversion + imm_select = IIMM; // rs2 holds part of the instruction + if (|instr.rftype.rs2[24:22]) + illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + end + 5'b11100: begin + instruction_o.rs2[4:0] = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr.rftype.rm == 3'b000 || (CVA6Cfg.XF16ALT && instr.rftype.rm == 3'b100)) // FP16ALT has separate encoding + instruction_o.op = ariane_pkg::FMV_F2X; // fmv.ifmt.fmt - FPR to GPR Move + else if (instr.rftype.rm == 3'b001 || (CVA6Cfg.XF16ALT && instr.rftype.rm == 3'b101)) // FP16ALT has separate encoding + instruction_o.op = ariane_pkg::FCLASS; // fclass.fmt - FP Classify + else illegal_instr = 1'b1; + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + 5'b11110: begin + instruction_o.op = ariane_pkg::FMV_X2F; // fmv.fmt.ifmt - GPR to FPR Move + instruction_o.rs2[4:0] = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (!(instr.rftype.rm == 3'b000 || (CVA6Cfg.XF16ALT && instr.rftype.rm == 3'b100))) + illegal_instr = 1'b1; + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + default: illegal_instr = 1'b1; + endcase + + // check format + unique case (instr.rftype.fmt) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~CVA6Cfg.RVF) illegal_instr = 1'b1; + 2'b01: if (~CVA6Cfg.RVD) illegal_instr = 1'b1; + 2'b10: if (~CVA6Cfg.XF16 & ~CVA6Cfg.XF16ALT) illegal_instr = 1'b1; + 2'b11: if (~CVA6Cfg.XF8) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check rounding mode + if (check_fprm) begin + unique case (instr.rftype.rm) inside + [3'b000 : 3'b100]: ; //legal rounding modes + 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 + if (~CVA6Cfg.XF16ALT || instr.rftype.fmt != 2'b10) illegal_instr = 1'b1; + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000 : 3'b100]: ; //legal rounding modes + default: illegal_instr = 1'b1; + endcase + end + 3'b111: begin + // rounding mode from frm csr + unique case (frm_i) inside + [3'b000 : 3'b100]: ; //legal rounding modes + default: illegal_instr = 1'b1; + endcase + end + default: illegal_instr = 1'b1; + endcase + end + end else begin + illegal_instr = 1'b1; + end + end + + // ---------------------------------- + // Atomic Operations + // ---------------------------------- + riscv::OpcodeAmo: begin + // we are going to use the load unit for AMOs + instruction_o.fu = STORE; + instruction_o.rs1[4:0] = instr.atype.rs1; + instruction_o.rs2[4:0] = instr.atype.rs2; + instruction_o.rd[4:0] = instr.atype.rd; + // TODO(zarubaf): Ordering + // words + if (CVA6Cfg.RVA && instr.stype.funct3 == 3'h2) begin + unique case (instr.instr[31:27]) + 5'h0: instruction_o.op = ariane_pkg::AMO_ADDW; + 5'h1: instruction_o.op = ariane_pkg::AMO_SWAPW; + 5'h2: begin + instruction_o.op = ariane_pkg::AMO_LRW; + if (instr.atype.rs2 != 0) illegal_instr = 1'b1; + end + 5'h3: instruction_o.op = ariane_pkg::AMO_SCW; + 5'h4: instruction_o.op = ariane_pkg::AMO_XORW; + 5'h8: instruction_o.op = ariane_pkg::AMO_ORW; + 5'hC: instruction_o.op = ariane_pkg::AMO_ANDW; + 5'h10: instruction_o.op = ariane_pkg::AMO_MINW; + 5'h14: instruction_o.op = ariane_pkg::AMO_MAXW; + 5'h18: instruction_o.op = ariane_pkg::AMO_MINWU; + 5'h1C: instruction_o.op = ariane_pkg::AMO_MAXWU; + default: illegal_instr = 1'b1; + endcase + // double words + end else if (riscv::IS_XLEN64 && CVA6Cfg.RVA && instr.stype.funct3 == 3'h3) begin + unique case (instr.instr[31:27]) + 5'h0: instruction_o.op = ariane_pkg::AMO_ADDD; + 5'h1: instruction_o.op = ariane_pkg::AMO_SWAPD; + 5'h2: begin + instruction_o.op = ariane_pkg::AMO_LRD; + if (instr.atype.rs2 != 0) illegal_instr = 1'b1; + end + 5'h3: instruction_o.op = ariane_pkg::AMO_SCD; + 5'h4: instruction_o.op = ariane_pkg::AMO_XORD; + 5'h8: instruction_o.op = ariane_pkg::AMO_ORD; + 5'hC: instruction_o.op = ariane_pkg::AMO_ANDD; + 5'h10: instruction_o.op = ariane_pkg::AMO_MIND; + 5'h14: instruction_o.op = ariane_pkg::AMO_MAXD; + 5'h18: instruction_o.op = ariane_pkg::AMO_MINDU; + 5'h1C: instruction_o.op = ariane_pkg::AMO_MAXDU; + default: illegal_instr = 1'b1; + endcase + end else begin + illegal_instr = 1'b1; + end + end + + // -------------------------------- + // Control Flow Instructions + // -------------------------------- + riscv::OpcodeBranch: begin + imm_select = SBIMM; + instruction_o.fu = CTRL_FLOW; + instruction_o.rs1[4:0] = instr.stype.rs1; + instruction_o.rs2[4:0] = instr.stype.rs2; + + is_control_flow_instr_o = 1'b1; + + case (instr.stype.funct3) + 3'b000: instruction_o.op = ariane_pkg::EQ; + 3'b001: instruction_o.op = ariane_pkg::NE; + 3'b100: instruction_o.op = ariane_pkg::LTS; + 3'b101: instruction_o.op = ariane_pkg::GES; + 3'b110: instruction_o.op = ariane_pkg::LTU; + 3'b111: instruction_o.op = ariane_pkg::GEU; + default: begin + is_control_flow_instr_o = 1'b0; + illegal_instr = 1'b1; + end + endcase + end + // Jump and link register + riscv::OpcodeJalr: begin + instruction_o.fu = CTRL_FLOW; + instruction_o.op = ariane_pkg::JALR; + instruction_o.rs1[4:0] = instr.itype.rs1; + imm_select = IIMM; + instruction_o.rd[4:0] = instr.itype.rd; + is_control_flow_instr_o = 1'b1; + // invalid jump and link register -> reserved for vector encoding + if (instr.itype.funct3 != 3'b0) illegal_instr = 1'b1; + end + // Jump and link + riscv::OpcodeJal: begin + instruction_o.fu = CTRL_FLOW; + imm_select = JIMM; + instruction_o.rd[4:0] = instr.utype.rd; + is_control_flow_instr_o = 1'b1; + end + + riscv::OpcodeAuipc: begin + instruction_o.fu = ALU; + imm_select = UIMM; + instruction_o.use_pc = 1'b1; + instruction_o.rd[4:0] = instr.utype.rd; + end + + riscv::OpcodeLui: begin + imm_select = UIMM; + instruction_o.fu = ALU; + instruction_o.rd[4:0] = instr.utype.rd; + end + + default: illegal_instr = 1'b1; + endcase + end + if (CVA6Cfg.CvxifEn) begin + if (is_illegal_i || illegal_instr) begin + instruction_o.fu = CVXIF; + instruction_o.rs1[4:0] = instr.r4type.rs1; + instruction_o.rs2[4:0] = instr.r4type.rs2; + instruction_o.rd[4:0] = instr.r4type.rd; + instruction_o.op = ariane_pkg::OFFLOAD; + imm_select = RS3; + end + end + + // Accelerator instructions. + // These can overwrite the previous decoding entirely. + if (CVA6Cfg.EnableAccelerator) begin // only generate decoder if accelerators are enabled (static) + if (is_accel) begin + instruction_o.fu = acc_instruction.fu; + instruction_o.vfp = acc_instruction.vfp; + instruction_o.rs1 = acc_instruction.rs1; + instruction_o.rs2 = acc_instruction.rs2; + instruction_o.rd = acc_instruction.rd; + instruction_o.op = acc_instruction.op; + illegal_instr = acc_illegal_instr; + is_control_flow_instr_o = acc_is_control_flow_instr; + end + end + end + + // -------------------------------- + // Sign extend immediate + // -------------------------------- + always_comb begin : sign_extend + imm_i_type = {{riscv::XLEN - 12{instruction_i[31]}}, instruction_i[31:20]}; + imm_s_type = {{riscv::XLEN - 12{instruction_i[31]}}, instruction_i[31:25], instruction_i[11:7]}; + imm_sb_type = { + {riscv::XLEN - 13{instruction_i[31]}}, + instruction_i[31], + instruction_i[7], + instruction_i[30:25], + instruction_i[11:8], + 1'b0 + }; + imm_u_type = { + {riscv::XLEN - 32{instruction_i[31]}}, instruction_i[31:12], 12'b0 + }; // JAL, AUIPC, sign extended to 64 bit + imm_uj_type = { + {riscv::XLEN - 20{instruction_i[31]}}, + instruction_i[19:12], + instruction_i[20], + instruction_i[30:21], + 1'b0 + }; + imm_bi_type = {{riscv::XLEN - 5{instruction_i[24]}}, instruction_i[24:20]}; + + // NOIMM, IIMM, SIMM, BIMM, UIMM, JIMM, RS3 + // select immediate + case (imm_select) + IIMM: begin + instruction_o.result = imm_i_type; + instruction_o.use_imm = 1'b1; + end + SIMM: begin + instruction_o.result = imm_s_type; + instruction_o.use_imm = 1'b1; + end + SBIMM: begin + instruction_o.result = imm_sb_type; + instruction_o.use_imm = 1'b1; + end + UIMM: begin + instruction_o.result = imm_u_type; + instruction_o.use_imm = 1'b1; + end + JIMM: begin + instruction_o.result = imm_uj_type; + instruction_o.use_imm = 1'b1; + end + RS3: begin + // result holds address of fp operand rs3 + instruction_o.result = {{riscv::XLEN - 5{1'b0}}, instr.r4type.rs3}; + instruction_o.use_imm = 1'b0; + end + default: begin + instruction_o.result = {riscv::XLEN{1'b0}}; + instruction_o.use_imm = 1'b0; + end + endcase + + if (CVA6Cfg.EnableAccelerator) begin + if (is_accel) begin + instruction_o.result = acc_instruction.result; + instruction_o.use_imm = acc_instruction.use_imm; + end + end + end + + // --------------------- + // Exception handling + // --------------------- + riscv::xlen_t interrupt_cause; + + // this instruction has already executed if the exception is valid + assign instruction_o.valid = instruction_o.ex.valid; + + always_comb begin : exception_handling + interrupt_cause = '0; + instruction_o.ex = ex_i; + // look if we didn't already get an exception in any previous + // stage - we should not overwrite it as we retain order regarding the exception + if (~ex_i.valid) begin + // if we didn't already get an exception save the instruction here as we may need it + // in the commit stage if we got a access exception to one of the CSR registers + instruction_o.ex.tval = (is_compressed_i) ? {{riscv::XLEN-16{1'b0}}, compressed_instr_i} : {{riscv::XLEN-32{1'b0}}, instruction_i}; + // instructions which will throw an exception are marked as valid + // e.g.: they can be committed anytime and do not need to wait for any functional unit + // check here if we decoded an invalid instruction or if the compressed decoder already decoded + // a invalid instruction + if (illegal_instr || is_illegal_i) begin + if (!CVA6Cfg.CvxifEn) instruction_o.ex.valid = 1'b1; + // we decoded an illegal exception here + instruction_o.ex.cause = riscv::ILLEGAL_INSTR; + // we got an ecall, set the correct cause depending on the current privilege level + end else if (ecall) begin + // this exception is valid + instruction_o.ex.valid = 1'b1; + // depending on the privilege mode, set the appropriate cause + if (priv_lvl_i == riscv::PRIV_LVL_S && CVA6Cfg.RVS) begin + instruction_o.ex.cause = riscv::ENV_CALL_SMODE; + end else if (priv_lvl_i == riscv::PRIV_LVL_U && CVA6Cfg.RVU) begin + instruction_o.ex.cause = riscv::ENV_CALL_UMODE; + end else if (priv_lvl_i == riscv::PRIV_LVL_M) begin + instruction_o.ex.cause = riscv::ENV_CALL_MMODE; + end + end else if (ebreak) begin + // this exception is valid + instruction_o.ex.valid = 1'b1; + // set breakpoint cause + instruction_o.ex.cause = riscv::BREAKPOINT; + end + // ----------------- + // Interrupt Control + // ----------------- + // we decode an interrupt the same as an exception, hence it will be taken if the instruction did not + // throw any previous exception. + // we have three interrupt sources: external interrupts, software interrupts, timer interrupts (order of precedence) + // for two privilege levels: Supervisor and Machine Mode + // Supervisor Timer Interrupt + if (irq_ctrl_i.mie[riscv::IRQ_S_TIMER] && irq_ctrl_i.mip[riscv::IRQ_S_TIMER]) begin + interrupt_cause = riscv::S_TIMER_INTERRUPT; + end + // Supervisor Software Interrupt + if (irq_ctrl_i.mie[riscv::IRQ_S_SOFT] && irq_ctrl_i.mip[riscv::IRQ_S_SOFT]) begin + interrupt_cause = riscv::S_SW_INTERRUPT; + end + // Supervisor External Interrupt + // The logical-OR of the software-writable bit and the signal from the external interrupt controller is + // used to generate external interrupts to the supervisor + if (irq_ctrl_i.mie[riscv::IRQ_S_EXT] && (irq_ctrl_i.mip[riscv::IRQ_S_EXT] | irq_i[ariane_pkg::SupervisorIrq])) begin + interrupt_cause = riscv::S_EXT_INTERRUPT; + end + // Machine Timer Interrupt + if (irq_ctrl_i.mip[riscv::IRQ_M_TIMER] && irq_ctrl_i.mie[riscv::IRQ_M_TIMER]) begin + interrupt_cause = riscv::M_TIMER_INTERRUPT; + end + // Machine Mode Software Interrupt + if (irq_ctrl_i.mip[riscv::IRQ_M_SOFT] && irq_ctrl_i.mie[riscv::IRQ_M_SOFT]) begin + interrupt_cause = riscv::M_SW_INTERRUPT; + end + // Machine Mode External Interrupt + if (irq_ctrl_i.mip[riscv::IRQ_M_EXT] && irq_ctrl_i.mie[riscv::IRQ_M_EXT]) begin + interrupt_cause = riscv::M_EXT_INTERRUPT; + end + + if (interrupt_cause[riscv::XLEN-1] && irq_ctrl_i.global_enable) begin + // However, if bit i in mideleg is set, interrupts are considered to be globally enabled if the hart’s current privilege + // mode equals the delegated privilege mode (S or U) and that mode’s interrupt enable bit + // (SIE or UIE in mstatus) is set, or if the current privilege mode is less than the delegated privilege mode. + if (irq_ctrl_i.mideleg[interrupt_cause[$clog2(riscv::XLEN)-1:0]]) begin + if ((CVA6Cfg.RVS && irq_ctrl_i.sie && priv_lvl_i == riscv::PRIV_LVL_S) || (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U)) begin + instruction_o.ex.valid = 1'b1; + instruction_o.ex.cause = interrupt_cause; + end + end else begin + instruction_o.ex.valid = 1'b1; + instruction_o.ex.cause = interrupt_cause; + end + end + end + + // a debug request has precendece over everything else + if (CVA6Cfg.DebugEn && debug_req_i && !debug_mode_i) begin + instruction_o.ex.valid = 1'b1; + instruction_o.ex.cause = riscv::DEBUG_REQUEST; + end + end +endmodule diff --git a/test/type_param/core/ex_stage.sv b/test/type_param/core/ex_stage.sv new file mode 100644 index 0000000..978e2a3 --- /dev/null +++ b/test/type_param/core/ex_stage.sv @@ -0,0 +1,413 @@ + +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.04.2017 +// Description: Instantiation of all functional units residing in the execute stage + + +module ex_stage + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + input logic debug_mode_i, + + input logic [riscv::VLEN-1:0] rs1_forwarding_i, + input logic [riscv::VLEN-1:0] rs2_forwarding_i, + input fu_data_t fu_data_i, + input logic [riscv::VLEN-1:0] pc_i, // PC of current instruction + input logic is_compressed_instr_i, // we need to know if this was a compressed instruction + // in order to calculate the next PC on a mis-predict + // Fixed latency unit(s) + output riscv::xlen_t flu_result_o, + output logic [TRANS_ID_BITS-1:0] flu_trans_id_o, // ID of scoreboard entry at which to write back + output exception_t flu_exception_o, + output logic flu_ready_o, // FLU is ready + output logic flu_valid_o, // FLU result is valid + // Branches and Jumps + // ALU 1 + input logic alu_valid_i, // Output is valid + // Branch Unit + input logic branch_valid_i, // we are using the branch unit + input branchpredict_sbe_t branch_predict_i, + output bp_resolve_t resolved_branch_o, // the branch engine uses the write back from the ALU + output logic resolve_branch_o, // to ID signaling that we resolved the branch + // CSR + input logic csr_valid_i, + output logic [11:0] csr_addr_o, + input logic csr_commit_i, + // MULT + input logic mult_valid_i, // Output is valid + // LSU + output logic lsu_ready_o, // FU is ready + input logic lsu_valid_i, // Input is valid + + output logic load_valid_o, + output riscv::xlen_t load_result_o, + output logic [TRANS_ID_BITS-1:0] load_trans_id_o, + output exception_t load_exception_o, + output logic store_valid_o, + output riscv::xlen_t store_result_o, + output logic [TRANS_ID_BITS-1:0] store_trans_id_o, + output exception_t store_exception_o, + + input logic lsu_commit_i, + output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request + input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, + input logic stall_st_pending_i, + output logic no_st_pending_o, + input logic amo_valid_commit_i, + // FPU + output logic fpu_ready_o, // FU is ready + input logic fpu_valid_i, // Output is valid + input logic [1:0] fpu_fmt_i, // FP format + input logic [2:0] fpu_rm_i, // FP rm + input logic [2:0] fpu_frm_i, // FP frm csr + input logic [6:0] fpu_prec_i, // FP precision control + output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, + output riscv::xlen_t fpu_result_o, + output logic fpu_valid_o, + output exception_t fpu_exception_o, + // CoreV-X-Interface + input logic x_valid_i, + output logic x_ready_o, + input logic [31:0] x_off_instr_i, + output logic [TRANS_ID_BITS-1:0] x_trans_id_o, + output exception_t x_exception_o, + output riscv::xlen_t x_result_o, + output logic x_valid_o, + output logic x_we_o, + output cvxif_pkg::cvxif_req_t cvxif_req_o, + input cvxif_pkg::cvxif_resp_t cvxif_resp_i, + input logic acc_valid_i, // Output is valid + // Memory Management + input logic enable_translation_i, + input logic en_ld_st_translation_i, + input logic flush_tlb_i, + + input riscv::priv_lvl_t priv_lvl_i, + input riscv::priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input logic mxr_i, + input logic [riscv::PPNW-1:0] satp_ppn_i, + input logic [ ASID_WIDTH-1:0] asid_i, + // icache translation requests + input icache_arsp_t icache_areq_i, + output icache_areq_t icache_areq_o, + + // interface to dcache + input dcache_req_o_t [2:0] dcache_req_ports_i, + output dcache_req_i_t [2:0] dcache_req_ports_o, + input logic dcache_wbuffer_empty_i, + input logic dcache_wbuffer_not_ni_i, + output amo_req_t amo_req_o, // request to cache subsytem + input amo_resp_t amo_resp_i, // response from cache subsystem + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PMPs + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + + // RVFI + output lsu_ctrl_t rvfi_lsu_ctrl_o, + output [riscv::PLEN-1:0] rvfi_mem_paddr_o +); + + // ------------------------- + // Fixed Latency Units + // ------------------------- + // all fixed latency units share a single issue port and a sing write + // port into the scoreboard. At the moment those are: + // 1. ALU - all operations are single cycle + // 2. Branch unit: operation is single cycle, the ALU is needed + // for comparison + // 3. CSR: This is a small buffer which saves the address of the CSR. + // The value is then re-fetched once the instruction retires. The buffer + // is only a single entry deep, hence this operation will block all + // other operations once this buffer is full. This should not be a major + // concern though as CSRs are infrequent. + // 4. Multiplier/Divider: The multiplier has a fixed latency of 1 cycle. + // The issue logic will take care of not issuing + // another instruction if it will collide on the + // output port. Divisions are arbitrary in length + // they will simply block the issue of all other + // instructions. + + + logic current_instruction_is_sfence_vma; + // These two register store the rs1 and rs2 parameters in case of `SFENCE_VMA` + // instruction to be used for TLB flush in the next clock cycle. + logic [ASID_WIDTH-1:0] asid_to_be_flushed; + logic [riscv::VLEN-1:0] vaddr_to_be_flushed; + + // from ALU to branch unit + logic alu_branch_res; // branch comparison result + riscv::xlen_t alu_result, csr_result, mult_result; + logic [riscv::VLEN-1:0] branch_result; + logic csr_ready, mult_ready; + logic [TRANS_ID_BITS-1:0] mult_trans_id; + logic mult_valid; + + // 1. ALU (combinatorial) + // data silence operation + fu_data_t alu_data; + assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0; + + alu #( + .CVA6Cfg(CVA6Cfg) + ) alu_i ( + .clk_i, + .rst_ni, + .fu_data_i (alu_data), + .result_o (alu_result), + .alu_branch_res_o(alu_branch_res) + ); + + // 2. Branch Unit (combinatorial) + // we don't silence the branch unit as this is already critical and we do + // not want to add another layer of logic + branch_unit #( + .CVA6Cfg(CVA6Cfg) + ) branch_unit_i ( + .clk_i, + .rst_ni, + .debug_mode_i, + .fu_data_i, + .pc_i, + .is_compressed_instr_i, + // any functional unit is valid, check that there is no accidental mis-predict + .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) , + .branch_valid_i, + .branch_comp_res_i(alu_branch_res), + .branch_result_o(branch_result), + .branch_predict_i, + .resolved_branch_o, + .resolve_branch_o, + .branch_exception_o(flu_exception_o) + ); + + // 3. CSR (sequential) + csr_buffer #( + .CVA6Cfg(CVA6Cfg) + ) csr_buffer_i ( + .clk_i, + .rst_ni, + .flush_i, + .fu_data_i, + .csr_valid_i, + .csr_ready_o (csr_ready), + .csr_result_o(csr_result), + .csr_commit_i, + .csr_addr_o + ); + + assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i | mult_valid; + + // result MUX + always_comb begin + // Branch result as default case + flu_result_o = {{riscv::XLEN - riscv::VLEN{1'b0}}, branch_result}; + flu_trans_id_o = fu_data_i.trans_id; + // ALU result + if (alu_valid_i) begin + flu_result_o = alu_result; + // CSR result + end else if (csr_valid_i) begin + flu_result_o = csr_result; + end else if (mult_valid) begin + flu_result_o = mult_result; + flu_trans_id_o = mult_trans_id; + end + end + + // ready flags for FLU + always_comb begin + flu_ready_o = csr_ready & mult_ready; + end + + // 4. Multiplication (Sequential) + fu_data_t mult_data; + // input silencing of multiplier + assign mult_data = mult_valid_i ? fu_data_i : '0; + + mult #( + .CVA6Cfg(CVA6Cfg) + ) i_mult ( + .clk_i, + .rst_ni, + .flush_i, + .mult_valid_i, + .fu_data_i (mult_data), + .result_o (mult_result), + .mult_valid_o (mult_valid), + .mult_ready_o (mult_ready), + .mult_trans_id_o(mult_trans_id) + ); + + // ---------------- + // FPU + // ---------------- + generate + if (CVA6Cfg.FpPresent) begin : fpu_gen + fu_data_t fpu_data; + assign fpu_data = fpu_valid_i ? fu_data_i : '0; + + fpu_wrap #( + .CVA6Cfg(CVA6Cfg) + ) fpu_i ( + .clk_i, + .rst_ni, + .flush_i, + .fpu_valid_i, + .fpu_ready_o, + .fu_data_i(fpu_data), + .fpu_fmt_i, + .fpu_rm_i, + .fpu_frm_i, + .fpu_prec_i, + .fpu_trans_id_o, + .result_o (fpu_result_o), + .fpu_valid_o, + .fpu_exception_o + ); + end else begin : no_fpu_gen + assign fpu_ready_o = '0; + assign fpu_trans_id_o = '0; + assign fpu_result_o = '0; + assign fpu_valid_o = '0; + assign fpu_exception_o = '0; + end + endgenerate + + // ---------------- + // Load-Store Unit + // ---------------- + fu_data_t lsu_data; + + assign lsu_data = lsu_valid_i ? fu_data_i : '0; + + load_store_unit #( + .CVA6Cfg (CVA6Cfg), + .ASID_WIDTH(ASID_WIDTH) + ) lsu_i ( + .clk_i, + .rst_ni, + .flush_i, + .stall_st_pending_i, + .no_st_pending_o, + .fu_data_i (lsu_data), + .lsu_ready_o, + .lsu_valid_i, + .load_trans_id_o, + .load_result_o, + .load_valid_o, + .load_exception_o, + .store_trans_id_o, + .store_result_o, + .store_valid_o, + .store_exception_o, + .commit_i (lsu_commit_i), + .commit_ready_o (lsu_commit_ready_o), + .commit_tran_id_i, + .enable_translation_i, + .en_ld_st_translation_i, + .icache_areq_i, + .icache_areq_o, + .priv_lvl_i, + .ld_st_priv_lvl_i, + .sum_i, + .mxr_i, + .satp_ppn_i, + .asid_i, + .asid_to_be_flushed_i (asid_to_be_flushed), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed), + .flush_tlb_i, + .itlb_miss_o, + .dtlb_miss_o, + .dcache_req_ports_i, + .dcache_req_ports_o, + .dcache_wbuffer_empty_i, + .dcache_wbuffer_not_ni_i, + .amo_valid_commit_i, + .amo_req_o, + .amo_resp_i, + .pmpcfg_i, + .pmpaddr_i, + .rvfi_lsu_ctrl_o, + .rvfi_mem_paddr_o + ); + + if (CVA6Cfg.CvxifEn) begin : gen_cvxif + fu_data_t cvxif_data; + assign cvxif_data = x_valid_i ? fu_data_i : '0; + cvxif_fu #( + .CVA6Cfg(CVA6Cfg) + ) cvxif_fu_i ( + .clk_i, + .rst_ni, + .fu_data_i, + .priv_lvl_i(ld_st_priv_lvl_i), + .x_valid_i, + .x_ready_o, + .x_off_instr_i, + .x_trans_id_o, + .x_exception_o, + .x_result_o, + .x_valid_o, + .x_we_o, + .cvxif_req_o, + .cvxif_resp_i + ); + end else begin : gen_no_cvxif + assign cvxif_req_o = '0; + assign x_trans_id_o = '0; + assign x_exception_o = '0; + assign x_result_o = '0; + assign x_valid_o = '0; + end + + if (CVA6Cfg.RVS) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + current_instruction_is_sfence_vma <= 1'b0; + end else begin + if (flush_i) begin + current_instruction_is_sfence_vma <= 1'b0; + end else if ((fu_data_i.operation == SFENCE_VMA) && csr_valid_i) begin + current_instruction_is_sfence_vma <= 1'b1; + end + end + end + + // This process stores the rs1 and rs2 parameters of a SFENCE_VMA instruction. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + asid_to_be_flushed <= '0; + vaddr_to_be_flushed <= '0; + // if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen + end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin + vaddr_to_be_flushed <= rs1_forwarding_i; + asid_to_be_flushed <= rs2_forwarding_i[ASID_WIDTH-1:0]; + end + end + end else begin + assign current_instruction_is_sfence_vma = 1'b0; + assign asid_to_be_flushed = '0; + assign vaddr_to_be_flushed = '0; + end + +endmodule diff --git a/test/type_param/core/fpu_wrap.sv b/test/type_param/core/fpu_wrap.sv new file mode 100644 index 0000000..9219029 --- /dev/null +++ b/test/type_param/core/fpu_wrap.sv @@ -0,0 +1,568 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Stefan Mach, ETH Zurich +// Date: 12.04.2018 +// Description: Wrapper for the floating-point unit + + +module fpu_wrap + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic fpu_valid_i, + output logic fpu_ready_o, + input fu_data_t fu_data_i, + + input logic [ 1:0] fpu_fmt_i, + input logic [ 2:0] fpu_rm_i, + input logic [ 2:0] fpu_frm_i, + input logic [ 6:0] fpu_prec_i, + output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, + output logic [ CVA6Cfg.FLen-1:0] result_o, + output logic fpu_valid_o, + output exception_t fpu_exception_o +); + + // this is a workaround + // otherwise compilation might issue an error if FLEN=0 + enum logic { + READY, + STALL + } + state_q, state_d; + if (CVA6Cfg.FpPresent) begin : fpu_gen + logic [CVA6Cfg.FLen-1:0] operand_a_i; + logic [CVA6Cfg.FLen-1:0] operand_b_i; + logic [CVA6Cfg.FLen-1:0] operand_c_i; + assign operand_a_i = fu_data_i.operand_a[CVA6Cfg.FLen-1:0]; + assign operand_b_i = fu_data_i.operand_b[CVA6Cfg.FLen-1:0]; + assign operand_c_i = fu_data_i.imm[CVA6Cfg.FLen-1:0]; + + //----------------------------------- + // FPnew config from FPnew package + //----------------------------------- + localparam OPBITS = fpnew_pkg::OP_BITS; + localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS); + localparam IFMTBITS = $clog2(fpnew_pkg::NUM_INT_FORMATS); + + // Features (enabled formats, vectors etc.) + localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{ + Width: unsigned'(riscv::XLEN), // parameterized using XLEN + EnableVectors: CVA6Cfg.XFVec, + EnableNanBox: 1'b1, + FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT}, + IntFmtMask: { + CVA6Cfg.XFVec && CVA6Cfg.XF8, + CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT), + 1'b1, + 1'b1 + } + }; + + // Implementation (number of registers etc) + localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{ + PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt + '{ + unsigned'(LAT_COMP_FP32), + unsigned'(LAT_COMP_FP64), + unsigned'(LAT_COMP_FP16), + unsigned'(LAT_COMP_FP8), + unsigned'(LAT_COMP_FP16ALT) + }, // ADDMUL + '{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT + '{default: unsigned'(LAT_NONCOMP)}, // NONCOMP + '{default: unsigned'(LAT_CONV)} + }, // CONV + UnitTypes: '{ + '{default: fpnew_pkg::PARALLEL}, // ADDMUL + '{default: fpnew_pkg::MERGED}, // DIVSQRT + '{default: fpnew_pkg::PARALLEL}, // NONCOMP + '{default: fpnew_pkg::MERGED} + }, // CONV + PipeConfig: fpnew_pkg::DISTRIBUTED + }; + + //------------------------------------------------- + // Inputs to the FPU and protocol inversion buffer + //------------------------------------------------- + logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a; + logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b; + logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c; + logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op; + logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod; + logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt; + logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt; + logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt; + logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm; + logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op; + + logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag; + + logic fpu_in_ready, fpu_in_valid; + logic fpu_out_ready, fpu_out_valid; + + logic [4:0] fpu_status; + + // FSM to handle protocol inversion + logic hold_inputs; + logic use_hold; + + //----------------------------- + // Translate inputs + //----------------------------- + + always_comb begin : input_translation + + automatic logic vec_replication; // control honoring of replication flag + automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB) + automatic logic check_ah; // Decide for AH from RM field encoding + + // Default Values + operand_a_d = operand_a_i; + operand_b_d = operand_b_i; // immediates come through this port unless used as operand + operand_c_d = operand_c_i; // immediates come through this port unless used as operand + fpu_op_d = fpnew_pkg::SGNJ; // sign injection by default + fpu_op_mod_d = 1'b0; + fpu_dstfmt_d = fpnew_pkg::FP32; + fpu_ifmt_d = fpnew_pkg::INT32; + fpu_rm_d = fpu_rm_i; + fpu_vec_op_d = fu_data_i.fu == FPU_VEC; + fpu_tag_d = fu_data_i.trans_id; + vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field + replicate_c = 1'b0; + check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i + + // Scalar Rounding Modes - some ops encode inside RM but use smaller range + if (!(fpu_rm_i inside {[3'b000 : 3'b100]})) fpu_rm_d = fpu_frm_i; + + // Vectorial ops always consult FRM + if (fpu_vec_op_d) fpu_rm_d = fpu_frm_i; + + // Formats + unique case (fpu_fmt_i) + // FP32 + 2'b00: fpu_dstfmt_d = fpnew_pkg::FP32; + // FP64 or FP16ALT (vectorial) + 2'b01: fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64; + // FP16 or FP16ALT (scalar) + 2'b10: begin + if (!fpu_vec_op_d && fpu_rm_i == 3'b101) fpu_dstfmt_d = fpnew_pkg::FP16ALT; + else fpu_dstfmt_d = fpnew_pkg::FP16; + end + // FP8 + default: fpu_dstfmt_d = fpnew_pkg::FP8; + endcase + + // By default, set src=dst + fpu_srcfmt_d = fpu_dstfmt_d; + + // Operations (this can modify the rounding mode field and format!) + unique case (fu_data_i.operation) + // Addition + FADD: begin + fpu_op_d = fpnew_pkg::ADD; + replicate_c = 1'b1; // second operand is in C + end + // Subtraction is modified ADD + FSUB: begin + fpu_op_d = fpnew_pkg::ADD; + fpu_op_mod_d = 1'b1; + replicate_c = 1'b1; // second operand is in C + end + // Multiplication + FMUL: fpu_op_d = fpnew_pkg::MUL; + // Division + FDIV: fpu_op_d = fpnew_pkg::DIV; + // Min/Max - OP is encoded in rm (000-001) + FMIN_MAX: begin + fpu_op_d = fpnew_pkg::MINMAX; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Square Root + FSQRT: fpu_op_d = fpnew_pkg::SQRT; + // Fused Multiply Add + FMADD: fpu_op_d = fpnew_pkg::FMADD; + // Fused Multiply Subtract is modified FMADD + FMSUB: begin + fpu_op_d = fpnew_pkg::FMADD; + fpu_op_mod_d = 1'b1; + end + // Fused Negated Multiply Subtract + FNMSUB: fpu_op_d = fpnew_pkg::FNMSUB; + // Fused Negated Multiply Add is modified FNMSUB + FNMADD: begin + fpu_op_d = fpnew_pkg::FNMSUB; + fpu_op_mod_d = 1'b1; + end + // Float to Int Cast - Op encoded in lowest two imm bits or rm + FCVT_F2I: begin + fpu_op_d = fpnew_pkg::F2I; + // Vectorial Ops encoded in R bit + if (fpu_vec_op_d) begin + fpu_op_mod_d = fpu_rm_i[0]; + vec_replication = 1'b0; // no replication, R bit used for op + unique case (fpu_fmt_i) + 2'b00: fpu_ifmt_d = fpnew_pkg::INT32; + 2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16; + 2'b11: fpu_ifmt_d = fpnew_pkg::INT8; + endcase + // Scalar casts encoded in imm + end else begin + fpu_op_mod_d = operand_c_i[0]; + if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64; + else fpu_ifmt_d = fpnew_pkg::INT32; + end + end + // Int to Float Cast - Op encoded in lowest two imm bits or rm + FCVT_I2F: begin + fpu_op_d = fpnew_pkg::I2F; + // Vectorial Ops encoded in R bit + if (fpu_vec_op_d) begin + fpu_op_mod_d = fpu_rm_i[0]; + vec_replication = 1'b0; // no replication, R bit used for op + unique case (fpu_fmt_i) + 2'b00: fpu_ifmt_d = fpnew_pkg::INT32; + 2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16; + 2'b11: fpu_ifmt_d = fpnew_pkg::INT8; + endcase + // Scalar casts encoded in imm + end else begin + fpu_op_mod_d = operand_c_i[0]; + if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64; + else fpu_ifmt_d = fpnew_pkg::INT32; + end + end + // Float to Float Cast - Source format encoded in lowest two/three imm bits + FCVT_F2F: begin + fpu_op_d = fpnew_pkg::F2F; + // Vectorial ops encoded in lowest two imm bits + if (fpu_vec_op_d) begin + vec_replication = 1'b0; // no replication for casts (not needed) + unique case (operand_c_i[1:0]) + 2'b00: fpu_srcfmt_d = fpnew_pkg::FP32; + 2'b01: fpu_srcfmt_d = fpnew_pkg::FP16ALT; + 2'b10: fpu_srcfmt_d = fpnew_pkg::FP16; + 2'b11: fpu_srcfmt_d = fpnew_pkg::FP8; + endcase + // Scalar ops encoded in lowest three imm bits + end else begin + unique case (operand_c_i[2:0]) + 3'b000: fpu_srcfmt_d = fpnew_pkg::FP32; + 3'b001: fpu_srcfmt_d = fpnew_pkg::FP64; + 3'b010: fpu_srcfmt_d = fpnew_pkg::FP16; + 3'b110: fpu_srcfmt_d = fpnew_pkg::FP16ALT; + 3'b011: fpu_srcfmt_d = fpnew_pkg::FP8; + default: ; // Do nothing + endcase + end + end + // Scalar Sign Injection - op encoded in rm (000-010) + FSGNJ: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding + FMV_F2X: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = 3'b011; // passthrough without checking nan-box + fpu_op_mod_d = 1'b1; // no NaN-Boxing + check_ah = 1'b1; // AH has RM MSB encoding + vec_replication = 1'b0; // no replication, we set second operand + end + // Move from GPR to FPR - mapped to NOP since no recoding + FMV_X2F: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = 3'b011; // passthrough without checking nan-box + check_ah = 1'b1; // AH has RM MSB encoding + vec_replication = 1'b0; // no replication, we set second operand + end + // Scalar Comparisons - op encoded in rm (000-010) + FCMP: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Classification + FCLASS: begin + fpu_op_d = fpnew_pkg::CLASSIFY; + fpu_rm_d = { + 1'b0, fpu_rm_i[1:0] + }; // mask out AH encoding bit - CLASS doesn't care anyways + check_ah = 1'b1; // AH has RM MSB encoding + end + // Vectorial Minimum - set up scalar encoding in rm + VFMIN: begin + fpu_op_d = fpnew_pkg::MINMAX; + fpu_rm_d = 3'b000; // min + end + // Vectorial Maximum - set up scalar encoding in rm + VFMAX: begin + fpu_op_d = fpnew_pkg::MINMAX; + fpu_rm_d = 3'b001; // max + end + // Vectorial Sign Injection - set up scalar encoding in rm + VFSGNJ: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = 3'b000; // sgnj + end + // Vectorial Negated Sign Injection - set up scalar encoding in rm + VFSGNJN: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = 3'b001; // sgnjn + end + // Vectorial Xored Sign Injection - set up scalar encoding in rm + VFSGNJX: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = 3'b010; // sgnjx + end + // Vectorial Equals - set up scalar encoding in rm + VFEQ: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_rm_d = 3'b010; // eq + end + // Vectorial Not Equals - set up scalar encoding in rm + VFNE: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b010; // eq + end + // Vectorial Less Than - set up scalar encoding in rm + VFLT: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_rm_d = 3'b001; // lt + end + // Vectorial Greater or Equal - set up scalar encoding in rm + VFGE: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b001; // lt + end + // Vectorial Less or Equal - set up scalar encoding in rm + VFLE: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_rm_d = 3'b000; // le + end + // Vectorial Greater Than - set up scalar encoding in rm + VFGT: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b000; // le + end + // Vectorial Convert-and-Pack from FP32, lower 4 entries + VFCPKAB_S: begin + fpu_op_d = fpnew_pkg::CPKAB; + fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32 + end + // Vectorial Convert-and-Pack from FP32, upper 4 entries + VFCPKCD_S: begin + fpu_op_d = fpnew_pkg::CPKCD; + fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32 + end + // Vectorial Convert-and-Pack from FP64, lower 4 entries + VFCPKAB_D: begin + fpu_op_d = fpnew_pkg::CPKAB; + fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64 + end + // Vectorial Convert-and-Pack from FP64, upper 4 entries + VFCPKCD_D: begin + fpu_op_d = fpnew_pkg::CPKCD; + fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64 + end + // No changes per default + default: ; //nothing + endcase + + // Scalar AH encoding fixing + if (!fpu_vec_op_d && check_ah) if (fpu_rm_i[2]) fpu_dstfmt_d = fpnew_pkg::FP16ALT; + + // Replication + if (fpu_vec_op_d && vec_replication) begin + if (replicate_c) begin + unique case (fpu_dstfmt_d) + fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i; + fpnew_pkg::FP16, fpnew_pkg::FP16ALT: + operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}}; + fpnew_pkg::FP8: + operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}}; + default: ; // Do nothing + endcase // fpu_dstfmt_d + end else begin + unique case (fpu_dstfmt_d) + fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i; + fpnew_pkg::FP16, fpnew_pkg::FP16ALT: + operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}}; + fpnew_pkg::FP8: + operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}}; + default: ; // Do nothing + endcase // fpu_dstfmt_d + end + end + end + + + //--------------------------------------------------------- + // Upstream protocol inversion: InValid depends on InReady + //--------------------------------------------------------- + + always_comb begin : p_inputFSM + // Default Values + fpu_ready_o = 1'b0; + fpu_in_valid = 1'b0; + hold_inputs = 1'b0; // hold register disabled + use_hold = 1'b0; // inputs go directly to unit + state_d = state_q; // stay in the same state + + // FSM + unique case (state_q) + // Default state, ready for instructions + READY: begin + fpu_ready_o = 1'b1; // Act as if FPU ready + fpu_in_valid = fpu_valid_i; // Forward input valid to FPU + // There is a transaction but the FPU can't handle it + if (fpu_valid_i & ~fpu_in_ready) begin + fpu_ready_o = 1'b0; // No token given to Issue + hold_inputs = 1'b1; // save inputs to the holding register + state_d = STALL; // stall future incoming requests + end + end + // We're stalling the upstream (ready=0) + STALL: begin + fpu_in_valid = 1'b1; // we have data for the FPU + use_hold = 1'b1; // the data comes from the hold reg + // Wait until it's consumed + if (fpu_in_ready) begin + fpu_ready_o = 1'b1; // Give a token to issue + state_d = READY; // accept future requests + end + end + // Default: emit default values + default: ; + endcase + + // Flushing will override issue and go back to idle + if (flush_i) begin + state_d = READY; + end + + end + + // Buffer register and FSM state holding + always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg + if (~rst_ni) begin + state_q <= READY; + operand_a_q <= '0; + operand_b_q <= '0; + operand_c_q <= '0; + fpu_op_q <= '0; + fpu_op_mod_q <= '0; + fpu_srcfmt_q <= '0; + fpu_dstfmt_q <= '0; + fpu_ifmt_q <= '0; + fpu_rm_q <= '0; + fpu_vec_op_q <= '0; + fpu_tag_q <= '0; + end else begin + state_q <= state_d; + // Hold register is [TRIGGERED] by FSM + if (hold_inputs) begin + operand_a_q <= operand_a_d; + operand_b_q <= operand_b_d; + operand_c_q <= operand_c_d; + fpu_op_q <= fpu_op_d; + fpu_op_mod_q <= fpu_op_mod_d; + fpu_srcfmt_q <= fpu_srcfmt_d; + fpu_dstfmt_q <= fpu_dstfmt_d; + fpu_ifmt_q <= fpu_ifmt_d; + fpu_rm_q <= fpu_rm_d; + fpu_vec_op_q <= fpu_vec_op_d; + fpu_tag_q <= fpu_tag_d; + end + end + end + + // Select FPU input data: from register if valid data in register, else directly from input + assign operand_a = use_hold ? operand_a_q : operand_a_d; + assign operand_b = use_hold ? operand_b_q : operand_b_d; + assign operand_c = use_hold ? operand_c_q : operand_c_d; + assign fpu_op = use_hold ? fpu_op_q : fpu_op_d; + assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d; + assign fpu_srcfmt = use_hold ? fpu_srcfmt_q : fpu_srcfmt_d; + assign fpu_dstfmt = use_hold ? fpu_dstfmt_q : fpu_dstfmt_d; + assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d; + assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d; + assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d; + assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d; + + // Consolidate operands + logic [2:0][CVA6Cfg.FLen-1:0] fpu_operands; + + assign fpu_operands[0] = operand_a; + assign fpu_operands[1] = operand_b; + assign fpu_operands[2] = operand_c; + + //--------------- + // FPU instance + //--------------- + + fpnew_top #( + .Features (FPU_FEATURES), + .Implementation(FPU_IMPLEMENTATION), + .TagType (logic [TRANS_ID_BITS-1:0]) + ) i_fpnew_bulk ( + .clk_i, + .rst_ni, + .operands_i (fpu_operands), + .rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rm)), + .op_i (fpnew_pkg::operation_e'(fpu_op)), + .op_mod_i (fpu_op_mod), + .src_fmt_i (fpnew_pkg::fp_format_e'(fpu_srcfmt)), + .dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dstfmt)), + .int_fmt_i (fpnew_pkg::int_format_e'(fpu_ifmt)), + .vectorial_op_i(fpu_vec_op), + .tag_i (fpu_tag), + .simd_mask_i (1'b1), + .in_valid_i (fpu_in_valid), + .in_ready_o (fpu_in_ready), + .flush_i, + .result_o, + .status_o (fpu_status), + .tag_o (fpu_trans_id_o), + .out_valid_o (fpu_out_valid), + .out_ready_i (fpu_out_ready), + .busy_o ( /* unused */) + ); + + // Pack status flag into exception cause, tval ignored in wb, exception is always invalid + assign fpu_exception_o.cause = {59'h0, fpu_status}; + assign fpu_exception_o.valid = 1'b0; + + // Donwstream write port is dedicated to FPU and always ready + assign fpu_out_ready = 1'b1; + + // Downstream valid from unit + assign fpu_valid_o = fpu_out_valid; + + end +endmodule diff --git a/test/type_param/core/frontend/bht.sv b/test/type_param/core/frontend/bht.sv new file mode 100644 index 0000000..bcfb78c --- /dev/null +++ b/test/type_param/core/frontend/bht.sv @@ -0,0 +1,215 @@ +// Copyright 2018 - 2019 ETH Zurich and University of Bologna. +// Copyright 2023 - Thales for additionnal conribution. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 +// FPGA optimization: Sebastien Jacq, Thales +// Date: 2023-01-30 + +// branch history table - 2 bit saturation counter + +module bht #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NR_ENTRIES = 1024 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic debug_mode_i, + input logic [ riscv::VLEN-1:0] vpc_i, + input ariane_pkg::bht_update_t bht_update_i, + // we potentially need INSTR_PER_FETCH predictions/cycle + output ariane_pkg::bht_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_prediction_o +); + // the last bit is always zero, we don't need it for indexing + localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2; + // re-shape the branch history table + localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH; + // number of bits needed to index the row + localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH); + localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; + // number of bits we should use for prediction + localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS; + // we are not interested in all bits of the address + unread i_unread (.d_i(|vpc_i)); + + struct packed { + logic valid; + logic [1:0] saturation_counter; + } + bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0], + bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0]; + + logic [$clog2(NR_ROWS)-1:0] index, update_pc; + logic [ROW_INDEX_BITS-1:0] update_row_index; + + assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET]; + assign update_pc = bht_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET]; + if (CVA6Cfg.RVC) begin : gen_update_row_index + assign update_row_index = bht_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET]; + end else begin + assign update_row_index = '0; + end + + if (!ariane_pkg::FPGA_EN) begin : gen_asic_bht // ASIC TARGET + + logic [1:0] saturation_counter; + // prediction assignment + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output + assign bht_prediction_o[i].valid = bht_q[index][i].valid; + assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1; + end + + always_comb begin : update_bht + bht_d = bht_q; + saturation_counter = bht_q[update_pc][update_row_index].saturation_counter; + + if ((bht_update_i.valid && CVA6Cfg.DebugEn && !debug_mode_i) || (bht_update_i.valid && !CVA6Cfg.DebugEn)) begin + bht_d[update_pc][update_row_index].valid = 1'b1; + + if (saturation_counter == 2'b11) begin + // we can safely decrease it + if (!bht_update_i.taken) + bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1; + // then check if it saturated in the negative regime e.g.: branch not taken + end else if (saturation_counter == 2'b00) begin + // we can safely increase it + if (bht_update_i.taken) + bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1; + end else begin // otherwise we are not in any boundaries and can decrease or increase it + if (bht_update_i.taken) + bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1; + else bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + for (int unsigned i = 0; i < NR_ROWS; i++) begin + for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin + bht_q[i][j] <= '0; + end + end + end else begin + // evict all entries + if (flush_i) begin + for (int i = 0; i < NR_ROWS; i++) begin + for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin + bht_q[i][j].valid <= 1'b0; + bht_q[i][j].saturation_counter <= 2'b10; + end + end + end else begin + bht_q <= bht_d; + end + end + end + + end else begin : gen_fpga_bht //FPGA TARGETS + + // number of bits par word in the bram + localparam BRAM_WORD_BITS = $bits(ariane_pkg::bht_t); + logic [ ROW_INDEX_BITS-1:0] row_index; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_ram_we; + logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0; + logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1; + logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1; + + ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht; + ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_updated; + + if (CVA6Cfg.RVC) begin : gen_row_index + assign row_index = vpc_i[ROW_ADDR_BITS+OFFSET-1:OFFSET]; + end else begin + assign row_index = '0; + end + + // ------------------------- + // prediction assignment & update Branch History Table + // ------------------------- + always_comb begin : prediction_update_bht + bht_ram_we = '0; + bht_ram_read_address_0 = '0; + bht_ram_read_address_1 = '0; + bht_ram_write_address = '0; + bht_ram_wdata = '0; + bht_updated = '0; + bht = '0; + + for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + if (row_index == i) begin + bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index; + bht_prediction_o[i].valid = bht_ram_rdata_0[i*BRAM_WORD_BITS+2]; + bht_prediction_o[i].taken = bht_ram_rdata_0[i*BRAM_WORD_BITS+1]; + end + end + + if (bht_update_i.valid && !debug_mode_i) begin + for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + if (update_row_index == i) begin + bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc; + bht[i].saturation_counter = bht_ram_rdata_1[i*BRAM_WORD_BITS+:2]; + + if (bht[i].saturation_counter == 2'b11) begin + // we can safely decrease it + if (!bht_update_i.taken) + bht_updated[i].saturation_counter = bht[i].saturation_counter - 1; + else bht_updated[i].saturation_counter = 2'b11; + // then check if it saturated in the negative regime e.g.: branch not taken + end else if (bht[i].saturation_counter == 2'b00) begin + // we can safely increase it + if (bht_update_i.taken) + bht_updated[i].saturation_counter = bht[i].saturation_counter + 1; + else bht_updated[i].saturation_counter = 2'b00; + end else begin // otherwise we are not in any boundaries and can decrease or increase it + if (bht_update_i.taken) + bht_updated[i].saturation_counter = bht[i].saturation_counter + 1; + else bht_updated[i].saturation_counter = bht[i].saturation_counter - 1; + end + + bht_updated[i].valid = 1'b1; + bht_ram_we[i] = 1'b1; + bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc; + //bht_ram_wdata[(i+1)*BRAM_WORD_BITS-1] = 1'b1; //valid + bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = { + bht_updated[i].valid, bht_updated[i].saturation_counter + }; + + end + end + end + end + + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_ram + AsyncThreePortRam #( + .ADDR_WIDTH($clog2(NR_ROWS)), + .DATA_DEPTH(NR_ROWS), + .DATA_WIDTH(BRAM_WORD_BITS) + ) i_bht_ram ( + .Clk_CI (clk_i), + .WrEn_SI (bht_ram_we[i]), + .WrAddr_DI (bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]), + .WrData_DI (bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]), + .RdAddr_DI_0(bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]), + .RdAddr_DI_1(bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]), + .RdData_DO_0(bht_ram_rdata_0[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]), + .RdData_DO_1(bht_ram_rdata_1[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]) + ); + end + + end +endmodule diff --git a/test/type_param/core/frontend/btb.sv b/test/type_param/core/frontend/btb.sv new file mode 100644 index 0000000..9500f37 --- /dev/null +++ b/test/type_param/core/frontend/btb.sv @@ -0,0 +1,185 @@ +// Copyright 2018 - 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 +// +// Additional contributions by: +// Sebastien Jacq, Thales - sjthales on github.com +// Date: 2022-12-01 +// +// Description: This module is an adaptation of the BTB (Branch Target Buffer) +// module both FPGA and ASIC targets. +// Prediction target address is stored in BRAM on FPGA while for +// original module, target address is stored in D flip-flop. +// For FPGA flushing is not supported because the frontend module +// flushing signal is not connected. +// +// branch target buffer +module btb #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int NR_ENTRIES = 8 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the btb + input logic debug_mode_i, + + input logic [riscv::VLEN-1:0] vpc_i, // virtual PC from IF stage + input ariane_pkg::btb_update_t btb_update_i, // update btb with this information + output ariane_pkg::btb_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb +); + // the last bit is always zero, we don't need it for indexing + localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2; + // re-shape the branch history table + localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH; + // number of bits needed to index the row + localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH); + localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; + // number of bits we should use for prediction + localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS; + // prevent aliasing to degrade performance + localparam ANTIALIAS_BITS = 8; + // number of bits par word in the bram + localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t); + // we are not interested in all bits of the address + unread i_unread (.d_i(|vpc_i)); + + + logic [$clog2(NR_ROWS)-1:0] index, update_pc; + logic [ROW_INDEX_BITS-1:0] update_row_index; + + assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET]; + assign update_pc = btb_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET]; + if (CVA6Cfg.RVC) begin : gen_update_row_index + assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET]; + end else begin + assign update_row_index = '0; + end + + if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction; + logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction; + + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update; + logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update; + + // output matching prediction + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output + assign btb_ram_csel_prediction[i] = 1'b1; + assign btb_ram_we_prediction[i] = 1'b0; + assign btb_ram_wdata_prediction = '0; + assign btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index; + assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]; + end + + // ------------------------- + // Update Branch Prediction + // ------------------------- + // update on a mis-predict + always_comb begin : update_branch_predict + btb_ram_csel_update = '0; + btb_ram_we_update = '0; + btb_ram_addr_update = '0; + btb_ram_wdata_update = '0; + + if (btb_update_i.valid && !debug_mode_i) begin + for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + if (update_row_index == i) begin + btb_ram_csel_update[i] = 1'b1; + btb_ram_we_update[i] = 1'b1; + btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc; + btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = { + 1'b1, btb_update_i.target_address + }; + end + end + end + end + + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram + SyncDpRam #( + .ADDR_WIDTH($clog2(NR_ROWS)), + .DATA_DEPTH(NR_ROWS), + .DATA_WIDTH(BRAM_WORD_BITS), + .OUT_REGS (0), + .SIM_INIT (1) + ) i_btb_ram ( + .Clk_CI (clk_i), + .Rst_RBI (rst_ni), + //---------------------------- + .CSelA_SI (btb_ram_csel_update[i]), + .WrEnA_SI (btb_ram_we_update[i]), + .AddrA_DI (btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]), + .WrDataA_DI(btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]), + .RdDataA_DO(), + //----------------------------- + .CSelB_SI (btb_ram_csel_prediction[i]), + .WrEnB_SI (btb_ram_we_prediction[i]), + .AddrB_DI (btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]), + .WrDataB_DI(btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]), + .RdDataB_DO(btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]) + ); + end + + end else begin : gen_asic_btb // ASIC TARGET + + // typedef for all branch target entries + // we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects + ariane_pkg::btb_prediction_t + btb_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0], + btb_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0]; + + // output matching prediction + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output + assign btb_prediction_o[i] = btb_q[index][i]; // workaround + end + + // ------------------------- + // Update Branch Prediction + // ------------------------- + // update on a mis-predict + always_comb begin : update_branch_predict + btb_d = btb_q; + + if (btb_update_i.valid && !debug_mode_i) begin + btb_d[update_pc][update_row_index].valid = 1'b1; + // the target address is simply updated + btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + // Bias the branches to be taken upon first arrival + for (int i = 0; i < NR_ROWS; i++) btb_q[i] <= '{default: 0}; + end else begin + // evict all entries + if (flush_i) begin + for (int i = 0; i < NR_ROWS; i++) begin + for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin + btb_q[i][j].valid <= 1'b0; + end + end + end else begin + btb_q <= btb_d; + end + end + end + end +endmodule diff --git a/test/type_param/core/frontend/frontend.sv b/test/type_param/core/frontend/frontend.sv new file mode 100644 index 0000000..8f2f50a --- /dev/null +++ b/test/type_param/core/frontend/frontend.sv @@ -0,0 +1,516 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Description: Ariane Instruction Fetch Frontend +// +// This module interfaces with the instruction cache, handles control +// change request from the back-end and does branch prediction. + +module frontend + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush request for PCGEN + input logic flush_bp_i, // flush branch prediction + input logic halt_i, // halt commit stage + input logic debug_mode_i, + // global input + input logic [riscv::VLEN-1:0] boot_addr_i, + // Set a new PC + // mispredict + input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB + // from commit, when flushing the whole pipeline + input logic set_pc_commit_i, // Take the PC from commit stage + input logic [riscv::VLEN-1:0] pc_commit_i, // PC of instruction in commit stage + // CSR input + input logic [riscv::VLEN-1:0] epc_i, // exception PC which we need to return to + input logic eret_i, // return from exception + input logic [riscv::VLEN-1:0] trap_vector_base_i, // base of trap vector + input logic ex_valid_i, // exception is valid - from commit + input logic set_debug_pc_i, // jump to debug address + // Instruction Fetch + output icache_dreq_t icache_dreq_o, + input icache_drsp_t icache_dreq_i, + // instruction output port -> to processor back-end + output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage + output logic fetch_entry_valid_o, // instruction in IF is valid + input logic fetch_entry_ready_i // ID acknowledged this instruction +); + // Instruction Cache Registers, from I$ + logic [ FETCH_WIDTH-1:0] icache_data_q; + logic icache_valid_q; + ariane_pkg::frontend_exception_t icache_ex_valid_q; + logic [ riscv::VLEN-1:0] icache_vaddr_q; + logic instr_queue_ready; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed; + // upper-most branch-prediction from last cycle + btb_prediction_t btb_q; + bht_prediction_t bht_q; + // instruction fetch is ready + logic if_ready; + logic [riscv::VLEN-1:0] npc_d, npc_q; // next PC + + // indicates whether we come out of reset (then we need to load boot_addr_i) + logic npc_rst_load_q; + + logic replay; + logic [ riscv::VLEN-1:0] replay_addr; + + // shift amount + logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt; + // address will always be 16 bit aligned, make this explicit here + if (CVA6Cfg.RVC) begin : gen_shamt + assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1]; + end else begin + assign shamt = 1'b0; + end + + // ----------------------- + // Ctrl Flow Speculation + // ----------------------- + // RVI ctrl flow prediction + logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump; + logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvi_imm; + // RVC branching + logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call; + logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvc_imm; + // re-aligned instruction and address (coming from cache - combinationally) + logic [INSTR_PER_FETCH-1:0][ 31:0] instr; + logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr; + logic [INSTR_PER_FETCH-1:0] instruction_valid; + // BHT, BTB and RAS prediction + bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction; + btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction; + bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted; + btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted; + ras_t ras_predict; + logic [ riscv::VLEN-1:0] vpc_btb; + + // branch-predict update + logic is_mispredict; + logic ras_push, ras_pop; + logic [ riscv::VLEN-1:0] ras_update; + + // Instruction FIFO + logic [ riscv::VLEN-1:0] predict_address; + cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf; + + logic serving_unaligned; + // Re-align instructions + instr_realign #( + .CVA6Cfg(CVA6Cfg) + ) i_instr_realign ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (icache_dreq_o.kill_s2), + .valid_i (icache_valid_q), + .serving_unaligned_o(serving_unaligned), + .address_i (icache_vaddr_q), + .data_i (icache_data_q), + .valid_o (instruction_valid), + .addr_o (addr), + .instr_o (instr) + ); + + // -------------------- + // Branch Prediction + // -------------------- + // select the right branch prediction result + // in case we are serving an unaligned instruction in instr[0] we need to take + // the prediction we saved from the previous fetch + if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted + assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2( + INSTR_PER_FETCH + ):1]]; + assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2( + INSTR_PER_FETCH + ):1]]; + + // for all other predictions we can use the generated address to index + // into the branch prediction data structures + for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address + assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]]; + assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]]; + end + end else begin + assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]]; + assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][1]]; + end + ; + + // for the return address stack it doens't matter as we have the + // address of the call/return already + logic bp_valid; + + logic [INSTR_PER_FETCH-1:0] is_branch; + logic [INSTR_PER_FETCH-1:0] is_call; + logic [INSTR_PER_FETCH-1:0] is_jump; + logic [INSTR_PER_FETCH-1:0] is_return; + logic [INSTR_PER_FETCH-1:0] is_jalr; + + for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin + // branch history table -> BHT + assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]); + // function calls -> RAS + assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]); + // function return -> RAS + assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]); + // unconditional jumps with known target -> immediately resolved + assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]); + // unconditional jumps with unknown target -> BTB + assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]); + end + + // taken/not taken + always_comb begin + taken_rvi_cf = '0; + taken_rvc_cf = '0; + predict_address = '0; + + for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF; + + ras_push = 1'b0; + ras_pop = 1'b0; + ras_update = '0; + + // lower most prediction gets precedence + for (int i = INSTR_PER_FETCH - 1; i >= 0; i--) begin + unique case ({ + is_branch[i], is_return[i], is_jump[i], is_jalr[i] + }) + 4'b0000: ; // regular instruction e.g.: no branch + // unconditional jump to register, we need the BTB to resolve this + 4'b0001: begin + ras_pop = 1'b0; + ras_push = 1'b0; + if (CVA6Cfg.BTBEntries && btb_prediction_shifted[i].valid) begin + predict_address = btb_prediction_shifted[i].target_address; + cf_type[i] = ariane_pkg::JumpR; + end + end + // its an unconditional jump to an immediate + 4'b0010: begin + ras_pop = 1'b0; + ras_push = 1'b0; + taken_rvi_cf[i] = rvi_jump[i]; + taken_rvc_cf[i] = rvc_jump[i]; + cf_type[i] = ariane_pkg::Jump; + end + // return + 4'b0100: begin + // make sure to only alter the RAS if we actually consumed the instruction + ras_pop = ras_predict.valid & instr_queue_consumed[i]; + ras_push = 1'b0; + predict_address = ras_predict.ra; + cf_type[i] = ariane_pkg::Return; + end + // branch prediction + 4'b1000: begin + ras_pop = 1'b0; + ras_push = 1'b0; + // if we have a valid dynamic prediction use it + if (bht_prediction_shifted[i].valid) begin + taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken; + taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken; + // otherwise default to static prediction + end else begin + // set if immediate is negative - static prediction + taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1]; + taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1]; + end + if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin + cf_type[i] = ariane_pkg::Branch; + end + end + default: ; + // default: $error("Decoded more than one control flow"); + endcase + // if this instruction, in addition, is a call, save the resulting address + // but only if we actually consumed the address + if (is_call[i]) begin + ras_push = instr_queue_consumed[i]; + ras_update = addr[i] + (rvc_call[i] ? 2 : 4); + end + // calculate the jump target address + if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin + predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]); + end + end + end + // or reduce struct + always_comb begin + bp_valid = 1'b0; + // BP cannot be valid if we have a return instruction and the RAS is not giving a valid address + // Check that we encountered a control flow and that for a return the RAS + // contains a valid prediction. + for (int i = 0; i < INSTR_PER_FETCH; i++) + bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid)); + end + assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict; + + // Cache interface + assign icache_dreq_o.req = instr_queue_ready; + assign if_ready = icache_dreq_i.ready & instr_queue_ready; + // We need to flush the cache pipeline if: + // 1. We mispredicted + // 2. Want to flush the whole processor front-end + // 3. Need to replay an instruction because the fetch-fifo was full + assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay; + // if we have a valid branch-prediction we need to only kill the last cache request + // also if we killed the first stage we also need to kill the second stage (inclusive flush) + assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid; + + // Update Control Flow Predictions + bht_update_t bht_update; + btb_update_t btb_update; + + // assert on branch, deassert when resolved + logic speculative_q, speculative_d; + assign speculative_d = (speculative_q && !resolved_branch_i.valid || |is_branch || |is_return || |is_jalr) && !flush_i; + assign icache_dreq_o.spec = speculative_d; + + assign bht_update.valid = resolved_branch_i.valid + & (resolved_branch_i.cf_type == ariane_pkg::Branch); + assign bht_update.pc = resolved_branch_i.pc; + assign bht_update.taken = resolved_branch_i.is_taken; + // only update mispredicted branches e.g. no returns from the RAS + assign btb_update.valid = resolved_branch_i.valid + & resolved_branch_i.is_mispredict + & (resolved_branch_i.cf_type == ariane_pkg::JumpR); + assign btb_update.pc = resolved_branch_i.pc; + assign btb_update.target_address = resolved_branch_i.target_address; + + // ------------------- + // Next PC + // ------------------- + // next PC (NPC) can come from (in order of precedence): + // 0. Default assignment/replay instruction + // 1. Branch Predict taken + // 2. Control flow change request (misprediction) + // 3. Return from environment call + // 4. Exception/Interrupt + // 5. Pipeline Flush because of CSR side effects + // Mis-predict handling is a little bit different + // select PC a.k.a PC Gen + always_comb begin : npc_select + automatic logic [riscv::VLEN-1:0] fetch_address; + // check whether we come out of reset + // this is a workaround. some tools have issues + // having boot_addr_i in the asynchronous + // reset assignment to npc_q, even though + // boot_addr_i will be assigned a constant + // on the top-level. + if (npc_rst_load_q) begin + npc_d = boot_addr_i; + fetch_address = boot_addr_i; + end else begin + fetch_address = npc_q; + // keep stable by default + npc_d = npc_q; + end + // 0. Branch Prediction + if (bp_valid) begin + fetch_address = predict_address; + npc_d = predict_address; + end + // 1. Default assignment + if (if_ready) begin + npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4; + end + // 2. Replay instruction fetch + if (replay) begin + npc_d = replay_addr; + end + // 3. Control flow change request + if (is_mispredict) begin + npc_d = resolved_branch_i.target_address; + end + // 4. Return from environment call + if (eret_i) begin + npc_d = epc_i; + end + // 5. Exception/Interrupt + if (ex_valid_i) begin + npc_d = trap_vector_base_i; + end + // 6. Pipeline Flush because of CSR side effects + // On a pipeline flush start fetching from the next address + // of the instruction in the commit stage + // we either came here from a flush request of a CSR instruction or AMO, + // so as CSR or AMO instructions do not exist in a compressed form + // we can unconditionally do PC + 4 here + // or if the commit stage is halted, just take the current pc of the + // instruction in the commit stage + // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage + if (set_pc_commit_i) begin + npc_d = pc_commit_i + (halt_i ? '0 : {{riscv::VLEN - 3{1'b0}}, 3'b100}); + end + // 7. Debug + // enter debug on a hard-coded base-address + if (CVA6Cfg.DebugEn && set_debug_pc_i) + npc_d = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.HaltAddress[riscv::VLEN-1:0]; + icache_dreq_o.vaddr = fetch_address; + end + + logic [FETCH_WIDTH-1:0] icache_data; + // re-align the cache line + assign icache_data = icache_dreq_i.data >> {shamt, 4'b0}; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + npc_rst_load_q <= 1'b1; + npc_q <= '0; + speculative_q <= '0; + icache_data_q <= '0; + icache_valid_q <= 1'b0; + icache_vaddr_q <= 'b0; + icache_ex_valid_q <= ariane_pkg::FE_NONE; + btb_q <= '0; + bht_q <= '0; + end else begin + npc_rst_load_q <= 1'b0; + npc_q <= npc_d; + speculative_q <= speculative_d; + icache_valid_q <= icache_dreq_i.valid; + if (icache_dreq_i.valid) begin + icache_data_q <= icache_data; + icache_vaddr_q <= icache_dreq_i.vaddr; + // Map the only three exceptions which can occur in the frontend to a two bit enum + if (ariane_pkg::MMU_PRESENT && icache_dreq_i.ex.cause == riscv::INSTR_PAGE_FAULT) begin + icache_ex_valid_q <= ariane_pkg::FE_INSTR_PAGE_FAULT; + end else if (icache_dreq_i.ex.cause == riscv::INSTR_ACCESS_FAULT) begin + icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT; + end else begin + icache_ex_valid_q <= ariane_pkg::FE_NONE; + end + // save the uppermost prediction + btb_q <= btb_prediction[INSTR_PER_FETCH-1]; + bht_q <= bht_prediction[INSTR_PER_FETCH-1]; + end + end + end + + if (CVA6Cfg.RASDepth == 0) begin + assign ras_predict = '0; + end else begin : ras_gen + ras #( + .CVA6Cfg(CVA6Cfg), + .DEPTH (CVA6Cfg.RASDepth) + ) i_ras ( + .clk_i, + .rst_ni, + .flush_i(flush_bp_i), + .push_i (ras_push), + .pop_i (ras_pop), + .data_i (ras_update), + .data_o (ras_predict) + ); + end + + //For FPGA, BTB is implemented in read synchronous BRAM + //while for ASIC, BTB is implemented in D flip-flop + //and can be read at the same cycle. + assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q; + + if (CVA6Cfg.BTBEntries == 0) begin + assign btb_prediction = '0; + end else begin : btb_gen + btb #( + .CVA6Cfg (CVA6Cfg), + .NR_ENTRIES(CVA6Cfg.BTBEntries) + ) i_btb ( + .clk_i, + .rst_ni, + .flush_i (flush_bp_i), + .debug_mode_i, + .vpc_i (vpc_btb), + .btb_update_i (btb_update), + .btb_prediction_o(btb_prediction) + ); + end + + if (CVA6Cfg.BHTEntries == 0) begin + assign bht_prediction = '0; + end else begin : bht_gen + bht #( + .CVA6Cfg (CVA6Cfg), + .NR_ENTRIES(CVA6Cfg.BHTEntries) + ) i_bht ( + .clk_i, + .rst_ni, + .flush_i (flush_bp_i), + .debug_mode_i, + .vpc_i (icache_vaddr_q), + .bht_update_i (bht_update), + .bht_prediction_o(bht_prediction) + ); + end + + // we need to inspect up to INSTR_PER_FETCH instructions for branches + // and jumps + for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan + instr_scan #( + .CVA6Cfg(CVA6Cfg) + ) i_instr_scan ( + .instr_i (instr[i]), + .rvi_return_o(rvi_return[i]), + .rvi_call_o (rvi_call[i]), + .rvi_branch_o(rvi_branch[i]), + .rvi_jalr_o (rvi_jalr[i]), + .rvi_jump_o (rvi_jump[i]), + .rvi_imm_o (rvi_imm[i]), + .rvc_branch_o(rvc_branch[i]), + .rvc_jump_o (rvc_jump[i]), + .rvc_jr_o (rvc_jr[i]), + .rvc_return_o(rvc_return[i]), + .rvc_jalr_o (rvc_jalr[i]), + .rvc_call_o (rvc_call[i]), + .rvc_imm_o (rvc_imm[i]) + ); + end + + instr_queue #( + .CVA6Cfg(CVA6Cfg) + ) i_instr_queue ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .instr_i (instr), // from re-aligner + .addr_i (addr), // from re-aligner + .exception_i (icache_ex_valid_q), // from I$ + .exception_addr_i (icache_vaddr_q), + .predict_address_i (predict_address), + .cf_type_i (cf_type), + .valid_i (instruction_valid), // from re-aligner + .consumed_o (instr_queue_consumed), + .ready_o (instr_queue_ready), + .replay_o (replay), + .replay_addr_o (replay_addr), + .fetch_entry_o (fetch_entry_o), // to back-end + .fetch_entry_valid_o(fetch_entry_valid_o), // to back-end + .fetch_entry_ready_i(fetch_entry_ready_i) // to back-end + ); + + // pragma translate_off +`ifndef VERILATOR + initial begin + assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64) + else $fatal(1, "[frontend] fetch width != not supported"); + end +`endif + // pragma translate_on +endmodule diff --git a/test/type_param/core/frontend/instr_queue.sv b/test/type_param/core/frontend/instr_queue.sv new file mode 100644 index 0000000..3f95593 --- /dev/null +++ b/test/type_param/core/frontend/instr_queue.sv @@ -0,0 +1,459 @@ +// Copyright 2018 - 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 26.10.2018sim:/ariane_tb/dut/i_ariane/i_frontend/icache_ex_valid_q + +// Description: Instruction Queue, separates instruction front-end from processor +// back-end. +// +// This is an optimized instruction queue which supports the handling of +// compressed instructions (16 bit instructions). Internally it is organized as +// FETCH_ENTRY x 32 bit queues which are filled in a consecutive manner. Two pointers +// point into (`idx_is_q` and `idx_ds_q`) the fill port and the read port. The read port +// is designed so that it will easily allow for multiple issue implementation. +// The input supports arbitrary power of two instruction fetch widths. +// +// The queue supports handling of branch prediction and will take care of +// only saving a valid instruction stream. +// +// Furthermore it contains a replay interface in case the instruction queue +// is already full. As instructions are in general easily replayed this should +// increase the efficiency as I$ misses are potentially hidden. This stands in +// contrast to pessimistic actions (early stalling) or credit based approaches. +// Credit based systems might be difficult to implement with the current system +// as we do not exactly know how much space we are going to need in the fifos +// as each instruction can take either one or two slots. +// +// So the consumed/valid interface degenerates to a `information` interface. If the +// upstream circuits keeps pushing the queue will discard the information +// and start replaying from the point were it could last manage to accept instructions. +// +// The instruction front-end will stop issuing instructions as soon as the +// fifo is full. This will gate the logic if the processor is e.g.: halted +// +// TODO(zarubaf): The instruction queues can be reduced to 16 bit. Potentially +// the replay mechanism gets more complicated as it can be that a 32 bit instruction +// can not be pushed at once. + +module instr_queue + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i, + input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i, + input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i, + output logic ready_o, + output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o, + // we've encountered an exception, at this point the only possible exceptions are page-table faults + input ariane_pkg::frontend_exception_t exception_i, + input logic [riscv::VLEN-1:0] exception_addr_i, + // branch predict + input logic [riscv::VLEN-1:0] predict_address_i, + input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i, + // replay instruction because one of the FIFO was already full + output logic replay_o, + output logic [riscv::VLEN-1:0] replay_addr_o, // address at which to replay this instruction + // to processor backend + output ariane_pkg::fetch_entry_t fetch_entry_o, + output logic fetch_entry_valid_o, + input logic fetch_entry_ready_i +); + + typedef struct packed { + logic [31:0] instr; // instruction word + ariane_pkg::cf_t cf; // branch was taken + ariane_pkg::frontend_exception_t ex; // exception happened + logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception + } instr_data_t; + + logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] branch_index; + // instruction queues + logic [ariane_pkg::INSTR_PER_FETCH-1:0][$clog2( +ariane_pkg::FETCH_FIFO_DEPTH +)-1:0] instr_queue_usage; + instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty; + logic instr_overflow; + // address queue + logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage; + logic [ riscv::VLEN-1:0] address_out; + logic pop_address; + logic push_address; + logic full_address; + logic empty_address; + logic address_overflow; + // input stream counter + logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] idx_is_d, idx_is_q; + // Registers + // output FIFO select, one-hot + logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q; + logic [riscv::VLEN-1:0] pc_d, pc_q; // current PC + logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush + + logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask; + logic branch_empty; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken; + // shift amount, e.g.: instructions we want to retire + logic [ariane_pkg::LOG2_INSTR_PER_FETCH:0] popcount; + logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] shamt; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid; + logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] consumed_extended; + // FIFO mask + logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] fifo_pos_extended; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_pos; + logic [ariane_pkg::INSTR_PER_FETCH*2-1:0][31:0] instr; + ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH*2-1:0] cf; + // replay interface + logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_overflow_fifo; + + assign ready_o = ~(|instr_queue_full) & ~full_address; + + if (ariane_pkg::RVC) begin : gen_multiple_instr_per_fetch_with_C + + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_unpack_taken + assign taken[i] = cf_type_i[i] != ariane_pkg::NoCF; + end + + // calculate a branch mask, e.g.: get the first taken branch + lzc #( + .WIDTH(ariane_pkg::INSTR_PER_FETCH), + .MODE (0) // count trailing zeros + ) i_lzc_branch_index ( + .in_i (taken), // we want to count trailing zeros + .cnt_o (branch_index), // first branch on branch_index + .empty_o(branch_empty) + ); + + + // the first index is for sure valid + // for example (64 bit fetch): + // taken mask: 0 1 1 0 + // leading zero count = 1 + // 0 0 0 1, 1 1 1 << 1 = 0 0 1 1, 1 1 0 + // take the upper 4 bits: 0 0 1 1 + assign branch_mask_extended = {{{ariane_pkg::INSTR_PER_FETCH-1}{1'b0}}, {{ariane_pkg::INSTR_PER_FETCH}{1'b1}}} << branch_index; + assign branch_mask = branch_mask_extended[ariane_pkg::INSTR_PER_FETCH * 2 - 2:ariane_pkg::INSTR_PER_FETCH - 1]; + + // mask with taken branches to get the actual amount of instructions we want to push + assign valid = valid_i & branch_mask; + // rotate right again + assign consumed_extended = {push_instr_fifo, push_instr_fifo} >> idx_is_q; + assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0]; + // count the numbers of valid instructions we've pushed from this package + popcount #( + .INPUT_WIDTH(ariane_pkg::INSTR_PER_FETCH) + ) i_popcount ( + .data_i (push_instr_fifo), + .popcount_o(popcount) + ); + assign shamt = popcount[$bits(shamt)-1:0]; + + // save the shift amount for next cycle + assign idx_is_d = idx_is_q + shamt; + + // ---------------------- + // Input interface + // ---------------------- + // rotate left by the current position + assign fifo_pos_extended = {valid, valid} << idx_is_q; + // we just care about the upper bits + assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH]; + // the fifo_position signal can directly be used to guide the push signal of each FIFO + // make sure it is not full + assign push_instr = fifo_pos & ~instr_queue_full; + + // duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0 + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input + assign instr[i] = instr_i[i]; + assign instr[i+ariane_pkg::INSTR_PER_FETCH] = instr_i[i]; + assign cf[i] = cf_type_i[i]; + assign cf[i+ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i]; + end + + // shift the inputs + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select + /* verilator lint_off WIDTH */ + assign instr_data_in[i].instr = instr[i+idx_is_q]; + assign instr_data_in[i].cf = cf[i+idx_is_q]; + assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet + assign instr_data_in[i].ex_vaddr = exception_addr_i; + /* verilator lint_on WIDTH */ + end + end else begin : gen_multiple_instr_per_fetch_without_C + + assign taken = '0; + assign branch_empty = '0; + assign branch_index = '0; + assign branch_mask_extended = '0; + assign branch_mask = '0; + assign consumed_extended = '0; + assign fifo_pos_extended = '0; + assign fifo_pos = '0; + assign instr = '0; + assign popcount = '0; + assign shamt = '0; + assign valid = '0; + + + assign consumed_o = push_instr_fifo[0]; + // ---------------------- + // Input interface + // ---------------------- + assign push_instr = valid_i & ~instr_queue_full; + + /* verilator lint_off WIDTH */ + assign instr_data_in[0].instr = instr_i[0]; + assign instr_data_in[0].cf = cf_type_i[0]; + assign instr_data_in[0].ex = exception_i; // exceptions hold for the whole fetch packet + assign instr_data_in[0].ex_vaddr = exception_addr_i; + /* verilator lint_on WIDTH */ + end + + // ---------------------- + // Replay Logic + // ---------------------- + // We need to replay a instruction fetch iff: + // 1. One of the instruction data FIFOs was full and we needed it + // (e.g.: we pushed and it was full) + // 2. The address/branch predict FIFO was full + // if one of the FIFOs was full we need to replay the faulting instruction + if (ariane_pkg::RVC == 1'b1) begin : gen_instr_overflow_fifo_with_C + assign instr_overflow_fifo = instr_queue_full & fifo_pos; + end else begin : gen_instr_overflow_fifo_without_C + assign instr_overflow_fifo = instr_queue_full & valid_i; + end + assign instr_overflow = |instr_overflow_fifo; // at least one instruction overflowed + assign address_overflow = full_address & push_address; + assign replay_o = instr_overflow | address_overflow; + + if (ariane_pkg::RVC) begin : gen_replay_addr_o_with_c + // select the address, in the case of an address fifo overflow just + // use the base of this package + // if we successfully pushed some instructions we can output the next instruction + // which we didn't manage to push + assign replay_addr_o = (address_overflow) ? addr_i[0] : addr_i[shamt]; + end else begin : gen_replay_addr_o_without_C + assign replay_addr_o = addr_i[0]; + end + + // ---------------------- + // Downstream interface + // ---------------------- + // as long as there is at least one queue which can take the value we have a valid instruction + assign fetch_entry_valid_o = ~(&instr_queue_empty); + + if (ariane_pkg::RVC) begin : gen_downstream_itf_with_c + always_comb begin + idx_ds_d = idx_ds_q; + + pop_instr = '0; + // assemble fetch entry + fetch_entry_o.instruction = '0; + fetch_entry_o.address = pc_q; + fetch_entry_o.ex.valid = 1'b0; + fetch_entry_o.ex.cause = '0; + + fetch_entry_o.ex.tval = '0; + fetch_entry_o.branch_predict.predict_address = address_out; + fetch_entry_o.branch_predict.cf = ariane_pkg::NoCF; + // output mux select + for (int unsigned i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + if (idx_ds_q[i]) begin + if (instr_data_out[i].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin + fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT; + end else begin + fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT; + end + fetch_entry_o.instruction = instr_data_out[i].instr; + fetch_entry_o.ex.valid = instr_data_out[i].ex != ariane_pkg::FE_NONE; + fetch_entry_o.ex.tval = { + {(riscv::XLEN - riscv::VLEN) {1'b0}}, instr_data_out[i].ex_vaddr + }; + fetch_entry_o.branch_predict.cf = instr_data_out[i].cf; + pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i; + end + end + // rotate the pointer left + if (fetch_entry_ready_i) begin + idx_ds_d = { + idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1] + }; + end + end + end else begin : gen_downstream_itf_without_c + always_comb begin + idx_ds_d = '0; + idx_is_d = '0; + fetch_entry_o.instruction = instr_data_out[0].instr; + fetch_entry_o.address = pc_q; + + fetch_entry_o.ex.valid = instr_data_out[0].ex != ariane_pkg::FE_NONE; + if (instr_data_out[0].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin + fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT; + end else begin + fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT; + end + fetch_entry_o.ex.tval = {{64 - riscv::VLEN{1'b0}}, instr_data_out[0].ex_vaddr}; + + fetch_entry_o.branch_predict.predict_address = address_out; + fetch_entry_o.branch_predict.cf = instr_data_out[0].cf; + + pop_instr[0] = fetch_entry_valid_o & fetch_entry_ready_i; + end + end + + // TODO(zarubaf): This needs to change for dual-issue + // if the handshaking is successful and we had a prediction pop one address entry + assign pop_address = ((fetch_entry_o.branch_predict.cf != ariane_pkg::NoCF) & |pop_instr); + + // ---------------------- + // Calculate (Next) PC + // ---------------------- + always_comb begin + pc_d = pc_q; + reset_address_d = flush_i ? 1'b1 : reset_address_q; + + if (fetch_entry_ready_i) begin + // TODO(zarubaf): This needs to change for a dual issue implementation + // advance the PC + if (ariane_pkg::RVC == 1'b1) begin : gen_pc_with_c_extension + pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4); + end else begin : gen_pc_without_c_extension + pc_d = pc_q + 'd4; + end + end + + if (pop_address) pc_d = address_out; + + // we previously flushed so we need to reset the address + if (valid_i[0] && reset_address_q) begin + // this is the base of the first instruction + pc_d = addr_i[0]; + reset_address_d = 1'b0; + end + end + + // FIFOs + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_instr_fifo + // Make sure we don't save any instructions if we couldn't save the address + assign push_instr_fifo[i] = push_instr[i] & ~address_overflow; + fifo_v3 #( + .DEPTH(ariane_pkg::FETCH_FIFO_DEPTH), + .dtype(instr_data_t) + ) i_fifo_instr_data ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .testmode_i(1'b0), + .full_o (instr_queue_full[i]), + .empty_o (instr_queue_empty[i]), + .usage_o (instr_queue_usage[i]), + .data_i (instr_data_in[i]), + .push_i (push_instr_fifo[i]), + .data_o (instr_data_out[i]), + .pop_i (pop_instr[i]) + ); + end + // or reduce and check whether we are retiring a taken branch (might be that the corresponding) + // fifo is full. + always_comb begin + push_address = 1'b0; + // check if we are pushing a ctrl flow change, if so save the address + for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + push_address |= push_instr[i] & (instr_data_in[i].cf != ariane_pkg::NoCF); + end + end + + fifo_v3 #( + .DEPTH (ariane_pkg::FETCH_FIFO_DEPTH), // TODO(zarubaf): Fork out to separate param + .DATA_WIDTH(riscv::VLEN) + ) i_fifo_address ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .testmode_i(1'b0), + .full_o (full_address), + .empty_o (empty_address), + .usage_o (address_queue_usage), + .data_i (predict_address_i), + .push_i (push_address & ~full_address), + .data_o (address_out), + .pop_i (pop_address) + ); + + unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage})); + unread i_unread_branch_mask (.d_i(|branch_mask_extended)); + unread i_unread_lzc (.d_i(|{branch_empty})); + unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals + unread i_unread_instr_fifo (.d_i(|instr_queue_usage)); + + if (ariane_pkg::RVC) begin : gen_pc_q_with_c + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + idx_ds_q <= 'b1; + idx_is_q <= '0; + pc_q <= '0; + reset_address_q <= 1'b1; + end else begin + pc_q <= pc_d; + reset_address_q <= reset_address_d; + if (flush_i) begin + // one-hot encoded + idx_ds_q <= 'b1; + // binary encoded + idx_is_q <= '0; + reset_address_q <= 1'b1; + end else begin + idx_ds_q <= idx_ds_d; + idx_is_q <= idx_is_d; + end + end + end + end else begin : gen_pc_q_without_C + assign idx_ds_q = '0; + assign idx_is_q = '0; + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + pc_q <= '0; + reset_address_q <= 1'b1; + end else begin + pc_q <= pc_d; + reset_address_q <= reset_address_d; + if (flush_i) begin + reset_address_q <= 1'b1; + end + end + end + end + + // pragma translate_off +`ifndef VERILATOR + replay_address_fifo : + assert property (@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i) + else $fatal(1, "[instr_queue] Pushing address although replay asserted"); + + output_select_onehot : + assert property (@(posedge clk_i) $onehot0(idx_ds_q)) + else begin + $error("Output select should be one-hot encoded"); + $stop(); + end +`endif + // pragma translate_on +endmodule diff --git a/test/type_param/core/frontend/instr_scan.sv b/test/type_param/core/frontend/instr_scan.sv new file mode 100644 index 0000000..592d5d3 --- /dev/null +++ b/test/type_param/core/frontend/instr_scan.sv @@ -0,0 +1,83 @@ +// Copyright 2018 - 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 + +// ------------------------------ +// Instruction Scanner +// ------------------------------ +module instr_scan #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic [ 31:0] instr_i, // expect aligned instruction, compressed or not + output logic rvi_return_o, + output logic rvi_call_o, + output logic rvi_branch_o, + output logic rvi_jalr_o, + output logic rvi_jump_o, + output logic [riscv::VLEN-1:0] rvi_imm_o, + output logic rvc_branch_o, + output logic rvc_jump_o, + output logic rvc_jr_o, + output logic rvc_return_o, + output logic rvc_jalr_o, + output logic rvc_call_o, + output logic [riscv::VLEN-1:0] rvc_imm_o +); + logic is_rvc; + assign is_rvc = (instr_i[1:0] != 2'b11); + + logic rv32_rvc_jal; + assign rv32_rvc_jal = (riscv::XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)); + + logic is_xret; + assign is_xret = logic'(instr_i[31:30] == 2'b00) & logic'(instr_i[28:0] == 29'b10000001000000000000001110011); + + // check that rs1 is either x1 or x5 and that rd is not rs1 + assign rvi_return_o = rvi_jalr_o & ((instr_i[19:15] == 5'd1) | instr_i[19:15] == 5'd5) + & (instr_i[19:15] != instr_i[11:7]); + // Opocde is JAL[R] and destination register is either x1 or x5 + assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5); + // differentiates between JAL and BRANCH opcode, JALR comes from BHT + assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? ariane_pkg::uj_imm( + instr_i + ) : ariane_pkg::sb_imm( + instr_i + ); + assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch); + assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr); + assign rvi_jump_o = logic'(instr_i[6:0] == riscv::OpcodeJal) | is_xret; + + // opcode JAL + assign rvc_jump_o = ((instr_i[15:13] == riscv::OpcodeC1J) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)) | rv32_rvc_jal; + + // always links to register 0 + logic is_jal_r; + assign is_jal_r = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd) + & (instr_i[6:2] == 5'b00000) + & (instr_i[1:0] == riscv::OpcodeC2) + & is_rvc; + assign rvc_jr_o = is_jal_r & ~instr_i[12]; + // always links to register 1 e.g.: it is a jump + assign rvc_jalr_o = is_jal_r & instr_i[12]; + assign rvc_call_o = rvc_jalr_o | rv32_rvc_jal; + + assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez)) + & (instr_i[1:0] == riscv::OpcodeC1) + & is_rvc; + // check that rs1 is x1 or x5 + assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o; + + // differentiates between JAL and BRANCH opcode, JALR comes from BHT + assign rvc_imm_o = (instr_i[14]) ? {{56+riscv::VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0} + : {{53+riscv::VLEN-64{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0}; +endmodule diff --git a/test/type_param/core/frontend/ras.sv b/test/type_param/core/frontend/ras.sv new file mode 100644 index 0000000..f092b50 --- /dev/null +++ b/test/type_param/core/frontend/ras.sv @@ -0,0 +1,71 @@ +//Copyright (C) 2018 to present, +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 + +// return address stack +module ras #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned DEPTH = 2 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic push_i, + input logic pop_i, + input logic [riscv::VLEN-1:0] data_i, + output ariane_pkg::ras_t data_o +); + + ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q; + + assign data_o = stack_q[0]; + + always_comb begin + stack_d = stack_q; + + // push on the stack + if (push_i) begin + stack_d[0].ra = data_i; + // mark the new return address as valid + stack_d[0].valid = 1'b1; + stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0]; + end + + if (pop_i) begin + stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1]; + // we popped the value so invalidate the end of the stack + stack_d[DEPTH-1].valid = 1'b0; + stack_d[DEPTH-1].ra = 'b0; + end + // leave everything untouched and just push the latest value to the + // top of the stack + if (pop_i && push_i) begin + stack_d = stack_q; + stack_d[0].ra = data_i; + stack_d[0].valid = 1'b1; + end + + if (flush_i) begin + stack_d = '0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + stack_q <= '0; + end else begin + stack_q <= stack_d; + end + end +endmodule diff --git a/test/type_param/core/id_stage.sv b/test/type_param/core/id_stage.sv new file mode 100644 index 0000000..81d1640 --- /dev/null +++ b/test/type_param/core/id_stage.sv @@ -0,0 +1,143 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 15.04.2017 +// Description: Instruction decode, contains the logic for decode, +// issue and read operands. + +module id_stage #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + + input logic flush_i, + input logic debug_req_i, + // from IF + input ariane_pkg::fetch_entry_t fetch_entry_i, + input logic fetch_entry_valid_i, + output logic fetch_entry_ready_o, // acknowledge the instruction (fetch entry) + // to ID + output ariane_pkg::scoreboard_entry_t issue_entry_o, // a decoded instruction + output logic issue_entry_valid_o, // issue entry is valid + output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions + input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions + output logic rvfi_is_compressed_o, + // from CSR file + input riscv::priv_lvl_t priv_lvl_i, // current privilege level + input riscv::xs_t fs_i, // floating point extension status + input logic [2:0] frm_i, // floating-point dynamic rounding mode + input riscv::xs_t vs_i, // vector extension status + input logic [1:0] irq_i, + input ariane_pkg::irq_ctrl_t irq_ctrl_i, + input logic debug_mode_i, // we are in debug mode + input logic tvm_i, + input logic tw_i, + input logic tsr_i +); + // ID/ISSUE register stage + typedef struct packed { + logic valid; + ariane_pkg::scoreboard_entry_t sbe; + logic is_ctrl_flow; + } issue_struct_t; + issue_struct_t issue_n, issue_q; + + logic is_control_flow_instr; + ariane_pkg::scoreboard_entry_t decoded_instruction; + + logic is_illegal; + logic [31:0] instruction; + logic is_compressed; + + if (CVA6Cfg.RVC) begin + // --------------------------------------------------------- + // 1. Check if they are compressed and expand in case they are + // --------------------------------------------------------- + compressed_decoder #( + .CVA6Cfg(CVA6Cfg) + ) compressed_decoder_i ( + .instr_i (fetch_entry_i.instruction), + .instr_o (instruction), + .illegal_instr_o(is_illegal), + .is_compressed_o(is_compressed) + ); + end else begin + assign instruction = fetch_entry_i.instruction; + assign is_illegal = '0; + assign is_compressed = '0; + end + + assign rvfi_is_compressed_o = is_compressed; + // --------------------------------------------------------- + // 2. Decode and emit instruction to issue stage + // --------------------------------------------------------- + decoder #( + .CVA6Cfg(CVA6Cfg) + ) decoder_i ( + .debug_req_i, + .irq_ctrl_i, + .irq_i, + .pc_i (fetch_entry_i.address), + .is_compressed_i (is_compressed), + .is_illegal_i (is_illegal), + .instruction_i (instruction), + .compressed_instr_i (fetch_entry_i.instruction[15:0]), + .branch_predict_i (fetch_entry_i.branch_predict), + .ex_i (fetch_entry_i.ex), + .priv_lvl_i (priv_lvl_i), + .debug_mode_i (debug_mode_i), + .fs_i, + .frm_i, + .vs_i, + .tvm_i, + .tw_i, + .tsr_i, + .instruction_o (decoded_instruction), + .is_control_flow_instr_o(is_control_flow_instr) + ); + + // ------------------ + // Pipeline Register + // ------------------ + assign issue_entry_o = issue_q.sbe; + assign issue_entry_valid_o = issue_q.valid; + assign is_ctrl_flow_o = issue_q.is_ctrl_flow; + + always_comb begin + issue_n = issue_q; + fetch_entry_ready_o = 1'b0; + + // Clear the valid flag if issue has acknowledged the instruction + if (issue_instr_ack_i) issue_n.valid = 1'b0; + + // if we have a space in the register and the fetch is valid, go get it + // or the issue stage is currently acknowledging an instruction, which means that we will have space + // for a new instruction + if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin + fetch_entry_ready_o = 1'b1; + issue_n = '{1'b1, decoded_instruction, is_control_flow_instr}; + end + + // invalidate the pipeline register on a flush + if (flush_i) issue_n.valid = 1'b0; + end + // ------------------------- + // Registers (ID <-> Issue) + // ------------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + issue_q <= '0; + end else begin + issue_q <= issue_n; + end + end +endmodule diff --git a/test/type_param/core/include/acc_pkg.sv b/test/type_param/core/include/acc_pkg.sv new file mode 100644 index 0000000..bcd3c70 --- /dev/null +++ b/test/type_param/core/include/acc_pkg.sv @@ -0,0 +1,47 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: Matheus Cavalcante +// Nils Wistoff + +// Package defining the accelerator interface as used by Ara + CVA6 + +package acc_pkg; + + // ---------------------- + // Accelerator Interface + // ---------------------- + + typedef struct packed { + logic req_valid; + logic resp_ready; + riscv::instruction_t insn; + riscv::xlen_t rs1; + riscv::xlen_t rs2; + fpnew_pkg::roundmode_e frm; + logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id; + logic store_pending; + // Invalidation interface + logic acc_cons_en; + logic inval_ready; + } accelerator_req_t; + + typedef struct packed { + logic req_ready; + logic resp_valid; + riscv::xlen_t result; + logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id; + logic error; + // Metadata + logic store_pending; + logic store_complete; + logic load_complete; + logic [4:0] fflags; + logic fflags_valid; + // Invalidation interface + logic inval_valid; + logic [63:0] inval_addr; + } accelerator_resp_t; + +endpackage diff --git a/test/type_param/core/include/ariane_pkg.sv b/test/type_param/core/include/ariane_pkg.sv new file mode 100644 index 0000000..1616faf --- /dev/null +++ b/test/type_param/core/include/ariane_pkg.sv @@ -0,0 +1,994 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: ariane_pkg.sv + * Author: Florian Zaruba + * Date: 8.4.2017 + * + * Description: Contains all the necessary defines for Ariane + * in one package. + */ + +// this is needed to propagate the +// configuration in case Ariane is +// instantiated in OpenPiton +`ifdef PITON_ARIANE +`include "l15.tmp.h" +`endif + +/// This package contains `functions` and global defines for CVA6. +/// *Note*: There are some parameters here as well which will eventually be +/// moved out to favour a fully parameterizable core. +package ariane_pkg; + + // TODO: Slowly move those parameters to the new system. + localparam NR_SB_ENTRIES = cva6_config_pkg::CVA6ConfigNrScoreboardEntries; // number of scoreboard entries + localparam TRANS_ID_BITS = $clog2( + NR_SB_ENTRIES + ); // depending on the number of scoreboard entries we need that many bits + // to uniquely identify the entry in the scoreboard + localparam ASID_WIDTH = (riscv::XLEN == 64) ? 16 : 1; + localparam BITS_SATURATION_COUNTER = 2; + + localparam ISSUE_WIDTH = 1; + + // depth of store-buffers, this needs to be a power of two + localparam logic [2:0] DEPTH_SPEC = 'd4; + + localparam int unsigned DCACHE_TYPE = int'(cva6_config_pkg::CVA6ConfigDcacheType); + // if DCACHE_TYPE = cva6_config_pkg::WT + // we can use a small commit queue since we have a write buffer in the dcache + // we could in principle do without the commit queue in this case, but the timing degrades if we do that due + // to longer paths into the commit stage + // if DCACHE_TYPE = cva6_config_pkg::WB + // allocate more space for the commit buffer to be on the save side, this needs to be a power of two + localparam logic [2:0] DEPTH_COMMIT = 'd4; + + localparam bit FPGA_EN = cva6_config_pkg::CVA6ConfigFPGAEn; // Is FPGA optimization of CV32A6 + + localparam bit RVC = cva6_config_pkg::CVA6ConfigCExtEn; // Is C extension configuration + + // Transprecision float unit + localparam int unsigned LAT_COMP_FP32 = 'd2; + localparam int unsigned LAT_COMP_FP64 = 'd3; + localparam int unsigned LAT_COMP_FP16 = 'd1; + localparam int unsigned LAT_COMP_FP16ALT = 'd1; + localparam int unsigned LAT_COMP_FP8 = 'd1; + localparam int unsigned LAT_DIVSQRT = 'd2; + localparam int unsigned LAT_NONCOMP = 'd1; + localparam int unsigned LAT_CONV = 'd2; + + localparam riscv::xlen_t OPENHWGROUP_MVENDORID = {{riscv::XLEN - 32{1'b0}}, 32'h0602}; + localparam riscv::xlen_t ARIANE_MARCHID = {{riscv::XLEN - 32{1'b0}}, 32'd3}; + + // 32 registers + localparam REG_ADDR_SIZE = 5; + + // Read ports for general purpose register files + localparam NR_RGPR_PORTS = 2; + + // static debug hartinfo + // debug causes + localparam logic [2:0] CauseBreakpoint = 3'h1; + localparam logic [2:0] CauseTrigger = 3'h2; + localparam logic [2:0] CauseRequest = 3'h3; + localparam logic [2:0] CauseSingleStep = 3'h4; + // amount of data count registers implemented + localparam logic [3:0] DataCount = 4'h2; + + // address where data0-15 is shadowed or if shadowed in a CSR + // address of the first CSR used for shadowing the data + localparam logic [11:0] DataAddr = 12'h380; // we are aligned with Rocket here + typedef struct packed { + logic [31:24] zero1; + logic [23:20] nscratch; + logic [19:17] zero0; + logic dataaccess; + logic [15:12] datasize; + logic [11:0] dataaddr; + } hartinfo_t; + + localparam hartinfo_t DebugHartInfo = '{ + zero1: '0, + nscratch: 2, // Debug module needs at least two scratch regs + zero0: '0, + dataaccess: 1'b1, // data registers are memory mapped in the debugger + datasize: DataCount, + dataaddr: DataAddr + }; + + // enables a commit log which matches spikes commit log format for easier trace comparison + localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1; + + // ------------- Dangerous ------------- + // if set to zero a flush will not invalidate the cache-lines, in a single core environment + // where coherence is not necessary this can improve performance. This needs to be switched on + // when more than one core is in a system + localparam logic INVALIDATE_ON_FLUSH = 1'b1; + +`ifdef SPIKE_TANDEM + // Spike still places 0 in TVAL for ENV_CALL_* exceptions. + // This may eventually go away when Spike starts to handle TVAL for *all* exceptions. + localparam bit ZERO_TVAL = 1'b1; +`else + localparam bit ZERO_TVAL = 1'b0; +`endif + // read mask for SSTATUS over MMSTATUS + localparam logic [63:0] SMODE_STATUS_READ_MASK = riscv::SSTATUS_UIE + | riscv::SSTATUS_SIE + | riscv::SSTATUS_SPIE + | riscv::SSTATUS_SPP + | riscv::SSTATUS_FS + | riscv::SSTATUS_XS + | riscv::SSTATUS_SUM + | riscv::SSTATUS_MXR + | riscv::SSTATUS_UPIE + | riscv::SSTATUS_SPIE + | riscv::SSTATUS_UXL + | riscv::SSTATUS_SD; + + localparam logic [63:0] SMODE_STATUS_WRITE_MASK = riscv::SSTATUS_SIE + | riscv::SSTATUS_SPIE + | riscv::SSTATUS_SPP + | riscv::SSTATUS_FS + | riscv::SSTATUS_SUM + | riscv::SSTATUS_MXR; + // --------------- + // AXI + // --------------- + + localparam FETCH_USER_WIDTH = cva6_config_pkg::CVA6ConfigFetchUserWidth; + localparam DATA_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth; + localparam AXI_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn | cva6_config_pkg::CVA6ConfigFetchUserEn; + localparam AXI_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth; + localparam DATA_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn; + localparam FETCH_USER_EN = cva6_config_pkg::CVA6ConfigFetchUserEn; + + typedef enum logic { + SINGLE_REQ, + CACHE_LINE_REQ + } ad_req_t; + + // --------------- + // Fetch Stage + // --------------- + + // leave as is (fails with >8 entries and wider fetch width) + localparam int unsigned FETCH_FIFO_DEPTH = 4; + localparam int unsigned FETCH_WIDTH = 32; + // maximum instructions we can fetch on one request (we support compressed instructions) + localparam int unsigned INSTR_PER_FETCH = RVC == 1'b1 ? (FETCH_WIDTH / 16) : 1; + localparam int unsigned LOG2_INSTR_PER_FETCH = RVC == 1'b1 ? $clog2(INSTR_PER_FETCH) : 1; + + // Only use struct when signals have same direction + // exception + typedef struct packed { + riscv::xlen_t cause; // cause of exception + riscv::xlen_t tval; // additional information of causing exception (e.g.: instruction causing it), + // address of LD/ST fault + logic valid; + } exception_t; + + typedef enum logic [2:0] { + NoCF, // No control flow prediction + Branch, // Branch + Jump, // Jump to address from immediate + JumpR, // Jump to address from registers + Return // Return Address Prediction + } cf_t; + + // branch-predict + // this is the struct we get back from ex stage and we will use it to update + // all the necessary data structures + // bp_resolve_t + typedef struct packed { + logic valid; // prediction with all its values is valid + logic [riscv::VLEN-1:0] pc; // PC of predict or mis-predict + logic [riscv::VLEN-1:0] target_address; // target address at which to jump, or not + logic is_mispredict; // set if this was a mis-predict + logic is_taken; // branch is taken + cf_t cf_type; // Type of control flow change + } bp_resolve_t; + + // branchpredict scoreboard entry + // this is the struct which we will inject into the pipeline to guide the various + // units towards the correct branch decision and resolve + typedef struct packed { + cf_t cf; // type of control flow prediction + logic [riscv::VLEN-1:0] predict_address; // target address at which to jump, or not + } branchpredict_sbe_t; + + typedef struct packed { + logic valid; + logic [riscv::VLEN-1:0] pc; // update at PC + logic [riscv::VLEN-1:0] target_address; + } btb_update_t; + + typedef struct packed { + logic valid; + logic [riscv::VLEN-1:0] target_address; + } btb_prediction_t; + + typedef struct packed { + logic valid; + logic [riscv::VLEN-1:0] ra; + } ras_t; + + typedef struct packed { + logic valid; + logic [riscv::VLEN-1:0] pc; // update at PC + logic taken; + } bht_update_t; + + typedef struct packed { + logic valid; + logic taken; + } bht_prediction_t; + + typedef struct packed { + logic valid; + logic [1:0] saturation_counter; + } bht_t; + + typedef enum logic [3:0] { + NONE, // 0 + LOAD, // 1 + STORE, // 2 + ALU, // 3 + CTRL_FLOW, // 4 + MULT, // 5 + CSR, // 6 + FPU, // 7 + FPU_VEC, // 8 + CVXIF, // 9 + ACCEL // 10 + } fu_t; + + localparam EXC_OFF_RST = 8'h80; + + localparam SupervisorIrq = 1; + localparam MachineIrq = 0; + + // All information needed to determine whether we need to associate an interrupt + // with the corresponding instruction or not. + typedef struct packed { + riscv::xlen_t mie; + riscv::xlen_t mip; + riscv::xlen_t mideleg; + logic sie; + logic global_enable; + } irq_ctrl_t; + + // --------------- + // Cache config + // --------------- + + // for usage in OpenPiton we have to propagate the openpiton L15 configuration from l15.h +`ifdef PITON_ARIANE + +`ifndef CONFIG_L1I_CACHELINE_WIDTH + `define CONFIG_L1I_CACHELINE_WIDTH 128 +`endif + +`ifndef CONFIG_L1I_ASSOCIATIVITY + `define CONFIG_L1I_ASSOCIATIVITY 4 +`endif + +`ifndef CONFIG_L1I_SIZE + `define CONFIG_L1I_SIZE 16*1024 +`endif + +`ifndef CONFIG_L1D_CACHELINE_WIDTH + `define CONFIG_L1D_CACHELINE_WIDTH 128 +`endif + +`ifndef CONFIG_L1D_ASSOCIATIVITY + `define CONFIG_L1D_ASSOCIATIVITY 8 +`endif + +`ifndef CONFIG_L1D_SIZE + `define CONFIG_L1D_SIZE 32*1024 +`endif + +`ifndef L15_THREADID_WIDTH + `define L15_THREADID_WIDTH 3 +`endif + + // I$ + localparam int unsigned ICACHE_LINE_WIDTH = `CONFIG_L1I_CACHELINE_WIDTH; + localparam int unsigned ICACHE_SET_ASSOC = `CONFIG_L1I_ASSOCIATIVITY; + localparam int unsigned ICACHE_INDEX_WIDTH = $clog2(`CONFIG_L1I_SIZE / ICACHE_SET_ASSOC); + localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH; + localparam int unsigned ICACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : 128; // in bit + // D$ + localparam int unsigned DCACHE_LINE_WIDTH = `CONFIG_L1D_CACHELINE_WIDTH; + localparam int unsigned DCACHE_SET_ASSOC = `CONFIG_L1D_ASSOCIATIVITY; + localparam int unsigned DCACHE_INDEX_WIDTH = $clog2(`CONFIG_L1D_SIZE / DCACHE_SET_ASSOC); + localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH; + localparam int unsigned DCACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : 128; // in bit + localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH; + + localparam int unsigned MEM_TID_WIDTH = `L15_THREADID_WIDTH; +`else + // I$ + localparam int unsigned CONFIG_L1I_SIZE = cva6_config_pkg::CVA6ConfigIcacheByteSize; // in byte + localparam int unsigned ICACHE_SET_ASSOC = cva6_config_pkg::CVA6ConfigIcacheSetAssoc; // number of ways + localparam int unsigned ICACHE_INDEX_WIDTH = $clog2( + CONFIG_L1I_SIZE / ICACHE_SET_ASSOC + ); // in bit, contains also offset width + localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH; // in bit + localparam int unsigned ICACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit + localparam int unsigned ICACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit + // D$ + localparam int unsigned CONFIG_L1D_SIZE = cva6_config_pkg::CVA6ConfigDcacheByteSize; // in byte + localparam int unsigned DCACHE_SET_ASSOC = cva6_config_pkg::CVA6ConfigDcacheSetAssoc; // number of ways + localparam int unsigned DCACHE_INDEX_WIDTH = $clog2( + CONFIG_L1D_SIZE / DCACHE_SET_ASSOC + ); // in bit, contains also offset width + localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH; // in bit + localparam int unsigned DCACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit + localparam int unsigned DCACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit + localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH; + + localparam int unsigned MEM_TID_WIDTH = cva6_config_pkg::CVA6ConfigMemTidWidth; +`endif + + localparam int unsigned DCACHE_TID_WIDTH = cva6_config_pkg::CVA6ConfigDcacheIdWidth; + + localparam int unsigned WT_DCACHE_WBUF_DEPTH = cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth; + + // --------------- + // EX Stage + // --------------- + + typedef enum logic [7:0] { // basic ALU op + ADD, + SUB, + ADDW, + SUBW, + // logic operations + XORL, + ORL, + ANDL, + // shifts + SRA, + SRL, + SLL, + SRLW, + SLLW, + SRAW, + // comparisons + LTS, + LTU, + GES, + GEU, + EQ, + NE, + // jumps + JALR, + BRANCH, + // set lower than operations + SLTS, + SLTU, + // CSR functions + MRET, + SRET, + DRET, + ECALL, + WFI, + FENCE, + FENCE_I, + SFENCE_VMA, + CSR_WRITE, + CSR_READ, + CSR_SET, + CSR_CLEAR, + // LSU functions + LD, + SD, + LW, + LWU, + SW, + LH, + LHU, + SH, + LB, + SB, + LBU, + // Atomic Memory Operations + AMO_LRW, + AMO_LRD, + AMO_SCW, + AMO_SCD, + AMO_SWAPW, + AMO_ADDW, + AMO_ANDW, + AMO_ORW, + AMO_XORW, + AMO_MAXW, + AMO_MAXWU, + AMO_MINW, + AMO_MINWU, + AMO_SWAPD, + AMO_ADDD, + AMO_ANDD, + AMO_ORD, + AMO_XORD, + AMO_MAXD, + AMO_MAXDU, + AMO_MIND, + AMO_MINDU, + // Multiplications + MUL, + MULH, + MULHU, + MULHSU, + MULW, + // Divisions + DIV, + DIVU, + DIVW, + DIVUW, + REM, + REMU, + REMW, + REMUW, + // Floating-Point Load and Store Instructions + FLD, + FLW, + FLH, + FLB, + FSD, + FSW, + FSH, + FSB, + // Floating-Point Computational Instructions + FADD, + FSUB, + FMUL, + FDIV, + FMIN_MAX, + FSQRT, + FMADD, + FMSUB, + FNMSUB, + FNMADD, + // Floating-Point Conversion and Move Instructions + FCVT_F2I, + FCVT_I2F, + FCVT_F2F, + FSGNJ, + FMV_F2X, + FMV_X2F, + // Floating-Point Compare Instructions + FCMP, + // Floating-Point Classify Instruction + FCLASS, + // Vectorial Floating-Point Instructions that don't directly map onto the scalar ones + VFMIN, + VFMAX, + VFSGNJ, + VFSGNJN, + VFSGNJX, + VFEQ, + VFNE, + VFLT, + VFGE, + VFLE, + VFGT, + VFCPKAB_S, + VFCPKCD_S, + VFCPKAB_D, + VFCPKCD_D, + // Offload Instructions to be directed into cv_x_if + OFFLOAD, + // Or-Combine and REV8 + ORCB, + REV8, + // Bitwise Rotation + ROL, + ROLW, + ROR, + RORI, + RORIW, + RORW, + // Sign and Zero Extend + SEXTB, + SEXTH, + ZEXTH, + // Count population + CPOP, + CPOPW, + // Count Leading/Training Zeros + CLZ, + CLZW, + CTZ, + CTZW, + // Carry less multiplication Op's + CLMUL, + CLMULH, + CLMULR, + // Single bit instructions Op's + BCLR, + BCLRI, + BEXT, + BEXTI, + BINV, + BINVI, + BSET, + BSETI, + // Integer minimum/maximum + MAX, + MAXU, + MIN, + MINU, + // Shift with Add Unsigned Word and Unsigned Word Op's (Bitmanip) + SH1ADDUW, + SH2ADDUW, + SH3ADDUW, + ADDUW, + SLLIUW, + // Shift with Add (Bitmanip) + SH1ADD, + SH2ADD, + SH3ADD, + // Bitmanip Logical with negate op (Bitmanip) + ANDN, + ORN, + XNOR, + // Accelerator operations + ACCEL_OP, + ACCEL_OP_FS1, + ACCEL_OP_FD, + ACCEL_OP_LOAD, + ACCEL_OP_STORE, + // Zicond instruction + CZERO_EQZ, + CZERO_NEZ + } fu_op; + + typedef struct packed { + fu_t fu; + fu_op operation; + riscv::xlen_t operand_a; + riscv::xlen_t operand_b; + riscv::xlen_t imm; + logic [TRANS_ID_BITS-1:0] trans_id; + } fu_data_t; + + function automatic logic op_is_branch(input fu_op op); + unique case (op) inside + EQ, NE, LTS, GES, LTU, GEU: return 1'b1; + default: return 1'b0; // all other ops + endcase + endfunction + + // ------------------------------- + // Extract Src/Dst FP Reg from Op + // ------------------------------- + // function used in instr_trace svh + // is_rs1_fpr function is kept to allow cva6 compilation with instr_trace feature + function automatic logic is_rs1_fpr(input fu_op op); + unique case (op) inside + [FMUL : FNMADD], // Computational Operations (except ADD/SUB) + FCVT_F2I, // Float-Int Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_F2X, // FPR-GPR Moves + FCMP, // Comparisons + FCLASS, // Classifications + [VFMIN : VFCPKCD_D], // Additional Vectorial FP ops + ACCEL_OP_FS1: + return 1'b1; // Accelerator instructions + default: return 1'b0; // all other ops + endcase + endfunction + + // function used in instr_trace svh + // is_rs2_fpr function is kept to allow cva6 compilation with instr_trace feature + function automatic logic is_rs2_fpr(input fu_op op); + unique case (op) inside + [FSD : FSB], // FP Stores + [FADD : FMIN_MAX], // Computational Operations (no sqrt) + [FMADD : FNMADD], // Fused Computational Operations + FCVT_F2F, // Vectorial F2F Conversions requrie target + [FSGNJ : FMV_F2X], // Sign Injections and moves mapped to SGNJ + FCMP, // Comparisons + [VFMIN : VFCPKCD_D]: + return 1'b1; // Additional Vectorial FP ops + default: return 1'b0; // all other ops + endcase + endfunction + + // function used in instr_trace svh + // is_imm_fpr function is kept to allow cva6 compilation with instr_trace feature + // ternary operations encode the rs3 address in the imm field, also add/sub + function automatic logic is_imm_fpr(input fu_op op); + unique case (op) inside + [FADD : FSUB], // ADD/SUB need inputs as Operand B/C + [FMADD : FNMADD], // Fused Computational Operations + [VFCPKAB_S : VFCPKCD_D]: + return 1'b1; // Vectorial FP cast and pack ops + default: return 1'b0; // all other ops + endcase + endfunction + + // function used in instr_trace svh + // is_rd_fpr function is kept to allow cva6 compilation with instr_trace feature + function automatic logic is_rd_fpr(input fu_op op); + unique case (op) inside + [FLD : FLB], // FP Loads + [FADD : FNMADD], // Computational Operations + FCVT_I2F, // Int-Float Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_X2F, // GPR-FPR Moves + [VFMIN : VFSGNJX], // Vectorial MIN/MAX and SGNJ + [VFCPKAB_S : VFCPKCD_D], // Vectorial FP cast and pack ops + ACCEL_OP_FD: + return 1'b1; // Accelerator instructions + default: return 1'b0; // all other ops + endcase + endfunction + + function automatic logic is_amo(fu_op op); + case (op) inside + [AMO_LRW : AMO_MINDU]: begin + return 1'b1; + end + default: return 1'b0; + endcase + endfunction + + typedef struct packed { + logic valid; + logic [riscv::VLEN-1:0] vaddr; + logic overflow; + riscv::xlen_t data; + logic [(riscv::XLEN/8)-1:0] be; + fu_t fu; + fu_op operation; + logic [TRANS_ID_BITS-1:0] trans_id; + } lsu_ctrl_t; + + // --------------- + // IF/ID Stage + // --------------- + // store the decompressed instruction + typedef struct packed { + logic [riscv::VLEN-1:0] address; // the address of the instructions from below + logic [31:0] instruction; // instruction word + branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path + exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions + } fetch_entry_t; + + // --------------- + // ID/EX/WB Stage + // --------------- + + localparam RVFI = cva6_config_pkg::CVA6ConfigRvfiTrace; + + typedef struct packed { + logic [riscv::VLEN-1:0] pc; // PC of instruction + logic [TRANS_ID_BITS-1:0] trans_id; // this can potentially be simplified, we could index the scoreboard entry + // with the transaction id in any case make the width more generic + fu_t fu; // functional unit to use + fu_op op; // operation to perform in each functional unit + logic [REG_ADDR_SIZE-1:0] rs1; // register source address 1 + logic [REG_ADDR_SIZE-1:0] rs2; // register source address 2 + logic [REG_ADDR_SIZE-1:0] rd; // register destination address + riscv::xlen_t result; // for unfinished instructions this field also holds the immediate, + // for unfinished floating-point that are partly encoded in rs2, this field also holds rs2 + // for unfinished floating-point fused operations (FMADD, FMSUB, FNMADD, FNMSUB) + // this field holds the address of the third operand from the floating-point register file + logic valid; // is the result valid + logic use_imm; // should we use the immediate as operand b? + logic use_zimm; // use zimm as operand a + logic use_pc; // set if we need to use the PC as operand a, PC from exception + exception_t ex; // exception has occurred + branchpredict_sbe_t bp; // branch predict scoreboard data structure + logic is_compressed; // signals a compressed instructions, we need this information at the commit stage if + // we want jump accordingly e.g.: +4, +2 + logic vfp; // is this a vector floating-point instruction? + } scoreboard_entry_t; + + // --------------- + // MMU instanciation + // --------------- + localparam bit MMU_PRESENT = cva6_config_pkg::CVA6ConfigMmuPresent; + + localparam int unsigned INSTR_TLB_ENTRIES = cva6_config_pkg::CVA6ConfigInstrTlbEntries; + localparam int unsigned DATA_TLB_ENTRIES = cva6_config_pkg::CVA6ConfigDataTlbEntries; + + // ------------------- + // Performance counter + // ------------------- + localparam bit PERF_COUNTER_EN = cva6_config_pkg::CVA6ConfigPerfCounterEn; + localparam int unsigned MHPMCounterNum = 6; + + // -------------------- + // Atomics + // -------------------- + typedef enum logic [3:0] { + AMO_NONE = 4'b0000, + AMO_LR = 4'b0001, + AMO_SC = 4'b0010, + AMO_SWAP = 4'b0011, + AMO_ADD = 4'b0100, + AMO_AND = 4'b0101, + AMO_OR = 4'b0110, + AMO_XOR = 4'b0111, + AMO_MAX = 4'b1000, + AMO_MAXU = 4'b1001, + AMO_MIN = 4'b1010, + AMO_MINU = 4'b1011, + AMO_CAS1 = 4'b1100, // unused, not part of riscv spec, but provided in OpenPiton + AMO_CAS2 = 4'b1101 // unused, not part of riscv spec, but provided in OpenPiton + } amo_t; + + typedef struct packed { + logic valid; // valid flag + logic is_2M; // + logic is_1G; // + logic [27-1:0] vpn; // VPN (39bits) = 27bits + 12bits offset + logic [ASID_WIDTH-1:0] asid; + riscv::pte_t content; + } tlb_update_t; + + // Bits required for representation of physical address space as 4K pages + // (e.g. 27*4K == 39bit address space). + localparam PPN4K_WIDTH = 38; + + typedef struct packed { + logic valid; // valid flag + logic is_4M; // + logic [20-1:0] vpn; //VPN (32bits) = 20bits + 12bits offset + logic [9-1:0] asid; //ASID length = 9 for Sv32 mmu + riscv::pte_sv32_t content; + } tlb_update_sv32_t; + + typedef enum logic [1:0] { + FE_NONE, + FE_INSTR_ACCESS_FAULT, + FE_INSTR_PAGE_FAULT + } frontend_exception_t; + + // ---------------------- + // cache request ports + // ---------------------- + // I$ address translation requests + typedef struct packed { + logic fetch_valid; // address translation valid + logic [riscv::PLEN-1:0] fetch_paddr; // physical address in + exception_t fetch_exception; // exception occurred during fetch + } icache_areq_t; + + typedef struct packed { + logic fetch_req; // address translation request + logic [riscv::VLEN-1:0] fetch_vaddr; // virtual address out + } icache_arsp_t; + + // I$ data requests + typedef struct packed { + logic req; // we request a new word + logic kill_s1; // kill the current request + logic kill_s2; // kill the last request + logic spec; // request is speculative + logic [riscv::VLEN-1:0] vaddr; // 1st cycle: 12 bit index is taken for lookup + } icache_dreq_t; + + typedef struct packed { + logic ready; // icache is ready + logic valid; // signals a valid read + logic [FETCH_WIDTH-1:0] data; // 2+ cycle out: tag + logic [FETCH_USER_WIDTH-1:0] user; // User bits + logic [riscv::VLEN-1:0] vaddr; // virtual address out + exception_t ex; // we've encountered an exception + } icache_drsp_t; + + // AMO request going to cache. this request is unconditionally valid as soon + // as request goes high. + // Furthermore, those signals are kept stable until the response indicates + // completion by asserting ack. + typedef struct packed { + logic req; // this request is valid + amo_t amo_op; // atomic memory operation to perform + logic [1:0] size; // 2'b10 --> word operation, 2'b11 --> double word operation + logic [63:0] operand_a; // address + logic [63:0] operand_b; // data as layouted in the register + } amo_req_t; + + // AMO response coming from cache. + typedef struct packed { + logic ack; // response is valid + logic [63:0] result; // sign-extended, result + } amo_resp_t; + + // D$ data requests + typedef struct packed { + logic [DCACHE_INDEX_WIDTH-1:0] address_index; + logic [DCACHE_TAG_WIDTH-1:0] address_tag; + riscv::xlen_t data_wdata; + logic [DCACHE_USER_WIDTH-1:0] data_wuser; + logic data_req; + logic data_we; + logic [(riscv::XLEN/8)-1:0] data_be; + logic [1:0] data_size; + logic [DCACHE_TID_WIDTH-1:0] data_id; + logic kill_req; + logic tag_valid; + } dcache_req_i_t; + + typedef struct packed { + logic data_gnt; + logic data_rvalid; + logic [DCACHE_TID_WIDTH-1:0] data_rid; + riscv::xlen_t data_rdata; + logic [DCACHE_USER_WIDTH-1:0] data_ruser; + } dcache_req_o_t; + + // ---------------------- + // Arithmetic Functions + // ---------------------- + function automatic riscv::xlen_t sext32(logic [31:0] operand); + return {{riscv::XLEN - 32{operand[31]}}, operand[31:0]}; + endfunction + + // ---------------------- + // Immediate functions + // ---------------------- + function automatic logic [riscv::VLEN-1:0] uj_imm(logic [31:0] instruction_i); + return { + {44 + riscv::VLEN - 64{instruction_i[31]}}, + instruction_i[19:12], + instruction_i[20], + instruction_i[30:21], + 1'b0 + }; + endfunction + + function automatic logic [riscv::VLEN-1:0] i_imm(logic [31:0] instruction_i); + return {{52 + riscv::VLEN - 64{instruction_i[31]}}, instruction_i[31:20]}; + endfunction + + function automatic logic [riscv::VLEN-1:0] sb_imm(logic [31:0] instruction_i); + return { + {51 + riscv::VLEN - 64{instruction_i[31]}}, + instruction_i[31], + instruction_i[7], + instruction_i[30:25], + instruction_i[11:8], + 1'b0 + }; + endfunction + + // ---------------------- + // LSU Functions + // ---------------------- + // align data to address e.g.: shift data to be naturally 64 + function automatic riscv::xlen_t data_align(logic [2:0] addr, logic [63:0] data); + // Set addr[2] to 1'b0 when 32bits + logic [ 2:0] addr_tmp = {(addr[2] && riscv::IS_XLEN64), addr[1:0]}; + logic [63:0] data_tmp = {64{1'b0}}; + case (addr_tmp) + 3'b000: data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-1:0]}; + 3'b001: + data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-9:0], data[riscv::XLEN-1:riscv::XLEN-8]}; + 3'b010: + data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-17:0], data[riscv::XLEN-1:riscv::XLEN-16]}; + 3'b011: + data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-25:0], data[riscv::XLEN-1:riscv::XLEN-24]}; + 3'b100: data_tmp = {data[31:0], data[63:32]}; + 3'b101: data_tmp = {data[23:0], data[63:24]}; + 3'b110: data_tmp = {data[15:0], data[63:16]}; + 3'b111: data_tmp = {data[7:0], data[63:8]}; + endcase + return data_tmp[riscv::XLEN-1:0]; + endfunction + + // generate byte enable mask + function automatic logic [7:0] be_gen(logic [2:0] addr, logic [1:0] size); + case (size) + 2'b11: begin + return 8'b1111_1111; + end + 2'b10: begin + case (addr[2:0]) + 3'b000: return 8'b0000_1111; + 3'b001: return 8'b0001_1110; + 3'b010: return 8'b0011_1100; + 3'b011: return 8'b0111_1000; + 3'b100: return 8'b1111_0000; + default: ; // Do nothing + endcase + end + 2'b01: begin + case (addr[2:0]) + 3'b000: return 8'b0000_0011; + 3'b001: return 8'b0000_0110; + 3'b010: return 8'b0000_1100; + 3'b011: return 8'b0001_1000; + 3'b100: return 8'b0011_0000; + 3'b101: return 8'b0110_0000; + 3'b110: return 8'b1100_0000; + default: ; // Do nothing + endcase + end + 2'b00: begin + case (addr[2:0]) + 3'b000: return 8'b0000_0001; + 3'b001: return 8'b0000_0010; + 3'b010: return 8'b0000_0100; + 3'b011: return 8'b0000_1000; + 3'b100: return 8'b0001_0000; + 3'b101: return 8'b0010_0000; + 3'b110: return 8'b0100_0000; + 3'b111: return 8'b1000_0000; + endcase + end + endcase + return 8'b0; + endfunction + + function automatic logic [3:0] be_gen_32(logic [1:0] addr, logic [1:0] size); + case (size) + 2'b10: begin + return 4'b1111; + end + 2'b01: begin + case (addr[1:0]) + 2'b00: return 4'b0011; + 2'b01: return 4'b0110; + 2'b10: return 4'b1100; + default: ; // Do nothing + endcase + end + 2'b00: begin + case (addr[1:0]) + 2'b00: return 4'b0001; + 2'b01: return 4'b0010; + 2'b10: return 4'b0100; + 2'b11: return 4'b1000; + endcase + end + default: return 4'b0; + endcase + return 4'b0; + endfunction + + // ---------------------- + // Extract Bytes from Op + // ---------------------- + function automatic logic [1:0] extract_transfer_size(fu_op op); + case (op) + LD, SD, FLD, FSD, + AMO_LRD, AMO_SCD, + AMO_SWAPD, AMO_ADDD, + AMO_ANDD, AMO_ORD, + AMO_XORD, AMO_MAXD, + AMO_MAXDU, AMO_MIND, + AMO_MINDU: begin + return 2'b11; + end + LW, LWU, SW, FLW, FSW, + AMO_LRW, AMO_SCW, + AMO_SWAPW, AMO_ADDW, + AMO_ANDW, AMO_ORW, + AMO_XORW, AMO_MAXW, + AMO_MAXWU, AMO_MINW, + AMO_MINWU: begin + return 2'b10; + end + LH, LHU, SH, FLH, FSH: return 2'b01; + LB, LBU, SB, FLB, FSB: return 2'b00; + default: return 2'b11; + endcase + endfunction +endpackage diff --git a/test/type_param/core/include/config_pkg.sv b/test/type_param/core/include/config_pkg.sv new file mode 100644 index 0000000..90d6bfe --- /dev/null +++ b/test/type_param/core/include/config_pkg.sv @@ -0,0 +1,181 @@ +// Copyright 2023 Thales DIS France SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Jean-Roch COULON - Thales + +package config_pkg; + + // --------------- + // Global Config + // --------------- + localparam int unsigned ILEN = 32; + localparam int unsigned NRET = 1; + + /// The NoC type is a top-level parameter, hence we need a bit more + /// information on what protocol those type parameters are supporting. + /// Currently two values are supported" + typedef enum { + /// The "classic" AXI4 protocol. + NOC_TYPE_AXI4_ATOP, + /// In the OpenPiton setting the WT cache is connected to the L15. + NOC_TYPE_L15_BIG_ENDIAN, + NOC_TYPE_L15_LITTLE_ENDIAN + } noc_type_e; + + /// Cache type parameter + typedef enum logic [1:0] { + WB = 0, + WT = 1, + HPDCACHE = 2 + } cache_type_t; + + localparam NrMaxRules = 16; + + typedef struct packed { + /// Number of commit ports, i.e., maximum number of instructions that the + /// core can retire per cycle. It can be beneficial to have more commit + /// ports than issue ports, for the scoreboard to empty out in case one + /// instruction stalls a little longer. + int unsigned NrCommitPorts; + /// AXI parameters. + int unsigned AxiAddrWidth; + int unsigned AxiDataWidth; + int unsigned AxiIdWidth; + int unsigned AxiUserWidth; + int unsigned NrLoadBufEntries; + bit FpuEn; + bit XF16; + bit XF16ALT; + bit XF8; + bit RVA; + bit RVB; + bit RVV; + bit RVC; + bit RVZCB; + bit XFVec; + bit CvxifEn; + bit ZiCondExtEn; + // Calculated + bit RVF; + bit RVD; + bit FpPresent; + bit NSX; + int unsigned FLen; + bit RVFVec; + bit XF16Vec; + bit XF16ALTVec; + bit XF8Vec; + int unsigned NrRgprPorts; + int unsigned NrWbPorts; + bit EnableAccelerator; + bit RVS; //Supervisor mode + bit RVU; //User mode + // Debug Module + // address to which a hart should jump when it was requested to halt + logic [63:0] HaltAddress; + logic [63:0] ExceptionAddress; + /// Return address stack depth, good values are around 2 to 4. + int unsigned RASDepth; + /// Branch target buffer entries. + int unsigned BTBEntries; + /// Branch history (2-bit saturation counter) size, to keep track of + /// branch otucomes. + int unsigned BHTEntries; + /// Offset of the debug module. + logic [63:0] DmBaseAddress; + /// Number of PMP entries. + int unsigned NrPMPEntries; + /// Physical Memory Protection (PMP) CSR reset values and read-only bits + logic [15:0][63:0] PMPCfgRstVal; + logic [15:0][63:0] PMPAddrRstVal; + bit [15:0] PMPEntryReadOnly; + /// Set to the bus type in use. + noc_type_e NOCType; + /// Physical Memory Attributes (PMAs) + /// Number of non idempotent rules. + int unsigned NrNonIdempotentRules; + /// Base which needs to match. + logic [NrMaxRules-1:0][63:0] NonIdempotentAddrBase; + /// Bit mask which bits to consider when matching the rule. + logic [NrMaxRules-1:0][63:0] NonIdempotentLength; + /// Number of regions which have execute property. + int unsigned NrExecuteRegionRules; + /// Base which needs to match. + logic [NrMaxRules-1:0][63:0] ExecuteRegionAddrBase; + /// Bit mask which bits to consider when matching the rule. + logic [NrMaxRules-1:0][63:0] ExecuteRegionLength; + /// Number of regions which have cached property. + int unsigned NrCachedRegionRules; + /// Base which needs to match. + logic [NrMaxRules-1:0][63:0] CachedRegionAddrBase; + /// Bit mask which bits to consider when matching the rule. + logic [NrMaxRules-1:0][63:0] CachedRegionLength; + /// Maximum number of outstanding stores. + int unsigned MaxOutstandingStores; + bit DebugEn; + bit NonIdemPotenceEn; + bit AxiBurstWriteEn; + } cva6_cfg_t; + + + /// Empty configuration to sanity check proper parameter passing. Whenever + /// you develop a module that resides within the core, assign this constant. + localparam cva6_cfg_t cva6_cfg_empty = '0; + + + /// Utility function being called to check parameters. Not all values make + /// sense for all parameters, here is the place to sanity check them. + function automatic void check_cfg(cva6_cfg_t Cfg); + // pragma translate_off +`ifndef VERILATOR + assert (Cfg.RASDepth > 0); + assert (2 ** $clog2(Cfg.BTBEntries) == Cfg.BTBEntries); + assert (2 ** $clog2(Cfg.BHTEntries) == Cfg.BHTEntries); + assert (Cfg.NrNonIdempotentRules <= NrMaxRules); + assert (Cfg.NrExecuteRegionRules <= NrMaxRules); + assert (Cfg.NrCachedRegionRules <= NrMaxRules); + assert (Cfg.NrPMPEntries <= 16); +`endif + // pragma translate_on + endfunction + + function automatic logic range_check(logic [63:0] base, logic [63:0] len, logic [63:0] address); + // if len is a power of two, and base is properly aligned, this check could be simplified + // Extend base by one bit to prevent an overflow. + return (address >= base) && (({1'b0, address}) < (65'(base) + len)); + endfunction : range_check + + + function automatic logic is_inside_nonidempotent_regions(cva6_cfg_t Cfg, logic [63:0] address); + logic [NrMaxRules-1:0] pass; + pass = '0; + for (int unsigned k = 0; k < Cfg.NrNonIdempotentRules; k++) begin + pass[k] = range_check(Cfg.NonIdempotentAddrBase[k], Cfg.NonIdempotentLength[k], address); + end + return |pass; + endfunction : is_inside_nonidempotent_regions + + function automatic logic is_inside_execute_regions(cva6_cfg_t Cfg, logic [63:0] address); + // if we don't specify any region we assume everything is accessible + logic [NrMaxRules-1:0] pass; + pass = '0; + for (int unsigned k = 0; k < Cfg.NrExecuteRegionRules; k++) begin + pass[k] = range_check(Cfg.ExecuteRegionAddrBase[k], Cfg.ExecuteRegionLength[k], address); + end + return |pass; + endfunction : is_inside_execute_regions + + function automatic logic is_inside_cacheable_regions(cva6_cfg_t Cfg, logic [63:0] address); + automatic logic [NrMaxRules-1:0] pass; + pass = '0; + for (int unsigned k = 0; k < Cfg.NrCachedRegionRules; k++) begin + pass[k] = range_check(Cfg.CachedRegionAddrBase[k], Cfg.CachedRegionLength[k], address); + end + return |pass; + endfunction : is_inside_cacheable_regions + +endpackage diff --git a/test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv b/test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv new file mode 100644 index 0000000..ec4db64 --- /dev/null +++ b/test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv @@ -0,0 +1,150 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Jean-Roch COULON - Thales + + +package cva6_config_pkg; + + localparam CVA6ConfigXlen = 64; + + localparam CVA6ConfigFpuEn = 1; + localparam CVA6ConfigF16En = 0; + localparam CVA6ConfigF16AltEn = 0; + localparam CVA6ConfigF8En = 0; + localparam CVA6ConfigFVecEn = 0; + + localparam CVA6ConfigCvxifEn = 1; + localparam CVA6ConfigCExtEn = 1; + localparam CVA6ConfigZcbExtEn = 1; + localparam CVA6ConfigAExtEn = 1; + localparam CVA6ConfigBExtEn = 1; + localparam CVA6ConfigVExtEn = 0; + localparam CVA6ConfigZiCondExtEn = 1; + + localparam CVA6ConfigAxiIdWidth = 4; + localparam CVA6ConfigAxiAddrWidth = 64; + localparam CVA6ConfigAxiDataWidth = 64; + localparam CVA6ConfigFetchUserEn = 0; + localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; + localparam CVA6ConfigDataUserEn = 0; + localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; + + localparam CVA6ConfigIcacheByteSize = 16384; + localparam CVA6ConfigIcacheSetAssoc = 4; + localparam CVA6ConfigIcacheLineWidth = 128; + localparam CVA6ConfigDcacheByteSize = 32768; + localparam CVA6ConfigDcacheSetAssoc = 8; + localparam CVA6ConfigDcacheLineWidth = 128; + + localparam CVA6ConfigDcacheIdWidth = 1; + localparam CVA6ConfigMemTidWidth = 2; + + localparam CVA6ConfigWtDcacheWbufDepth = 8; + + localparam CVA6ConfigNrCommitPorts = 2; + localparam CVA6ConfigNrScoreboardEntries = 8; + + localparam CVA6ConfigFPGAEn = 0; + + localparam CVA6ConfigNrLoadPipeRegs = 1; + localparam CVA6ConfigNrStorePipeRegs = 0; + localparam CVA6ConfigNrLoadBufEntries = 2; + + localparam CVA6ConfigInstrTlbEntries = 16; + localparam CVA6ConfigDataTlbEntries = 16; + + localparam CVA6ConfigRASDepth = 2; + localparam CVA6ConfigBTBEntries = 32; + localparam CVA6ConfigBHTEntries = 128; + + localparam CVA6ConfigNrPMPEntries = 8; + + localparam CVA6ConfigPerfCounterEn = 1; + + localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; + + localparam CVA6ConfigMmuPresent = 1; + + localparam CVA6ConfigRvfiTrace = 1; + + localparam config_pkg::cva6_cfg_t cva6_cfg = '{ + NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), + AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), + AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), + AxiIdWidth: unsigned'(CVA6ConfigAxiIdWidth), + AxiUserWidth: unsigned'(CVA6ConfigDataUserWidth), + NrLoadBufEntries: unsigned'(CVA6ConfigNrLoadBufEntries), + FpuEn: bit'(CVA6ConfigFpuEn), + XF16: bit'(CVA6ConfigF16En), + XF16ALT: bit'(CVA6ConfigF16AltEn), + XF8: bit'(CVA6ConfigF8En), + RVA: bit'(CVA6ConfigAExtEn), + RVB: bit'(CVA6ConfigBExtEn), + RVV: bit'(CVA6ConfigVExtEn), + RVC: bit'(CVA6ConfigCExtEn), + RVZCB: bit'(CVA6ConfigZcbExtEn), + XFVec: bit'(CVA6ConfigFVecEn), + CvxifEn: bit'(CVA6ConfigCvxifEn), + ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), + // Extended + RVF: + bit'( + 0 + ), + RVD: bit'(0), + FpPresent: bit'(0), + NSX: bit'(0), + FLen: unsigned'(0), + RVFVec: bit'(0), + XF16Vec: bit'(0), + XF16ALTVec: bit'(0), + XF8Vec: bit'(0), + NrRgprPorts: unsigned'(0), + NrWbPorts: unsigned'(0), + EnableAccelerator: bit'(0), + RVS: bit'(1), + RVU: bit'(1), + HaltAddress: 64'h800, + ExceptionAddress: 64'h808, + RASDepth: unsigned'(CVA6ConfigRASDepth), + BTBEntries: unsigned'(CVA6ConfigBTBEntries), + BHTEntries: unsigned'(CVA6ConfigBHTEntries), + DmBaseAddress: 64'h0, + NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), + PMPCfgRstVal: {16{64'h0}}, + PMPAddrRstVal: {16{64'h0}}, + PMPEntryReadOnly: 16'd0, + NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, + // idempotent region + NrNonIdempotentRules: + unsigned'( + 2 + ), + NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), + NonIdempotentLength: 1024'({64'b0, 64'b0}), + NrExecuteRegionRules: unsigned'(3), + // DRAM, Boot ROM, Debug Module + ExecuteRegionAddrBase: + 1024'( + {64'h8000_0000, 64'h1_0000, 64'h0} + ), + ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), + // cached region + NrCachedRegionRules: + unsigned'( + 1 + ), + CachedRegionAddrBase: 1024'({64'h8000_0000}), + CachedRegionLength: 1024'({64'h40000000}), + MaxOutstandingStores: unsigned'(7), + DebugEn: bit'(1), + NonIdemPotenceEn: bit'(0), + AxiBurstWriteEn: bit'(0) + }; + +endpackage diff --git a/test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv b/test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv new file mode 100644 index 0000000..1abe537 --- /dev/null +++ b/test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv @@ -0,0 +1,123 @@ +// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies +// Alternatives (CEA) +// +// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Authors: Cesar Fuguet +// Date: February, 2023 +// Description: +// Default package with parameters for the HPDcache in a CVA6 platform. +// Users can copy this file, rename it, and adapt the configuration values as +// needed. + +package hpdcache_params_pkg; + // Imports from the CVA6 configuration package + // {{{ + import cva6_config_pkg::CVA6ConfigXlen; + import cva6_config_pkg::CVA6ConfigDcacheByteSize; + import cva6_config_pkg::CVA6ConfigDcacheSetAssoc; + import cva6_config_pkg::CVA6ConfigDcacheLineWidth; + import cva6_config_pkg::CVA6ConfigDcacheIdWidth; + import cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth; + import cva6_config_pkg::CVA6ConfigNrLoadBufEntries; + // }}} + + // Definition of constants used only in this file + // {{{ + localparam int unsigned __BYTES_PER_WAY = CVA6ConfigDcacheByteSize / CVA6ConfigDcacheSetAssoc; + + localparam int unsigned __BYTES_PER_CACHELINE = CVA6ConfigDcacheLineWidth / 8; + // }}} + + // Definition of global constants for the HPDcache data and directory + // {{{ + // HPDcache physical address width (in bits) + localparam int unsigned PARAM_PA_WIDTH = riscv::PLEN; + + // HPDcache number of sets + localparam int unsigned PARAM_SETS = __BYTES_PER_WAY / __BYTES_PER_CACHELINE; + + // HPDcache number of ways + localparam int unsigned PARAM_WAYS = CVA6ConfigDcacheSetAssoc; + + // HPDcache word width (bits) + localparam int unsigned PARAM_WORD_WIDTH = CVA6ConfigXlen; + + // HPDcache cache-line width (bits) + localparam int unsigned PARAM_CL_WORDS = CVA6ConfigDcacheLineWidth / PARAM_WORD_WIDTH; + + // HPDcache number of words in the request data channels (request and response) + localparam int unsigned PARAM_REQ_WORDS = 1; + + // HPDcache request transaction ID width (bits) + localparam int unsigned PARAM_REQ_TRANS_ID_WIDTH = CVA6ConfigDcacheIdWidth; + + // HPDcache request source ID width (bits) + localparam int unsigned PARAM_REQ_SRC_ID_WIDTH = 3; + // }}} + + // Definition of constants and types for HPDcache data memory + // {{{ + localparam int unsigned PARAM_DATA_WAYS_PER_RAM_WORD = 128 / PARAM_WORD_WIDTH; + localparam int unsigned PARAM_DATA_SETS_PER_RAM = PARAM_SETS; + + // HPDcache DATA RAM macros whether implements: + // - Write byte enable (1'b1) + // - Write bit mask (1'b0) + localparam bit PARAM_DATA_RAM_WBYTEENABLE = 1'b1; + + // Define the number of memory contiguous words that can be accessed + // simultaneously from the cache. + // - This limits the maximum width for the data channel from requesters + // - This impacts the refill latency (more ACCESS_WORDS -> less REFILL LATENCY) + localparam int unsigned PARAM_ACCESS_WORDS = PARAM_CL_WORDS / 2; + // }}} + + // Definition of constants and types for the Miss Status Holding Register (MSHR) + // {{{ + // HPDcache MSHR number of sets + localparam int unsigned PARAM_MSHR_SETS = 2; + + // HPDcache MSHR number of ways + localparam int unsigned PARAM_MSHR_WAYS = (CVA6ConfigNrLoadBufEntries > 4) ? 4 : 2; + + // HPDcache MSHR number of ways in the same SRAM word + localparam int unsigned PARAM_MSHR_WAYS_PER_RAM_WORD = (PARAM_MSHR_WAYS > 1) ? 2 : 1; + + // HPDcache MSHR number of sets in the same SRAM + localparam int unsigned PARAM_MSHR_SETS_PER_RAM = PARAM_MSHR_SETS; + + // HPDcache MSHR RAM whether implements: + // - Write byte enable (1'b1) + // - Write bit mask (1'b0) + localparam bit PARAM_MSHR_RAM_WBYTEENABLE = 1'b1; + + // HPDcache MSHR whether uses FFs or SRAM + localparam bit PARAM_MSHR_USE_REGBANK = (PARAM_MSHR_SETS * PARAM_MSHR_WAYS) <= 16; + localparam bit PARAM_REFILL_CORE_RSP_FEEDTHROUGH = 1'b1; + // }}} + + // Definition of constants and types for the Write Buffer (WBUF) + // {{{ + // HPDcache Write-Buffer number of entries in the directory + localparam int unsigned PARAM_WBUF_DIR_ENTRIES = CVA6ConfigWtDcacheWbufDepth; + + // HPDcache Write-Buffer number of entries in the data buffer + localparam int unsigned PARAM_WBUF_DATA_ENTRIES = CVA6ConfigWtDcacheWbufDepth; + + // HPDcache Write-Buffer number of words per entry + localparam int unsigned PARAM_WBUF_WORDS = PARAM_REQ_WORDS; + + // HPDcache Write-Buffer threshold counter width (in bits) + localparam int unsigned PARAM_WBUF_TIMECNT_WIDTH = 3; + localparam bit PARAM_WBUF_SEND_FEEDTHROUGH = 1'b0; + // }}} + + // Definition of constants and types for the Replay Table (RTAB) + // {{{ + localparam int PARAM_RTAB_ENTRIES = 4; + // }}} +endpackage diff --git a/test/type_param/core/include/cvxif_pkg.sv b/test/type_param/core/include/cvxif_pkg.sv new file mode 100644 index 0000000..39e77b4 --- /dev/null +++ b/test/type_param/core/include/cvxif_pkg.sv @@ -0,0 +1,110 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Guillaume CHAUVON (guillaume.chauvon@thalesgroup.com) + +// Package for the CoreV-X-Interface for the CVA6 + +package cvxif_pkg; + + localparam X_DATAWIDTH = riscv::XLEN; + localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS; //2 or 3 + localparam X_ID_WIDTH = ariane_pkg::TRANS_ID_BITS; + localparam X_MEM_WIDTH = 64; + localparam X_RFR_WIDTH = riscv::XLEN; + localparam X_RFW_WIDTH = riscv::XLEN; + + typedef struct packed { + logic [15:0] instr; + logic [1:0] mode; + logic [X_ID_WIDTH-1:0] id; + } x_compressed_req_t; + + typedef struct packed { + logic [31:0] instr; + logic accept; + } x_compressed_resp_t; + + typedef struct packed { + logic [31:0] instr; + logic [1:0] mode; + logic [X_ID_WIDTH-1:0] id; + logic [X_NUM_RS-1:0][X_RFR_WIDTH-1:0] rs; + logic [X_NUM_RS-1:0] rs_valid; + } x_issue_req_t; + + typedef struct packed { + logic accept; + logic writeback; + logic dualwrite; + logic dualread; + logic loadstore; + logic exc; + } x_issue_resp_t; + + typedef struct packed { + logic [X_ID_WIDTH-1:0] id; + logic x_commit_kill; + } x_commit_t; + + typedef struct packed { + logic [X_ID_WIDTH-1:0] id; + logic [31:0] addr; + logic [1:0] mode; + logic we; + logic [1:0] size; + logic [X_MEM_WIDTH-1:0] wdata; + logic last; + logic spec; + } x_mem_req_t; + + typedef struct packed { + logic exc; + logic [5:0] exccode; + } x_mem_resp_t; + + typedef struct packed { + logic [X_ID_WIDTH-1:0] id; + logic [X_MEM_WIDTH-1:0] rdata; + logic err; + } x_mem_result_t; + + typedef struct packed { + logic [X_ID_WIDTH-1:0] id; + logic [X_RFW_WIDTH-1:0] data; + logic [4:0] rd; + logic we; + logic exc; + logic [5:0] exccode; + } x_result_t; + + typedef struct packed { + logic x_compressed_valid; + x_compressed_req_t x_compressed_req; + logic x_issue_valid; + x_issue_req_t x_issue_req; + logic x_commit_valid; + x_commit_t x_commit; + logic x_mem_ready; + x_mem_resp_t x_mem_resp; + logic x_mem_result_valid; + x_mem_result_t x_mem_result; + logic x_result_ready; + } cvxif_req_t; + + typedef struct packed { + logic x_compressed_ready; + x_compressed_resp_t x_compressed_resp; + logic x_issue_ready; + x_issue_resp_t x_issue_resp; + logic x_mem_valid; + x_mem_req_t x_mem_req; + logic x_result_valid; + x_result_t x_result; + } cvxif_resp_t; + +endpackage diff --git a/test/type_param/core/include/instr_tracer_pkg.sv b/test/type_param/core/include/instr_tracer_pkg.sv new file mode 100644 index 0000000..bd36f09 --- /dev/null +++ b/test/type_param/core/include/instr_tracer_pkg.sv @@ -0,0 +1,202 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 16.05.2017 +// Description: Instruction Tracer Defines + +`ifndef VERILATOR +package instr_tracer_pkg; + + parameter INSTR_NOP = 32'h00_00_00_13; + + parameter INSTR_LUI = {25'b?, riscv::OpcodeLui}; + parameter INSTR_AUIPC = {25'b?, riscv::OpcodeAuipc}; + parameter INSTR_JAL = {25'b?, riscv::OpcodeJal}; + parameter INSTR_JALR = {17'b?, 3'b000, 5'b?, riscv::OpcodeJalr}; + // BRANCH + parameter INSTR_BEQZ = {7'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BEQ = {7'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BNEZ = {7'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BNE = {7'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BLTZ = {7'b?, 5'b0, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BLT = {7'b?, 5'b?, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BGEZ = {7'b?, 5'b0, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BGE = {7'b?, 5'b?, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BLTU = {7'b?, 5'b?, 5'b?, 3'b110, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BGEU = {7'b?, 5'b?, 5'b?, 3'b111, 5'b?, riscv::OpcodeBranch}; + + // OP-IMM + parameter INSTR_LI = {12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_ADDI = {17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_SLTI = {17'b?, 3'b010, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_SLTIU = {17'b?, 3'b011, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_XORI = {17'b?, 3'b100, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_ORI = {17'b?, 3'b110, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_ANDI = {17'b?, 3'b111, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_SLLI = {6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_SRLI = {6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_SRAI = {6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm}; + + // OP-IMM-32 + parameter INSTR_ADDIW = {17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm32}; + parameter INSTR_SLLIW = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpImm32}; + parameter INSTR_SRLIW = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32}; + parameter INSTR_SRAIW = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32}; + + // OP + parameter INSTR_ADD = {7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SUB = {7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SLL = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SLT = {7'b0000000, 10'b?, 3'b010, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SLTU = {7'b0000000, 10'b?, 3'b011, 5'b?, riscv::OpcodeOp}; + parameter INSTR_XOR = {7'b0000000, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SRL = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SRA = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp}; + parameter INSTR_OR = {7'b0000000, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp}; + parameter INSTR_AND = {7'b0000000, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp}; + parameter INSTR_MUL = {7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp}; + + // OP32 + parameter INSTR_ADDW = {7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32}; + parameter INSTR_SUBW = {7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32}; + parameter INSTR_SLLW = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp32}; + parameter INSTR_SRLW = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32}; + parameter INSTR_SRAW = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32}; + parameter INSTR_MULW = {7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp32}; + + // MISC-MEM + parameter INSTR_FENCE = {4'b0, 8'b?, 13'b0, riscv::OpcodeMiscMem}; + parameter INSTR_FENCEI = {17'b0, 3'b001, 5'b0, riscv::OpcodeMiscMem}; + + // SYSTEM + parameter INSTR_CSRW = {12'b?, 5'b?, 3'b001, 5'b0, riscv::OpcodeSystem}; + parameter INSTR_CSRRW = {12'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRR = {12'b?, 5'b0, 3'b010, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRRS = {12'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRS = {12'b?, 5'b?, 3'b010, 5'b0, riscv::OpcodeSystem}; + parameter INSTR_CSRRC = {12'b?, 5'b?, 3'b011, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRC = {12'b?, 5'b?, 3'b011, 5'b0, riscv::OpcodeSystem}; + + parameter INSTR_CSRWI = {17'b?, 3'b101, 5'b0, riscv::OpcodeSystem}; + parameter INSTR_CSRRWI = {17'b?, 3'b101, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRSI = {17'b?, 3'b110, 5'b0, riscv::OpcodeSystem}; + parameter INSTR_CSRRSI = {17'b?, 3'b110, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRCI = {17'b?, 3'b111, 5'b0, riscv::OpcodeSystem}; + parameter INSTR_CSRRCI = {17'b?, 3'b111, 5'b?, riscv::OpcodeSystem}; + + parameter INSTR_ECALL = {12'b000000000000, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_EBREAK = {12'b000000000001, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_MRET = {12'b001100000010, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_SRET = {12'b000100000010, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_DRET = {12'b011110110010, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_WFI = {12'b000100000101, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_SFENCE = {12'b0001001?????, 13'b?, riscv::OpcodeSystem}; + + // RV32M + parameter INSTR_PMUL = {7'b0000001, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp}; + parameter INSTR_DIV = {7'b0000001, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp}; + parameter INSTR_DIVU = {7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp}; + parameter INSTR_REM = {7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp}; + parameter INSTR_REMU = {7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp}; + + // RVFD + parameter INSTR_FMADD = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMadd}; + parameter INSTR_FMSUB = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMsub}; + parameter INSTR_FNSMSUB = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmsub}; + parameter INSTR_FNMADD = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmadd}; + + parameter INSTR_FADD = {5'b00000, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FSUB = {5'b00001, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FMUL = {5'b00010, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FDIV = {5'b00011, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FSQRT = {5'b01011, 2'b?, 5'b0, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FSGNJ = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FSGNJN = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FSGNJX = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FMIN = {5'b00101, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FMAX = {5'b00101, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FLE = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FLT = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FEQ = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp}; + + parameter INSTR_FCVT_F2F = {5'b01000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FMV_F2X = {5'b11100, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FCLASS = {5'b11100, 2'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FMV_X2F = {5'b11110, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FCVT_F2I = {5'b11000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FCVT_I2F = {5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + + // A + parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo}; + + // Load/Stores + parameter [31:0] LB = 32'b?????????????????000?????0000011; + parameter [31:0] LH = 32'b?????????????????001?????0000011; + parameter [31:0] LW = 32'b?????????????????010?????0000011; + parameter [31:0] LD = 32'b?????????????????011?????0000011; + parameter [31:0] LBU = 32'b?????????????????100?????0000011; + parameter [31:0] LHU = 32'b?????????????????101?????0000011; + parameter [31:0] LWU = 32'b?????????????????110?????0000011; + parameter [31:0] FLW = 32'b?????????????????010?????0000111; + parameter [31:0] FLD = 32'b?????????????????011?????0000111; + parameter [31:0] FLQ = 32'b?????????????????100?????0000111; + parameter [31:0] SB = 32'b?????????????????000?????0100011; + parameter [31:0] SH = 32'b?????????????????001?????0100011; + parameter [31:0] SW = 32'b?????????????????010?????0100011; + parameter [31:0] SD = 32'b?????????????????011?????0100011; + parameter [31:0] FSW = 32'b?????????????????010?????0100111; + parameter [31:0] FSD = 32'b?????????????????011?????0100111; + parameter [31:0] FSQ = 32'b?????????????????100?????0100111; + parameter [31:0] C_ADDI4SPN = 32'b????????????????000???????????00; + parameter [31:0] C_FLD = 32'b????????????????001???????????00; + parameter [31:0] C_LW = 32'b????????????????010???????????00; + parameter [31:0] C_FLW = 32'b????????????????011???????????00; + parameter [31:0] C_FSD = 32'b????????????????101???????????00; + parameter [31:0] C_SW = 32'b????????????????110???????????00; + parameter [31:0] C_FSW = 32'b????????????????111???????????00; + parameter [31:0] C_ADDI = 32'b????????????????000???????????01; + parameter [31:0] C_JAL = 32'b????????????????001???????????01; + parameter [31:0] C_LI = 32'b????????????????010???????????01; + parameter [31:0] C_LUI = 32'b????????????????011???????????01; + parameter [31:0] C_SRLI = 32'b????????????????100?00????????01; + parameter [31:0] C_SRAI = 32'b????????????????100?01????????01; + parameter [31:0] C_ANDI = 32'b????????????????100?10????????01; + parameter [31:0] C_SUB = 32'b????????????????100011???00???01; + parameter [31:0] C_XOR = 32'b????????????????100011???01???01; + parameter [31:0] C_OR = 32'b????????????????100011???10???01; + parameter [31:0] C_AND = 32'b????????????????100011???11???01; + parameter [31:0] C_SUBW = 32'b????????????????100111???00???01; + parameter [31:0] C_ADDW = 32'b????????????????100111???01???01; + parameter [31:0] C_J = 32'b????????????????101???????????01; + parameter [31:0] C_BEQZ = 32'b????????????????110???????????01; + parameter [31:0] C_BNEZ = 32'b????????????????111???????????01; + parameter [31:0] C_SLLI = 32'b????????????????000???????????10; + parameter [31:0] C_FLDSP = 32'b????????????????001???????????10; + parameter [31:0] C_LWSP = 32'b????????????????010???????????10; + parameter [31:0] C_FLWSP = 32'b????????????????011???????????10; + parameter [31:0] C_MV = 32'b????????????????1000??????????10; + parameter [31:0] C_ADD = 32'b????????????????1001??????????10; + parameter [31:0] C_FSDSP = 32'b????????????????101???????????10; + parameter [31:0] C_SWSP = 32'b????????????????110???????????10; + parameter [31:0] C_FSWSP = 32'b????????????????111???????????10; + parameter [31:0] C_NOP = 32'b????????????????0000000000000001; + parameter [31:0] C_ADDI16SP = 32'b????????????????011?00010?????01; + parameter [31:0] C_JR = 32'b????????????????1000?????0000010; + parameter [31:0] C_JALR = 32'b????????????????1001?????0000010; + parameter [31:0] C_EBREAK = 32'b????????????????1001000000000010; + parameter [31:0] C_LD = 32'b????????????????011???????????00; + parameter [31:0] C_SD = 32'b????????????????111???????????00; + parameter [31:0] C_ADDIW = 32'b????????????????001???????????01; + parameter [31:0] C_LDSP = 32'b????????????????011???????????10; + parameter [31:0] C_SDSP = 32'b????????????????111???????????10; + +endpackage +`endif diff --git a/test/type_param/core/include/riscv_pkg.sv b/test/type_param/core/include/riscv_pkg.sv new file mode 100644 index 0000000..18ae2cf --- /dev/null +++ b/test/type_param/core/include/riscv_pkg.sv @@ -0,0 +1,851 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: riscv_pkg.sv + * Author: Florian Zaruba + * Date: 30.6.2017 + * + * Description: Common RISC-V definitions. + */ + +package riscv; + + // ---------------------- + // Import cva6 config from cva6_config_pkg + // ---------------------- + localparam XLEN = cva6_config_pkg::CVA6ConfigXlen; + localparam FPU_EN = cva6_config_pkg::CVA6ConfigFpuEn; + + // ---------------------- + // Data and Address length + // ---------------------- + typedef enum logic [3:0] { + ModeOff = 0, + ModeSv32 = 1, + ModeSv39 = 8, + ModeSv48 = 9, + ModeSv57 = 10, + ModeSv64 = 11 + } vm_mode_t; + + // Warning: When using STD_CACHE, configuration must be PLEN=56 and VLEN=64 + // Warning: VLEN must be superior or equal to PLEN + localparam VLEN = (XLEN == 32) ? 32 : 64; // virtual address length + localparam PLEN = (XLEN == 32) ? 34 : 56; // physical address length + + localparam IS_XLEN32 = (XLEN == 32) ? 1'b1 : 1'b0; + localparam IS_XLEN64 = (XLEN == 32) ? 1'b0 : 1'b1; + localparam ModeW = (XLEN == 32) ? 1 : 4; + localparam ASIDW = (XLEN == 32) ? 9 : 16; + localparam PPNW = (XLEN == 32) ? 22 : 44; + localparam vm_mode_t MODE_SV = (XLEN == 32) ? ModeSv32 : ModeSv39; + localparam SV = (MODE_SV == ModeSv32) ? 32 : 39; + localparam VPN2 = (VLEN - 31 < 8) ? VLEN - 31 : 8; + localparam XLEN_ALIGN_BYTES = $clog2(XLEN / 8); + + typedef logic [XLEN-1:0] xlen_t; + + // -------------------- + // Privilege Spec + // -------------------- + typedef enum logic [1:0] { + PRIV_LVL_M = 2'b11, + PRIV_LVL_S = 2'b01, + PRIV_LVL_U = 2'b00 + } priv_lvl_t; + + // type which holds xlen + typedef enum logic [1:0] { + XLEN_32 = 2'b01, + XLEN_64 = 2'b10, + XLEN_128 = 2'b11 + } xlen_e; + + typedef enum logic [1:0] { + Off = 2'b00, + Initial = 2'b01, + Clean = 2'b10, + Dirty = 2'b11 + } xs_t; + + typedef struct packed { + logic sd; // signal dirty state - read-only + logic [62:34] wpri6; // writes preserved reads ignored + xlen_e uxl; // variable user mode xlen - hardwired to zero + logic [12:0] wpri5; // writes preserved reads ignored + logic mxr; // make executable readable + logic sum; // permit supervisor user memory access + logic wpri4; // writes preserved reads ignored + xs_t xs; // extension register - hardwired to zero + xs_t fs; // floating point extension register + logic [1:0] wpri3; // writes preserved reads ignored + xs_t vs; // vector extension register + logic spp; // holds the previous privilege mode up to supervisor + logic wpri2; // writes preserved reads ignored + logic ube; // UBE controls whether explicit load and store memory accesses made from U-mode are little-endian (UBE=0) or big-endian (UBE=1) + logic spie; // supervisor interrupts enable bit active prior to trap + logic [1:0] wpri1; // writes preserved reads ignored + logic sie; // supervisor interrupts enable + logic wpri0; // writes preserved reads ignored + } sstatus_rv_t; + + typedef struct packed { + logic sd; // signal dirty state - read-only + logic [62:36] wpri4; // writes preserved reads ignored + xlen_e sxl; // variable supervisor mode xlen - hardwired to zero + xlen_e uxl; // variable user mode xlen - hardwired to zero + logic [8:0] wpri3; // writes preserved reads ignored + logic tsr; // trap sret + logic tw; // time wait + logic tvm; // trap virtual memory + logic mxr; // make executable readable + logic sum; // permit supervisor user memory access + logic mprv; // modify privilege - privilege level for ld/st + xs_t xs; // extension register - hardwired to zero + xs_t fs; // floating point extension register + priv_lvl_t mpp; // holds the previous privilege mode up to machine + xs_t vs; // vector extension register + logic spp; // holds the previous privilege mode up to supervisor + logic mpie; // machine interrupts enable bit active prior to trap + logic ube; // UBE controls whether explicit load and store memory accesses made from U-mode are little-endian (UBE=0) or big-endian (UBE=1) + logic spie; // supervisor interrupts enable bit active prior to trap + logic wpri2; // writes preserved reads ignored + logic mie; // machine interrupts enable + logic wpri1; // writes preserved reads ignored + logic sie; // supervisor interrupts enable + logic wpri0; // writes preserved reads ignored + } mstatus_rv_t; + + typedef struct packed { + logic [ModeW-1:0] mode; + logic [ASIDW-1:0] asid; + logic [PPNW-1:0] ppn; + } satp_t; + + // -------------------- + // Instruction Types + // -------------------- + typedef struct packed { + logic [31:25] funct7; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } rtype_t; + + typedef struct packed { + logic [31:27] rs3; + logic [26:25] funct2; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } r4type_t; + + typedef struct packed { + logic [31:27] funct5; + logic [26:25] fmt; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] rm; + logic [11:7] rd; + logic [6:0] opcode; + } rftype_t; // floating-point + + typedef struct packed { + logic [31:30] funct2; + logic [29:25] vecfltop; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:14] repl; + logic [13:12] vfmt; + logic [11:7] rd; + logic [6:0] opcode; + } rvftype_t; // vectorial floating-point + + typedef struct packed { + logic [31:20] imm; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } itype_t; + + typedef struct packed { + logic [31:25] imm; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] imm0; + logic [6:0] opcode; + } stype_t; + + typedef struct packed { + logic [31:12] imm; + logic [11:7] rd; + logic [6:0] opcode; + } utype_t; + + // atomic instructions + typedef struct packed { + logic [31:27] funct5; + logic aq; + logic rl; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } atype_t; + + typedef union packed { + logic [31:0] instr; + rtype_t rtype; + r4type_t r4type; + rftype_t rftype; + rvftype_t rvftype; + itype_t itype; + stype_t stype; + utype_t utype; + atype_t atype; + } instruction_t; + + // -------------------- + // Opcodes + // -------------------- + // RV32/64G listings: + // Quadrant 0 + localparam OpcodeLoad = 7'b00_000_11; + localparam OpcodeLoadFp = 7'b00_001_11; + localparam OpcodeCustom0 = 7'b00_010_11; + localparam OpcodeMiscMem = 7'b00_011_11; + localparam OpcodeOpImm = 7'b00_100_11; + localparam OpcodeAuipc = 7'b00_101_11; + localparam OpcodeOpImm32 = 7'b00_110_11; + // Quadrant 1 + localparam OpcodeStore = 7'b01_000_11; + localparam OpcodeStoreFp = 7'b01_001_11; + localparam OpcodeCustom1 = 7'b01_010_11; + localparam OpcodeAmo = 7'b01_011_11; + localparam OpcodeOp = 7'b01_100_11; + localparam OpcodeLui = 7'b01_101_11; + localparam OpcodeOp32 = 7'b01_110_11; + // Quadrant 2 + localparam OpcodeMadd = 7'b10_000_11; + localparam OpcodeMsub = 7'b10_001_11; + localparam OpcodeNmsub = 7'b10_010_11; + localparam OpcodeNmadd = 7'b10_011_11; + localparam OpcodeOpFp = 7'b10_100_11; + localparam OpcodeVec = 7'b10_101_11; + localparam OpcodeCustom2 = 7'b10_110_11; + // Quadrant 3 + localparam OpcodeBranch = 7'b11_000_11; + localparam OpcodeJalr = 7'b11_001_11; + localparam OpcodeRsrvd2 = 7'b11_010_11; + localparam OpcodeJal = 7'b11_011_11; + localparam OpcodeSystem = 7'b11_100_11; + localparam OpcodeRsrvd3 = 7'b11_101_11; + localparam OpcodeCustom3 = 7'b11_110_11; + + // RV64C/RV32C listings: + // Quadrant 0 + localparam OpcodeC0 = 2'b00; + localparam OpcodeC0Addi4spn = 3'b000; + localparam OpcodeC0Fld = 3'b001; + localparam OpcodeC0Lw = 3'b010; + localparam OpcodeC0Ld = 3'b011; + localparam OpcodeC0Zcb = 3'b100; + localparam OpcodeC0Fsd = 3'b101; + localparam OpcodeC0Sw = 3'b110; + localparam OpcodeC0Sd = 3'b111; + // Quadrant 1 + localparam OpcodeC1 = 2'b01; + localparam OpcodeC1Addi = 3'b000; + localparam OpcodeC1Addiw = 3'b001; //for RV64I only + localparam OpcodeC1Jal = 3'b001; //for RV32I only + localparam OpcodeC1Li = 3'b010; + localparam OpcodeC1LuiAddi16sp = 3'b011; + localparam OpcodeC1MiscAlu = 3'b100; + localparam OpcodeC1J = 3'b101; + localparam OpcodeC1Beqz = 3'b110; + localparam OpcodeC1Bnez = 3'b111; + // Quadrant 2 + localparam OpcodeC2 = 2'b10; + localparam OpcodeC2Slli = 3'b000; + localparam OpcodeC2Fldsp = 3'b001; + localparam OpcodeC2Lwsp = 3'b010; + localparam OpcodeC2Ldsp = 3'b011; + localparam OpcodeC2JalrMvAdd = 3'b100; + localparam OpcodeC2Fsdsp = 3'b101; + localparam OpcodeC2Swsp = 3'b110; + localparam OpcodeC2Sdsp = 3'b111; + + // ---------------------- + // Virtual Memory + // ---------------------- + // memory management, pte for sv39 + typedef struct packed { + logic [9:0] reserved; + logic [44-1:0] ppn; // PPN length for + logic [1:0] rsw; + logic d; + logic a; + logic g; + logic u; + logic x; + logic w; + logic r; + logic v; + } pte_t; + + // memory management, pte for sv32 + typedef struct packed { + logic [22-1:0] ppn; // PPN length for + logic [1:0] rsw; + logic d; + logic a; + logic g; + logic u; + logic x; + logic w; + logic r; + logic v; + } pte_sv32_t; + + // ---------------------- + // Exception Cause Codes + // ---------------------- + localparam logic [XLEN-1:0] INSTR_ADDR_MISALIGNED = 0; + localparam logic [XLEN-1:0] INSTR_ACCESS_FAULT = 1; // Illegal access as governed by PMPs and PMAs + localparam logic [XLEN-1:0] ILLEGAL_INSTR = 2; + localparam logic [XLEN-1:0] BREAKPOINT = 3; + localparam logic [XLEN-1:0] LD_ADDR_MISALIGNED = 4; + localparam logic [XLEN-1:0] LD_ACCESS_FAULT = 5; // Illegal access as governed by PMPs and PMAs + localparam logic [XLEN-1:0] ST_ADDR_MISALIGNED = 6; + localparam logic [XLEN-1:0] ST_ACCESS_FAULT = 7; // Illegal access as governed by PMPs and PMAs + localparam logic [XLEN-1:0] ENV_CALL_UMODE = 8; // environment call from user mode + localparam logic [XLEN-1:0] ENV_CALL_SMODE = 9; // environment call from supervisor mode + localparam logic [XLEN-1:0] ENV_CALL_MMODE = 11; // environment call from machine mode + localparam logic [XLEN-1:0] INSTR_PAGE_FAULT = 12; // Instruction page fault + localparam logic [XLEN-1:0] LOAD_PAGE_FAULT = 13; // Load page fault + localparam logic [XLEN-1:0] STORE_PAGE_FAULT = 15; // Store page fault + localparam logic [XLEN-1:0] DEBUG_REQUEST = 24; // Debug request + + localparam int unsigned IRQ_S_SOFT = 1; + localparam int unsigned IRQ_M_SOFT = 3; + localparam int unsigned IRQ_S_TIMER = 5; + localparam int unsigned IRQ_M_TIMER = 7; + localparam int unsigned IRQ_S_EXT = 9; + localparam int unsigned IRQ_M_EXT = 11; + + localparam logic [XLEN-1:0] MIP_SSIP = 1 << IRQ_S_SOFT; + localparam logic [XLEN-1:0] MIP_MSIP = 1 << IRQ_M_SOFT; + localparam logic [XLEN-1:0] MIP_STIP = 1 << IRQ_S_TIMER; + localparam logic [XLEN-1:0] MIP_MTIP = 1 << IRQ_M_TIMER; + localparam logic [XLEN-1:0] MIP_SEIP = 1 << IRQ_S_EXT; + localparam logic [XLEN-1:0] MIP_MEIP = 1 << IRQ_M_EXT; + + localparam logic [XLEN-1:0] S_SW_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_SOFT); + localparam logic [XLEN-1:0] M_SW_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_SOFT); + localparam logic [XLEN-1:0] S_TIMER_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_TIMER); + localparam logic [XLEN-1:0] M_TIMER_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_TIMER); + localparam logic [XLEN-1:0] S_EXT_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_EXT); + localparam logic [XLEN-1:0] M_EXT_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_EXT); + + // ----- + // CSRs + // ----- + typedef enum logic [11:0] { + // Floating-Point CSRs + CSR_FFLAGS = 12'h001, + CSR_FRM = 12'h002, + CSR_FCSR = 12'h003, + CSR_FTRAN = 12'h800, + // Vector CSRs + CSR_VSTART = 12'h008, + CSR_VXSAT = 12'h009, + CSR_VXRM = 12'h00A, + CSR_VCSR = 12'h00F, + CSR_VL = 12'hC20, + CSR_VTYPE = 12'hC21, + CSR_VLENB = 12'hC22, + // Supervisor Mode CSRs + CSR_SSTATUS = 12'h100, + CSR_SIE = 12'h104, + CSR_STVEC = 12'h105, + CSR_SCOUNTEREN = 12'h106, + CSR_SSCRATCH = 12'h140, + CSR_SEPC = 12'h141, + CSR_SCAUSE = 12'h142, + CSR_STVAL = 12'h143, + CSR_SIP = 12'h144, + CSR_SATP = 12'h180, + // Machine Mode CSRs + CSR_MSTATUS = 12'h300, + CSR_MISA = 12'h301, + CSR_MEDELEG = 12'h302, + CSR_MIDELEG = 12'h303, + CSR_MIE = 12'h304, + CSR_MTVEC = 12'h305, + CSR_MCOUNTEREN = 12'h306, + CSR_MSTATUSH = 12'h310, + CSR_MCOUNTINHIBIT = 12'h320, + CSR_MHPM_EVENT_3 = 12'h323, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_4 = 12'h324, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_5 = 12'h325, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_6 = 12'h326, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_7 = 12'h327, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_8 = 12'h328, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_9 = 12'h329, //Reserved + CSR_MHPM_EVENT_10 = 12'h32A, //Reserved + CSR_MHPM_EVENT_11 = 12'h32B, //Reserved + CSR_MHPM_EVENT_12 = 12'h32C, //Reserved + CSR_MHPM_EVENT_13 = 12'h32D, //Reserved + CSR_MHPM_EVENT_14 = 12'h32E, //Reserved + CSR_MHPM_EVENT_15 = 12'h32F, //Reserved + CSR_MHPM_EVENT_16 = 12'h330, //Reserved + CSR_MHPM_EVENT_17 = 12'h331, //Reserved + CSR_MHPM_EVENT_18 = 12'h332, //Reserved + CSR_MHPM_EVENT_19 = 12'h333, //Reserved + CSR_MHPM_EVENT_20 = 12'h334, //Reserved + CSR_MHPM_EVENT_21 = 12'h335, //Reserved + CSR_MHPM_EVENT_22 = 12'h336, //Reserved + CSR_MHPM_EVENT_23 = 12'h337, //Reserved + CSR_MHPM_EVENT_24 = 12'h338, //Reserved + CSR_MHPM_EVENT_25 = 12'h339, //Reserved + CSR_MHPM_EVENT_26 = 12'h33A, //Reserved + CSR_MHPM_EVENT_27 = 12'h33B, //Reserved + CSR_MHPM_EVENT_28 = 12'h33C, //Reserved + CSR_MHPM_EVENT_29 = 12'h33D, //Reserved + CSR_MHPM_EVENT_30 = 12'h33E, //Reserved + CSR_MHPM_EVENT_31 = 12'h33F, //Reserved + CSR_MSCRATCH = 12'h340, + CSR_MEPC = 12'h341, + CSR_MCAUSE = 12'h342, + CSR_MTVAL = 12'h343, + CSR_MIP = 12'h344, + CSR_MENVCFG = 12'h30A, + CSR_MENVCFGH = 12'h31A, + CSR_PMPCFG0 = 12'h3A0, + CSR_PMPCFG1 = 12'h3A1, + CSR_PMPCFG2 = 12'h3A2, + CSR_PMPCFG3 = 12'h3A3, + CSR_PMPADDR0 = 12'h3B0, + CSR_PMPADDR1 = 12'h3B1, + CSR_PMPADDR2 = 12'h3B2, + CSR_PMPADDR3 = 12'h3B3, + CSR_PMPADDR4 = 12'h3B4, + CSR_PMPADDR5 = 12'h3B5, + CSR_PMPADDR6 = 12'h3B6, + CSR_PMPADDR7 = 12'h3B7, + CSR_PMPADDR8 = 12'h3B8, + CSR_PMPADDR9 = 12'h3B9, + CSR_PMPADDR10 = 12'h3BA, + CSR_PMPADDR11 = 12'h3BB, + CSR_PMPADDR12 = 12'h3BC, + CSR_PMPADDR13 = 12'h3BD, + CSR_PMPADDR14 = 12'h3BE, + CSR_PMPADDR15 = 12'h3BF, + CSR_MVENDORID = 12'hF11, + CSR_MARCHID = 12'hF12, + CSR_MIMPID = 12'hF13, + CSR_MHARTID = 12'hF14, + CSR_MCONFIGPTR = 12'hF15, + CSR_MCYCLE = 12'hB00, + CSR_MCYCLEH = 12'hB80, + CSR_MINSTRET = 12'hB02, + CSR_MINSTRETH = 12'hB82, + //Performance Counters + CSR_MHPM_COUNTER_3 = 12'hB03, + CSR_MHPM_COUNTER_4 = 12'hB04, + CSR_MHPM_COUNTER_5 = 12'hB05, + CSR_MHPM_COUNTER_6 = 12'hB06, + CSR_MHPM_COUNTER_7 = 12'hB07, + CSR_MHPM_COUNTER_8 = 12'hB08, + CSR_MHPM_COUNTER_9 = 12'hB09, // reserved + CSR_MHPM_COUNTER_10 = 12'hB0A, // reserved + CSR_MHPM_COUNTER_11 = 12'hB0B, // reserved + CSR_MHPM_COUNTER_12 = 12'hB0C, // reserved + CSR_MHPM_COUNTER_13 = 12'hB0D, // reserved + CSR_MHPM_COUNTER_14 = 12'hB0E, // reserved + CSR_MHPM_COUNTER_15 = 12'hB0F, // reserved + CSR_MHPM_COUNTER_16 = 12'hB10, // reserved + CSR_MHPM_COUNTER_17 = 12'hB11, // reserved + CSR_MHPM_COUNTER_18 = 12'hB12, // reserved + CSR_MHPM_COUNTER_19 = 12'hB13, // reserved + CSR_MHPM_COUNTER_20 = 12'hB14, // reserved + CSR_MHPM_COUNTER_21 = 12'hB15, // reserved + CSR_MHPM_COUNTER_22 = 12'hB16, // reserved + CSR_MHPM_COUNTER_23 = 12'hB17, // reserved + CSR_MHPM_COUNTER_24 = 12'hB18, // reserved + CSR_MHPM_COUNTER_25 = 12'hB19, // reserved + CSR_MHPM_COUNTER_26 = 12'hB1A, // reserved + CSR_MHPM_COUNTER_27 = 12'hB1B, // reserved + CSR_MHPM_COUNTER_28 = 12'hB1C, // reserved + CSR_MHPM_COUNTER_29 = 12'hB1D, // reserved + CSR_MHPM_COUNTER_30 = 12'hB1E, // reserved + CSR_MHPM_COUNTER_31 = 12'hB1F, // reserved + CSR_MHPM_COUNTER_3H = 12'hB83, + CSR_MHPM_COUNTER_4H = 12'hB84, + CSR_MHPM_COUNTER_5H = 12'hB85, + CSR_MHPM_COUNTER_6H = 12'hB86, + CSR_MHPM_COUNTER_7H = 12'hB87, + CSR_MHPM_COUNTER_8H = 12'hB88, + CSR_MHPM_COUNTER_9H = 12'hB89, // reserved + CSR_MHPM_COUNTER_10H = 12'hB8A, // reserved + CSR_MHPM_COUNTER_11H = 12'hB8B, // reserved + CSR_MHPM_COUNTER_12H = 12'hB8C, // reserved + CSR_MHPM_COUNTER_13H = 12'hB8D, // reserved + CSR_MHPM_COUNTER_14H = 12'hB8E, // reserved + CSR_MHPM_COUNTER_15H = 12'hB8F, // reserved + CSR_MHPM_COUNTER_16H = 12'hB90, // reserved + CSR_MHPM_COUNTER_17H = 12'hB91, // reserved + CSR_MHPM_COUNTER_18H = 12'hB92, // reserved + CSR_MHPM_COUNTER_19H = 12'hB93, // reserved + CSR_MHPM_COUNTER_20H = 12'hB94, // reserved + CSR_MHPM_COUNTER_21H = 12'hB95, // reserved + CSR_MHPM_COUNTER_22H = 12'hB96, // reserved + CSR_MHPM_COUNTER_23H = 12'hB97, // reserved + CSR_MHPM_COUNTER_24H = 12'hB98, // reserved + CSR_MHPM_COUNTER_25H = 12'hB99, // reserved + CSR_MHPM_COUNTER_26H = 12'hB9A, // reserved + CSR_MHPM_COUNTER_27H = 12'hB9B, // reserved + CSR_MHPM_COUNTER_28H = 12'hB9C, // reserved + CSR_MHPM_COUNTER_29H = 12'hB9D, // reserved + CSR_MHPM_COUNTER_30H = 12'hB9E, // reserved + CSR_MHPM_COUNTER_31H = 12'hB9F, // reserved + // Cache Control (platform specifc) + CSR_DCACHE = 12'h7C1, + CSR_ICACHE = 12'h7C0, + // Accelerator memory consistency (platform specific) + CSR_ACC_CONS = 12'h7C2, + // Triggers + CSR_TSELECT = 12'h7A0, + CSR_TDATA1 = 12'h7A1, + CSR_TDATA2 = 12'h7A2, + CSR_TDATA3 = 12'h7A3, + CSR_TINFO = 12'h7A4, + // Debug CSR + CSR_DCSR = 12'h7b0, + CSR_DPC = 12'h7b1, + CSR_DSCRATCH0 = 12'h7b2, // optional + CSR_DSCRATCH1 = 12'h7b3, // optional + // Counters and Timers (User Mode - R/O Shadows) + CSR_CYCLE = 12'hC00, + CSR_CYCLEH = 12'hC80, + CSR_TIME = 12'hC01, + CSR_TIMEH = 12'hC81, + CSR_INSTRET = 12'hC02, + CSR_INSTRETH = 12'hC82, + // Performance counters (User Mode - R/O Shadows) + CSR_HPM_COUNTER_3 = 12'hC03, + CSR_HPM_COUNTER_4 = 12'hC04, + CSR_HPM_COUNTER_5 = 12'hC05, + CSR_HPM_COUNTER_6 = 12'hC06, + CSR_HPM_COUNTER_7 = 12'hC07, + CSR_HPM_COUNTER_8 = 12'hC08, + CSR_HPM_COUNTER_9 = 12'hC09, // reserved + CSR_HPM_COUNTER_10 = 12'hC0A, // reserved + CSR_HPM_COUNTER_11 = 12'hC0B, // reserved + CSR_HPM_COUNTER_12 = 12'hC0C, // reserved + CSR_HPM_COUNTER_13 = 12'hC0D, // reserved + CSR_HPM_COUNTER_14 = 12'hC0E, // reserved + CSR_HPM_COUNTER_15 = 12'hC0F, // reserved + CSR_HPM_COUNTER_16 = 12'hC10, // reserved + CSR_HPM_COUNTER_17 = 12'hC11, // reserved + CSR_HPM_COUNTER_18 = 12'hC12, // reserved + CSR_HPM_COUNTER_19 = 12'hC13, // reserved + CSR_HPM_COUNTER_20 = 12'hC14, // reserved + CSR_HPM_COUNTER_21 = 12'hC15, // reserved + CSR_HPM_COUNTER_22 = 12'hC16, // reserved + CSR_HPM_COUNTER_23 = 12'hC17, // reserved + CSR_HPM_COUNTER_24 = 12'hC18, // reserved + CSR_HPM_COUNTER_25 = 12'hC19, // reserved + CSR_HPM_COUNTER_26 = 12'hC1A, // reserved + CSR_HPM_COUNTER_27 = 12'hC1B, // reserved + CSR_HPM_COUNTER_28 = 12'hC1C, // reserved + CSR_HPM_COUNTER_29 = 12'hC1D, // reserved + CSR_HPM_COUNTER_30 = 12'hC1E, // reserved + CSR_HPM_COUNTER_31 = 12'hC1F, // reserved + CSR_HPM_COUNTER_3H = 12'hC83, + CSR_HPM_COUNTER_4H = 12'hC84, + CSR_HPM_COUNTER_5H = 12'hC85, + CSR_HPM_COUNTER_6H = 12'hC86, + CSR_HPM_COUNTER_7H = 12'hC87, + CSR_HPM_COUNTER_8H = 12'hC88, + CSR_HPM_COUNTER_9H = 12'hC89, // reserved + CSR_HPM_COUNTER_10H = 12'hC8A, // reserved + CSR_HPM_COUNTER_11H = 12'hC8B, // reserved + CSR_HPM_COUNTER_12H = 12'hC8C, // reserved + CSR_HPM_COUNTER_13H = 12'hC8D, // reserved + CSR_HPM_COUNTER_14H = 12'hC8E, // reserved + CSR_HPM_COUNTER_15H = 12'hC8F, // reserved + CSR_HPM_COUNTER_16H = 12'hC90, // reserved + CSR_HPM_COUNTER_17H = 12'hC91, // reserved + CSR_HPM_COUNTER_18H = 12'hC92, // reserved + CSR_HPM_COUNTER_19H = 12'hC93, // reserved + CSR_HPM_COUNTER_20H = 12'hC94, // reserved + CSR_HPM_COUNTER_21H = 12'hC95, // reserved + CSR_HPM_COUNTER_22H = 12'hC96, // reserved + CSR_HPM_COUNTER_23H = 12'hC97, // reserved + CSR_HPM_COUNTER_24H = 12'hC98, // reserved + CSR_HPM_COUNTER_25H = 12'hC99, // reserved + CSR_HPM_COUNTER_26H = 12'hC9A, // reserved + CSR_HPM_COUNTER_27H = 12'hC9B, // reserved + CSR_HPM_COUNTER_28H = 12'hC9C, // reserved + CSR_HPM_COUNTER_29H = 12'hC9D, // reserved + CSR_HPM_COUNTER_30H = 12'hC9E, // reserved + CSR_HPM_COUNTER_31H = 12'hC9F // reserved + } csr_reg_t; + + localparam logic [63:0] SSTATUS_UIE = 'h00000001; + localparam logic [63:0] SSTATUS_SIE = 'h00000002; + localparam logic [63:0] SSTATUS_SPIE = 'h00000020; + localparam logic [63:0] SSTATUS_SPP = 'h00000100; + localparam logic [63:0] SSTATUS_FS = 'h00006000; + localparam logic [63:0] SSTATUS_XS = 'h00018000; + localparam logic [63:0] SSTATUS_SUM = 'h00040000; + localparam logic [63:0] SSTATUS_MXR = 'h00080000; + localparam logic [63:0] SSTATUS_UPIE = 'h00000010; + localparam logic [63:0] SSTATUS_UXL = 64'h0000000300000000; + localparam logic [63:0] SSTATUS_SD = {IS_XLEN64, 31'h00000000, ~IS_XLEN64, 31'h00000000}; + + localparam logic [63:0] MSTATUS_UIE = 'h00000001; + localparam logic [63:0] MSTATUS_SIE = 'h00000002; + localparam logic [63:0] MSTATUS_HIE = 'h00000004; + localparam logic [63:0] MSTATUS_MIE = 'h00000008; + localparam logic [63:0] MSTATUS_UPIE = 'h00000010; + localparam logic [63:0] MSTATUS_SPIE = 'h00000020; + localparam logic [63:0] MSTATUS_HPIE = 'h00000040; + localparam logic [63:0] MSTATUS_MPIE = 'h00000080; + localparam logic [63:0] MSTATUS_SPP = 'h00000100; + localparam logic [63:0] MSTATUS_HPP = 'h00000600; + localparam logic [63:0] MSTATUS_MPP = 'h00001800; + localparam logic [63:0] MSTATUS_FS = 'h00006000; + localparam logic [63:0] MSTATUS_XS = 'h00018000; + localparam logic [63:0] MSTATUS_MPRV = 'h00020000; + localparam logic [63:0] MSTATUS_SUM = 'h00040000; + localparam logic [63:0] MSTATUS_MXR = 'h00080000; + localparam logic [63:0] MSTATUS_TVM = 'h00100000; + localparam logic [63:0] MSTATUS_TW = 'h00200000; + localparam logic [63:0] MSTATUS_TSR = 'h00400000; + localparam logic [63:0] MSTATUS_UXL = {30'h0000000, IS_XLEN64, IS_XLEN64, 32'h00000000}; + localparam logic [63:0] MSTATUS_SXL = {28'h0000000, IS_XLEN64, IS_XLEN64, 34'h00000000}; + localparam logic [63:0] MSTATUS_SD = {IS_XLEN64, 31'h00000000, ~IS_XLEN64, 31'h00000000}; + + typedef enum logic [2:0] { + CSRRW = 3'h1, + CSRRS = 3'h2, + CSRRC = 3'h3, + CSRRWI = 3'h5, + CSRRSI = 3'h6, + CSRRCI = 3'h7 + } csr_op_t; + + // decoded CSR address + typedef struct packed { + logic [1:0] rw; + priv_lvl_t priv_lvl; + logic [7:0] address; + } csr_addr_t; + + typedef union packed { + csr_reg_t address; + csr_addr_t csr_decode; + } csr_t; + + // Floating-Point control and status register (32-bit!) + typedef struct packed { + logic [31:15] reserved; // reserved for L extension, return 0 otherwise + logic [6:0] fprec; // div/sqrt precision control + logic [2:0] frm; // float rounding mode + logic [4:0] fflags; // float exception flags + } fcsr_t; + + // PMP + typedef enum logic [1:0] { + OFF = 2'b00, + TOR = 2'b01, + NA4 = 2'b10, + NAPOT = 2'b11 + } pmp_addr_mode_t; + + // PMP Access Type + typedef enum logic [2:0] { + ACCESS_NONE = 3'b000, + ACCESS_READ = 3'b001, + ACCESS_WRITE = 3'b010, + ACCESS_EXEC = 3'b100 + } pmp_access_t; + + typedef struct packed { + logic x; + logic w; + logic r; + } pmpcfg_access_t; + + // packed struct of a PMP configuration register (8bit) + typedef struct packed { + logic locked; // lock this configuration + logic [1:0] reserved; + pmp_addr_mode_t addr_mode; // Off, TOR, NA4, NAPOT + pmpcfg_access_t access_type; + } pmpcfg_t; + + // ----- + // Debug + // ----- + typedef struct packed { + logic [31:28] xdebugver; + logic [27:16] zero2; + logic ebreakm; + logic zero1; + logic ebreaks; + logic ebreaku; + logic stepie; + logic stopcount; + logic stoptime; + logic [8:6] cause; + logic zero0; + logic mprven; + logic nmip; + logic step; + priv_lvl_t prv; + } dcsr_t; + + // Instruction Generation *incomplete* + function automatic logic [31:0] jal(logic [4:0] rd, logic [20:0] imm); + // OpCode Jal + return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h6f}; + endfunction + + function automatic logic [31:0] jalr(logic [4:0] rd, logic [4:0] rs1, logic [11:0] offset); + // OpCode Jal + return {offset[11:0], rs1, 3'b0, rd, 7'h67}; + endfunction + + function automatic logic [31:0] andi(logic [4:0] rd, logic [4:0] rs1, logic [11:0] imm); + // OpCode andi + return {imm[11:0], rs1, 3'h7, rd, 7'h13}; + endfunction + + function automatic logic [31:0] slli(logic [4:0] rd, logic [4:0] rs1, logic [5:0] shamt); + // OpCode slli + return {6'b0, shamt[5:0], rs1, 3'h1, rd, 7'h13}; + endfunction + + function automatic logic [31:0] srli(logic [4:0] rd, logic [4:0] rs1, logic [5:0] shamt); + // OpCode srli + return {6'b0, shamt[5:0], rs1, 3'h5, rd, 7'h13}; + endfunction + + function automatic logic [31:0] load(logic [2:0] size, logic [4:0] dest, logic [4:0] base, + logic [11:0] offset); + // OpCode Load + return {offset[11:0], base, size, dest, 7'h03}; + endfunction + + function automatic logic [31:0] auipc(logic [4:0] rd, logic [20:0] imm); + // OpCode Auipc + return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h17}; + endfunction + + function automatic logic [31:0] store(logic [2:0] size, logic [4:0] src, logic [4:0] base, + logic [11:0] offset); + // OpCode Store + return {offset[11:5], src, base, size, offset[4:0], 7'h23}; + endfunction + + function automatic logic [31:0] float_load(logic [2:0] size, logic [4:0] dest, logic [4:0] base, + logic [11:0] offset); + // OpCode Load + return {offset[11:0], base, size, dest, 7'b00_001_11}; + endfunction + + function automatic logic [31:0] float_store(logic [2:0] size, logic [4:0] src, logic [4:0] base, + logic [11:0] offset); + // OpCode Store + return {offset[11:5], src, base, size, offset[4:0], 7'b01_001_11}; + endfunction + + function automatic logic [31:0] csrw(csr_reg_t csr, logic [4:0] rs1); + // CSRRW, rd, OpCode System + return {csr, rs1, 3'h1, 5'h0, 7'h73}; + endfunction + + function automatic logic [31:0] csrr(csr_reg_t csr, logic [4:0] dest); + // rs1, CSRRS, rd, OpCode System + return {csr, 5'h0, 3'h2, dest, 7'h73}; + endfunction + + function automatic logic [31:0] branch(logic [4:0] src2, logic [4:0] src1, logic [2:0] funct3, + logic [11:0] offset); + // OpCode Branch + return {offset[11], offset[9:4], src2, src1, funct3, offset[3:0], offset[10], 7'b11_000_11}; + endfunction + + function automatic logic [31:0] ebreak(); + return 32'h00100073; + endfunction + + function automatic logic [31:0] wfi(); + return 32'h10500073; + endfunction + + function automatic logic [31:0] nop(); + return 32'h00000013; + endfunction + + function automatic logic [31:0] illegal(); + return 32'h00000000; + endfunction + + + // trace log compatible to spikes commit log feature + // pragma translate_off + function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, + logic [4:0] rd, logic [63:0] result, logic rd_fpr); + string rd_s; + string instr_word; + + automatic string rf_s = rd_fpr ? "f" : "x"; + + if (instr[1:0] != 2'b11) begin + instr_word = $sformatf("(0x%h)", instr[15:0]); + end else begin + instr_word = $sformatf("(0x%h)", instr); + end + + if (rd < 10) rd_s = $sformatf("%s %0d", rf_s, rd); + else rd_s = $sformatf("%s%0d", rf_s, rd); + + if (rd_fpr || rd != 0) begin + // 0 0x0000000080000118 (0xeecf8f93) x31 0x0000000080004000 + return $sformatf("%d 0x%h %s %s 0x%h\n", priv_lvl, pc, instr_word, rd_s, result); + end else begin + // 0 0x000000008000019c (0x0040006f) + return $sformatf("%d 0x%h %s\n", priv_lvl, pc, instr_word); + end + endfunction + + typedef struct { + byte priv; + longint unsigned pc; + byte is_fp; + byte rd; + longint unsigned data; + int unsigned instr; + byte was_exception; + } commit_log_t; + // pragma translate_on + +endpackage diff --git a/test/type_param/core/include/std_cache_pkg.sv b/test/type_param/core/include/std_cache_pkg.sv new file mode 100644 index 0000000..ae812c9 --- /dev/null +++ b/test/type_param/core/include/std_cache_pkg.sv @@ -0,0 +1,98 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba , ETH Zurich +// Michael Schaffner , ETH Zurich +// Date: 15.08.2018 + +// ******* WIP ******* +// Description: package for the standard Ariane cache subsystem. + +package std_cache_pkg; + + // Calculated parameter + localparam DCACHE_BYTE_OFFSET = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8); + localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_BYTE_OFFSET); + localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC * 2; + localparam DCACHE_SET_ASSOC_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC); + // localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not + + typedef struct packed { + logic [1:0] id; // id for which we handle the miss + logic valid; + logic we; + logic [55:0] addr; + logic [7:0][7:0] wdata; + logic [7:0] be; + } mshr_t; + + typedef struct packed { + logic valid; + logic [63:0] addr; + logic [7:0] be; + logic [1:0] size; + logic we; + logic [63:0] wdata; + logic bypass; + } miss_req_t; + + typedef struct packed { + logic req; + ariane_pkg::ad_req_t reqtype; + ariane_pkg::amo_t amo; + logic [3:0] id; + logic [63:0] addr; + logic [63:0] wdata; + logic we; + logic [7:0] be; + logic [1:0] size; + } bypass_req_t; + + typedef struct packed { + logic gnt; + logic valid; + logic [63:0] rdata; + } bypass_rsp_t; + + typedef struct packed { + logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array + logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array + logic valid; // state array + logic dirty; // state array + } cache_line_t; + + // cache line byte enable + typedef struct packed { + logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array + logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits) + } cl_be_t; + + // convert one hot to bin for -> needed for cache replacement + function automatic logic [DCACHE_SET_ASSOC_WIDTH-1:0] one_hot_to_bin( + input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] in); + for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin + if (in[i]) return i; + end + endfunction + // get the first bit set, returns one hot value + function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] get_victim_cl( + input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] valid_dirty); + // one-hot return vector + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] oh = '0; + for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin + if (valid_dirty[i]) begin + oh[i] = 1'b1; + return oh; + end + end + endfunction +endpackage : std_cache_pkg + diff --git a/test/type_param/core/include/wt_cache_pkg.sv b/test/type_param/core/include/wt_cache_pkg.sv new file mode 100644 index 0000000..9a8c0ce --- /dev/null +++ b/test/type_param/core/include/wt_cache_pkg.sv @@ -0,0 +1,344 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Package for OpenPiton compatible L1 cache subsystem + +// this is needed to propagate the +// configuration in case Ariane is +// instantiated in OpenPiton +`ifdef PITON_ARIANE +`include "l15.tmp.h" +`include "define.tmp.h" +`endif + +package wt_cache_pkg; + + // these parames need to coincide with the + // L1.5 parameterization, do not change +`ifdef PITON_ARIANE + +`ifndef CONFIG_L15_ASSOCIATIVITY + `define CONFIG_L15_ASSOCIATIVITY 4 +`endif + +`ifndef TLB_CSM_WIDTH + `define TLB_CSM_WIDTH 33 +`endif + + localparam L15_SET_ASSOC = `CONFIG_L15_ASSOCIATIVITY; + localparam L15_TLB_CSM_WIDTH = `TLB_CSM_WIDTH; +`else + localparam L15_SET_ASSOC = ariane_pkg::DCACHE_SET_ASSOC;// align with dcache for compatibility with the standard Ariane setup + localparam L15_TLB_CSM_WIDTH = 33; +`endif + localparam L15_TID_WIDTH = ariane_pkg::MEM_TID_WIDTH; + localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC); + localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC); + localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC); + + // FIFO depths of L15 adapter + localparam ADAPTER_REQ_FIFO_DEPTH = 2; + localparam ADAPTER_RTRN_FIFO_DEPTH = 2; + + + // Calculated parameter + localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8); + localparam ICACHE_NUM_WORDS = 2 ** (ariane_pkg::ICACHE_INDEX_WIDTH - ICACHE_OFFSET_WIDTH); + localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS); // excluding byte offset + + localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8); + localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_OFFSET_WIDTH); + localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS); // excluding byte offset + + localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH / riscv::XLEN; + localparam DCACHE_NUM_BANKS_WIDTH = $clog2(DCACHE_NUM_BANKS); + + // write buffer parameterization + localparam DCACHE_WBUF_DEPTH = ariane_pkg::WT_DCACHE_WBUF_DEPTH; + localparam DCACHE_MAX_TX = 2 ** L15_TID_WIDTH; + localparam CACHE_ID_WIDTH = L15_TID_WIDTH; + + + typedef struct packed { + logic [ariane_pkg::DCACHE_TAG_WIDTH+(ariane_pkg::DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES)-1:0] wtag; + riscv::xlen_t data; + logic [ariane_pkg::DCACHE_USER_WIDTH-1:0] user; + logic [(riscv::XLEN/8)-1:0] dirty; // byte is dirty + logic [(riscv::XLEN/8)-1:0] valid; // byte is valid + logic [(riscv::XLEN/8)-1:0] txblock; // byte is part of transaction in-flight + logic checked; // if cache state of this word has been checked + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache + } wbuffer_t; + + // TX status registers are indexed with the transaction ID + // they basically store which bytes from which buffer entry are part + // of that transaction + + typedef struct packed { + logic vld; + logic [(riscv::XLEN/8)-1:0] be; + logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] ptr; + } tx_stat_t; + + // local interfaces between caches and L15 adapter + typedef enum logic [1:0] { + DCACHE_STORE_REQ, + DCACHE_LOAD_REQ, + DCACHE_ATOMIC_REQ, + DCACHE_INT_REQ + } dcache_out_t; + + typedef enum logic [2:0] { + DCACHE_INV_REQ, // no ack from the core required + DCACHE_STORE_ACK, // note: this may contain an invalidation vector, too + DCACHE_LOAD_ACK, + DCACHE_ATOMIC_ACK, + DCACHE_INT_ACK + } dcache_in_t; + + typedef enum logic [0:0] { + ICACHE_INV_REQ, // no ack from the core required + ICACHE_IFILL_ACK + } icache_in_t; + + // icache interface + typedef struct packed { + logic vld; // invalidate only affected way + logic all; // invalidate all ways + logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate + logic [L1I_WAY_WIDTH-1:0] way; // way to invalidate + } icache_inval_t; + + typedef struct packed { + logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way; // way to replace + logic [riscv::PLEN-1:0] paddr; // physical address + logic nc; // noncacheable + logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } icache_req_t; + + typedef struct packed { + icache_in_t rtype; // see definitions above + logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data; // full cache line width + logic [ariane_pkg::ICACHE_USER_LINE_WIDTH-1:0] user; // user bits + icache_inval_t inv; // invalidation vector + logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } icache_rtrn_t; + + // dcache interface + typedef struct packed { + logic vld; // invalidate only affected way + logic all; // invalidate all ways + logic [ariane_pkg::DCACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate + logic [L15_WAY_WIDTH-1:0] way; // way to invalidate + } dcache_inval_t; + + typedef struct packed { + dcache_out_t rtype; // see definitions above + logic [2:0] size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) + logic [L1D_WAY_WIDTH-1:0] way; // way to replace + logic [riscv::PLEN-1:0] paddr; // physical address + riscv::xlen_t data; // word width of processor (no block stores at the moment) + logic [ariane_pkg::DATA_USER_WIDTH-1:0] user; // user width of processor (no block stores at the moment) + logic nc; // noncacheable + logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + ariane_pkg::amo_t amo_op; // amo opcode + } dcache_req_t; + + typedef struct packed { + dcache_in_t rtype; // see definitions above + logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // full cache line width + logic [ariane_pkg::DCACHE_USER_LINE_WIDTH-1:0] user; // user bits + dcache_inval_t inv; // invalidation vector + logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } dcache_rtrn_t; + + + // taken from iop.h in openpiton + // to l1.5 (only marked subset is used) + typedef enum logic [4:0] { + L15_LOAD_RQ = 5'b00000, // load request + L15_IMISS_RQ = 5'b10000, // instruction fill request + L15_STORE_RQ = 5'b00001, // store request + L15_ATOMIC_RQ = 5'b00110, // atomic op + //L15_CAS1_RQ = 5'b00010, // compare and swap1 packet (OpenSparc atomics) + //L15_CAS2_RQ = 5'b00011, // compare and swap2 packet (OpenSparc atomics) + //L15_SWAP_RQ = 5'b00110, // swap packet (OpenSparc atomics) + L15_STRLOAD_RQ = 5'b00100, // unused + L15_STRST_RQ = 5'b00101, // unused + L15_STQ_RQ = 5'b00111, // unused + L15_INT_RQ = 5'b01001, // interrupt request + L15_FWD_RQ = 5'b01101, // unused + L15_FWD_RPY = 5'b01110, // unused + L15_RSVD_RQ = 5'b11111 // unused + } l15_reqtypes_t; + + // from l1.5 (only marked subset is used) + typedef enum logic [3:0] { + L15_LOAD_RET = 4'b0000, // load packet + // L15_INV_RET = 4'b0011, // invalidate packet, not unique... + L15_ST_ACK = 4'b0100, // store ack packet + //L15_AT_ACK = 4'b0011, // unused, not unique... + L15_INT_RET = 4'b0111, // interrupt packet + L15_TEST_RET = 4'b0101, // unused + L15_FP_RET = 4'b1000, // unused + L15_IFILL_RET = 4'b0001, // instruction fill packet + L15_EVICT_REQ = 4'b0011, // eviction request + L15_ERR_RET = 4'b1100, // unused + L15_STRLOAD_RET = 4'b0010, // unused + L15_STRST_ACK = 4'b0110, // unused + L15_FWD_RQ_RET = 4'b1010, // unused + L15_FWD_RPY_RET = 4'b1011, // unused + L15_RSVD_RET = 4'b1111, // unused + L15_CPX_RESTYPE_ATOMIC_RES = 4'b1110 // custom type for atomic responses + } l15_rtrntypes_t; + + + typedef struct packed { + logic l15_val; // valid signal, asserted with request + logic l15_req_ack; // ack for response + l15_reqtypes_t l15_rqtype; // see below for encoding + logic l15_nc; // non-cacheable bit + logic [2:0] l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) + logic [L15_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1 + logic l15_prefetch; // unused in openpiton + logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment + logic l15_blockstore; // unused in openpiton + logic l15_blockinitstore; // unused in openpiton + logic [L15_WAY_WIDTH-1:0] l15_l1rplway; // way to replace + logic [39:0] l15_address; // physical address + logic [63:0] l15_data; // word to write + logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests) + logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane + logic [3:0] l15_amo_op; // atomic operation type + } l15_req_t; + + typedef struct packed { + logic l15_ack; // ack for request struct + logic l15_header_ack; // ack for request struct + logic l15_val; // valid signal for return struct + l15_rtrntypes_t l15_returntype; // see below for encoding + logic l15_l2miss; // unused in Ariane + logic [1:0] l15_error; // unused in openpiton + logic l15_noncacheable; // non-cacheable bit + logic l15_atomic; // asserted in load return and store ack packets of atomic tx + logic [L15_TID_WIDTH-1:0] l15_threadid; // used as transaction ID + logic l15_prefetch; // unused in openpiton + logic l15_f4b; // 4byte instruction fill from I/O space (nc). + logic [63:0] l15_data_0; // used for both caches + logic [63:0] l15_data_1; // used for both caches + logic [63:0] l15_data_2; // currently only used for I$ + logic [63:0] l15_data_3; // currently only used for I$ + logic l15_inval_icache_all_way; // invalidate all ways + logic l15_inval_dcache_all_way; // unused in openpiton + logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline + logic l15_cross_invalidate; // unused in openpiton + logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton + logic l15_inval_dcache_inval; // invalidate selected cacheline and way + logic l15_inval_icache_inval; // unused in openpiton + logic [L15_WAY_WIDTH-1:0] l15_inval_way; // way to invalidate + logic l15_blockinitstore; // unused in openpiton + } l15_rtrn_t; + + // swap endianess in a 64bit word + function automatic logic [63:0] swendian64(input logic [63:0] in); + automatic logic [63:0] out; + for (int k = 0; k < 64; k += 8) begin + out[k+:8] = in[63-k-:8]; + end + return out; + endfunction + + function automatic logic [5:0] popcnt64(input logic [63:0] in); + logic [5:0] cnt = 0; + foreach (in[k]) begin + cnt += 6'(in[k]); + end + return cnt; + endfunction : popcnt64 + + function automatic logic [(riscv::XLEN/8)-1:0] to_byte_enable8( + input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size); + logic [(riscv::XLEN/8)-1:0] be; + be = '0; + unique case (size) + 2'b00: be[offset] = '1; // byte + 2'b01: be[offset+:2] = '1; // hword + 2'b10: be[offset+:4] = '1; // word + default: be = '1; // dword + endcase // size + return be; + endfunction : to_byte_enable8 + + function automatic logic [(riscv::XLEN/8)-1:0] to_byte_enable4( + input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size); + logic [3:0] be; + be = '0; + unique case (size) + 2'b00: be[offset] = '1; // byte + 2'b01: be[offset+:2] = '1; // hword + default: be = '1; // word + endcase // size + return be; + endfunction : to_byte_enable4 + + // openpiton requires the data to be replicated in case of smaller sizes than dwords + function automatic riscv::xlen_t repData64(input riscv::xlen_t data, + input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, + input logic [1:0] size); + riscv::xlen_t out; + unique case (size) + 2'b00: for (int k = 0; k < 8; k++) out[k*8+:8] = data[offset*8+:8]; // byte + 2'b01: for (int k = 0; k < 4; k++) out[k*16+:16] = data[offset*8+:16]; // hword + 2'b10: for (int k = 0; k < 2; k++) out[k*32+:32] = data[offset*8+:32]; // word + default: out = data; // dword + endcase // size + return out; + endfunction : repData64 + + function automatic riscv::xlen_t repData32(input riscv::xlen_t data, + input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, + input logic [1:0] size); + riscv::xlen_t out; + unique case (size) + 2'b00: for (int k = 0; k < 4; k++) out[k*8+:8] = data[offset*8+:8]; // byte + 2'b01: for (int k = 0; k < 2; k++) out[k*16+:16] = data[offset*8+:16]; // hword + default: out = data; // word + endcase // size + return out; + endfunction : repData32 + + // note: this is openpiton specific. cannot transmit unaligned words. + // hence we default to individual bytes in that case, and they have to be transmitted + // one after the other + function automatic logic [1:0] toSize64(input logic [7:0] be); + logic [1:0] size; + unique case (be) + 8'b1111_1111: size = 2'b11; // dword + 8'b0000_1111, 8'b1111_0000: size = 2'b10; // word + 8'b1100_0000, 8'b0011_0000, 8'b0000_1100, 8'b0000_0011: size = 2'b01; // hword + default: size = 2'b00; // individual bytes + endcase // be + return size; + endfunction : toSize64 + + + function automatic logic [1:0] toSize32(input logic [3:0] be); + logic [1:0] size; + unique case (be) + 4'b1111: size = 2'b10; // word + 4'b1100, 4'b0011: size = 2'b01; // hword + default: size = 2'b00; // individual bytes + endcase // be + return size; + endfunction : toSize32 + +endpackage diff --git a/test/type_param/core/instr_realign.sv b/test/type_param/core/instr_realign.sv new file mode 100644 index 0000000..043a131 --- /dev/null +++ b/test/type_param/core/instr_realign.sv @@ -0,0 +1,361 @@ +// Copyright 2018 - 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba +// Description: Instruction Re-aligner +// +// This module takes 32-bit aligned cache blocks and extracts the instructions. +// As we are supporting the compressed instruction set extension in a 32 bit instruction word +// are up to 2 compressed instructions. +// Furthermore those instructions can be arbitrarily interleaved which makes it possible to fetch +// only the lower part of a 32 bit instruction. +// Furthermore we need to handle the case if we want to start fetching from an unaligned +// instruction e.g. a branch. + + +module instr_realign + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic valid_i, + output logic serving_unaligned_o, // we have an unaligned instruction in [0] + input logic [riscv::VLEN-1:0] address_i, + input logic [FETCH_WIDTH-1:0] data_i, + output logic [INSTR_PER_FETCH-1:0] valid_o, + output logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_o, + output logic [INSTR_PER_FETCH-1:0][31:0] instr_o +); + // as a maximum we support a fetch width of 64-bit, hence there can be 4 compressed instructions + logic [3:0] instr_is_compressed; + + for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin + // LSB != 2'b11 + assign instr_is_compressed[i] = ~&data_i[i*16+:2]; + end + + // save the unaligned part of the instruction to this ff + logic [15:0] unaligned_instr_d, unaligned_instr_q; + // the last instruction was unaligned + logic unaligned_d, unaligned_q; + // register to save the unaligned address + logic [riscv::VLEN-1:0] unaligned_address_d, unaligned_address_q; + // we have an unaligned instruction + assign serving_unaligned_o = unaligned_q; + + // Instruction re-alignment + if (FETCH_WIDTH == 32) begin : realign_bp_32 + always_comb begin : re_align + unaligned_d = unaligned_q; + unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10}; + unaligned_instr_d = data_i[31:16]; + + valid_o[0] = valid_i; + instr_o[0] = (unaligned_q) ? {data_i[15:0], unaligned_instr_q} : data_i[31:0]; + addr_o[0] = (unaligned_q) ? unaligned_address_q : address_i; + + valid_o[1] = 1'b0; + instr_o[1] = '0; + addr_o[1] = {address_i[riscv::VLEN-1:2], 2'b10}; + + // this instruction is compressed or the last instruction was unaligned + if (instr_is_compressed[0] || unaligned_q) begin + // check if this is instruction is still unaligned e.g.: it is not compressed + // if its compressed re-set unaligned flag + // for 32 bit we can simply check the next instruction and whether it is compressed or not + // if it is compressed the next fetch will contain an aligned instruction + // is instruction 1 also compressed + // yes? -> no problem, no -> we've got an unaligned instruction + if (instr_is_compressed[1]) begin + unaligned_d = 1'b0; + valid_o[1] = valid_i; + instr_o[1] = {16'b0, data_i[31:16]}; + end else begin + // save the upper bits for next cycle + unaligned_d = 1'b1; + unaligned_instr_d = data_i[31:16]; + unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10}; + end + end // else -> normal fetch + + // we started to fetch on a unaligned boundary with a whole instruction -> wait until we've + // received the next instruction + if (valid_i && address_i[1]) begin + // the instruction is not compressed so we can't do anything in this cycle + if (!instr_is_compressed[0]) begin + valid_o = '0; + unaligned_d = 1'b1; + unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10}; + unaligned_instr_d = data_i[15:0]; + // the instruction isn't compressed but only the lower is ready + end else begin + valid_o = {{INSTR_PER_FETCH - 1{1'b0}}, 1'b1}; + end + end + end + // TODO(zarubaf): Fix 64 bit FETCH_WIDTH, maybe generalize to arbitrary fetch width + end else if (FETCH_WIDTH == 64) begin : realign_bp_64 + initial begin + $error("Not propperly implemented"); + end + always_comb begin : re_align + unaligned_d = unaligned_q; + unaligned_address_d = unaligned_address_q; + unaligned_instr_d = unaligned_instr_q; + + valid_o = '0; + valid_o[0] = valid_i; + + instr_o[0] = data_i[31:0]; + addr_o[0] = address_i; + + instr_o[1] = '0; + addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b010}; + + instr_o[2] = {16'b0, data_i[47:32]}; + addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b100}; + + instr_o[3] = {16'b0, data_i[63:48]}; + addr_o[3] = {address_i[riscv::VLEN-1:3], 3'b110}; + + // last instruction was unaligned + if (unaligned_q) begin + instr_o[0] = {data_i[15:0], unaligned_instr_q}; + addr_o[0] = unaligned_address_q; + // for 64 bit there exist the following options: + // 64 32 0 + // | 3 | 2 | 1 | 0 | <- instruction slot + // | I | I | U | -> again unaligned + // | * | C | I | U | -> aligned + // | * | I | C | U | -> aligned + // | I | C | C | U | -> again unaligned + // | * | C | C | C | U | -> aligned + // Legend: C = compressed, I = 32 bit instruction, U = unaligned upper half + // * = don't care + if (instr_is_compressed[1]) begin + instr_o[1] = {16'b0, data_i[31:16]}; + valid_o[1] = valid_i; + + if (instr_is_compressed[2]) begin + if (instr_is_compressed[3]) begin + unaligned_d = 1'b0; + valid_o[3] = valid_i; + end else begin + // continues to be unaligned + end + end else begin + unaligned_d = 1'b0; + instr_o[2] = data_i[63:32]; + valid_o[2] = valid_i; + end + // instruction 1 is not compressed + end else begin + instr_o[1] = data_i[47:16]; + valid_o[1] = valid_i; + addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + if (instr_is_compressed[2]) begin + unaligned_d = 1'b0; + instr_o[2] = {16'b0, data_i[63:48]}; + valid_o[2] = valid_i; + end else begin + // continues to be unaligned + end + end + end else if (instr_is_compressed[0]) begin // instruction zero is RVC + // 64 32 0 + // | 3 | 2 | 1 | 0 | <- instruction slot + // | I | I | C | -> again unaligned + // | * | C | I | C | -> aligned + // | * | I | C | C | -> aligned + // | I | C | C | C | -> again unaligned + // | * | C | C | C | C | -> aligned + if (instr_is_compressed[1]) begin + instr_o[1] = {16'b0, data_i[31:16]}; + valid_o[1] = valid_i; + + if (instr_is_compressed[2]) begin + valid_o[2] = valid_i; + if (instr_is_compressed[3]) begin + valid_o[3] = valid_i; + end else begin + // this instruction is unaligned + unaligned_d = 1'b1; + unaligned_instr_d = data_i[63:48]; + unaligned_address_d = addr_o[3]; + end + end else begin + instr_o[2] = data_i[63:32]; + valid_o[2] = valid_i; + end + // instruction 1 is not compressed -> check slot 3 + end else begin + instr_o[1] = data_i[47:16]; + valid_o[1] = valid_i; + addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + if (instr_is_compressed[3]) begin + instr_o[2] = data_i[63:48]; + valid_o[2] = valid_i; + end else begin + unaligned_d = 1'b1; + unaligned_instr_d = data_i[63:48]; + unaligned_address_d = addr_o[2]; + end + end + + // Full instruction in slot zero + // 64 32 0 + // | 3 | 2 | 1 | 0 | <- instruction slot + // | I | C | I | + // | * | C | C | I | + // | * | I | I | + end else begin + addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100}; + + if (instr_is_compressed[2]) begin + instr_o[1] = {16'b0, data_i[47:32]}; + valid_o[1] = valid_i; + addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + if (instr_is_compressed[3]) begin + // | * | C | C | I | + valid_o[2] = valid_i; + addr_o[2] = {16'b0, data_i[63:48]}; + end else begin + // this instruction is unaligned + unaligned_d = 1'b1; + unaligned_instr_d = data_i[63:48]; + unaligned_address_d = addr_o[2]; + end + end else begin + // two regular instructions back-to-back + instr_o[1] = data_i[63:32]; + valid_o[1] = valid_i; + end + end + + // -------------------------- + // Unaligned fetch + // -------------------------- + // Address was not 64 bit aligned + case (address_i[2:1]) + // this means the previouse instruction was either compressed or unaligned + // in any case we don't ccare + 2'b01: begin + // 64 32 0 + // | 3 | 2 | 1 | 0 | <- instruction slot + // | I | I | x -> again unaligned + // | * | C | I | x -> aligned + // | * | I | C | x -> aligned + // | I | C | C | x -> again unaligned + // | * | C | C | C | x -> aligned + addr_o[0] = {address_i[riscv::VLEN-1:3], 3'b010}; + + if (instr_is_compressed[1]) begin + instr_o[0] = {16'b0, data_i[31:16]}; + valid_o[0] = valid_i; + + if (instr_is_compressed[2]) begin + valid_o[1] = valid_i; + instr_o[1] = {16'b0, data_i[47:32]}; + addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100}; + if (instr_is_compressed[3]) begin + instr_o[2] = {16'b0, data_i[63:48]}; + addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + valid_o[2] = valid_i; + end else begin + // this instruction is unaligned + unaligned_d = 1'b1; + unaligned_instr_d = data_i[63:48]; + unaligned_address_d = addr_o[3]; + end + end else begin + instr_o[1] = data_i[63:32]; + addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100}; + valid_o[1] = valid_i; + end + // instruction 1 is not compressed -> check slot 3 + end else begin + instr_o[0] = data_i[47:16]; + valid_o[0] = valid_i; + addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b110}; + if (instr_is_compressed[3]) begin + instr_o[1] = data_i[63:48]; + valid_o[1] = valid_i; + end else begin + unaligned_d = 1'b1; + unaligned_instr_d = data_i[63:48]; + unaligned_address_d = addr_o[1]; + end + end + end + 2'b10: begin + valid_o = '0; + // 64 32 0 + // | 3 | 2 | 1 | 0 | <- instruction slot + // | I | C | * | <- unaligned + // | C | C | * | <- aligned + // | I | * | <- aligned + if (instr_is_compressed[2]) begin + valid_o[0] = valid_i; + instr_o[0] = data_i[47:32]; + // second instruction is also compressed + if (instr_is_compressed[3]) begin + valid_o[1] = valid_i; + instr_o[1] = data_i[63:48]; + // regular instruction -> unaligned + end else begin + unaligned_d = 1'b1; + unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110}; + unaligned_instr_d = data_i[63:48]; + end + // instruction is a regular instruction + end else begin + valid_o[0] = valid_i; + instr_o[0] = data_i[63:32]; + addr_o[0] = address_i; + end + end + // we started to fetch on a unaligned boundary with a whole instruction -> wait until we've + // received the next instruction + 2'b11: begin + valid_o = '0; + if (!instr_is_compressed[3]) begin + unaligned_d = 1'b1; + unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110}; + unaligned_instr_d = data_i[63:48]; + end else begin + valid_o[3] = valid_i; + end + end + endcase + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + unaligned_q <= 1'b0; + unaligned_address_q <= '0; + unaligned_instr_q <= '0; + end else begin + if (valid_i) begin + unaligned_address_q <= unaligned_address_d; + unaligned_instr_q <= unaligned_instr_d; + end + + if (flush_i) begin + unaligned_q <= 1'b0; + end else if (valid_i) begin + unaligned_q <= unaligned_d; + end + end + end +endmodule diff --git a/test/type_param/core/issue_read_operands.sv b/test/type_param/core/issue_read_operands.sv new file mode 100644 index 0000000..2e32486 --- /dev/null +++ b/test/type_param/core/issue_read_operands.sv @@ -0,0 +1,604 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.04.2017 +// Description: Issues instruction from the scoreboard and fetches the operands +// This also includes all the forwarding logic + + +module issue_read_operands + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type rs3_len_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // flush + input logic flush_i, + // stall + input logic stall_i, + // coming from decoder + input scoreboard_entry_t issue_instr_i, + input logic issue_instr_valid_i, + output logic issue_ack_o, + // lookup rd in scoreboard + output logic [REG_ADDR_SIZE-1:0] rs1_o, + input riscv::xlen_t rs1_i, + input logic rs1_valid_i, + output logic [REG_ADDR_SIZE-1:0] rs2_o, + input riscv::xlen_t rs2_i, + input logic rs2_valid_i, + output logic [REG_ADDR_SIZE-1:0] rs3_o, + input rs3_len_t rs3_i, + input logic rs3_valid_i, + // get clobber input + input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_i, + input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_i, + // To FU, just single issue for now + output fu_data_t fu_data_o, + output riscv::xlen_t rs1_forwarding_o, // unregistered version of fu_data_o.operanda + output riscv::xlen_t rs2_forwarding_o, // unregistered version of fu_data_o.operandb + output logic [riscv::VLEN-1:0] pc_o, + output logic is_compressed_instr_o, + // ALU 1 + input logic flu_ready_i, // Fixed latency unit ready to accept a new request + output logic alu_valid_o, // Output is valid + // Branches and Jumps + output logic branch_valid_o, // this is a valid branch instruction + output branchpredict_sbe_t branch_predict_o, + // LSU + input logic lsu_ready_i, // FU is ready + output logic lsu_valid_o, // Output is valid + // MULT + output logic mult_valid_o, // Output is valid + // FPU + input logic fpu_ready_i, // FU is ready + output logic fpu_valid_o, // Output is valid + output logic [1:0] fpu_fmt_o, // FP fmt field from instr. + output logic [2:0] fpu_rm_o, // FP rm field from instr. + // CSR + output logic csr_valid_o, // Output is valid + // CVXIF + output logic cvxif_valid_o, + input logic cvxif_ready_i, + output logic [31:0] cvxif_off_instr_o, + // commit port + input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i, + + output logic stall_issue_o // stall signal, we do not want to fetch any more entries + // committing instruction instruction + // from scoreboard + // input scoreboard_entry commit_instr_i, + // output logic commit_ack_o +); + logic stall; + logic fu_busy; // functional unit is busy + riscv::xlen_t operand_a_regfile, operand_b_regfile; // operands coming from regfile + rs3_len_t + operand_c_regfile, + operand_c_fpr, + operand_c_gpr; // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3 + // output flipflop (ID <-> EX) + riscv::xlen_t operand_a_n, operand_a_q, operand_b_n, operand_b_q, imm_n, imm_q, imm_forward_rs3; + + logic alu_valid_q; + logic mult_valid_q; + logic fpu_valid_q; + logic [ 1:0] fpu_fmt_q; + logic [ 2:0] fpu_rm_q; + logic lsu_valid_q; + logic csr_valid_q; + logic branch_valid_q; + logic cvxif_valid_q; + logic [31:0] cvxif_off_instr_q; + + logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; + fu_op operator_n, operator_q; // operation to perform + fu_t fu_n, fu_q; // functional unit to use + + // forwarding signals + logic forward_rs1, forward_rs2, forward_rs3; + + // original instruction stored in tval + riscv::instruction_t orig_instr; + assign orig_instr = riscv::instruction_t'(issue_instr_i.ex.tval[31:0]); + + // ID <-> EX registers + + assign rs1_forwarding_o = operand_a_n[riscv::VLEN-1:0]; //forwarding or unregistered rs1 value + assign rs2_forwarding_o = operand_b_n[riscv::VLEN-1:0]; //forwarding or unregistered rs2 value + + assign fu_data_o.operand_a = operand_a_q; + assign fu_data_o.operand_b = operand_b_q; + assign fu_data_o.fu = fu_q; + assign fu_data_o.operation = operator_q; + assign fu_data_o.trans_id = trans_id_q; + assign fu_data_o.imm = imm_q; + assign alu_valid_o = alu_valid_q; + assign branch_valid_o = branch_valid_q; + assign lsu_valid_o = lsu_valid_q; + assign csr_valid_o = csr_valid_q; + assign mult_valid_o = mult_valid_q; + assign fpu_valid_o = fpu_valid_q; + assign fpu_fmt_o = fpu_fmt_q; + assign fpu_rm_o = fpu_rm_q; + assign cvxif_valid_o = CVA6Cfg.CvxifEn ? cvxif_valid_q : '0; + assign cvxif_off_instr_o = CVA6Cfg.CvxifEn ? cvxif_off_instr_q : '0; + assign stall_issue_o = stall; + // --------------- + // Issue Stage + // --------------- + + // select the right busy signal + // this obviously depends on the functional unit we need + always_comb begin : unit_busy + unique case (issue_instr_i.fu) + NONE: fu_busy = 1'b0; + ALU, CTRL_FLOW, CSR, MULT: fu_busy = ~flu_ready_i; + LOAD, STORE: fu_busy = ~lsu_ready_i; + CVXIF: fu_busy = ~cvxif_ready_i; + default: begin + if (CVA6Cfg.FpPresent && (issue_instr_i.fu == FPU || issue_instr_i.fu == FPU_VEC)) begin + fu_busy = ~fpu_ready_i; + end else begin + fu_busy = 1'b0; + end + end + endcase + end + + // --------------- + // Register stage + // --------------- + // check that all operands are available, otherwise stall + // forward corresponding register + always_comb begin : operands_available + stall = stall_i; + // operand forwarding signals + forward_rs1 = 1'b0; + forward_rs2 = 1'b0; + forward_rs3 = 1'b0; // FPR only + // poll the scoreboard for those values + rs1_o = issue_instr_i.rs1; + rs2_o = issue_instr_i.rs2; + rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field + + // 0. check that we are not using the zimm type in RS1 + // as this is an immediate we do not have to wait on anything here + // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr) + // 2. poll the scoreboard + if (!issue_instr_i.use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i.op + )) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE : + rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin + // check if the clobbering instruction is not a CSR instruction, CSR instructions can only + // be fetched through the register file since they can't be forwarded + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs1_valid_i && (CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i.op + ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i.rs1] != CSR) || + (CVA6Cfg.RVS && issue_instr_i.op == SFENCE_VMA)))) begin + forward_rs1 = 1'b1; + end else begin // the operand is not available -> stall + stall = 1'b1; + end + end + + if ((CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i.op + )) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE : + rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs2_valid_i && (CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i.op + ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i.rs2] != CSR) || + (CVA6Cfg.RVS && issue_instr_i.op == SFENCE_VMA)))) begin + forward_rs2 = 1'b1; + end else begin // the operand is not available -> stall + stall = 1'b1; + end + end + + // Only check clobbered gpr for OFFLOADED instruction + if ((CVA6Cfg.FpPresent && is_imm_fpr( + issue_instr_i.op + )) ? rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE : + issue_instr_i.op == OFFLOAD && CVA6Cfg.NrRgprPorts == 3 ? + rd_clobber_gpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE : 0) begin + // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check + if (rs3_valid_i) begin + forward_rs3 = 1'b1; + end else begin // the operand is not available -> stall + stall = 1'b1; + end + end + end + + // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3 + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_rs3 + assign imm_forward_rs3 = rs3_i; + end else begin : gen_fp_rs3 + assign imm_forward_rs3 = {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, rs3_i}; + end + + // Forwarding/Output MUX + always_comb begin : forwarding_operand_select + // default is regfiles (gpr or fpr) + operand_a_n = operand_a_regfile; + operand_b_n = operand_b_regfile; + // immediates are the third operands in the store case + // for FP operations, the imm field can also be the third operand from the regfile + if (CVA6Cfg.NrRgprPorts == 3) begin + imm_n = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ? + {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : + issue_instr_i.op == OFFLOAD ? operand_c_regfile : issue_instr_i.result; + end else begin + imm_n = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ? + {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : issue_instr_i.result; + end + trans_id_n = issue_instr_i.trans_id; + fu_n = issue_instr_i.fu; + operator_n = issue_instr_i.op; + // or should we forward + if (forward_rs1) begin + operand_a_n = rs1_i; + end + + if (forward_rs2) begin + operand_b_n = rs2_i; + end + + if (CVA6Cfg.FpPresent && forward_rs3) begin + imm_n = imm_forward_rs3; + end + + // use the PC as operand a + if (issue_instr_i.use_pc) begin + operand_a_n = { + {riscv::XLEN - riscv::VLEN{issue_instr_i.pc[riscv::VLEN-1]}}, issue_instr_i.pc + }; + end + + // use the zimm as operand a + if (issue_instr_i.use_zimm) begin + // zero extend operand a + operand_a_n = {{riscv::XLEN - 5{1'b0}}, issue_instr_i.rs1[4:0]}; + end + // or is it an immediate (including PC), this is not the case for a store, control flow, and accelerator instructions + // also make sure operand B is not already used as an FP operand + if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && (issue_instr_i.fu != ACCEL) && !(CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i.op + ))) begin + operand_b_n = issue_instr_i.result; + end + end + + // FU select, assert the correct valid out signal (in the next cycle) + // This needs to be like this to make verilator happy. I know its ugly. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + alu_valid_q <= 1'b0; + lsu_valid_q <= 1'b0; + mult_valid_q <= 1'b0; + fpu_valid_q <= 1'b0; + fpu_fmt_q <= 2'b0; + fpu_rm_q <= 3'b0; + csr_valid_q <= 1'b0; + branch_valid_q <= 1'b0; + end else begin + alu_valid_q <= 1'b0; + lsu_valid_q <= 1'b0; + mult_valid_q <= 1'b0; + fpu_valid_q <= 1'b0; + fpu_fmt_q <= 2'b0; + fpu_rm_q <= 3'b0; + csr_valid_q <= 1'b0; + branch_valid_q <= 1'b0; + // Exception pass through: + // If an exception has occurred simply pass it through + // we do not want to issue this instruction + if (!issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin + case (issue_instr_i.fu) + ALU: begin + alu_valid_q <= 1'b1; + end + CTRL_FLOW: begin + branch_valid_q <= 1'b1; + end + MULT: begin + mult_valid_q <= 1'b1; + end + LOAD, STORE: begin + lsu_valid_q <= 1'b1; + end + CSR: begin + csr_valid_q <= 1'b1; + end + default: begin + if (issue_instr_i.fu == FPU && CVA6Cfg.FpPresent) begin + fpu_valid_q <= 1'b1; + fpu_fmt_q <= orig_instr.rftype.fmt; // fmt bits from instruction + fpu_rm_q <= orig_instr.rftype.rm; // rm bits from instruction + end else if (issue_instr_i.fu == FPU_VEC && CVA6Cfg.FpPresent) begin + fpu_valid_q <= 1'b1; + fpu_fmt_q <= orig_instr.rvftype.vfmt; // vfmt bits from instruction + fpu_rm_q <= {2'b0, orig_instr.rvftype.repl}; // repl bit from instruction + end + end + endcase + end + // if we got a flush request, de-assert the valid flag, otherwise we will start this + // functional unit with the wrong inputs + if (flush_i) begin + alu_valid_q <= 1'b0; + lsu_valid_q <= 1'b0; + mult_valid_q <= 1'b0; + fpu_valid_q <= 1'b0; + csr_valid_q <= 1'b0; + branch_valid_q <= 1'b0; + end + end + end + + if (CVA6Cfg.CvxifEn) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + cvxif_valid_q <= 1'b0; + cvxif_off_instr_q <= 32'b0; + end else begin + cvxif_valid_q <= 1'b0; + cvxif_off_instr_q <= 32'b0; + if (!issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin + case (issue_instr_i.fu) + CVXIF: begin + cvxif_valid_q <= 1'b1; + cvxif_off_instr_q <= orig_instr; + end + default: ; + endcase + end + if (flush_i) begin + cvxif_valid_q <= 1'b0; + cvxif_off_instr_q <= 32'b0; + end + end + end + end + + // We can issue an instruction if we do not detect that any other instruction is writing the same + // destination register. + // We also need to check if there is an unresolved branch in the scoreboard. + always_comb begin : issue_scoreboard + // default assignment + issue_ack_o = 1'b0; + // check that we didn't stall, that the instruction we got is valid + // and that the functional unit we need is not busy + if (issue_instr_valid_i) begin + // check that the corresponding functional unit is not busy + if (!stall && !fu_busy) begin + // ----------------------------------------- + // WAW - Write After Write Dependency Check + // ----------------------------------------- + // no other instruction has the same destination register -> issue the instruction + if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + issue_instr_i.op + )) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE) : + (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin + issue_ack_o = 1'b1; + end + // or check that the target destination register will be written in this cycle by the + // commit stage + for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) + if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + issue_instr_i.op + )) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd[4:0]) : + (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd[4:0])) begin + issue_ack_o = 1'b1; + end + + end + // we can also issue the instruction under the following two circumstances: + // we can do this even if we are stalled or no functional unit is ready (as we don't need one) + // the decoder needs to make sure that the instruction is marked as valid when it does not + // need any functional unit or if an exception occurred previous to the execute stage. + // 1. we already got an exception + if (issue_instr_i.ex.valid) begin + issue_ack_o = 1'b1; + end + // 2. it is an instruction which does not need any functional unit + if (issue_instr_i.fu == NONE) begin + issue_ack_o = 1'b1; + end + end + // after a multiplication was issued we can only issue another multiplication + // otherwise we will get contentions on the fixed latency bus + if (mult_valid_q && issue_instr_i.fu inside {ALU, CTRL_FLOW, CSR}) begin + issue_ack_o = 1'b0; + end + end + + // ---------------------- + // Integer Register File + // ---------------------- + logic [ CVA6Cfg.NrRgprPorts-1:0][riscv::XLEN-1:0] rdata; + logic [ CVA6Cfg.NrRgprPorts-1:0][ 4:0] raddr_pack; + + // pack signals + logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_pack; + logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_pack; + logic [CVA6Cfg.NrCommitPorts-1:0] we_pack; + + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_rs3 + assign raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; + end else begin : gen_no_rs3 + assign raddr_pack = {issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; + end + + for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_write_back_port + assign waddr_pack[i] = waddr_i[i]; + assign wdata_pack[i] = wdata_i[i]; + assign we_pack[i] = we_gpr_i[i]; + end + if (ariane_pkg::FPGA_EN) begin : gen_fpga_regfile + ariane_regfile_fpga #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (riscv::XLEN), + .NR_READ_PORTS(CVA6Cfg.NrRgprPorts), + .ZERO_REG_ZERO(1) + ) i_ariane_regfile_fpga ( + .test_en_i(1'b0), + .raddr_i (raddr_pack), + .rdata_o (rdata), + .waddr_i (waddr_pack), + .wdata_i (wdata_pack), + .we_i (we_pack), + .* + ); + end else begin : gen_asic_regfile + ariane_regfile #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (riscv::XLEN), + .NR_READ_PORTS(CVA6Cfg.NrRgprPorts), + .ZERO_REG_ZERO(1) + ) i_ariane_regfile ( + .test_en_i(1'b0), + .raddr_i (raddr_pack), + .rdata_o (rdata), + .waddr_i (waddr_pack), + .wdata_i (wdata_pack), + .we_i (we_pack), + .* + ); + end + + // ----------------------------- + // Floating-Point Register File + // ----------------------------- + logic [2:0][CVA6Cfg.FLen-1:0] fprdata; + + // pack signals + logic [2:0][4:0] fp_raddr_pack; + logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] fp_wdata_pack; + + generate + if (CVA6Cfg.FpPresent) begin : float_regfile_gen + assign fp_raddr_pack = { + issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0] + }; + for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_fp_wdata_pack + assign fp_wdata_pack[i] = {wdata_i[i][CVA6Cfg.FLen-1:0]}; + end + if (ariane_pkg::FPGA_EN) begin : gen_fpga_fp_regfile + ariane_regfile_fpga #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (CVA6Cfg.FLen), + .NR_READ_PORTS(3), + .ZERO_REG_ZERO(0) + ) i_ariane_fp_regfile_fpga ( + .test_en_i(1'b0), + .raddr_i (fp_raddr_pack), + .rdata_o (fprdata), + .waddr_i (waddr_pack), + .wdata_i (fp_wdata_pack), + .we_i (we_fpr_i), + .* + ); + end else begin : gen_asic_fp_regfile + ariane_regfile #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (CVA6Cfg.FLen), + .NR_READ_PORTS(3), + .ZERO_REG_ZERO(0) + ) i_ariane_fp_regfile ( + .test_en_i(1'b0), + .raddr_i (fp_raddr_pack), + .rdata_o (fprdata), + .waddr_i (waddr_pack), + .wdata_i (fp_wdata_pack), + .we_i (we_fpr_i), + .* + ); + end + end else begin : no_fpr_gen + assign fprdata = '{default: '0}; + end + endgenerate + + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_operand_c + assign operand_c_fpr = {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[2]}; + assign operand_c_gpr = rdata[2]; + end else begin + assign operand_c_fpr = fprdata[2]; + end + + assign operand_a_regfile = (CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i.op + )) ? {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[0]} : rdata[0]; + assign operand_b_regfile = (CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i.op + )) ? {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[1]} : rdata[1]; + assign operand_c_regfile = (CVA6Cfg.NrRgprPorts == 3) ? ((CVA6Cfg.FpPresent && is_imm_fpr( + issue_instr_i.op + )) ? operand_c_fpr : operand_c_gpr) : operand_c_fpr; + + + // ---------------------- + // Registers (ID <-> EX) + // ---------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + operand_a_q <= '{default: 0}; + operand_b_q <= '{default: 0}; + imm_q <= '0; + fu_q <= NONE; + operator_q <= ADD; + trans_id_q <= '0; + pc_o <= '0; + is_compressed_instr_o <= 1'b0; + branch_predict_o <= {cf_t'(0), {riscv::VLEN{1'b0}}}; + end else begin + operand_a_q <= operand_a_n; + operand_b_q <= operand_b_n; + imm_q <= imm_n; + fu_q <= fu_n; + operator_q <= operator_n; + trans_id_q <= trans_id_n; + pc_o <= issue_instr_i.pc; + is_compressed_instr_o <= issue_instr_i.is_compressed; + branch_predict_o <= issue_instr_i.bp; + end + end + + //pragma translate_off + initial begin + assert (CVA6Cfg.NrRgprPorts == 2 || (CVA6Cfg.NrRgprPorts == 3 && CVA6Cfg.CvxifEn)) + else + $fatal( + 1, + "If CVXIF is enable, ariane regfile can have either 2 or 3 read ports. Else it has 2 read ports." + ); + end + + assert property (@(posedge clk_i) (branch_valid_q) |-> (!$isunknown( + operand_a_q + ) && !$isunknown( + operand_b_q + ))) + else $warning("Got unknown value in one of the operands"); + + //pragma translate_on +endmodule + + diff --git a/test/type_param/core/issue_stage.sv b/test/type_param/core/issue_stage.sv new file mode 100644 index 0000000..64b8cb5 --- /dev/null +++ b/test/type_param/core/issue_stage.sv @@ -0,0 +1,199 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 21.05.2017 +// Description: Issue stage dispatches instructions to the FUs and keeps track of them +// in a scoreboard like data-structure. + + +module issue_stage + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + output logic sb_full_o, + input logic flush_unissued_instr_i, + input logic flush_i, + input logic stall_i, // Stall issue stage + // from ISSUE + input scoreboard_entry_t decoded_instr_i, + input logic decoded_instr_valid_i, + input logic is_ctrl_flow_i, + output logic decoded_instr_ack_o, + // to EX + output [riscv::VLEN-1:0] rs1_forwarding_o, // unregistered version of fu_data_o.operanda + output [riscv::VLEN-1:0] rs2_forwarding_o, // unregistered version of fu_data_o.operandb + output fu_data_t fu_data_o, + output logic [riscv::VLEN-1:0] pc_o, + output logic is_compressed_instr_o, + input logic flu_ready_i, + output logic alu_valid_o, + // ex just resolved our predicted branch, we are ready to accept new requests + input logic resolve_branch_i, + + input logic lsu_ready_i, + output logic lsu_valid_o, + // branch prediction + output logic branch_valid_o, // use branch prediction unit + output branchpredict_sbe_t branch_predict_o, // Branch predict Out + + output logic mult_valid_o, + + input logic fpu_ready_i, + output logic fpu_valid_o, + output logic [1:0] fpu_fmt_o, // FP fmt field from instr. + output logic [2:0] fpu_rm_o, // FP rm field from instr. + + output logic csr_valid_o, + + // CVXIF + //Issue interface + output logic x_issue_valid_o, + input logic x_issue_ready_i, + output logic [31:0] x_off_instr_o, + + // to accelerator dispatcher + output scoreboard_entry_t issue_instr_o, + output logic issue_instr_hs_o, + + // write back port + input logic [CVA6Cfg.NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_i, + input bp_resolve_t resolved_branch_i, + input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, + input exception_t [CVA6Cfg.NrWbPorts-1:0] ex_ex_i, // exception from execute stage or CVXIF offloaded instruction + input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, + input logic x_we_i, + + // commit port + input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i, + + output scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o, + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, + + output logic stall_issue_o, // Used in Performance Counters + + //RVFI + output logic [TRANS_ID_BITS-1:0] rvfi_issue_pointer_o, + output logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] rvfi_commit_pointer_o +); + // --------------------------------------------------- + // Scoreboard (SB) <-> Issue and Read Operands (IRO) + // --------------------------------------------------- + typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? riscv::XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t; + + fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro; + fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro; + + logic [ REG_ADDR_SIZE-1:0] rs1_iro_sb; + riscv::xlen_t rs1_sb_iro; + logic rs1_valid_sb_iro; + + logic [ REG_ADDR_SIZE-1:0] rs2_iro_sb; + riscv::xlen_t rs2_sb_iro; + logic rs2_valid_iro_sb; + + logic [ REG_ADDR_SIZE-1:0] rs3_iro_sb; + rs3_len_t rs3_sb_iro; + logic rs3_valid_iro_sb; + + scoreboard_entry_t issue_instr_sb_iro; + logic issue_instr_valid_sb_iro; + logic issue_ack_iro_sb; + + riscv::xlen_t rs1_forwarding_xlen; + riscv::xlen_t rs2_forwarding_xlen; + + assign rs1_forwarding_o = rs1_forwarding_xlen[riscv::VLEN-1:0]; + assign rs2_forwarding_o = rs2_forwarding_xlen[riscv::VLEN-1:0]; + + assign issue_instr_o = issue_instr_sb_iro; + assign issue_instr_hs_o = issue_instr_valid_sb_iro & issue_ack_iro_sb; + + + // --------------------------------------------------------- + // 2. Manage instructions in a scoreboard + // --------------------------------------------------------- + scoreboard #( + .CVA6Cfg (CVA6Cfg), + .rs3_len_t(rs3_len_t) + ) i_scoreboard ( + .sb_full_o (sb_full_o), + .unresolved_branch_i(1'b0), + .rd_clobber_gpr_o (rd_clobber_gpr_sb_iro), + .rd_clobber_fpr_o (rd_clobber_fpr_sb_iro), + .rs1_i (rs1_iro_sb), + .rs1_o (rs1_sb_iro), + .rs1_valid_o (rs1_valid_sb_iro), + .rs2_i (rs2_iro_sb), + .rs2_o (rs2_sb_iro), + .rs2_valid_o (rs2_valid_iro_sb), + .rs3_i (rs3_iro_sb), + .rs3_o (rs3_sb_iro), + .rs3_valid_o (rs3_valid_iro_sb), + + .decoded_instr_i (decoded_instr_i), + .decoded_instr_valid_i(decoded_instr_valid_i), + .decoded_instr_ack_o (decoded_instr_ack_o), + .issue_instr_o (issue_instr_sb_iro), + .issue_instr_valid_o (issue_instr_valid_sb_iro), + .issue_ack_i (issue_ack_iro_sb), + + .resolved_branch_i(resolved_branch_i), + .trans_id_i (trans_id_i), + .wbdata_i (wbdata_i), + .ex_i (ex_ex_i), + .* + ); + + // --------------------------------------------------------- + // 3. Issue instruction and read operand, also commit + // --------------------------------------------------------- + issue_read_operands #( + .CVA6Cfg (CVA6Cfg), + .rs3_len_t(rs3_len_t) + ) i_issue_read_operands ( + .flush_i (flush_unissued_instr_i), + .issue_instr_i (issue_instr_sb_iro), + .issue_instr_valid_i(issue_instr_valid_sb_iro), + .issue_ack_o (issue_ack_iro_sb), + .fu_data_o (fu_data_o), + .flu_ready_i (flu_ready_i), + .rs1_o (rs1_iro_sb), + .rs1_i (rs1_sb_iro), + .rs1_valid_i (rs1_valid_sb_iro), + .rs2_o (rs2_iro_sb), + .rs2_i (rs2_sb_iro), + .rs2_valid_i (rs2_valid_iro_sb), + .rs3_o (rs3_iro_sb), + .rs3_i (rs3_sb_iro), + .rs3_valid_i (rs3_valid_iro_sb), + .rd_clobber_gpr_i (rd_clobber_gpr_sb_iro), + .rd_clobber_fpr_i (rd_clobber_fpr_sb_iro), + .alu_valid_o (alu_valid_o), + .branch_valid_o (branch_valid_o), + .csr_valid_o (csr_valid_o), + .cvxif_valid_o (x_issue_valid_o), + .cvxif_ready_i (x_issue_ready_i), + .cvxif_off_instr_o (x_off_instr_o), + .mult_valid_o (mult_valid_o), + .rs1_forwarding_o (rs1_forwarding_xlen), + .rs2_forwarding_o (rs2_forwarding_xlen), + .stall_issue_o (stall_issue_o), + .* + ); + +endmodule diff --git a/test/type_param/core/load_store_unit.sv b/test/type_param/core/load_store_unit.sv new file mode 100644 index 0000000..14a281f --- /dev/null +++ b/test/type_param/core/load_store_unit.sv @@ -0,0 +1,493 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.04.2017 +// Description: Load Store Unit, handles address calculation and memory interface signals + + +module load_store_unit + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic stall_st_pending_i, + output logic no_st_pending_o, + input logic amo_valid_commit_i, + + input fu_data_t fu_data_i, + output logic lsu_ready_o, // FU is ready e.g. not busy + input logic lsu_valid_i, // Input is valid + + output logic [TRANS_ID_BITS-1:0] load_trans_id_o, // ID of scoreboard entry at which to write back + output riscv::xlen_t load_result_o, + output logic load_valid_o, + output exception_t load_exception_o, // to WB, signal exception status LD exception + + output logic [TRANS_ID_BITS-1:0] store_trans_id_o, // ID of scoreboard entry at which to write back + output riscv::xlen_t store_result_o, + output logic store_valid_o, + output exception_t store_exception_o, // to WB, signal exception status ST exception + + input logic commit_i, // commit the pending store + output logic commit_ready_o, // commit queue is ready to accept another commit request + input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, + + input logic enable_translation_i, // enable virtual memory translation + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + + // icache translation requests + input icache_arsp_t icache_areq_i, + output icache_areq_t icache_areq_o, + + input riscv::priv_lvl_t priv_lvl_i, // From CSR register file + input riscv::priv_lvl_t ld_st_priv_lvl_i, // From CSR register file + input logic sum_i, // From CSR register file + input logic mxr_i, // From CSR register file + input logic [riscv::PPNW-1:0] satp_ppn_i, // From CSR register file + input logic [ ASID_WIDTH-1:0] asid_i, // From CSR register file + input logic [ ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic flush_tlb_i, + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + + // interface to dcache + input dcache_req_o_t [ 2:0] dcache_req_ports_i, + output dcache_req_i_t [ 2:0] dcache_req_ports_o, + input logic dcache_wbuffer_empty_i, + input logic dcache_wbuffer_not_ni_i, + // AMO interface + output amo_req_t amo_req_o, + input amo_resp_t amo_resp_i, + // PMP + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + + //RVFI + output lsu_ctrl_t rvfi_lsu_ctrl_o, + output [riscv::PLEN-1:0] rvfi_mem_paddr_o +); + // data is misaligned + logic data_misaligned; + // -------------------------------------- + // 1st register stage - (stall registers) + // -------------------------------------- + // those are the signals which are always correct + // e.g.: they keep the value in the stall case + lsu_ctrl_t lsu_ctrl; + + logic pop_st; + logic pop_ld; + + // ------------------------------ + // Address Generation Unit (AGU) + // ------------------------------ + // virtual address as calculated by the AGU in the first cycle + logic [ riscv::VLEN-1:0] vaddr_i; + riscv::xlen_t vaddr_xlen; + logic overflow; + logic [(riscv::XLEN/8)-1:0] be_i; + + assign vaddr_xlen = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a)); + assign vaddr_i = vaddr_xlen[riscv::VLEN-1:0]; + // we work with SV39 or SV32, so if VM is enabled, check that all bits [XLEN-1:38] or [XLEN-1:31] are equal + assign overflow = (riscv::IS_XLEN64 && (!((&vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b1 || (|vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b0))); + + logic st_valid_i; + logic ld_valid_i; + logic ld_translation_req; + logic st_translation_req; + logic [riscv::VLEN-1:0] ld_vaddr; + logic [riscv::VLEN-1:0] st_vaddr; + logic translation_req; + logic translation_valid; + logic [riscv::VLEN-1:0] mmu_vaddr; + logic [riscv::PLEN-1:0] mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen; + exception_t mmu_exception; + logic dtlb_hit; + logic [ riscv::PPNW-1:0] dtlb_ppn; + + logic ld_valid; + logic [TRANS_ID_BITS-1:0] ld_trans_id; + riscv::xlen_t ld_result; + logic st_valid; + logic [TRANS_ID_BITS-1:0] st_trans_id; + riscv::xlen_t st_result; + + logic [ 11:0] page_offset; + logic page_offset_matches; + + exception_t misaligned_exception; + exception_t ld_ex; + exception_t st_ex; + + // ------------------- + // MMU e.g.: TLBs/PTW + // ------------------- + if (MMU_PRESENT && (riscv::XLEN == 64)) begin : gen_mmu_sv39 + mmu #( + .CVA6Cfg (CVA6Cfg), + .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES), + .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_cva6_mmu ( + // misaligned bypass + .misaligned_ex_i(misaligned_exception), + .lsu_is_store_i (st_translation_req), + .lsu_req_i (translation_req), + .lsu_vaddr_i (mmu_vaddr), + .lsu_valid_o (translation_valid), + .lsu_paddr_o (mmu_paddr), + .lsu_exception_o(mmu_exception), + .lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request + .lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request + // connecting PTW to D$ IF + .req_port_i (dcache_req_ports_i[0]), + .req_port_o (dcache_req_ports_o[0]), + // icache address translation requests + .icache_areq_i (icache_areq_i), + .asid_to_be_flushed_i, + .vaddr_to_be_flushed_i, + .icache_areq_o (icache_areq_o), + .pmpcfg_i, + .pmpaddr_i, + .* + ); + end else if (MMU_PRESENT && (riscv::XLEN == 32)) begin : gen_mmu_sv32 + cva6_mmu_sv32 #( + .CVA6Cfg (CVA6Cfg), + .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES), + .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_cva6_mmu ( + // misaligned bypass + .misaligned_ex_i(misaligned_exception), + .lsu_is_store_i (st_translation_req), + .lsu_req_i (translation_req), + .lsu_vaddr_i (mmu_vaddr), + .lsu_valid_o (translation_valid), + .lsu_paddr_o (mmu_paddr), + .lsu_exception_o(mmu_exception), + .lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request + .lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request + // connecting PTW to D$ IF + .req_port_i (dcache_req_ports_i[0]), + .req_port_o (dcache_req_ports_o[0]), + // icache address translation requests + .icache_areq_i (icache_areq_i), + .asid_to_be_flushed_i, + .vaddr_to_be_flushed_i, + .icache_areq_o (icache_areq_o), + .pmpcfg_i, + .pmpaddr_i, + .* + ); + end else begin : gen_no_mmu + + if (riscv::VLEN > riscv::PLEN) begin + assign mmu_vaddr_plen = mmu_vaddr[riscv::PLEN-1:0]; + assign fetch_vaddr_plen = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; + end else begin + assign mmu_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, mmu_vaddr}; + assign fetch_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, icache_areq_i.fetch_vaddr}; + end + + assign icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + assign icache_areq_o.fetch_paddr = fetch_vaddr_plen; + assign icache_areq_o.fetch_exception = '0; + + assign dcache_req_ports_o[0].address_index = '0; + assign dcache_req_ports_o[0].address_tag = '0; + assign dcache_req_ports_o[0].data_wdata = '0; + assign dcache_req_ports_o[0].data_req = 1'b0; + assign dcache_req_ports_o[0].data_be = '1; + assign dcache_req_ports_o[0].data_size = 2'b11; + assign dcache_req_ports_o[0].data_we = 1'b0; + assign dcache_req_ports_o[0].kill_req = '0; + assign dcache_req_ports_o[0].tag_valid = 1'b0; + + assign itlb_miss_o = 1'b0; + assign dtlb_miss_o = 1'b0; + assign dtlb_ppn = mmu_vaddr_plen[riscv::PLEN-1:12]; + assign dtlb_hit = 1'b1; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mmu_paddr <= '0; + translation_valid <= '0; + mmu_exception <= '0; + end else begin + mmu_paddr <= mmu_vaddr_plen; + translation_valid <= translation_req; + mmu_exception <= misaligned_exception; + end + end + end + + + logic store_buffer_empty; + // ------------------ + // Store Unit + // ------------------ + store_unit #( + .CVA6Cfg(CVA6Cfg) + ) i_store_unit ( + .clk_i, + .rst_ni, + .flush_i, + .stall_st_pending_i, + .no_st_pending_o, + .store_buffer_empty_o(store_buffer_empty), + + .valid_i (st_valid_i), + .lsu_ctrl_i(lsu_ctrl), + .pop_st_o (pop_st), + .commit_i, + .commit_ready_o, + .amo_valid_commit_i, + + .valid_o (st_valid), + .trans_id_o (st_trans_id), + .result_o (st_result), + .ex_o (st_ex), + // MMU port + .translation_req_o (st_translation_req), + .vaddr_o (st_vaddr), + .rvfi_mem_paddr_o (rvfi_mem_paddr_o), + .paddr_i (mmu_paddr), + .ex_i (mmu_exception), + .dtlb_hit_i (dtlb_hit), + // Load Unit + .page_offset_i (page_offset), + .page_offset_matches_o(page_offset_matches), + // AMOs + .amo_req_o, + .amo_resp_i, + // to memory arbiter + .req_port_i (dcache_req_ports_i[2]), + .req_port_o (dcache_req_ports_o[2]) + ); + + // ------------------ + // Load Unit + // ------------------ + load_unit #( + .CVA6Cfg(CVA6Cfg) + ) i_load_unit ( + .valid_i (ld_valid_i), + .lsu_ctrl_i(lsu_ctrl), + .pop_ld_o (pop_ld), + + .valid_o (ld_valid), + .trans_id_o (ld_trans_id), + .result_o (ld_result), + .ex_o (ld_ex), + // MMU port + .translation_req_o (ld_translation_req), + .vaddr_o (ld_vaddr), + .paddr_i (mmu_paddr), + .ex_i (mmu_exception), + .dtlb_hit_i (dtlb_hit), + .dtlb_ppn_i (dtlb_ppn), + // to store unit + .page_offset_o (page_offset), + .page_offset_matches_i(page_offset_matches), + .store_buffer_empty_i (store_buffer_empty), + // to memory arbiter + .req_port_i (dcache_req_ports_i[1]), + .req_port_o (dcache_req_ports_o[1]), + .dcache_wbuffer_not_ni_i, + .commit_tran_id_i, + .* + ); + + // ---------------------------- + // Output Pipeline Register + // ---------------------------- + + // amount of pipeline registers inserted for load/store return path + // can be tuned to trade-off IPC vs. cycle time + + shift_reg #( + .dtype(logic [$bits(ld_valid) + $bits(ld_trans_id) + $bits(ld_result) + $bits(ld_ex) - 1:0]), + .Depth(cva6_config_pkg::CVA6ConfigNrLoadPipeRegs) + ) i_pipe_reg_load ( + .clk_i, + .rst_ni, + .d_i({ld_valid, ld_trans_id, ld_result, ld_ex}), + .d_o({load_valid_o, load_trans_id_o, load_result_o, load_exception_o}) + ); + + shift_reg #( + .dtype(logic [$bits(st_valid) + $bits(st_trans_id) + $bits(st_result) + $bits(st_ex) - 1:0]), + .Depth(cva6_config_pkg::CVA6ConfigNrStorePipeRegs) + ) i_pipe_reg_store ( + .clk_i, + .rst_ni, + .d_i({st_valid, st_trans_id, st_result, st_ex}), + .d_o({store_valid_o, store_trans_id_o, store_result_o, store_exception_o}) + ); + + // determine whether this is a load or store + always_comb begin : which_op + + ld_valid_i = 1'b0; + st_valid_i = 1'b0; + + translation_req = 1'b0; + mmu_vaddr = {riscv::VLEN{1'b0}}; + + // check the operation to activate the right functional unit accordingly + unique case (lsu_ctrl.fu) + // all loads go here + LOAD: begin + ld_valid_i = lsu_ctrl.valid; + translation_req = ld_translation_req; + mmu_vaddr = ld_vaddr; + end + // all stores go here + STORE: begin + st_valid_i = lsu_ctrl.valid; + translation_req = st_translation_req; + mmu_vaddr = st_vaddr; + end + // not relevant for the LSU + default: ; + endcase + end + + + // --------------- + // Byte Enable + // --------------- + // we can generate the byte enable from the virtual address since the last + // 12 bit are the same anyway + // and we can always generate the byte enable from the address at hand + + if (riscv::IS_XLEN64) begin : gen_8b_be + assign be_i = be_gen(vaddr_i[2:0], extract_transfer_size(fu_data_i.operation)); + end else begin : gen_4b_be + assign be_i = be_gen_32(vaddr_i[1:0], extract_transfer_size(fu_data_i.operation)); + end + + // ------------------------ + // Misaligned Exception + // ------------------------ + // we can detect a misaligned exception immediately + // the misaligned exception is passed to the functional unit via the MMU, which in case + // can augment the exception if other memory related exceptions like a page fault or access errors + always_comb begin : data_misaligned_detection + + misaligned_exception = {{riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, 1'b0}; + + data_misaligned = 1'b0; + + if (lsu_ctrl.valid) begin + case (lsu_ctrl.operation) + // double word + LD, SD, FLD, FSD, + AMO_LRD, AMO_SCD, + AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD, + AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND, + AMO_MINDU: begin + if (riscv::IS_XLEN64 && lsu_ctrl.vaddr[2:0] != 3'b000) begin + data_misaligned = 1'b1; + end + end + // word + LW, LWU, SW, FLW, FSW, + AMO_LRW, AMO_SCW, + AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW, + AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW, + AMO_MINWU: begin + if (lsu_ctrl.vaddr[1:0] != 2'b00) begin + data_misaligned = 1'b1; + end + end + // half word + LH, LHU, SH, FLH, FSH: begin + if (lsu_ctrl.vaddr[0] != 1'b0) begin + data_misaligned = 1'b1; + end + end + // byte -> is always aligned + default: ; + endcase + end + + if (data_misaligned) begin + + if (lsu_ctrl.fu == LOAD) begin + misaligned_exception = { + riscv::LD_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + }; + + end else if (lsu_ctrl.fu == STORE) begin + misaligned_exception = { + riscv::ST_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + }; + end + end + + if (ariane_pkg::MMU_PRESENT && en_ld_st_translation_i && lsu_ctrl.overflow) begin + + if (lsu_ctrl.fu == LOAD) begin + misaligned_exception = { + riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + }; + + end else if (lsu_ctrl.fu == STORE) begin + misaligned_exception = { + riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + }; + end + end + end + + // ------------------ + // LSU Control + // ------------------ + // new data arrives here + lsu_ctrl_t lsu_req_i; + + assign lsu_req_i = { + lsu_valid_i, + vaddr_i, + overflow, + fu_data_i.operand_b, + be_i, + fu_data_i.fu, + fu_data_i.operation, + fu_data_i.trans_id + }; + + lsu_bypass #( + .CVA6Cfg(CVA6Cfg) + ) lsu_bypass_i ( + .lsu_req_i (lsu_req_i), + .lsu_req_valid_i(lsu_valid_i), + .pop_ld_i (pop_ld), + .pop_st_i (pop_st), + + .lsu_ctrl_o(lsu_ctrl), + .ready_o (lsu_ready_o), + .* + ); + + assign rvfi_lsu_ctrl_o = lsu_ctrl; + +endmodule + diff --git a/test/type_param/core/load_unit.sv b/test/type_param/core/load_unit.sv new file mode 100644 index 0000000..512b498 --- /dev/null +++ b/test/type_param/core/load_unit.sv @@ -0,0 +1,534 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba , ETH Zurich +// Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Load Unit, takes care of all load requests +// +// Contributor: Cesar Fuguet , CEA List +// Date: August 29, 2023 +// Modification: add support for multiple outstanding load operations +// to the data cache + +module load_unit + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + // load unit input port + input logic valid_i, + input lsu_ctrl_t lsu_ctrl_i, + output logic pop_ld_o, + // load unit output port + output logic valid_o, + output logic [TRANS_ID_BITS-1:0] trans_id_o, + output riscv::xlen_t result_o, + output exception_t ex_o, + // MMU -> Address Translation + output logic translation_req_o, // request address translation + output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out + input logic [riscv::PLEN-1:0] paddr_i, // physical address in + input exception_t ex_i, // exception which may has happened earlier. for example: mis-aligned exception + input logic dtlb_hit_i, // hit on the dtlb, send in the same cycle as the request + input logic [riscv::PPNW-1:0] dtlb_ppn_i, // ppn on the dtlb, send in the same cycle as the request + // address checker + output logic [11:0] page_offset_o, + input logic page_offset_matches_i, + input logic store_buffer_empty_i, // the entire store-buffer is empty + input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, + // D$ interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + input logic dcache_wbuffer_not_ni_i +); + enum logic [3:0] { + IDLE, + WAIT_GNT, + SEND_TAG, + WAIT_PAGE_OFFSET, + ABORT_TRANSACTION, + ABORT_TRANSACTION_NI, + WAIT_TRANSLATION, + WAIT_FLUSH, + WAIT_WB_EMPTY + } + state_d, state_q; + + // in order to decouple the response interface from the request interface, + // we need a a buffer which can hold all inflight memory load requests + typedef struct packed { + logic [TRANS_ID_BITS-1:0] trans_id; // scoreboard identifier + logic [riscv::XLEN_ALIGN_BYTES-1:0] address_offset; // least significant bits of the address + fu_op operation; // type of load + } ldbuf_t; + + + // to support a throughput of one load per cycle, if the number of entries + // of the load buffer is 1, implement a fall-through mode. This however + // adds a combinational path between the request and response interfaces + // towards the cache. + localparam logic LDBUF_FALLTHROUGH = (CVA6Cfg.NrLoadBufEntries == 1); + localparam int unsigned REQ_ID_BITS = CVA6Cfg.NrLoadBufEntries > 1 ? $clog2( + CVA6Cfg.NrLoadBufEntries + ) : 1; + + typedef logic [REQ_ID_BITS-1:0] ldbuf_id_t; + + logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_valid_q, ldbuf_valid_d; + logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_flushed_q, ldbuf_flushed_d; + ldbuf_t [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_q; + logic ldbuf_empty, ldbuf_full; + ldbuf_id_t ldbuf_free_index; + logic ldbuf_w; + ldbuf_t ldbuf_wdata; + ldbuf_id_t ldbuf_windex; + logic ldbuf_r; + ldbuf_t ldbuf_rdata; + ldbuf_id_t ldbuf_rindex; + ldbuf_id_t ldbuf_last_id_q; + + assign ldbuf_full = &ldbuf_valid_q; + + // + // buffer of outstanding loads + + // write in the first available slot + generate + if (CVA6Cfg.NrLoadBufEntries > 1) begin : ldbuf_free_index_multi_gen + lzc #( + .WIDTH(CVA6Cfg.NrLoadBufEntries), + .MODE (1'b0) // Count leading zeros + ) lzc_windex_i ( + .in_i (~ldbuf_valid_q), + .cnt_o (ldbuf_free_index), + .empty_o(ldbuf_empty) + ); + end else begin : ldbuf_free_index_single_gen + assign ldbuf_free_index = 1'b0; + end + endgenerate + + assign ldbuf_windex = (LDBUF_FALLTHROUGH && ldbuf_r) ? ldbuf_rindex : ldbuf_free_index; + + always_comb begin : ldbuf_comb + ldbuf_flushed_d = ldbuf_flushed_q; + ldbuf_valid_d = ldbuf_valid_q; + + // In case of flush, raise the flushed flag in all slots. + if (flush_i) begin + ldbuf_flushed_d = '1; + end + // Free read entry (in the case of fall-through mode, free the entry + // only if there is no pending load) + if (ldbuf_r && (!LDBUF_FALLTHROUGH || !ldbuf_w)) begin + ldbuf_valid_d[ldbuf_rindex] = 1'b0; + end + // Track a new outstanding operation in the load buffer + if (ldbuf_w) begin + ldbuf_flushed_d[ldbuf_windex] = 1'b0; + ldbuf_valid_d[ldbuf_windex] = 1'b1; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ldbuf_ff + if (!rst_ni) begin + ldbuf_flushed_q <= '0; + ldbuf_valid_q <= '0; + ldbuf_last_id_q <= '0; + ldbuf_q <= '0; + end else begin + ldbuf_flushed_q <= ldbuf_flushed_d; + ldbuf_valid_q <= ldbuf_valid_d; + if (ldbuf_w) begin + ldbuf_last_id_q <= ldbuf_windex; + ldbuf_q[ldbuf_windex] <= ldbuf_wdata; + end + end + end + + // page offset is defined as the lower 12 bits, feed through for address checker + assign page_offset_o = lsu_ctrl_i.vaddr[11:0]; + // feed-through the virtual address for VA translation + assign vaddr_o = lsu_ctrl_i.vaddr; + // this is a read-only interface so set the write enable to 0 + assign req_port_o.data_we = 1'b0; + assign req_port_o.data_wdata = '0; + // compose the load buffer write data, control is handled in the FSM + assign ldbuf_wdata = { + lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[riscv::XLEN_ALIGN_BYTES-1:0], lsu_ctrl_i.operation + }; + // output address + // we can now output the lower 12 bit as the index to the cache + assign req_port_o.address_index = lsu_ctrl_i.vaddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + // translation from last cycle, again: control is handled in the FSM + assign req_port_o.address_tag = paddr_i[ariane_pkg::DCACHE_TAG_WIDTH + + ariane_pkg::DCACHE_INDEX_WIDTH-1 : + ariane_pkg::DCACHE_INDEX_WIDTH]; + // request id = index of the load buffer's entry + assign req_port_o.data_id = ldbuf_windex; + // directly forward exception fields (valid bit is set below) + assign ex_o.cause = ex_i.cause; + assign ex_o.tval = ex_i.tval; + + // Check that NI operations follow the necessary conditions + logic paddr_ni; + logic not_commit_time; + logic inflight_stores; + logic stall_ni; + assign paddr_ni = config_pkg::is_inside_nonidempotent_regions( + CVA6Cfg, {{52 - riscv::PPNW{1'b0}}, dtlb_ppn_i, 12'd0} + ); + assign not_commit_time = commit_tran_id_i != lsu_ctrl_i.trans_id; + assign inflight_stores = (!dcache_wbuffer_not_ni_i || !store_buffer_empty_i); + assign stall_ni = (inflight_stores || not_commit_time) && (paddr_ni && CVA6Cfg.NonIdemPotenceEn); + + // --------------- + // Load Control + // --------------- + always_comb begin : load_control + automatic logic accept_req; + + // default assignments + state_d = state_q; + translation_req_o = 1'b0; + req_port_o.data_req = 1'b0; + // tag control + req_port_o.kill_req = 1'b0; + req_port_o.tag_valid = 1'b0; + req_port_o.data_be = lsu_ctrl_i.be; + req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operation); + pop_ld_o = 1'b0; + + // In IDLE and SEND_TAG states, this unit can accept a new load request + // when the load buffer is not full or if there is a response and the + // load buffer is in fall-through mode + accept_req = (valid_i && (!ldbuf_full || (LDBUF_FALLTHROUGH && ldbuf_r))); + + case (state_q) + IDLE: begin + if (accept_req) begin + // start the translation process even though we do not know if the addresses match + // this should ease timing + translation_req_o = 1'b1; + // check if the page offset matches with a store, if it does then stall and wait + if (!page_offset_matches_i) begin + // make a load request to memory + req_port_o.data_req = 1'b1; + // we got no data grant so wait for the grant before sending the tag + if (!req_port_i.data_gnt) begin + state_d = WAIT_GNT; + end else begin + if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin + state_d = ABORT_TRANSACTION; + end else begin + if (!stall_ni) begin + // we got a grant and a hit on the DTLB so we can send the tag in the next cycle + state_d = SEND_TAG; + pop_ld_o = 1'b1; + // translation valid but this is to NC and the WB is not yet empty. + end else if (CVA6Cfg.NonIdemPotenceEn) begin + state_d = ABORT_TRANSACTION_NI; + end + end + end + end else begin + // wait for the store buffer to train and the page offset to not match anymore + state_d = WAIT_PAGE_OFFSET; + end + end + end + + // wait here for the page offset to not match anymore + WAIT_PAGE_OFFSET: begin + // we make a new request as soon as the page offset does not match anymore + if (!page_offset_matches_i) begin + state_d = WAIT_GNT; + end + end + + WAIT_GNT: begin + // keep the translation request up + translation_req_o = 1'b1; + // keep the request up + req_port_o.data_req = 1'b1; + // we finally got a data grant + if (req_port_i.data_gnt) begin + // so we send the tag in the next cycle + if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin + state_d = ABORT_TRANSACTION; + end else begin + if (!stall_ni) begin + // we got a grant and a hit on the DTLB so we can send the tag in the next cycle + state_d = SEND_TAG; + pop_ld_o = 1'b1; + // translation valid but this is to NC and the WB is not yet empty. + end else if (CVA6Cfg.NonIdemPotenceEn) begin + state_d = ABORT_TRANSACTION_NI; + end + end + + end + // otherwise we keep waiting on our grant + end + // we know for sure that the tag we want to send is valid + SEND_TAG: begin + req_port_o.tag_valid = 1'b1; + state_d = IDLE; + + if (accept_req) begin + // start the translation process even though we do not know if the addresses match + // this should ease timing + translation_req_o = 1'b1; + // check if the page offset matches with a store, if it does stall and wait + if (!page_offset_matches_i) begin + // make a load request to memory + req_port_o.data_req = 1'b1; + // we got no data grant so wait for the grant before sending the tag + if (!req_port_i.data_gnt) begin + state_d = WAIT_GNT; + end else begin + // we got a grant so we can send the tag in the next cycle + if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin + state_d = ABORT_TRANSACTION; + end else begin + if (!stall_ni) begin + // we got a grant and a hit on the DTLB so we can send the tag in the next cycle + state_d = SEND_TAG; + pop_ld_o = 1'b1; + // translation valid but this is to NC and the WB is not yet empty. + end else if (CVA6Cfg.NonIdemPotenceEn) begin + state_d = ABORT_TRANSACTION_NI; + end + end + end + end else begin + // wait for the store buffer to train and the page offset to not match anymore + state_d = WAIT_PAGE_OFFSET; + end + end + // ---------- + // Exception + // ---------- + // if we got an exception we need to kill the request immediately + if (ex_i.valid) begin + req_port_o.kill_req = 1'b1; + end + end + + WAIT_FLUSH: begin + // the D$ arbiter will take care of presenting this to the memory only in case we + // have an outstanding request + req_port_o.kill_req = 1'b1; + req_port_o.tag_valid = 1'b1; + // we've killed the current request so we can go back to idle + state_d = IDLE; + end + + default: begin + // abort the previous request - free the D$ arbiter + // we are here because of a TLB miss, we need to abort the current request and give way for the + // PTW walker to satisfy the TLB miss + if (state_q == ABORT_TRANSACTION && ariane_pkg::MMU_PRESENT) begin + req_port_o.kill_req = 1'b1; + req_port_o.tag_valid = 1'b1; + // wait until the WB is empty + state_d = WAIT_TRANSLATION; + end else if (state_q == ABORT_TRANSACTION_NI && CVA6Cfg.NonIdemPotenceEn) begin + req_port_o.kill_req = 1'b1; + req_port_o.tag_valid = 1'b1; + // re-do the request + state_d = WAIT_WB_EMPTY; + end else if (state_q == WAIT_WB_EMPTY && CVA6Cfg.NonIdemPotenceEn && dcache_wbuffer_not_ni_i) begin + // Wait until the write-back buffer is empty in the data cache. + // the write buffer is empty, so lets go and re-do the translation. + state_d = WAIT_TRANSLATION; + end else if(state_q == WAIT_TRANSLATION && (ariane_pkg::MMU_PRESENT || CVA6Cfg.NonIdemPotenceEn)) begin + translation_req_o = 1'b1; + // we've got a hit and we can continue with the request process + if (dtlb_hit_i) state_d = WAIT_GNT; + + // we got an exception + if (ex_i.valid) begin + // the next state will be the idle state + state_d = IDLE; + // pop load - but only if we are not getting an rvalid in here - otherwise we will over-write an incoming transaction + pop_ld_o = ~req_port_i.data_rvalid; + end + end else begin + state_d = IDLE; + end + end + endcase + + // if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage + if (flush_i) begin + state_d = WAIT_FLUSH; + end + end + + // track the load data for later usage + assign ldbuf_w = req_port_o.data_req & req_port_i.data_gnt; + + // --------------- + // Retire Load + // --------------- + assign ldbuf_rindex = (CVA6Cfg.NrLoadBufEntries > 1) ? ldbuf_id_t'(req_port_i.data_rid) : 1'b0, + ldbuf_rdata = ldbuf_q[ldbuf_rindex]; + + // decoupled rvalid process + always_comb begin : rvalid_output + // read the pending load buffer + ldbuf_r = req_port_i.data_rvalid; + trans_id_o = ldbuf_q[ldbuf_rindex].trans_id; + valid_o = 1'b0; + ex_o.valid = 1'b0; + + // we got an rvalid and it's corresponding request was not flushed + if (req_port_i.data_rvalid && !ldbuf_flushed_q[ldbuf_rindex]) begin + // if the response corresponds to the last request, check that we are not killing it + if ((ldbuf_last_id_q != ldbuf_rindex) || !req_port_o.kill_req) valid_o = 1'b1; + // the output is also valid if we got an exception. An exception arrives one cycle after + // dtlb_hit_i is asserted, i.e. when we are in SEND_TAG. Otherwise, the exception + // corresponds to the next request that is already being translated (see below). + if (ex_i.valid && (state_q == SEND_TAG)) begin + valid_o = 1'b1; + ex_o.valid = 1'b1; + end + end + + // an exception occurred during translation + // exceptions can retire out-of-order -> but we need to give priority to non-excepting load and stores + // so we simply check if we got an rvalid if so we prioritize it by not retiring the exception - we simply go for another + // round in the load FSM + if ((ariane_pkg::MMU_PRESENT || CVA6Cfg.NonIdemPotenceEn) && (state_q == WAIT_TRANSLATION) && !req_port_i.data_rvalid && ex_i.valid && valid_i) begin + trans_id_o = lsu_ctrl_i.trans_id; + valid_o = 1'b1; + ex_o.valid = 1'b1; + end + end + + + // latch physical address for the tag cycle (one cycle after applying the index) + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + end else begin + state_q <= state_d; + end + end + + // --------------- + // Sign Extend + // --------------- + riscv::xlen_t shifted_data; + + // realign as needed + assign shifted_data = req_port_i.data_rdata >> {ldbuf_rdata.address_offset, 3'b000}; + + /* // result mux (leaner code, but more logic stages. + // can be used instead of the code below (in between //result mux fast) if timing is not so critical) + always_comb begin + unique case (ldbuf_rdata.operation) + LWU: result_o = shifted_data[31:0]; + LHU: result_o = shifted_data[15:0]; + LBU: result_o = shifted_data[7:0]; + LW: result_o = 64'(signed'(shifted_data[31:0])); + LH: result_o = 64'(signed'(shifted_data[15:0])); + LB: result_o = 64'(signed'(shifted_data[ 7:0])); + default: result_o = shifted_data; + endcase + end */ + + // result mux fast + logic [ (riscv::XLEN/8)-1:0] rdata_sign_bits; + logic [riscv::XLEN_ALIGN_BYTES-1:0] rdata_offset; + logic rdata_sign_bit, rdata_is_signed, rdata_is_fp_signed; + + + // prepare these signals for faster selection in the next cycle + assign rdata_is_signed = ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::LH, ariane_pkg::LB}; + assign rdata_is_fp_signed = ldbuf_rdata.operation inside {ariane_pkg::FLW, ariane_pkg::FLH, ariane_pkg::FLB}; + assign rdata_offset = ((ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::FLW}) & riscv::IS_XLEN64) ? ldbuf_rdata.address_offset + 3 : + ( ldbuf_rdata.operation inside {ariane_pkg::LH, ariane_pkg::FLH}) ? ldbuf_rdata.address_offset + 1 : + ldbuf_rdata.address_offset; + + for (genvar i = 0; i < (riscv::XLEN / 8); i++) begin : gen_sign_bits + assign rdata_sign_bits[i] = req_port_i.data_rdata[(i+1)*8-1]; + end + + + // select correct sign bit in parallel to result shifter above + // pull to 0 if unsigned + assign rdata_sign_bit = rdata_is_signed & rdata_sign_bits[rdata_offset] | rdata_is_fp_signed; + + // result mux + always_comb begin + unique case (ldbuf_rdata.operation) + ariane_pkg::LW, ariane_pkg::LWU: + result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]}; + ariane_pkg::LH, ariane_pkg::LHU: + result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]}; + ariane_pkg::LB, ariane_pkg::LBU: + result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]}; + default: begin + // FLW, FLH and FLB have been defined here in default case to improve Code Coverage + if (CVA6Cfg.FpPresent) begin + unique case (ldbuf_rdata.operation) + ariane_pkg::FLW: begin + result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]}; + end + ariane_pkg::FLH: begin + result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]}; + end + ariane_pkg::FLB: begin + result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]}; + end + default: begin + result_o = shifted_data[riscv::XLEN-1:0]; + end + endcase + end else begin + result_o = shifted_data[riscv::XLEN-1:0]; + end + end + endcase + end + // end result mux fast + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + initial + assert (ariane_pkg::DCACHE_TID_WIDTH >= REQ_ID_BITS) + else $fatal(1, "CVA6ConfigDcacheIdWidth parameter is not wide enough to encode pending loads"); + // check invalid offsets, but only issue a warning as these conditions actually trigger a load address misaligned exception + addr_offset0 : + assert property (@(posedge clk_i) disable iff (~rst_ni) + ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LW, ariane_pkg::LWU}) |-> ldbuf_wdata.address_offset < 5) + else $fatal(1, "invalid address offset used with {LW, LWU}"); + addr_offset1 : + assert property (@(posedge clk_i) disable iff (~rst_ni) + ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LH, ariane_pkg::LHU}) |-> ldbuf_wdata.address_offset < 7) + else $fatal(1, "invalid address offset used with {LH, LHU}"); + addr_offset2 : + assert property (@(posedge clk_i) disable iff (~rst_ni) + ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LB, ariane_pkg::LBU}) |-> ldbuf_wdata.address_offset < 8) + else $fatal(1, "invalid address offset used with {LB, LBU}"); +`endif + //pragma translate_on + +endmodule diff --git a/test/type_param/core/lsu_bypass.sv b/test/type_param/core/lsu_bypass.sv new file mode 100644 index 0000000..96f6d50 --- /dev/null +++ b/test/type_param/core/lsu_bypass.sv @@ -0,0 +1,122 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.04.2017 +// Description: Load Store Unit, handles address calculation and memory interface signals + + +// ------------------ +// LSU Control +// ------------------ +// The LSU consists of two independent block which share a common address translation block. +// The one block is the load unit, the other one is the store unit. They will signal their readiness +// with separate signals. If they are not ready the LSU control should keep the last applied signals stable. +// Furthermore it can be the case that another request for one of the two store units arrives in which case +// the LSU control should sample it and store it for later application to the units. It does so, by storing it in a +// two element FIFO. This is necessary as we only know very late in the cycle whether the load/store will succeed (address check, +// TLB hit mainly). So we better unconditionally allow another request to arrive and store this request in case we need to. +module lsu_bypass + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + + input lsu_ctrl_t lsu_req_i, + input logic lsu_req_valid_i, + input logic pop_ld_i, + input logic pop_st_i, + + output lsu_ctrl_t lsu_ctrl_o, + output logic ready_o +); + + lsu_ctrl_t [1:0] mem_n, mem_q; + logic read_pointer_n, read_pointer_q; + logic write_pointer_n, write_pointer_q; + logic [1:0] status_cnt_n, status_cnt_q; + + logic empty; + assign empty = (status_cnt_q == 0); + assign ready_o = empty; + + always_comb begin + automatic logic [1:0] status_cnt; + automatic logic write_pointer; + automatic logic read_pointer; + + status_cnt = status_cnt_q; + write_pointer = write_pointer_q; + read_pointer = read_pointer_q; + + mem_n = mem_q; + // we've got a valid LSU request + if (lsu_req_valid_i) begin + mem_n[write_pointer_q] = lsu_req_i; + write_pointer++; + status_cnt++; + end + + if (pop_ld_i) begin + // invalidate the result + mem_n[read_pointer_q].valid = 1'b0; + read_pointer++; + status_cnt--; + end + + if (pop_st_i) begin + // invalidate the result + mem_n[read_pointer_q].valid = 1'b0; + read_pointer++; + status_cnt--; + end + + if (pop_st_i && pop_ld_i) mem_n = '0; + + if (flush_i) begin + status_cnt = '0; + write_pointer = '0; + read_pointer = '0; + mem_n = '0; + end + // default assignments + read_pointer_n = read_pointer; + write_pointer_n = write_pointer; + status_cnt_n = status_cnt; + end + + // output assignment + always_comb begin : output_assignments + if (empty) begin + lsu_ctrl_o = lsu_req_i; + end else begin + lsu_ctrl_o = mem_q[read_pointer_q]; + end + end + + // registers + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mem_q <= '0; + status_cnt_q <= '0; + write_pointer_q <= '0; + read_pointer_q <= '0; + end else begin + mem_q <= mem_n; + status_cnt_q <= status_cnt_n; + write_pointer_q <= write_pointer_n; + read_pointer_q <= read_pointer_n; + end + end +endmodule + diff --git a/test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv b/test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv new file mode 100644 index 0000000..d194306 --- /dev/null +++ b/test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv @@ -0,0 +1,565 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq Thales Research & Technology +// Date: 17/07/2021 +// +// Additional contributions by: +// Sebastien Jacq - sjthales on github.com +// +// Description: Memory Management Unit for CV32A6, contains TLB and +// address translation unit. Sv32 as defined in RISC-V +// privilege specification 1.11-WIP. +// This module is an adaptation of the MMU Sv39 developed +// by Florian Zaruba to the Sv32 standard. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2020-02-17 0.1 S.Jacq MMU Sv32 for CV32A6 +// =========================================================================== // + +module cva6_mmu_sv32 + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned INSTR_TLB_ENTRIES = 2, + parameter int unsigned DATA_TLB_ENTRIES = 2, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic enable_translation_i, + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + // IF interface + input icache_arsp_t icache_areq_i, + output icache_areq_t icache_areq_o, + // LSU interface + // this is a more minimalistic interface because the actual addressing logic is handled + // in the LSU as we distinguish load and stores, what we do here is simple address translation + input exception_t misaligned_ex_i, + input logic lsu_req_i, // request address translation + input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in + input logic lsu_is_store_i, // the translation is requested by a store + // if we need to walk the page table we can't grant in the same cycle + // Cycle 0 + output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB + output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) + // Cycle 1 + output logic lsu_valid_o, // translation is valid + output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address + output exception_t lsu_exception_o, // address translation threw an exception + // General control signals + input riscv::priv_lvl_t priv_lvl_i, + input riscv::priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input logic mxr_i, + // input logic flag_mprv_i, + input logic [riscv::PPNW-1:0] satp_ppn_i, + input logic [ASID_WIDTH-1:0] asid_i, + input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic flush_tlb_i, + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + // PMP + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i +); + + logic iaccess_err; // insufficient privilege to access this instruction page + logic daccess_err; // insufficient privilege to access this data page + logic ptw_active; // PTW is currently walking a page table + logic walking_instr; // PTW is walking because of an ITLB miss + logic ptw_error; // PTW threw an exception + logic ptw_access_exception; // PTW threw an access exception (PMPs) + logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr + + logic [riscv::VLEN-1:0] update_vaddr; + tlb_update_sv32_t update_itlb, update_dtlb, update_shared_tlb; + + logic itlb_lu_access; + riscv::pte_sv32_t itlb_content; + logic itlb_is_4M; + logic itlb_lu_hit; + + logic dtlb_lu_access; + riscv::pte_sv32_t dtlb_content; + logic dtlb_is_4M; + logic dtlb_lu_hit; + + logic shared_tlb_access; + logic [riscv::VLEN-1:0] shared_tlb_vaddr; + logic shared_tlb_hit; + + logic itlb_req; + + + // Assignments + assign itlb_lu_access = icache_areq_i.fetch_req; + assign dtlb_lu_access = lsu_req_i; + + + cva6_tlb_sv32 #( + .CVA6Cfg (CVA6Cfg), + .TLB_ENTRIES(INSTR_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_itlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_tlb_i), + + .update_i(update_itlb), + + .lu_access_i (itlb_lu_access), + .lu_asid_i (asid_i), + .asid_to_be_flushed_i (asid_to_be_flushed_i), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i), + .lu_vaddr_i (icache_areq_i.fetch_vaddr), + .lu_content_o (itlb_content), + + .lu_is_4M_o(itlb_is_4M), + .lu_hit_o (itlb_lu_hit) + ); + + cva6_tlb_sv32 #( + .CVA6Cfg (CVA6Cfg), + .TLB_ENTRIES(DATA_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_dtlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_tlb_i), + + .update_i(update_dtlb), + + .lu_access_i (dtlb_lu_access), + .lu_asid_i (asid_i), + .asid_to_be_flushed_i (asid_to_be_flushed_i), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i), + .lu_vaddr_i (lsu_vaddr_i), + .lu_content_o (dtlb_content), + + .lu_is_4M_o(dtlb_is_4M), + .lu_hit_o (dtlb_lu_hit) + ); + + cva6_shared_tlb_sv32 #( + .CVA6Cfg (CVA6Cfg), + .SHARED_TLB_DEPTH(64), + .SHARED_TLB_WAYS (2), + .ASID_WIDTH (ASID_WIDTH) + ) i_shared_tlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_tlb_i), + + .enable_translation_i (enable_translation_i), + .en_ld_st_translation_i(en_ld_st_translation_i), + + .asid_i (asid_i), + // from TLBs + // did we miss? + .itlb_access_i(itlb_lu_access), + .itlb_hit_i (itlb_lu_hit), + .itlb_vaddr_i (icache_areq_i.fetch_vaddr), + + .dtlb_access_i(dtlb_lu_access), + .dtlb_hit_i (dtlb_lu_hit), + .dtlb_vaddr_i (lsu_vaddr_i), + + // to TLBs, update logic + .itlb_update_o(update_itlb), + .dtlb_update_o(update_dtlb), + + // Performance counters + .itlb_miss_o(itlb_miss_o), + .dtlb_miss_o(dtlb_miss_o), + + .shared_tlb_access_o(shared_tlb_access), + .shared_tlb_hit_o (shared_tlb_hit), + .shared_tlb_vaddr_o (shared_tlb_vaddr), + + .itlb_req_o (itlb_req), + // to update shared tlb + .shared_tlb_update_i(update_shared_tlb) + ); + + cva6_ptw_sv32 #( + .CVA6Cfg (CVA6Cfg), + .ASID_WIDTH(ASID_WIDTH) + ) i_ptw ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_i), + + .ptw_active_o (ptw_active), + .walking_instr_o (walking_instr), + .ptw_error_o (ptw_error), + .ptw_access_exception_o(ptw_access_exception), + + .lsu_is_store_i(lsu_is_store_i), + // PTW memory interface + .req_port_i (req_port_i), + .req_port_o (req_port_o), + + // to Shared TLB, update logic + .shared_tlb_update_o(update_shared_tlb), + + .update_vaddr_o(update_vaddr), + + .asid_i(asid_i), + + // from shared TLB + // did we miss? + .shared_tlb_access_i(shared_tlb_access), + .shared_tlb_hit_i (shared_tlb_hit), + .shared_tlb_vaddr_i (shared_tlb_vaddr), + + .itlb_req_i(itlb_req), + + // from CSR file + .satp_ppn_i(satp_ppn_i), // ppn from satp + .mxr_i (mxr_i), + + // Performance counters + .shared_tlb_miss_o(), //open for now + + // PMP + .pmpcfg_i (pmpcfg_i), + .pmpaddr_i (pmpaddr_i), + .bad_paddr_o(ptw_bad_paddr) + + ); + + // ila_1 i_ila_1 ( + // .clk(clk_i), // input wire clk + // .probe0({req_port_o.address_tag, req_port_o.address_index}), + // .probe1(req_port_o.data_req), // input wire [63:0] probe1 + // .probe2(req_port_i.data_gnt), // input wire [0:0] probe2 + // .probe3(req_port_i.data_rdata), // input wire [0:0] probe3 + // .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4 + // .probe5(ptw_error), // input wire [1:0] probe5 + // .probe6(update_vaddr), // input wire [0:0] probe6 + // .probe7(update_itlb.valid), // input wire [0:0] probe7 + // .probe8(update_dtlb.valid), // input wire [0:0] probe8 + // .probe9(dtlb_lu_access), // input wire [0:0] probe9 + // .probe10(lsu_vaddr_i), // input wire [0:0] probe10 + // .probe11(dtlb_lu_hit), // input wire [0:0] probe11 + // .probe12(itlb_lu_access), // input wire [0:0] probe12 + // .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13 + // .probe14(itlb_lu_hit) // input wire [0:0] probe13 + // ); + + //----------------------- + // Instruction Interface + //----------------------- + logic match_any_execute_region; + logic pmp_instr_allow; + + // The instruction interface is a simple request response interface + always_comb begin : instr_interface + // MMU disabled: just pass through + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + if (riscv::PLEN > riscv::VLEN) + icache_areq_o.fetch_paddr = { + {riscv::PLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr + }; // play through in case we disabled address translation + else + icache_areq_o.fetch_paddr = { + 2'b00, icache_areq_i.fetch_vaddr[riscv::VLEN-1:0] + }; // play through in case we disabled address translation + // two potential exception sources: + // 1. HPTW threw an exception -> signal with a page fault exception + // 2. We got an access error because of insufficient permissions -> throw an access exception + icache_areq_o.fetch_exception = '0; + // Check whether we are allowed to access this memory region from a fetch perspective + iaccess_err = icache_areq_i.fetch_req && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u) + || ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u)); + + // MMU enabled: address from TLB, request delayed until hit. Error when TLB + // hit and no access right or TLB hit and translated address not valid (e.g. + // AXI decode error), or when PTW performs walk due to ITLB miss and raises + // an error. + if (enable_translation_i) begin + // we work with SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal + if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; + end + + icache_areq_o.fetch_valid = 1'b0; + + // 4K page + icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]}; + // Mega page + if (itlb_is_4M) begin + icache_areq_o.fetch_paddr[21:12] = icache_areq_i.fetch_vaddr[21:12]; + end + + + // --------- + // ITLB Hit + // -------- + // if we hit the ITLB output the request signal immediately + if (itlb_lu_hit) begin + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + // we got an access error + if (iaccess_err) begin + // throw a page fault + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; //to check on wave --> not connected + end else if (!pmp_instr_allow) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, icache_areq_i.fetch_vaddr, 1'b1 + }; //to check on wave --> not connected + end + end else + // --------- + // ITLB Miss + // --------- + // watch out for exceptions happening during walking the page table + if (ptw_active && walking_instr) begin + icache_areq_o.fetch_valid = ptw_error | ptw_access_exception; + if (ptw_error) + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1 + }; //to check on wave + // TODO(moschn,zarubaf): What should the value of tval be in this case? + else + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1 + }; //to check on wave --> not connected + end + end + // if it didn't match any execute region throw an `Instruction Access Fault` + // or: if we are not translating, check PMPs immediately on the paddr + if (!match_any_execute_region || (!enable_translation_i && !pmp_instr_allow)) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, icache_areq_o.fetch_paddr[riscv::PLEN-1:2], 1'b1 + }; //to check on wave --> not connected + end + end + + // check for execute flag on memory + assign match_any_execute_region = config_pkg::is_inside_execute_regions( + CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr} + ); + + // Instruction fetch + pmp #( + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_if ( + .addr_i (icache_areq_o.fetch_paddr), + .priv_lvl_i, + // we will always execute on the instruction fetch port + .access_type_i(riscv::ACCESS_EXEC), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (pmp_instr_allow) + ); + + //----------------------- + // Data Interface + //----------------------- + logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; + riscv::pte_sv32_t dtlb_pte_n, dtlb_pte_q; + exception_t misaligned_ex_n, misaligned_ex_q; + logic lsu_req_n, lsu_req_q; + logic lsu_is_store_n, lsu_is_store_q; + logic dtlb_hit_n, dtlb_hit_q; + logic dtlb_is_4M_n, dtlb_is_4M_q; + + // check if we need to do translation or if we are always ready (e.g.: we are not translating anything) + assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1; + + // Wires to PMP checks + riscv::pmp_access_t pmp_access_type; + logic pmp_data_allow; + localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1; + // The data interface is simpler and only consists of a request/response interface + always_comb begin : data_interface + // save request and DTLB response + lsu_vaddr_n = lsu_vaddr_i; + lsu_req_n = lsu_req_i; + misaligned_ex_n = misaligned_ex_i; + dtlb_pte_n = dtlb_content; + dtlb_hit_n = dtlb_lu_hit; + lsu_is_store_n = lsu_is_store_i; + dtlb_is_4M_n = dtlb_is_4M; + + if (riscv::PLEN > riscv::VLEN) begin + lsu_paddr_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_q}; + lsu_dtlb_ppn_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n[riscv::VLEN-1:12]}; + end else begin + lsu_paddr_o = {2'b00, lsu_vaddr_q[riscv::VLEN-1:0]}; + lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PPNW-1:0]; + end + lsu_valid_o = lsu_req_q; + lsu_exception_o = misaligned_ex_q; + pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ; + + // mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions + misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i; + + // Check if the User flag is set, then we may only access it in supervisor mode + // if SUM is enabled + daccess_err = (ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode + (ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u); // this is not a user page but we are in user mode and trying to access it + // translation is enabled and no misaligned exception occurred + if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin + lsu_valid_o = 1'b0; + // 4K page + lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]}; + lsu_dtlb_ppn_o = dtlb_content.ppn; + // Mega page + if (dtlb_is_4M_q) begin + lsu_paddr_o[21:12] = lsu_vaddr_q[21:12]; + lsu_dtlb_ppn_o[21:12] = lsu_vaddr_n[21:12]; + end + // --------- + // DTLB Hit + // -------- + if (dtlb_hit_q && lsu_req_q) begin + lsu_valid_o = 1'b1; + // exception priority: + // PAGE_FAULTS have higher priority than ACCESS_FAULTS + // virtual memory based exceptions are PAGE_FAULTS + // physical memory based exceptions are ACCESS_FAULTS (PMA/PMP) + + // this is a store + if (lsu_is_store_q) begin + // check if the page is write-able and we are not violating privileges + // also check if the dirty flag is set + if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; //to check on wave + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1 + }; //only 32 bits on 34b of lsu_paddr_o are returned. + end + + // this is a load + end else begin + // check for sufficient access privileges - throw a page fault if necessary + if (daccess_err) begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1 + }; //only 32 bits on 34b of lsu_paddr_o are returned. + end + end + end else + + // --------- + // DTLB Miss + // --------- + // watch out for exceptions + if (ptw_active && !walking_instr) begin + // page table walker threw an exception + if (ptw_error) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // the page table walker can only throw page faults + if (lsu_is_store_q) begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + 1'b1 + }; + end + end + + if (ptw_access_exception) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // the page table walker can only throw page faults + lsu_exception_o = {riscv::LD_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1}; + end + end + end // If translation is not enabled, check the paddr immediately against PMPs + else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin + if (lsu_is_store_q) begin + lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; + end else begin + lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; + end + end + end + + // Load/store PMP check + pmp #( + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_data ( + .addr_i (lsu_paddr_o), + .priv_lvl_i (ld_st_priv_lvl_i), + .access_type_i(pmp_access_type), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (pmp_data_allow) + ); + + // ---------- + // Registers + // ---------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + lsu_vaddr_q <= '0; + lsu_req_q <= '0; + misaligned_ex_q <= '0; + dtlb_pte_q <= '0; + dtlb_hit_q <= '0; + lsu_is_store_q <= '0; + dtlb_is_4M_q <= '0; + end else begin + lsu_vaddr_q <= lsu_vaddr_n; + lsu_req_q <= lsu_req_n; + misaligned_ex_q <= misaligned_ex_n; + dtlb_pte_q <= dtlb_pte_n; + dtlb_hit_q <= dtlb_hit_n; + lsu_is_store_q <= lsu_is_store_n; + dtlb_is_4M_q <= dtlb_is_4M_n; + end + end +endmodule diff --git a/test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv b/test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv new file mode 100644 index 0000000..4bd736b --- /dev/null +++ b/test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv @@ -0,0 +1,400 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq Thales Research & Technology +// Date: 17/07/2021 +// +// Additional contributions by: +// Sebastien Jacq - sjthales on github.com +// +// Description: Hardware-PTW (Page-Table-Walker) for MMU Sv32. +// This module is an adaptation of the Sv39 PTW developed +// by Florian Zaruba and David Schaffenrath to the Sv32 standard. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2020-02-17 0.1 S.Jacq PTW Sv32 for CV32A6 +// =========================================================================== // + +/* verilator lint_off WIDTH */ + +module cva6_ptw_sv32 + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush everything, we need to do this because + // actually everything we do is speculative at this stage + // e.g.: there could be a CSR instruction that changes everything + output logic ptw_active_o, + output logic walking_instr_o, // set when walking for TLB + output logic ptw_error_o, // set when an error occurred + output logic ptw_access_exception_o, // set when an PMP access exception occured + + input logic lsu_is_store_i, // this translation was triggered by a store + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + + // to Shared TLB, update logic + output tlb_update_sv32_t shared_tlb_update_o, + + output logic [riscv::VLEN-1:0] update_vaddr_o, + + input logic [ASID_WIDTH-1:0] asid_i, + + // from shared TLB + input logic shared_tlb_access_i, + input logic shared_tlb_hit_i, + input logic [riscv::VLEN-1:0] shared_tlb_vaddr_i, + + input logic itlb_req_i, + + // from CSR file + input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp + input logic mxr_i, + + // Performance counters + output logic shared_tlb_miss_o, + + // PMP + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + output logic [riscv::PLEN-1:0] bad_paddr_o + +); + + // input registers + logic data_rvalid_q; + riscv::xlen_t data_rdata_q; + + riscv::pte_sv32_t pte; + assign pte = riscv::pte_sv32_t'(data_rdata_q); + + + enum logic [2:0] { + IDLE, + WAIT_GRANT, + PTE_LOOKUP, + WAIT_RVALID, + PROPAGATE_ERROR, + PROPAGATE_ACCESS_ERROR, + LATENCY + } + state_q, state_d; + + // SV32 defines two levels of page tables + enum logic { + LVL1, + LVL2 + } + ptw_lvl_q, ptw_lvl_n; + + // is this an instruction page table walk? + logic is_instr_ptw_q, is_instr_ptw_n; + logic global_mapping_q, global_mapping_n; + // latched tag signal + logic tag_valid_n, tag_valid_q; + // register the ASID + logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; + // register the VPN we need to walk, SV32 defines a 32 bit virtual address + logic [riscv::VLEN-1:0] vaddr_q, vaddr_n; + // 4 byte aligned physical pointer + logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n; + + // Assignments + assign update_vaddr_o = vaddr_q; + + assign ptw_active_o = (state_q != IDLE); + //assign walking_instr_o = is_instr_ptw_q; + assign walking_instr_o = is_instr_ptw_q; + // directly output the correct physical address + assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0]; + assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH]; + // we are never going to kill this request + assign req_port_o.kill_req = '0; + // we are never going to write with the HPTW + assign req_port_o.data_wdata = '0; + // we only issue one single request at a time + assign req_port_o.data_id = '0; + + // ----------- + // Shared TLB Update + // ----------- + assign shared_tlb_update_o.vpn = vaddr_q[riscv::SV-1:12]; + // update the correct page table level + assign shared_tlb_update_o.is_4M = (ptw_lvl_q == LVL1); + // output the correct ASID + assign shared_tlb_update_o.asid = tlb_update_asid_q; + // set the global mapping bit + assign shared_tlb_update_o.content = pte | (global_mapping_q << 5); + + + assign req_port_o.tag_valid = tag_valid_q; + + logic allow_access; + + assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0; + + pmp #( + .CVA6Cfg (CVA6Cfg), + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_ptw ( + .addr_i (ptw_pptr_q), + // PTW access are always checked as if in S-Mode... + .priv_lvl_i (riscv::PRIV_LVL_S), + // ...and they are always loads + .access_type_i(riscv::ACCESS_READ), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (allow_access) + ); + + + assign req_port_o.data_be = be_gen_32(req_port_o.address_index[1:0], req_port_o.data_size); + + //------------------- + // Page table walker + //------------------- + // A virtual address va is translated into a physical address pa as follows: + // 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39, + // PAGESIZE=2^12 and LEVELS=3.) + // 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For + // Sv32, PTESIZE=4.) + // 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access + // exception. + // 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5. + // Otherwise, this PTE is a pointer to the next level of the page table. + // Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let + // a = pte.ppn × PAGESIZE and go to step 2. + // 5. A leaf PTE has been found. Determine if the requested memory access + // is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and + // raise an access exception. Otherwise, the translation is successful. + // Set pte.a to 1, and, if the memory access is a store, set pte.d to 1. + // The translated physical address is given as follows: + // - pa.pgoff = va.pgoff. + // - If i > 0, then this is a superpage translation and + // pa.ppn[i-1:0] = va.vpn[i-1:0]. + // - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. + always_comb begin : ptw + // default assignments + // PTW memory interface + tag_valid_n = 1'b0; + req_port_o.data_req = 1'b0; + req_port_o.data_size = 2'b10; + req_port_o.data_we = 1'b0; + ptw_error_o = 1'b0; + ptw_access_exception_o = 1'b0; + shared_tlb_update_o.valid = 1'b0; + is_instr_ptw_n = is_instr_ptw_q; + ptw_lvl_n = ptw_lvl_q; + ptw_pptr_n = ptw_pptr_q; + state_d = state_q; + global_mapping_n = global_mapping_q; + // input registers + tlb_update_asid_n = tlb_update_asid_q; + vaddr_n = vaddr_q; + + shared_tlb_miss_o = 1'b0; + + case (state_q) + + IDLE: begin + // by default we start with the top-most page table + ptw_lvl_n = LVL1; + global_mapping_n = 1'b0; + is_instr_ptw_n = 1'b0; + // if we got a Shared TLB miss + if (shared_tlb_access_i & ~shared_tlb_hit_i) begin + ptw_pptr_n = { + satp_ppn_i, shared_tlb_vaddr_i[riscv::SV-1:22], 2'b0 + }; // SATP.PPN * PAGESIZE + VPN*PTESIZE = SATP.PPN * 2^(12) + VPN*4 + is_instr_ptw_n = itlb_req_i; + tlb_update_asid_n = asid_i; + vaddr_n = shared_tlb_vaddr_i; + state_d = WAIT_GRANT; + shared_tlb_miss_o = 1'b1; + end + end + + WAIT_GRANT: begin + // send a request out + req_port_o.data_req = 1'b1; + // wait for the WAIT_GRANT + if (req_port_i.data_gnt) begin + // send the tag valid signal one cycle later + tag_valid_n = 1'b1; + state_d = PTE_LOOKUP; + end + end + + PTE_LOOKUP: begin + // we wait for the valid signal + if (data_rvalid_q) begin + + // check if the global mapping bit is set + if (pte.g) global_mapping_n = 1'b1; + + // ------------- + // Invalid PTE + // ------------- + // If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception. + if (!pte.v || (!pte.r && pte.w)) state_d = PROPAGATE_ERROR; + // ----------- + // Valid PTE + // ----------- + else begin + //state_d = IDLE; + state_d = LATENCY; + // it is a valid PTE + // if pte.r = 1 or pte.x = 1 it is a valid PTE + if (pte.r || pte.x) begin + // Valid translation found (either 4M or 4K entry) + if (is_instr_ptw_q) begin + // ------------ + // Update ITLB + // ------------ + // If page is not executable, we can directly raise an error. This + // doesn't put a useless entry into the TLB. The same idea applies + // to the access flag since we let the access flag be managed by SW. + if (!pte.x || !pte.a) state_d = PROPAGATE_ERROR; + else shared_tlb_update_o.valid = 1'b1; + + end else begin + // ------------ + // Update DTLB + // ------------ + // Check if the access flag has been set, otherwise throw a page-fault + // and let the software handle those bits. + // If page is not readable (there are no write-only pages) + // we can directly raise an error. This doesn't put a useless + // entry into the TLB. + if (pte.a && (pte.r || (pte.x && mxr_i))) begin + shared_tlb_update_o.valid = 1'b1; + end else begin + state_d = PROPAGATE_ERROR; + end + // Request is a store: perform some additional checks + // If the request was a store and the page is not write-able, raise an error + // the same applies if the dirty flag is not set + if (lsu_is_store_i && (!pte.w || !pte.d)) begin + shared_tlb_update_o.valid = 1'b0; + state_d = PROPAGATE_ERROR; + end + end + // check if the ppn is correctly aligned: + // 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault + // exception. + if (ptw_lvl_q == LVL1 && pte.ppn[9:0] != '0) begin + state_d = PROPAGATE_ERROR; + shared_tlb_update_o.valid = 1'b0; + end + // this is a pointer to the next TLB level + end else begin + // pointer to next level of page table + if (ptw_lvl_q == LVL1) begin + // we are in the second level now + ptw_lvl_n = LVL2; + ptw_pptr_n = {pte.ppn, vaddr_q[21:12], 2'b0}; + end + + state_d = WAIT_GRANT; + + if (ptw_lvl_q == LVL2) begin + // Should already be the last level page table => Error + ptw_lvl_n = LVL2; + state_d = PROPAGATE_ERROR; + end + end + end + + // Check if this access was actually allowed from a PMP perspective + if (!allow_access) begin + shared_tlb_update_o.valid = 1'b0; + // we have to return the failed address in bad_addr + ptw_pptr_n = ptw_pptr_q; + state_d = PROPAGATE_ACCESS_ERROR; + end + end + // we've got a data WAIT_GRANT so tell the cache that the tag is valid + end + // Propagate error to MMU/LSU + PROPAGATE_ERROR: begin + state_d = LATENCY; + ptw_error_o = 1'b1; + end + PROPAGATE_ACCESS_ERROR: begin + state_d = LATENCY; + ptw_access_exception_o = 1'b1; + end + // wait for the rvalid before going back to IDLE + WAIT_RVALID: begin + if (data_rvalid_q) state_d = IDLE; + end + LATENCY: begin + state_d = IDLE; + end + default: begin + state_d = IDLE; + end + endcase + + // ------- + // Flush + // ------- + // should we have flushed before we got an rvalid, wait for it until going back to IDLE + if (flush_i) begin + // on a flush check whether we are + // 1. in the PTE Lookup check whether we still need to wait for an rvalid + // 2. waiting for a grant, if so: wait for it + // if not, go back to idle + if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) || + ((state_q == WAIT_GRANT) && req_port_i.data_gnt)) + state_d = WAIT_RVALID; + else state_d = LATENCY; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + is_instr_ptw_q <= 1'b0; + ptw_lvl_q <= LVL1; + tag_valid_q <= 1'b0; + tlb_update_asid_q <= '0; + vaddr_q <= '0; + ptw_pptr_q <= '0; + global_mapping_q <= 1'b0; + data_rdata_q <= '0; + data_rvalid_q <= 1'b0; + end else begin + state_q <= state_d; + ptw_pptr_q <= ptw_pptr_n; + is_instr_ptw_q <= is_instr_ptw_n; + ptw_lvl_q <= ptw_lvl_n; + tag_valid_q <= tag_valid_n; + tlb_update_asid_q <= tlb_update_asid_n; + vaddr_q <= vaddr_n; + global_mapping_q <= global_mapping_n; + data_rdata_q <= req_port_i.data_rdata; + data_rvalid_q <= req_port_i.data_rvalid; + end + end + +endmodule +/* verilator lint_on WIDTH */ diff --git a/test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv b/test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv new file mode 100644 index 0000000..98e2a04 --- /dev/null +++ b/test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv @@ -0,0 +1,367 @@ +// Copyright (c) 2023 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq - Thales Research & Technology +// Date: 08/03/2023 +// +// Description: N-way associative shared TLB, it allows to reduce the number +// of ITLB and DTLB entries. +// + +/* verilator lint_off WIDTH */ + +module cva6_shared_tlb_sv32 + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int SHARED_TLB_DEPTH = 64, + parameter int SHARED_TLB_WAYS = 2, + parameter int ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + + input logic enable_translation_i, // CSRs indicate to enable SV32 + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + + input logic [ASID_WIDTH-1:0] asid_i, + + // from TLBs + // did we miss? + input logic itlb_access_i, + input logic itlb_hit_i, + input logic [riscv::VLEN-1:0] itlb_vaddr_i, + + input logic dtlb_access_i, + input logic dtlb_hit_i, + input logic [riscv::VLEN-1:0] dtlb_vaddr_i, + + // to TLBs, update logic + output tlb_update_sv32_t itlb_update_o, + output tlb_update_sv32_t dtlb_update_o, + + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + + output logic shared_tlb_access_o, + output logic shared_tlb_hit_o, + output logic [riscv::VLEN-1:0] shared_tlb_vaddr_o, + + output logic itlb_req_o, + + // Update shared TLB in case of miss + input tlb_update_sv32_t shared_tlb_update_i + +); + + function logic [SHARED_TLB_WAYS-1:0] shared_tlb_way_bin2oh(input logic [$clog2(SHARED_TLB_WAYS +)-1:0] in); + logic [SHARED_TLB_WAYS-1:0] out; + out = '0; + out[in] = 1'b1; + return out; + endfunction + + typedef struct packed { + logic [8:0] asid; //9 bits wide + logic [9:0] vpn1; //10 bits wide + logic [9:0] vpn0; //10 bits wide + logic is_4M; + } shared_tag_t; + + shared_tag_t shared_tag_wr; + shared_tag_t [SHARED_TLB_WAYS-1:0] shared_tag_rd; + + logic [SHARED_TLB_DEPTH-1:0][SHARED_TLB_WAYS-1:0] shared_tag_valid_q, shared_tag_valid_d; + + logic [ SHARED_TLB_WAYS-1:0] shared_tag_valid; + + logic [ SHARED_TLB_WAYS-1:0] tag_wr_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_wr_addr; + logic [ $bits(shared_tag_t)-1:0] tag_wr_data; + + logic [ SHARED_TLB_WAYS-1:0] tag_rd_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_rd_addr; + logic [ $bits(shared_tag_t)-1:0] tag_rd_data [SHARED_TLB_WAYS-1:0]; + + logic [ SHARED_TLB_WAYS-1:0] tag_req; + logic [ SHARED_TLB_WAYS-1:0] tag_we; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_addr; + + logic [ SHARED_TLB_WAYS-1:0] pte_wr_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_wr_addr; + logic [$bits(riscv::pte_sv32_t)-1:0] pte_wr_data; + + logic [ SHARED_TLB_WAYS-1:0] pte_rd_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_rd_addr; + logic [$bits(riscv::pte_sv32_t)-1:0] pte_rd_data [SHARED_TLB_WAYS-1:0]; + + logic [ SHARED_TLB_WAYS-1:0] pte_req; + logic [ SHARED_TLB_WAYS-1:0] pte_we; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_addr; + + logic [9:0] vpn0_d, vpn1_d, vpn0_q, vpn1_q; + + riscv::pte_sv32_t [SHARED_TLB_WAYS-1:0] pte; + + logic [riscv::VLEN-1-12:0] itlb_vpn_q; + logic [riscv::VLEN-1-12:0] dtlb_vpn_q; + + logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_d; + + logic shared_tlb_access_q, shared_tlb_access_d; + logic shared_tlb_hit_d; + logic [riscv::VLEN-1:0] shared_tlb_vaddr_q, shared_tlb_vaddr_d; + + logic itlb_req_d, itlb_req_q; + logic dtlb_req_d, dtlb_req_q; + + // replacement strategy + logic [SHARED_TLB_WAYS-1:0] way_valid; + logic update_lfsr; // shift the LFSR + logic [$clog2(SHARED_TLB_WAYS)-1:0] inv_way; // first non-valid encountered + logic [$clog2(SHARED_TLB_WAYS)-1:0] rnd_way; // random index for replacement + logic [$clog2(SHARED_TLB_WAYS)-1:0] repl_way; // way to replace + logic [SHARED_TLB_WAYS-1:0] repl_way_oh_d; // way to replace (onehot) + logic all_ways_valid; // we need to switch repl strategy since all are valid + + assign shared_tlb_access_o = shared_tlb_access_q; + assign shared_tlb_hit_o = shared_tlb_hit_d; + assign shared_tlb_vaddr_o = shared_tlb_vaddr_q; + + assign itlb_req_o = itlb_req_q; + + /////////////////////////////////////////////////////// + // tag comparison, hit generation + /////////////////////////////////////////////////////// + always_comb begin : itlb_dtlb_miss + itlb_miss_o = 1'b0; + dtlb_miss_o = 1'b0; + vpn0_d = vpn0_q; + vpn1_d = vpn1_q; + + tag_rd_en = '0; + pte_rd_en = '0; + + itlb_req_d = 1'b0; + dtlb_req_d = 1'b0; + + tlb_update_asid_d = tlb_update_asid_q; + + shared_tlb_access_d = '0; + shared_tlb_vaddr_d = shared_tlb_vaddr_q; + + tag_rd_addr = '0; + pte_rd_addr = '0; + + // if we got an ITLB miss + if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin + tag_rd_en = '1; + tag_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + pte_rd_en = '1; + pte_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + + vpn0_d = itlb_vaddr_i[21:12]; + vpn1_d = itlb_vaddr_i[31:22]; + + itlb_miss_o = 1'b1; + itlb_req_d = 1'b1; + + tlb_update_asid_d = asid_i; + + shared_tlb_access_d = 1'b1; + shared_tlb_vaddr_d = itlb_vaddr_i; + + // we got an DTLB miss + end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin + tag_rd_en = '1; + tag_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + pte_rd_en = '1; + pte_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + + vpn0_d = dtlb_vaddr_i[21:12]; + vpn1_d = dtlb_vaddr_i[31:22]; + + dtlb_miss_o = 1'b1; + dtlb_req_d = 1'b1; + + tlb_update_asid_d = asid_i; + + shared_tlb_access_d = 1'b1; + shared_tlb_vaddr_d = dtlb_vaddr_i; + end + end //itlb_dtlb_miss + + always_comb begin : tag_comparison + shared_tlb_hit_d = 1'b0; + dtlb_update_o = '0; + itlb_update_o = '0; + //number of ways + for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin + if (shared_tag_valid[i] && ((tlb_update_asid_q == shared_tag_rd[i].asid) || pte[i].g) && vpn1_q == shared_tag_rd[i].vpn1) begin + if (shared_tag_rd[i].is_4M || vpn0_q == shared_tag_rd[i].vpn0) begin + shared_tlb_hit_d = 1'b1; + if (itlb_req_q) begin + itlb_update_o.valid = 1'b1; + itlb_update_o.vpn = itlb_vpn_q; + itlb_update_o.is_4M = shared_tag_rd[i].is_4M; + itlb_update_o.asid = tlb_update_asid_q; + itlb_update_o.content = pte[i]; + end else if (dtlb_req_q) begin + dtlb_update_o.valid = 1'b1; + dtlb_update_o.vpn = dtlb_vpn_q; + dtlb_update_o.is_4M = shared_tag_rd[i].is_4M; + dtlb_update_o.asid = tlb_update_asid_q; + dtlb_update_o.content = pte[i]; + end + end + end + end + end //tag_comparison + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + itlb_vpn_q <= '0; + dtlb_vpn_q <= '0; + tlb_update_asid_q <= '0; + shared_tlb_access_q <= '0; + shared_tlb_vaddr_q <= '0; + shared_tag_valid_q <= '0; + vpn0_q <= '0; + vpn1_q <= '0; + itlb_req_q <= '0; + dtlb_req_q <= '0; + shared_tag_valid <= '0; + end else begin + itlb_vpn_q <= itlb_vaddr_i[riscv::SV-1:12]; + dtlb_vpn_q <= dtlb_vaddr_i[riscv::SV-1:12]; + tlb_update_asid_q <= tlb_update_asid_d; + shared_tlb_access_q <= shared_tlb_access_d; + shared_tlb_vaddr_q <= shared_tlb_vaddr_d; + shared_tag_valid_q <= shared_tag_valid_d; + vpn0_q <= vpn0_d; + vpn1_q <= vpn1_d; + itlb_req_q <= itlb_req_d; + dtlb_req_q <= dtlb_req_d; + shared_tag_valid <= shared_tag_valid_q[tag_rd_addr]; + end + end + + // ------------------ + // Update and Flush + // ------------------ + always_comb begin : update_flush + shared_tag_valid_d = shared_tag_valid_q; + tag_wr_en = '0; + pte_wr_en = '0; + + if (flush_i) begin + shared_tag_valid_d = '0; + end else if (shared_tlb_update_i.valid) begin + for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin + if (repl_way_oh_d[i]) begin + shared_tag_valid_d[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]][i] = 1'b1; + tag_wr_en[i] = 1'b1; + pte_wr_en[i] = 1'b1; + end + end + end + end //update_flush + + assign shared_tag_wr.asid = shared_tlb_update_i.asid; + assign shared_tag_wr.vpn1 = shared_tlb_update_i.vpn[19:10]; + assign shared_tag_wr.vpn0 = shared_tlb_update_i.vpn[9:0]; + assign shared_tag_wr.is_4M = shared_tlb_update_i.is_4M; + + assign tag_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]; + assign tag_wr_data = shared_tag_wr; + + assign pte_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]; + assign pte_wr_data = shared_tlb_update_i.content; + + assign way_valid = shared_tag_valid_q[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]]; + assign repl_way = (all_ways_valid) ? rnd_way : inv_way; + assign update_lfsr = shared_tlb_update_i.valid & all_ways_valid; + assign repl_way_oh_d = (shared_tlb_update_i.valid) ? shared_tlb_way_bin2oh(repl_way) : '0; + + lzc #( + .WIDTH(SHARED_TLB_WAYS) + ) i_lzc ( + .in_i (~way_valid), + .cnt_o (inv_way), + .empty_o(all_ways_valid) + ); + + lfsr #( + .LfsrWidth(8), + .OutWidth ($clog2(SHARED_TLB_WAYS)) + ) i_lfsr ( + .clk_i (clk_i), + .rst_ni(rst_ni), + .en_i (update_lfsr), + .out_o (rnd_way) + ); + + /////////////////////////////////////////////////////// + // memory arrays and regs + /////////////////////////////////////////////////////// + + assign tag_req = tag_wr_en | tag_rd_en; + assign tag_we = tag_wr_en; + assign tag_addr = tag_wr_en ? tag_wr_addr : tag_rd_addr; + + assign pte_req = pte_wr_en | pte_rd_en; + assign pte_we = pte_wr_en; + assign pte_addr = pte_wr_en ? pte_wr_addr : pte_rd_addr; + + for (genvar i = 0; i < SHARED_TLB_WAYS; i++) begin : gen_sram + // Tag RAM + sram #( + .DATA_WIDTH($bits(shared_tag_t)), + .NUM_WORDS (SHARED_TLB_DEPTH) + ) tag_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (tag_req[i]), + .we_i (tag_we[i]), + .addr_i (tag_addr), + .wuser_i('0), + .wdata_i(tag_wr_data), + .be_i ('1), + .ruser_o(), + .rdata_o(tag_rd_data[i]) + ); + + assign shared_tag_rd[i] = shared_tag_t'(tag_rd_data[i]); + + // PTE RAM + sram #( + .DATA_WIDTH($bits(riscv::pte_sv32_t)), + .NUM_WORDS (SHARED_TLB_DEPTH) + ) pte_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (pte_req[i]), + .we_i (pte_we[i]), + .addr_i (pte_addr), + .wuser_i('0), + .wdata_i(pte_wr_data), + .be_i ('1), + .ruser_o(), + .rdata_o(pte_rd_data[i]) + ); + assign pte[i] = riscv::pte_sv32_t'(pte_rd_data[i]); + end +endmodule + +/* verilator lint_on WIDTH */ diff --git a/test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv b/test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv new file mode 100644 index 0000000..79a7c98 --- /dev/null +++ b/test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv @@ -0,0 +1,281 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq Thales Research & Technology +// Date: 17/07/2021 +// +// Additional contributions by: +// Sebastien Jacq - sjthales on github.com +// +// Description: Translation Lookaside Buffer, Sv32 , fully set-associative +// This module is an adaptation of the Sv39 TLB developed +// by Florian Zaruba and David Schaffenrath to the Sv32 standard. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2020-02-17 0.1 S.Jacq TLB Sv32 for CV32A6 +// =========================================================================== // + +module cva6_tlb_sv32 + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned TLB_ENTRIES = 4, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // Flush signal + // Update TLB + input tlb_update_sv32_t update_i, + // Lookup signals + input logic lu_access_i, + input logic [ASID_WIDTH-1:0] lu_asid_i, + input logic [riscv::VLEN-1:0] lu_vaddr_i, + output riscv::pte_sv32_t lu_content_o, + input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + output logic lu_is_4M_o, + output logic lu_hit_o +); + + // Sv32 defines two levels of page tables + struct packed { + logic [8:0] asid; //9 bits wide + logic [9:0] vpn1; //10 bits wide + logic [9:0] vpn0; //10 bits wide + logic is_4M; + logic valid; + } [TLB_ENTRIES-1:0] + tags_q, tags_n; + + riscv::pte_sv32_t [TLB_ENTRIES-1:0] content_q, content_n; + logic [9:0] vpn0, vpn1; + logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic + logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy + //------------- + // Translation + //------------- + always_comb begin : translation + vpn0 = lu_vaddr_i[21:12]; + vpn1 = lu_vaddr_i[31:22]; + + + // default assignment + lu_hit = '{default: 0}; + lu_hit_o = 1'b0; + lu_content_o = '{default: 0}; + lu_is_4M_o = 1'b0; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + // first level match, this may be a mega page, check the ASID flags as well + // if the entry is associated to a global address, don't match the ASID (ASID is don't care) + if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid[ASID_WIDTH-1:0]) || content_q[i].g) && vpn1 == tags_q[i].vpn1) begin + if (tags_q[i].is_4M || vpn0 == tags_q[i].vpn0) begin + lu_is_4M_o = tags_q[i].is_4M; + lu_content_o = content_q[i]; + lu_hit_o = 1'b1; + lu_hit[i] = 1'b1; + end + end + end + end + + logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high + logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high + logic [TLB_ENTRIES-1:0] vaddr_vpn0_match; + logic [TLB_ENTRIES-1:0] vaddr_vpn1_match; + + + assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i); + assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i); + + // ------------------ + // Update and Flush + // ------------------ + always_comb begin : update_flush + tags_n = tags_q; + content_n = content_q; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + + vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[21:12] == tags_q[i].vpn0); + vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[31:22] == tags_q[i].vpn1); + + if (flush_i) begin + // invalidate logic + // flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case) + if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0) tags_n[i].valid = 1'b0; + // flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages + else if (asid_to_be_flushed_is0 && ( (vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M) ) && (~vaddr_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case) + else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M)) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case) + else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!asid_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // normal replacement + end else if (update_i.valid & replace_en[i]) begin + // update tag array + tags_n[i] = '{ + asid: update_i.asid, + vpn1: update_i.vpn[19:10], + vpn0: update_i.vpn[9:0], + is_4M: update_i.is_4M, + valid: 1'b1 + }; + // and content as well + content_n[i] = update_i.content; + end + end + end + + // ----------------------------------------------- + // PLRU - Pseudo Least Recently Used Replacement + // ----------------------------------------------- + logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n; + logic en; + int unsigned idx_base, shift, new_index; + always_comb begin : plru_replacement + plru_tree_n = plru_tree_q; + en = '0; + idx_base = '0; + shift = '0; + new_index = '0; + // The PLRU-tree indexing: + // lvl0 0 + // / \ + // / \ + // lvl1 1 2 + // / \ / \ + // lvl2 3 4 5 6 + // / \ /\/\ /\ + // ... ... ... ... + // Just predefine which nodes will be set/cleared + // E.g. for a TLB with 8 entries, the for-loop is semantically + // equivalent to the following pseudo-code: + // unique case (1'b1) + // lu_hit[7]: plru_tree_n[0, 2, 6] = {1, 1, 1}; + // lu_hit[6]: plru_tree_n[0, 2, 6] = {1, 1, 0}; + // lu_hit[5]: plru_tree_n[0, 2, 5] = {1, 0, 1}; + // lu_hit[4]: plru_tree_n[0, 2, 5] = {1, 0, 0}; + // lu_hit[3]: plru_tree_n[0, 1, 4] = {0, 1, 1}; + // lu_hit[2]: plru_tree_n[0, 1, 4] = {0, 1, 0}; + // lu_hit[1]: plru_tree_n[0, 1, 3] = {0, 0, 1}; + // lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0}; + // default: begin /* No hit */ end + // endcase + for ( + int unsigned i = 0; i < TLB_ENTRIES; i++ + ) begin + // we got a hit so update the pointer as it was least recently used + if (lu_hit[i] & lu_access_i) begin + // Set the nodes to the values we would expect + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2 ** lvl) - 1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + // to circumvent the 32 bit integer arithmetic assignment + new_index = ~((i >> (shift - 1)) & 32'b1); + plru_tree_n[idx_base+(i>>shift)] = new_index[0]; + end + end + end + // Decode tree to write enable signals + // Next for-loop basically creates the following logic for e.g. an 8 entry + // TLB (note: pseudo-code obviously): + // replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1} + // replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0} + // replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1} + // replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0} + // replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1} + // replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0} + // replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1} + // replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0} + // For each entry traverse the tree. If every tree-node matches, + // the corresponding bit of the entry's index, this is + // the next entry to replace. + for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin + en = 1'b1; + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2 ** lvl) - 1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + + // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1); + new_index = (i >> (shift - 1)) & 32'b1; + if (new_index[0]) begin + en &= plru_tree_q[idx_base+(i>>shift)]; + end else begin + en &= ~plru_tree_q[idx_base+(i>>shift)]; + end + end + replace_en[i] = en; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + tags_q <= '{default: 0}; + content_q <= '{default: 0}; + plru_tree_q <= '{default: 0}; + end else begin + tags_q <= tags_n; + content_q <= content_n; + plru_tree_q <= plru_tree_n; + end + end + //-------------- + // Sanity checks + //-------------- + + //pragma translate_off +`ifndef VERILATOR + + initial begin : p_assertions + assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1)) + else begin + $error("TLB size must be a multiple of 2 and greater than 1"); + $stop(); + end + assert (ASID_WIDTH >= 1) + else begin + $error("ASID width must be at least 1"); + $stop(); + end + end + + // Just for checking + function int countSetBits(logic [TLB_ENTRIES-1:0] vector); + automatic int count = 0; + foreach (vector[idx]) begin + count += vector[idx]; + end + return count; + endfunction + + assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1)) + else begin + $error("More then one hit in TLB!"); + $stop(); + end + assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1)) + else begin + $error("More then one TLB entry selected for next replace!"); + $stop(); + end + +`endif + //pragma translate_on + +endmodule diff --git a/test/type_param/core/mmu_sv39/mmu.sv b/test/type_param/core/mmu_sv39/mmu.sv new file mode 100644 index 0000000..39e9f34 --- /dev/null +++ b/test/type_param/core/mmu_sv39/mmu.sv @@ -0,0 +1,519 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19/04/2017 +// Description: Memory Management Unit for Ariane, contains TLB and +// address translation unit. SV39 as defined in RISC-V +// privilege specification 1.11-WIP + + +module mmu + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned INSTR_TLB_ENTRIES = 4, + parameter int unsigned DATA_TLB_ENTRIES = 4, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic enable_translation_i, + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + // IF interface + input icache_arsp_t icache_areq_i, + output icache_areq_t icache_areq_o, + // LSU interface + // this is a more minimalistic interface because the actual addressing logic is handled + // in the LSU as we distinguish load and stores, what we do here is simple address translation + input exception_t misaligned_ex_i, + input logic lsu_req_i, // request address translation + input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in + input logic lsu_is_store_i, // the translation is requested by a store + // if we need to walk the page table we can't grant in the same cycle + // Cycle 0 + output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB + output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) + // Cycle 1 + output logic lsu_valid_o, // translation is valid + output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address + output exception_t lsu_exception_o, // address translation threw an exception + // General control signals + input riscv::priv_lvl_t priv_lvl_i, + input riscv::priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input logic mxr_i, + // input logic flag_mprv_i, + input logic [riscv::PPNW-1:0] satp_ppn_i, + input logic [ASID_WIDTH-1:0] asid_i, + input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic flush_tlb_i, + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + // PMP + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i +); + + logic iaccess_err; // insufficient privilege to access this instruction page + logic daccess_err; // insufficient privilege to access this data page + logic ptw_active; // PTW is currently walking a page table + logic walking_instr; // PTW is walking because of an ITLB miss + logic ptw_error; // PTW threw an exception + logic ptw_access_exception; // PTW threw an access exception (PMPs) + logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr + + logic [riscv::VLEN-1:0] update_vaddr; + tlb_update_t update_ptw_itlb, update_ptw_dtlb; + + logic itlb_lu_access; + riscv::pte_t itlb_content; + logic itlb_is_2M; + logic itlb_is_1G; + logic itlb_lu_hit; + + logic dtlb_lu_access; + riscv::pte_t dtlb_content; + logic dtlb_is_2M; + logic dtlb_is_1G; + logic dtlb_lu_hit; + + + // Assignments + assign itlb_lu_access = icache_areq_i.fetch_req; + assign dtlb_lu_access = lsu_req_i; + + + tlb #( + .CVA6Cfg (CVA6Cfg), + .TLB_ENTRIES(INSTR_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_itlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_tlb_i), + + .update_i(update_ptw_itlb), + + .lu_access_i (itlb_lu_access), + .lu_asid_i (asid_i), + .asid_to_be_flushed_i (asid_to_be_flushed_i), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i), + .lu_vaddr_i (icache_areq_i.fetch_vaddr), + .lu_content_o (itlb_content), + + .lu_is_2M_o(itlb_is_2M), + .lu_is_1G_o(itlb_is_1G), + .lu_hit_o (itlb_lu_hit) + ); + + tlb #( + .CVA6Cfg (CVA6Cfg), + .TLB_ENTRIES(DATA_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_dtlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_tlb_i), + + .update_i(update_ptw_dtlb), + + .lu_access_i (dtlb_lu_access), + .lu_asid_i (asid_i), + .asid_to_be_flushed_i (asid_to_be_flushed_i), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i), + .lu_vaddr_i (lsu_vaddr_i), + .lu_content_o (dtlb_content), + + .lu_is_2M_o(dtlb_is_2M), + .lu_is_1G_o(dtlb_is_1G), + .lu_hit_o (dtlb_lu_hit) + ); + + + ptw #( + .CVA6Cfg (CVA6Cfg), + .ASID_WIDTH(ASID_WIDTH) + ) i_ptw ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .ptw_active_o (ptw_active), + .walking_instr_o (walking_instr), + .ptw_error_o (ptw_error), + .ptw_access_exception_o(ptw_access_exception), + .enable_translation_i (enable_translation_i), + + .update_vaddr_o(update_vaddr), + .itlb_update_o (update_ptw_itlb), + .dtlb_update_o (update_ptw_dtlb), + + .itlb_access_i(itlb_lu_access), + .itlb_hit_i (itlb_lu_hit), + .itlb_vaddr_i (icache_areq_i.fetch_vaddr), + + .dtlb_access_i(dtlb_lu_access), + .dtlb_hit_i (dtlb_lu_hit), + .dtlb_vaddr_i (lsu_vaddr_i), + + .req_port_i (req_port_i), + .req_port_o (req_port_o), + .pmpcfg_i, + .pmpaddr_i, + .bad_paddr_o(ptw_bad_paddr), + .* + ); + + // ila_1 i_ila_1 ( + // .clk(clk_i), // input wire clk + // .probe0({req_port_o.address_tag, req_port_o.address_index}), + // .probe1(req_port_o.data_req), // input wire [63:0] probe1 + // .probe2(req_port_i.data_gnt), // input wire [0:0] probe2 + // .probe3(req_port_i.data_rdata), // input wire [0:0] probe3 + // .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4 + // .probe5(ptw_error), // input wire [1:0] probe5 + // .probe6(update_vaddr), // input wire [0:0] probe6 + // .probe7(update_ptw_itlb.valid), // input wire [0:0] probe7 + // .probe8(update_ptw_dtlb.valid), // input wire [0:0] probe8 + // .probe9(dtlb_lu_access), // input wire [0:0] probe9 + // .probe10(lsu_vaddr_i), // input wire [0:0] probe10 + // .probe11(dtlb_lu_hit), // input wire [0:0] probe11 + // .probe12(itlb_lu_access), // input wire [0:0] probe12 + // .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13 + // .probe14(itlb_lu_hit) // input wire [0:0] probe13 + // ); + + //----------------------- + // Instruction Interface + //----------------------- + logic match_any_execute_region; + logic pmp_instr_allow; + + // The instruction interface is a simple request response interface + always_comb begin : instr_interface + // MMU disabled: just pass through + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; // play through in case we disabled address translation + // two potential exception sources: + // 1. HPTW threw an exception -> signal with a page fault exception + // 2. We got an access error because of insufficient permissions -> throw an access exception + icache_areq_o.fetch_exception = '0; + // Check whether we are allowed to access this memory region from a fetch perspective + iaccess_err = icache_areq_i.fetch_req && enable_translation_i + && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u) + || ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u)); + + // MMU enabled: address from TLB, request delayed until hit. Error when TLB + // hit and no access right or TLB hit and translated address not valid (e.g. + // AXI decode error), or when PTW performs walk due to ITLB miss and raises + // an error. + if (enable_translation_i) begin + // we work with SV39 or SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal + if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; + end + + icache_areq_o.fetch_valid = 1'b0; + + // 4K page + icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]}; + // Mega page + if (itlb_is_2M) begin + icache_areq_o.fetch_paddr[20:12] = icache_areq_i.fetch_vaddr[20:12]; + end + // Giga page + if (itlb_is_1G) begin + icache_areq_o.fetch_paddr[29:12] = icache_areq_i.fetch_vaddr[29:12]; + end + + // --------- + // ITLB Hit + // -------- + // if we hit the ITLB output the request signal immediately + if (itlb_lu_hit) begin + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + // we got an access error + if (iaccess_err) begin + // throw a page fault + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; + end else if (!pmp_instr_allow) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; + end + end else + // --------- + // ITLB Miss + // --------- + // watch out for exceptions happening during walking the page table + if (ptw_active && walking_instr) begin + icache_areq_o.fetch_valid = ptw_error | ptw_access_exception; + if (ptw_error) + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1 + }; + else + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1 + }; + end + end + // if it didn't match any execute region throw an `Instruction Access Fault` + // or: if we are not translating, check PMPs immediately on the paddr + if ((!match_any_execute_region && !ptw_error) || (!enable_translation_i && !pmp_instr_allow)) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}, + 1'b1 + }; + end + end + + // check for execute flag on memory + assign match_any_execute_region = config_pkg::is_inside_execute_regions( + CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr} + ); + + // Instruction fetch + pmp #( + .CVA6Cfg (CVA6Cfg), + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_if ( + .addr_i (icache_areq_o.fetch_paddr), + .priv_lvl_i, + // we will always execute on the instruction fetch port + .access_type_i(riscv::ACCESS_EXEC), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (pmp_instr_allow) + ); + + //----------------------- + // Data Interface + //----------------------- + logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; + riscv::pte_t dtlb_pte_n, dtlb_pte_q; + exception_t misaligned_ex_n, misaligned_ex_q; + logic lsu_req_n, lsu_req_q; + logic lsu_is_store_n, lsu_is_store_q; + logic dtlb_hit_n, dtlb_hit_q; + logic dtlb_is_2M_n, dtlb_is_2M_q; + logic dtlb_is_1G_n, dtlb_is_1G_q; + + // check if we need to do translation or if we are always ready (e.g.: we are not translating anything) + assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1; + + // Wires to PMP checks + riscv::pmp_access_t pmp_access_type; + logic pmp_data_allow; + localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1; + // The data interface is simpler and only consists of a request/response interface + always_comb begin : data_interface + // save request and DTLB response + lsu_vaddr_n = lsu_vaddr_i; + lsu_req_n = lsu_req_i; + misaligned_ex_n = misaligned_ex_i; + dtlb_pte_n = dtlb_content; + dtlb_hit_n = dtlb_lu_hit; + lsu_is_store_n = lsu_is_store_i; + dtlb_is_2M_n = dtlb_is_2M; + dtlb_is_1G_n = dtlb_is_1G; + + lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0]; + lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PLEN-1:12]; + lsu_valid_o = lsu_req_q; + lsu_exception_o = misaligned_ex_q; + pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ; + + // mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions + misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i; + + // Check if the User flag is set, then we may only access it in supervisor mode + // if SUM is enabled + daccess_err = en_ld_st_translation_i && ((ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode + (ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u)); // this is not a user page but we are in user mode and trying to access it + // translation is enabled and no misaligned exception occurred + if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin + lsu_valid_o = 1'b0; + // 4K page + lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]}; + lsu_dtlb_ppn_o = dtlb_content.ppn; + // Mega page + if (dtlb_is_2M_q) begin + lsu_paddr_o[20:12] = lsu_vaddr_q[20:12]; + lsu_dtlb_ppn_o[20:12] = lsu_vaddr_n[20:12]; + end + // Giga page + if (dtlb_is_1G_q) begin + lsu_paddr_o[PPNWMin:12] = lsu_vaddr_q[PPNWMin:12]; + lsu_dtlb_ppn_o[PPNWMin:12] = lsu_vaddr_n[PPNWMin:12]; + end + // --------- + // DTLB Hit + // -------- + if (dtlb_hit_q && lsu_req_q) begin + lsu_valid_o = 1'b1; + // exception priority: + // PAGE_FAULTS have higher priority than ACCESS_FAULTS + // virtual memory based exceptions are PAGE_FAULTS + // physical memory based exceptions are ACCESS_FAULTS (PMA/PMP) + + // this is a store + if (lsu_is_store_q) begin + // check if the page is write-able and we are not violating privileges + // also check if the dirty flag is set + if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; + end + + // this is a load + end else begin + // check for sufficient access privileges - throw a page fault if necessary + if (daccess_err) begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; + end + end + end else + + // --------- + // DTLB Miss + // --------- + // watch out for exceptions + if (ptw_active && !walking_instr) begin + // page table walker threw an exception + if (ptw_error) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // the page table walker can only throw page faults + if (lsu_is_store_q) begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + 1'b1 + }; + end + end + + if (ptw_access_exception) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // Any fault of the page table walk should be based of the original access type + if (lsu_is_store_q) begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1 + }; + end + end + end + end // If translation is not enabled, check the paddr immediately against PMPs + else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin + if (lsu_is_store_q) begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1 + }; + end + end + end + + // Load/store PMP check + pmp #( + .CVA6Cfg (CVA6Cfg), + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_data ( + .addr_i (lsu_paddr_o), + .priv_lvl_i (ld_st_priv_lvl_i), + .access_type_i(pmp_access_type), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (pmp_data_allow) + ); + + // ---------- + // Registers + // ---------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + lsu_vaddr_q <= '0; + lsu_req_q <= '0; + misaligned_ex_q <= '0; + dtlb_pte_q <= '0; + dtlb_hit_q <= '0; + lsu_is_store_q <= '0; + dtlb_is_2M_q <= '0; + dtlb_is_1G_q <= '0; + end else begin + lsu_vaddr_q <= lsu_vaddr_n; + lsu_req_q <= lsu_req_n; + misaligned_ex_q <= misaligned_ex_n; + dtlb_pte_q <= dtlb_pte_n; + dtlb_hit_q <= dtlb_hit_n; + lsu_is_store_q <= lsu_is_store_n; + dtlb_is_2M_q <= dtlb_is_2M_n; + dtlb_is_1G_q <= dtlb_is_1G_n; + end + end +endmodule diff --git a/test/type_param/core/mmu_sv39/ptw.sv b/test/type_param/core/mmu_sv39/ptw.sv new file mode 100644 index 0000000..2d0e378 --- /dev/null +++ b/test/type_param/core/mmu_sv39/ptw.sv @@ -0,0 +1,409 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: David Schaffenrath, TU Graz +// Author: Florian Zaruba, ETH Zurich +// Date: 24.4.2017 +// Description: Hardware-PTW + +/* verilator lint_off WIDTH */ + +module ptw + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush everything, we need to do this because + // actually everything we do is speculative at this stage + // e.g.: there could be a CSR instruction that changes everything + output logic ptw_active_o, + output logic walking_instr_o, // set when walking for TLB + output logic ptw_error_o, // set when an error occurred + output logic ptw_access_exception_o, // set when an PMP access exception occured + input logic enable_translation_i, // CSRs indicate to enable SV39 + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + + input logic lsu_is_store_i, // this translation was triggered by a store + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + + + // to TLBs, update logic + output tlb_update_t itlb_update_o, + output tlb_update_t dtlb_update_o, + + output logic [riscv::VLEN-1:0] update_vaddr_o, + + input logic [ ASID_WIDTH-1:0] asid_i, + // from TLBs + // did we miss? + input logic itlb_access_i, + input logic itlb_hit_i, + input logic [riscv::VLEN-1:0] itlb_vaddr_i, + + input logic dtlb_access_i, + input logic dtlb_hit_i, + input logic [riscv::VLEN-1:0] dtlb_vaddr_i, + // from CSR file + input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp + input logic mxr_i, + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PMP + + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + output logic [riscv::PLEN-1:0] bad_paddr_o + +); + + // input registers + logic data_rvalid_q; + logic [63:0] data_rdata_q; + + riscv::pte_t pte; + assign pte = riscv::pte_t'(data_rdata_q); + + enum logic [2:0] { + IDLE, + WAIT_GRANT, + PTE_LOOKUP, + WAIT_RVALID, + PROPAGATE_ERROR, + PROPAGATE_ACCESS_ERROR + } + state_q, state_d; + + // SV39 defines three levels of page tables + enum logic [1:0] { + LVL1, + LVL2, + LVL3 + } + ptw_lvl_q, ptw_lvl_n; + + // is this an instruction page table walk? + logic is_instr_ptw_q, is_instr_ptw_n; + logic global_mapping_q, global_mapping_n; + // latched tag signal + logic tag_valid_n, tag_valid_q; + // register the ASID + logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; + // register the VPN we need to walk, SV39 defines a 39 bit virtual address + logic [riscv::VLEN-1:0] vaddr_q, vaddr_n; + // 4 byte aligned physical pointer + logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n; + + // Assignments + assign update_vaddr_o = vaddr_q; + + assign ptw_active_o = (state_q != IDLE); + assign walking_instr_o = is_instr_ptw_q; + // directly output the correct physical address + assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0]; + assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH]; + // we are never going to kill this request + assign req_port_o.kill_req = '0; + // we are never going to write with the HPTW + assign req_port_o.data_wdata = 64'b0; + // we only issue one single request at a time + assign req_port_o.data_id = '0; + // ----------- + // TLB Update + // ----------- + assign itlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]}; + assign dtlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]}; + // update the correct page table level + assign itlb_update_o.is_2M = (ptw_lvl_q == LVL2); + assign itlb_update_o.is_1G = (ptw_lvl_q == LVL1); + assign dtlb_update_o.is_2M = (ptw_lvl_q == LVL2); + assign dtlb_update_o.is_1G = (ptw_lvl_q == LVL1); + // output the correct ASID + assign itlb_update_o.asid = tlb_update_asid_q; + assign dtlb_update_o.asid = tlb_update_asid_q; + // set the global mapping bit + assign itlb_update_o.content = pte | (global_mapping_q << 5); + assign dtlb_update_o.content = pte | (global_mapping_q << 5); + + assign req_port_o.tag_valid = tag_valid_q; + + logic allow_access; + + assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0; + + pmp #( + .CVA6Cfg (CVA6Cfg), + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_ptw ( + .addr_i (ptw_pptr_q), + // PTW access are always checked as if in S-Mode... + .priv_lvl_i (riscv::PRIV_LVL_S), + // ...and they are always loads + .access_type_i(riscv::ACCESS_READ), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (allow_access) + ); + + //------------------- + // Page table walker + //------------------- + // A virtual address va is translated into a physical address pa as follows: + // 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39, + // PAGESIZE=2^12 and LEVELS=3.) + // 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For + // Sv32, PTESIZE=4.) + // 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access + // exception. + // 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5. + // Otherwise, this PTE is a pointer to the next level of the page table. + // Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let + // a = pte.ppn × PAGESIZE and go to step 2. + // 5. A leaf PTE has been found. Determine if the requested memory access + // is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and + // raise an access exception. Otherwise, the translation is successful. + // Set pte.a to 1, and, if the memory access is a store, set pte.d to 1. + // The translated physical address is given as follows: + // - pa.pgoff = va.pgoff. + // - If i > 0, then this is a superpage translation and + // pa.ppn[i-1:0] = va.vpn[i-1:0]. + // - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. + always_comb begin : ptw + // default assignments + // PTW memory interface + tag_valid_n = 1'b0; + req_port_o.data_req = 1'b0; + req_port_o.data_be = 8'hFF; + req_port_o.data_size = 2'b11; + req_port_o.data_we = 1'b0; + ptw_error_o = 1'b0; + ptw_access_exception_o = 1'b0; + itlb_update_o.valid = 1'b0; + dtlb_update_o.valid = 1'b0; + is_instr_ptw_n = is_instr_ptw_q; + ptw_lvl_n = ptw_lvl_q; + ptw_pptr_n = ptw_pptr_q; + state_d = state_q; + global_mapping_n = global_mapping_q; + // input registers + tlb_update_asid_n = tlb_update_asid_q; + vaddr_n = vaddr_q; + + itlb_miss_o = 1'b0; + dtlb_miss_o = 1'b0; + + case (state_q) + + IDLE: begin + // by default we start with the top-most page table + ptw_lvl_n = LVL1; + global_mapping_n = 1'b0; + is_instr_ptw_n = 1'b0; + // if we got an ITLB miss + if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin + ptw_pptr_n = {satp_ppn_i, itlb_vaddr_i[riscv::SV-1:30], 3'b0}; + is_instr_ptw_n = 1'b1; + tlb_update_asid_n = asid_i; + vaddr_n = itlb_vaddr_i; + state_d = WAIT_GRANT; + itlb_miss_o = 1'b1; + // we got an DTLB miss + end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin + ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[riscv::SV-1:30], 3'b0}; + tlb_update_asid_n = asid_i; + vaddr_n = dtlb_vaddr_i; + state_d = WAIT_GRANT; + dtlb_miss_o = 1'b1; + end + end + + WAIT_GRANT: begin + // send a request out + req_port_o.data_req = 1'b1; + // wait for the WAIT_GRANT + if (req_port_i.data_gnt) begin + // send the tag valid signal one cycle later + tag_valid_n = 1'b1; + state_d = PTE_LOOKUP; + end + end + + PTE_LOOKUP: begin + // we wait for the valid signal + if (data_rvalid_q) begin + + // check if the global mapping bit is set + if (pte.g) global_mapping_n = 1'b1; + + // ------------- + // Invalid PTE + // ------------- + // If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception. + if (!pte.v || (!pte.r && pte.w)) state_d = PROPAGATE_ERROR; + // ----------- + // Valid PTE + // ----------- + else begin + state_d = IDLE; + // it is a valid PTE + // if pte.r = 1 or pte.x = 1 it is a valid PTE + if (pte.r || pte.x) begin + // Valid translation found (either 1G, 2M or 4K entry) + if (is_instr_ptw_q) begin + // ------------ + // Update ITLB + // ------------ + // If page is not executable, we can directly raise an error. This + // doesn't put a useless entry into the TLB. The same idea applies + // to the access flag since we let the access flag be managed by SW. + if (!pte.x || !pte.a) state_d = PROPAGATE_ERROR; + else itlb_update_o.valid = 1'b1; + + end else begin + // ------------ + // Update DTLB + // ------------ + // Check if the access flag has been set, otherwise throw a page-fault + // and let the software handle those bits. + // If page is not readable (there are no write-only pages) + // we can directly raise an error. This doesn't put a useless + // entry into the TLB. + if (pte.a && (pte.r || (pte.x && mxr_i))) begin + dtlb_update_o.valid = 1'b1; + end else begin + state_d = PROPAGATE_ERROR; + end + // Request is a store: perform some additional checks + // If the request was a store and the page is not write-able, raise an error + // the same applies if the dirty flag is not set + if (lsu_is_store_i && (!pte.w || !pte.d)) begin + dtlb_update_o.valid = 1'b0; + state_d = PROPAGATE_ERROR; + end + end + // check if the ppn is correctly aligned: + // 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault + // exception. + if (ptw_lvl_q == LVL1 && pte.ppn[17:0] != '0) begin + state_d = PROPAGATE_ERROR; + dtlb_update_o.valid = 1'b0; + itlb_update_o.valid = 1'b0; + end else if (ptw_lvl_q == LVL2 && pte.ppn[8:0] != '0) begin + state_d = PROPAGATE_ERROR; + dtlb_update_o.valid = 1'b0; + itlb_update_o.valid = 1'b0; + end + // this is a pointer to the next TLB level + end else begin + // pointer to next level of page table + if (ptw_lvl_q == LVL1) begin + // we are in the second level now + ptw_lvl_n = LVL2; + ptw_pptr_n = {pte.ppn, vaddr_q[29:21], 3'b0}; + end + + if (ptw_lvl_q == LVL2) begin + // here we received a pointer to the third level + ptw_lvl_n = LVL3; + ptw_pptr_n = {pte.ppn, vaddr_q[20:12], 3'b0}; + end + + state_d = WAIT_GRANT; + + if (ptw_lvl_q == LVL3) begin + // Should already be the last level page table => Error + ptw_lvl_n = LVL3; + state_d = PROPAGATE_ERROR; + end + end + end + + // Check if this access was actually allowed from a PMP perspective + if (!allow_access) begin + itlb_update_o.valid = 1'b0; + dtlb_update_o.valid = 1'b0; + // we have to return the failed address in bad_addr + ptw_pptr_n = ptw_pptr_q; + state_d = PROPAGATE_ACCESS_ERROR; + end + end + // we've got a data WAIT_GRANT so tell the cache that the tag is valid + end + // Propagate error to MMU/LSU + PROPAGATE_ERROR: begin + state_d = IDLE; + ptw_error_o = 1'b1; + end + PROPAGATE_ACCESS_ERROR: begin + state_d = IDLE; + ptw_access_exception_o = 1'b1; + end + // wait for the rvalid before going back to IDLE + WAIT_RVALID: begin + if (data_rvalid_q) state_d = IDLE; + end + default: begin + state_d = IDLE; + end + endcase + + // ------- + // Flush + // ------- + // should we have flushed before we got an rvalid, wait for it until going back to IDLE + if (flush_i) begin + // on a flush check whether we are + // 1. in the PTE Lookup check whether we still need to wait for an rvalid + // 2. waiting for a grant, if so: wait for it + // if not, go back to idle + if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) || + ((state_q == WAIT_GRANT) && req_port_i.data_gnt)) + state_d = WAIT_RVALID; + else state_d = IDLE; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + is_instr_ptw_q <= 1'b0; + ptw_lvl_q <= LVL1; + tag_valid_q <= 1'b0; + tlb_update_asid_q <= '0; + vaddr_q <= '0; + ptw_pptr_q <= '0; + global_mapping_q <= 1'b0; + data_rdata_q <= '0; + data_rvalid_q <= 1'b0; + end else begin + state_q <= state_d; + ptw_pptr_q <= ptw_pptr_n; + is_instr_ptw_q <= is_instr_ptw_n; + ptw_lvl_q <= ptw_lvl_n; + tag_valid_q <= tag_valid_n; + tlb_update_asid_q <= tlb_update_asid_n; + vaddr_q <= vaddr_n; + global_mapping_q <= global_mapping_n; + data_rdata_q <= req_port_i.data_rdata; + data_rvalid_q <= req_port_i.data_rvalid; + end + end + +endmodule +/* verilator lint_on WIDTH */ diff --git a/test/type_param/core/mmu_sv39/tlb.sv b/test/type_param/core/mmu_sv39/tlb.sv new file mode 100644 index 0000000..3df2cb0 --- /dev/null +++ b/test/type_param/core/mmu_sv39/tlb.sv @@ -0,0 +1,290 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: David Schaffenrath, TU Graz +// Author: Florian Zaruba, ETH Zurich +// Date: 21.4.2017 +// Description: Translation Lookaside Buffer, SV39 +// fully set-associative + + +module tlb + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned TLB_ENTRIES = 4, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // Flush signal + // Update TLB + input tlb_update_t update_i, + // Lookup signals + input logic lu_access_i, + input logic [ ASID_WIDTH-1:0] lu_asid_i, + input logic [riscv::VLEN-1:0] lu_vaddr_i, + output riscv::pte_t lu_content_o, + input logic [ ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + output logic lu_is_2M_o, + output logic lu_is_1G_o, + output logic lu_hit_o +); + + // SV39 defines three levels of page tables + struct packed { + logic [ASID_WIDTH-1:0] asid; + logic [riscv::VPN2:0] vpn2; + logic [8:0] vpn1; + logic [8:0] vpn0; + logic is_2M; + logic is_1G; + logic valid; + } [TLB_ENTRIES-1:0] + tags_q, tags_n; + + riscv::pte_t [TLB_ENTRIES-1:0] content_q, content_n; + logic [8:0] vpn0, vpn1; + logic [ riscv::VPN2:0] vpn2; + logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic + logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy + //------------- + // Translation + //------------- + always_comb begin : translation + vpn0 = lu_vaddr_i[20:12]; + vpn1 = lu_vaddr_i[29:21]; + vpn2 = lu_vaddr_i[30+riscv::VPN2:30]; + + // default assignment + lu_hit = '{default: 0}; + lu_hit_o = 1'b0; + lu_content_o = '{default: 0}; + lu_is_1G_o = 1'b0; + lu_is_2M_o = 1'b0; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + // first level match, this may be a giga page, check the ASID flags as well + // if the entry is associated to a global address, don't match the ASID (ASID is don't care) + if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid) || content_q[i].g) && vpn2 == tags_q[i].vpn2) begin + // second level + if (tags_q[i].is_1G) begin + lu_is_1G_o = 1'b1; + lu_content_o = content_q[i]; + lu_hit_o = 1'b1; + lu_hit[i] = 1'b1; + // not a giga page hit so check further + end else if (vpn1 == tags_q[i].vpn1) begin + // this could be a 2 mega page hit or a 4 kB hit + // output accordingly + if (tags_q[i].is_2M || vpn0 == tags_q[i].vpn0) begin + lu_is_2M_o = tags_q[i].is_2M; + lu_content_o = content_q[i]; + lu_hit_o = 1'b1; + lu_hit[i] = 1'b1; + end + end + end + end + end + + + + logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high + logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high + logic [TLB_ENTRIES-1:0] vaddr_vpn0_match; + logic [TLB_ENTRIES-1:0] vaddr_vpn1_match; + logic [TLB_ENTRIES-1:0] vaddr_vpn2_match; + + assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i); + assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i); + + // ------------------ + // Update and Flush + // ------------------ + always_comb begin : update_flush + tags_n = tags_q; + content_n = content_q; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + + vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[20:12] == tags_q[i].vpn0); + vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[29:21] == tags_q[i].vpn1); + vaddr_vpn2_match[i] = (vaddr_to_be_flushed_i[30+riscv::VPN2:30] == tags_q[i].vpn2); + + if (flush_i) begin + // invalidate logic + // flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case) + if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0) tags_n[i].valid = 1'b0; + // flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages + else if (asid_to_be_flushed_is0 && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M) ) && (~vaddr_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case) + else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M)) && (asid_to_be_flushed_i == tags_q[i].asid) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case) + else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid) && (!asid_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // normal replacement + end else if (update_i.valid & replace_en[i]) begin + // update tag array + tags_n[i] = '{ + asid: update_i.asid, + vpn2: update_i.vpn[18+riscv::VPN2:18], + vpn1: update_i.vpn[17:9], + vpn0: update_i.vpn[8:0], + is_1G: update_i.is_1G, + is_2M: update_i.is_2M, + valid: 1'b1 + }; + // and content as well + content_n[i] = update_i.content; + end + end + end + + // ----------------------------------------------- + // PLRU - Pseudo Least Recently Used Replacement + // ----------------------------------------------- + logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n; + always_comb begin : plru_replacement + plru_tree_n = plru_tree_q; + // The PLRU-tree indexing: + // lvl0 0 + // / \ + // / \ + // lvl1 1 2 + // / \ / \ + // lvl2 3 4 5 6 + // / \ /\/\ /\ + // ... ... ... ... + // Just predefine which nodes will be set/cleared + // E.g. for a TLB with 8 entries, the for-loop is semantically + // equivalent to the following pseudo-code: + // unique case (1'b1) + // lu_hit[7]: plru_tree_n[0, 2, 6] = {1, 1, 1}; + // lu_hit[6]: plru_tree_n[0, 2, 6] = {1, 1, 0}; + // lu_hit[5]: plru_tree_n[0, 2, 5] = {1, 0, 1}; + // lu_hit[4]: plru_tree_n[0, 2, 5] = {1, 0, 0}; + // lu_hit[3]: plru_tree_n[0, 1, 4] = {0, 1, 1}; + // lu_hit[2]: plru_tree_n[0, 1, 4] = {0, 1, 0}; + // lu_hit[1]: plru_tree_n[0, 1, 3] = {0, 0, 1}; + // lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0}; + // default: begin /* No hit */ end + // endcase + for ( + int unsigned i = 0; i < TLB_ENTRIES; i++ + ) begin + automatic int unsigned idx_base, shift, new_index; + // we got a hit so update the pointer as it was least recently used + if (lu_hit[i] & lu_access_i) begin + // Set the nodes to the values we would expect + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2 ** lvl) - 1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + // to circumvent the 32 bit integer arithmetic assignment + new_index = ~((i >> (shift - 1)) & 32'b1); + plru_tree_n[idx_base+(i>>shift)] = new_index[0]; + end + end + end + // Decode tree to write enable signals + // Next for-loop basically creates the following logic for e.g. an 8 entry + // TLB (note: pseudo-code obviously): + // replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1} + // replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0} + // replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1} + // replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0} + // replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1} + // replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0} + // replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1} + // replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0} + // For each entry traverse the tree. If every tree-node matches, + // the corresponding bit of the entry's index, this is + // the next entry to replace. + for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin + automatic logic en; + automatic int unsigned idx_base, shift, new_index; + en = 1'b1; + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2 ** lvl) - 1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + + // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1); + new_index = (i >> (shift - 1)) & 32'b1; + if (new_index[0]) begin + en &= plru_tree_q[idx_base+(i>>shift)]; + end else begin + en &= ~plru_tree_q[idx_base+(i>>shift)]; + end + end + replace_en[i] = en; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + tags_q <= '{default: 0}; + content_q <= '{default: 0}; + plru_tree_q <= '{default: 0}; + end else begin + tags_q <= tags_n; + content_q <= content_n; + plru_tree_q <= plru_tree_n; + end + end + //-------------- + // Sanity checks + //-------------- + + //pragma translate_off +`ifndef VERILATOR + + initial begin : p_assertions + assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1)) + else begin + $error("TLB size must be a multiple of 2 and greater than 1"); + $stop(); + end + assert (ASID_WIDTH >= 1) + else begin + $error("ASID width must be at least 1"); + $stop(); + end + end + + // Just for checking + function int countSetBits(logic [TLB_ENTRIES-1:0] vector); + automatic int count = 0; + foreach (vector[idx]) begin + count += vector[idx]; + end + return count; + endfunction + + assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1)) + else begin + $error("More then one hit in TLB!"); + $stop(); + end + assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1)) + else begin + $error("More then one TLB entry selected for next replace!"); + $stop(); + end + +`endif + //pragma translate_on + +endmodule diff --git a/test/type_param/core/mult.sv b/test/type_param/core/mult.sv new file mode 100644 index 0000000..7270389 --- /dev/null +++ b/test/type_param/core/mult.sv @@ -0,0 +1,149 @@ + + +module mult + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input fu_data_t fu_data_i, + input logic mult_valid_i, + output riscv::xlen_t result_o, + output logic mult_valid_o, + output logic mult_ready_o, + output logic [TRANS_ID_BITS-1:0] mult_trans_id_o +); + logic mul_valid; + logic div_valid; + logic div_ready_i; // receiver of division result is able to accept the result + logic [TRANS_ID_BITS-1:0] mul_trans_id; + logic [TRANS_ID_BITS-1:0] div_trans_id; + riscv::xlen_t mul_result; + riscv::xlen_t div_result; + + logic div_valid_op; + logic mul_valid_op; + // Input Arbitration + + assign mul_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR }); + + assign div_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW }); + + // --------------------- + // Output Arbitration + // --------------------- + // we give precedence to multiplication as the divider supports stalling and the multiplier is + // just a dumb pipelined multiplier + assign div_ready_i = (mul_valid) ? 1'b0 : 1'b1; + assign mult_trans_id_o = (mul_valid) ? mul_trans_id : div_trans_id; + assign result_o = (mul_valid) ? mul_result : div_result; + assign mult_valid_o = div_valid | mul_valid; + // mult_ready_o = division as the multiplication will unconditionally be ready to accept new requests + + // --------------------- + // Multiplication + // --------------------- + multiplier #( + .CVA6Cfg(CVA6Cfg) + ) i_multiplier ( + .clk_i, + .rst_ni, + .trans_id_i (fu_data_i.trans_id), + .operation_i (fu_data_i.operation), + .operand_a_i (fu_data_i.operand_a), + .operand_b_i (fu_data_i.operand_b), + .result_o (mul_result), + .mult_valid_i (mul_valid_op), + .mult_valid_o (mul_valid), + .mult_trans_id_o(mul_trans_id), + .mult_ready_o () // this unit is unconditionally ready + ); + + // --------------------- + // Division + // --------------------- + riscv::xlen_t + operand_b, + operand_a; // input operands after input MUX (input silencing, word operations or full inputs) + riscv::xlen_t result; // result before result mux + + logic div_signed; // signed or unsigned division + logic rem; // is it a reminder (or not a reminder e.g.: a division) + logic word_op_d, word_op_q; // save whether the operation was signed or not + + // is this a signed op? + assign div_signed = fu_data_i.operation inside {DIV, DIVW, REM, REMW}; + // is this a modulo? + assign rem = fu_data_i.operation inside {REM, REMU, REMW, REMUW}; + + // prepare the input operands and control divider + always_comb begin + // silence the inputs + operand_a = '0; + operand_b = '0; + // control signals + word_op_d = word_op_q; + + // we've go a new division operation + if (mult_valid_i && fu_data_i.operation inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin + // is this a word operation? + if (riscv::IS_XLEN64 && (fu_data_i.operation == DIVW || fu_data_i.operation == DIVUW || fu_data_i.operation == REMW || fu_data_i.operation == REMUW)) begin + // yes so check if we should sign extend this is only done for a signed operation + if (div_signed) begin + operand_a = sext32(fu_data_i.operand_a[31:0]); + operand_b = sext32(fu_data_i.operand_b[31:0]); + end else begin + operand_a = fu_data_i.operand_a[31:0]; + operand_b = fu_data_i.operand_b[31:0]; + end + + // save whether we want sign extend the result or not, this is done for all word operations + word_op_d = 1'b1; + end else begin + // regular op + operand_a = fu_data_i.operand_a; + operand_b = fu_data_i.operand_b; + word_op_d = 1'b0; + end + end + end + + // --------------------- + // Serial Divider + // --------------------- + serdiv #( + .CVA6Cfg(CVA6Cfg), + .WIDTH (riscv::XLEN) + ) i_div ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .id_i (fu_data_i.trans_id), + .op_a_i (operand_a), + .op_b_i (operand_b), + .opcode_i ({rem, div_signed}), // 00: udiv, 10: urem, 01: div, 11: rem + .in_vld_i (div_valid_op), + .in_rdy_o (mult_ready_o), + .flush_i (flush_i), + .out_vld_o(div_valid), + .out_rdy_i(div_ready_i), + .id_o (div_trans_id), + .res_o (result) + ); + + // Result multiplexer + // if it was a signed word operation the bit will be set and the result will be sign extended accordingly + assign div_result = (riscv::IS_XLEN64 && word_op_q) ? sext32(result) : result; + + // --------------------- + // Registers + // --------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + word_op_q <= '0; + end else begin + word_op_q <= word_op_d; + end + end +endmodule diff --git a/test/type_param/core/multiplier.sv b/test/type_param/core/multiplier.sv new file mode 100644 index 0000000..e13d614 --- /dev/null +++ b/test/type_param/core/multiplier.sv @@ -0,0 +1,158 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba +// +// Description: Multiplication Unit with one pipeline register +// This unit relies on retiming features of the synthesizer +// + + +module multiplier + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic [TRANS_ID_BITS-1:0] trans_id_i, + input logic mult_valid_i, + input fu_op operation_i, + input riscv::xlen_t operand_a_i, + input riscv::xlen_t operand_b_i, + output riscv::xlen_t result_o, + output logic mult_valid_o, + output logic mult_ready_o, + output logic [TRANS_ID_BITS-1:0] mult_trans_id_o +); + // Carry-less multiplication + logic [riscv::XLEN-1:0] + clmul_q, clmul_d, clmulr_q, clmulr_d, operand_a, operand_b, operand_a_rev, operand_b_rev; + logic clmul_rmode, clmul_hmode; + + if (CVA6Cfg.RVB) begin : gen_bitmanip + // checking for clmul_rmode and clmul_hmode + assign clmul_rmode = (operation_i == CLMULR); + assign clmul_hmode = (operation_i == CLMULH); + + // operand_a and b reverse generator + for (genvar i = 0; i < riscv::XLEN; i++) begin + assign operand_a_rev[i] = operand_a_i[(riscv::XLEN-1)-i]; + assign operand_b_rev[i] = operand_b_i[(riscv::XLEN-1)-i]; + end + + // operand_a and operand_b selection + assign operand_a = (clmul_rmode | clmul_hmode) ? operand_a_rev : operand_a_i; + assign operand_b = (clmul_rmode | clmul_hmode) ? operand_b_rev : operand_b_i; + + // implementation + always_comb begin + clmul_d = '0; + for (int i = 0; i <= riscv::XLEN; i++) begin + clmul_d = (|((operand_b >> i) & 1)) ? clmul_d ^ (operand_a << i) : clmul_d; + end + end + + // clmulr + clmulh result generator + for (genvar i = 0; i < riscv::XLEN; i++) begin + assign clmulr_d[i] = clmul_d[(riscv::XLEN-1)-i]; + end + end + + // Pipeline register + logic [TRANS_ID_BITS-1:0] trans_id_q; + logic mult_valid_q; + fu_op operator_d, operator_q; + logic [riscv::XLEN*2-1:0] mult_result_d, mult_result_q; + + // control registers + logic sign_a, sign_b; + logic mult_valid; + + // control signals + assign mult_valid_o = mult_valid_q; + assign mult_trans_id_o = trans_id_q; + assign mult_ready_o = 1'b1; + + assign mult_valid = mult_valid_i && (operation_i inside {MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR}); + + // Sign Select MUX + always_comb begin + sign_a = 1'b0; + sign_b = 1'b0; + + // signed multiplication + if (operation_i == MULH) begin + sign_a = 1'b1; + sign_b = 1'b1; + // signed - unsigned multiplication + end else if (operation_i == MULHSU) begin + sign_a = 1'b1; + // unsigned multiplication + end else begin + sign_a = 1'b0; + sign_b = 1'b0; + end + end + + + // single stage version + assign mult_result_d = $signed( + {operand_a_i[riscv::XLEN-1] & sign_a, operand_a_i} + ) * $signed( + {operand_b_i[riscv::XLEN-1] & sign_b, operand_b_i} + ); + + + assign operator_d = operation_i; + + always_comb begin : p_selmux + unique case (operator_q) + MULH, MULHU, MULHSU: result_o = mult_result_q[riscv::XLEN*2-1:riscv::XLEN]; + CLMUL: result_o = clmul_q; + CLMULH: result_o = clmulr_q >> 1; + CLMULR: result_o = clmulr_q; + // MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register + default: begin + if (operator_q == MULW && riscv::IS_XLEN64) result_o = sext32(mult_result_q[31:0]); + else result_o = mult_result_q[riscv::XLEN-1:0]; // including MUL + end + endcase + end + if (CVA6Cfg.RVB) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + clmul_q <= '0; + clmulr_q <= '0; + end else begin + clmul_q <= clmul_d; + clmulr_q <= clmulr_d; + end + end + end + // ----------------------- + // Output pipeline register + // ----------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mult_valid_q <= '0; + trans_id_q <= '0; + operator_q <= MUL; + mult_result_q <= '0; + end else begin + // Input silencing + trans_id_q <= trans_id_i; + // Output Register + mult_valid_q <= mult_valid; + operator_q <= operator_d; + mult_result_q <= mult_result_d; + end + end +endmodule diff --git a/test/type_param/core/perf_counters.sv b/test/type_param/core/perf_counters.sv new file mode 100644 index 0000000..ff6d0d1 --- /dev/null +++ b/test/type_param/core/perf_counters.sv @@ -0,0 +1,226 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 06.10.2017 +// Description: Performance counters + + +module perf_counters + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 3 // number of miss ports +) ( + input logic clk_i, + input logic rst_ni, + input logic debug_mode_i, // debug mode + // SRAM like interface + input logic [11:0] addr_i, // read/write address (up to 6 counters possible) + input logic we_i, // write enable + input riscv::xlen_t data_i, // data to write + output riscv::xlen_t data_o, // data to read + // from commit stage + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // acknowledge that we are indeed committing + // from L1 caches + input logic l1_icache_miss_i, + input logic l1_dcache_miss_i, + // from MMU + input logic itlb_miss_i, + input logic dtlb_miss_i, + // from issue stage + input logic sb_full_i, + // from frontend + input logic if_empty_i, + // from PC Gen + input exception_t ex_i, + input logic eret_i, + input bp_resolve_t resolved_branch_i, + // for newly added events + input exception_t branch_exceptions_i, //Branch exceptions->execute unit-> branch_exception_o + input icache_dreq_t l1_icache_access_i, + input dcache_req_i_t [2:0] l1_dcache_access_i, + input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0]miss_vld_bits_i, //For Cache eviction (3ports-LOAD,STORE,PTW) + input logic i_tlb_flush_i, + input logic stall_issue_i, //stall-read operands + input logic [31:0] mcountinhibit_i +); + + logic [63:0] generic_counter_d[6:1]; + logic [63:0] generic_counter_q[6:1]; + + //internal signal to keep track of exception + logic read_access_exception, update_access_exception; + + logic events[6:1]; + //internal signal for MUX select line input + logic [4:0] mhpmevent_d[6:1]; + logic [4:0] mhpmevent_q[6:1]; + // internal signal to detect event on multiple commit ports + logic [CVA6Cfg.NrCommitPorts-1:0] load_event; + logic [CVA6Cfg.NrCommitPorts-1:0] store_event; + logic [CVA6Cfg.NrCommitPorts-1:0] branch_event; + logic [CVA6Cfg.NrCommitPorts-1:0] call_event; + logic [CVA6Cfg.NrCommitPorts-1:0] return_event; + logic [CVA6Cfg.NrCommitPorts-1:0] int_event; + logic [CVA6Cfg.NrCommitPorts-1:0] fp_event; + + //Multiplexer + always_comb begin : Mux + events[6:1] = '{default: 0}; + load_event = '{default: 0}; + store_event = '{default: 0}; + branch_event = '{default: 0}; + call_event = '{default: 0}; + return_event = '{default: 0}; + int_event = '{default: 0}; + fp_event = '{default: 0}; + + for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin + load_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == LOAD); + store_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == STORE); + branch_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == CTRL_FLOW); + call_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == CTRL_FLOW && (commit_instr_i[j].op == ADD || commit_instr_i[j].op == JALR) && (commit_instr_i[j].rd == 'd1 || commit_instr_i[j].rd == 'd5)); + return_event[j] = commit_ack_i[j] & (commit_instr_i[j].op == JALR && commit_instr_i[j].rd == 'd0); + int_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == ALU || commit_instr_i[j].fu == MULT); + fp_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == FPU || commit_instr_i[j].fu == FPU_VEC); + end + + for (int unsigned i = 1; i <= 6; i++) begin + case (mhpmevent_q[i]) + 5'b00000: events[i] = 0; + 5'b00001: events[i] = l1_icache_miss_i; //L1 I-Cache misses + 5'b00010: events[i] = l1_dcache_miss_i; //L1 D-Cache misses + 5'b00011: events[i] = itlb_miss_i; //ITLB misses + 5'b00100: events[i] = dtlb_miss_i; //DTLB misses + 5'b00101: events[i] = |load_event; //Load accesses + 5'b00110: events[i] = |store_event; //Store accesses + 5'b00111: events[i] = ex_i.valid; //Exceptions + 5'b01000: events[i] = eret_i; //Exception handler returns + 5'b01001: events[i] = |branch_event; // Branch instructions + 5'b01010: + events[i] = resolved_branch_i.valid && resolved_branch_i.is_mispredict;//Branch mispredicts + 5'b01011: events[i] = branch_exceptions_i.valid; //Branch exceptions + // The standard software calling convention uses register x1 to hold the return address on a call + // the unconditional jump is decoded as ADD op + 5'b01100: events[i] = |call_event; //Call + 5'b01101: events[i] = |return_event; //Return + 5'b01110: events[i] = sb_full_i; //MSB Full + 5'b01111: events[i] = if_empty_i; //Instruction fetch Empty + 5'b10000: events[i] = l1_icache_access_i.req; //L1 I-Cache accesses + 5'b10001: + events[i] = l1_dcache_access_i[0].data_req || l1_dcache_access_i[1].data_req || l1_dcache_access_i[2].data_req;//L1 D-Cache accesses + 5'b10010: + events[i] = (l1_dcache_miss_i && miss_vld_bits_i[0] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[1] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[2] == 8'hFF);//eviction + 5'b10011: events[i] = i_tlb_flush_i; //I-TLB flush + 5'b10100: events[i] = |int_event; //Integer instructions + 5'b10101: events[i] = |fp_event; //Floating Point Instructions + 5'b10110: events[i] = stall_issue_i; //Pipeline bubbles + default: events[i] = 0; + endcase + end + + end + + always_comb begin : generic_counter + generic_counter_d = generic_counter_q; + data_o = 'b0; + mhpmevent_d = mhpmevent_q; + read_access_exception = 1'b0; + update_access_exception = 1'b0; + + // Increment the non-inhibited counters with active events + for (int unsigned i = 1; i <= 6; i++) begin + if ((!debug_mode_i) && (!we_i)) begin + if ((events[i]) == 1 && (!mcountinhibit_i[i+2])) begin + generic_counter_d[i] = generic_counter_q[i] + 1'b1; + end + end + end + + //Read + unique case (addr_i) + riscv::CSR_MHPM_COUNTER_3, + riscv::CSR_MHPM_COUNTER_4, + riscv::CSR_MHPM_COUNTER_5, + riscv::CSR_MHPM_COUNTER_6, + riscv::CSR_MHPM_COUNTER_7, + riscv::CSR_MHPM_COUNTER_8 :begin + if (riscv::XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0]; + else data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1]; + end + riscv::CSR_MHPM_COUNTER_3H, + riscv::CSR_MHPM_COUNTER_4H, + riscv::CSR_MHPM_COUNTER_5H, + riscv::CSR_MHPM_COUNTER_6H, + riscv::CSR_MHPM_COUNTER_7H, + riscv::CSR_MHPM_COUNTER_8H :begin + if (riscv::XLEN == 32) + data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32]; + else read_access_exception = 1'b1; + end + riscv::CSR_MHPM_EVENT_3, + riscv::CSR_MHPM_EVENT_4, + riscv::CSR_MHPM_EVENT_5, + riscv::CSR_MHPM_EVENT_6, + riscv::CSR_MHPM_EVENT_7, + riscv::CSR_MHPM_EVENT_8 : + data_o = mhpmevent_q[addr_i-riscv::CSR_MHPM_EVENT_3+1]; + default: data_o = 'b0; + endcase + + //Write + if (we_i) begin + unique case (addr_i) + riscv::CSR_MHPM_COUNTER_3, + riscv::CSR_MHPM_COUNTER_4, + riscv::CSR_MHPM_COUNTER_5, + riscv::CSR_MHPM_COUNTER_6, + riscv::CSR_MHPM_COUNTER_7, + riscv::CSR_MHPM_COUNTER_8 :begin + if (riscv::XLEN == 32) + generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0] = data_i; + else generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1] = data_i; + end + riscv::CSR_MHPM_COUNTER_3H, + riscv::CSR_MHPM_COUNTER_4H, + riscv::CSR_MHPM_COUNTER_5H, + riscv::CSR_MHPM_COUNTER_6H, + riscv::CSR_MHPM_COUNTER_7H, + riscv::CSR_MHPM_COUNTER_8H :begin + if (riscv::XLEN == 32) + generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32] = data_i; + else update_access_exception = 1'b1; + end + riscv::CSR_MHPM_EVENT_3, + riscv::CSR_MHPM_EVENT_4, + riscv::CSR_MHPM_EVENT_5, + riscv::CSR_MHPM_EVENT_6, + riscv::CSR_MHPM_EVENT_7, + riscv::CSR_MHPM_EVENT_8 : + mhpmevent_d[addr_i-riscv::CSR_MHPM_EVENT_3+1] = data_i; + default: update_access_exception = 1'b1; + endcase + end + end + + //Registers + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + generic_counter_q <= '{default: 0}; + mhpmevent_q <= '{default: 0}; + end else begin + generic_counter_q <= generic_counter_d; + mhpmevent_q <= mhpmevent_d; + end + end + +endmodule diff --git a/test/type_param/core/pmp/src/pmp.sv b/test/type_param/core/pmp/src/pmp.sv new file mode 100644 index 0000000..a3adbb9 --- /dev/null +++ b/test/type_param/core/pmp/src/pmp.sv @@ -0,0 +1,94 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Moritz Schneider, ETH Zurich +// Date: 2.10.2019 +// Description: purely combinatorial PMP unit (with extraction for more complex configs such as NAPOT) + +module pmp #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned PLEN = 34, // rv64: 56 + parameter int unsigned PMP_LEN = 32, // rv64: 54 + parameter int unsigned NR_ENTRIES = 4 +) ( + // Input + input logic [PLEN-1:0] addr_i, + input riscv::pmp_access_t access_type_i, + input riscv::priv_lvl_t priv_lvl_i, + // Configuration + input logic [15:0][PMP_LEN-1:0] conf_addr_i, + input riscv::pmpcfg_t [15:0] conf_i, + // Output + output logic allow_o +); + // if there are no PMPs we can always grant the access. + if (NR_ENTRIES > 0) begin : gen_pmp + logic [NR_ENTRIES-1:0] match; + + for (genvar i = 0; i < NR_ENTRIES; i++) begin + logic [PMP_LEN-1:0] conf_addr_prev; + + assign conf_addr_prev = (i == 0) ? '0 : conf_addr_i[i-1]; + + pmp_entry #( + .CVA6Cfg(CVA6Cfg), + .PLEN (PLEN), + .PMP_LEN(PMP_LEN) + ) i_pmp_entry ( + .addr_i (addr_i), + .conf_addr_i (conf_addr_i[i]), + .conf_addr_prev_i(conf_addr_prev), + .conf_addr_mode_i(conf_i[i].addr_mode), + .match_o (match[i]) + ); + end + + always_comb begin + int i; + + allow_o = 1'b0; + for (i = 0; i < NR_ENTRIES; i++) begin + // either we are in S or U mode or the config is locked in which + // case it also applies in M mode + if (priv_lvl_i != riscv::PRIV_LVL_M || conf_i[i].locked) begin + if (match[i]) begin + if ((access_type_i & conf_i[i].access_type) != access_type_i) allow_o = 1'b0; + else allow_o = 1'b1; + break; + end + end + end + if (i == NR_ENTRIES) begin // no PMP entry matched the address + // allow all accesses from M-mode for no pmp match + if (priv_lvl_i == riscv::PRIV_LVL_M) allow_o = 1'b1; + // disallow accesses for all other modes + else + allow_o = 1'b0; + end + end + end else assign allow_o = 1'b1; + + // synthesis translate_off + always_comb begin + logic no_locked; + no_locked = 1'b0; + if (priv_lvl_i == riscv::PRIV_LVL_M) begin + no_locked = 1'b1; + for (int i = 0; i < NR_ENTRIES; i++) begin + if (conf_i[i].locked && conf_i[i].addr_mode != riscv::OFF) begin + no_locked &= 1'b0; + end else no_locked &= 1'b1; + end + if (no_locked == 1'b1) assert (allow_o == 1'b1); + end + end + // synthesis translate_on + +endmodule diff --git a/test/type_param/core/pmp/src/pmp_entry.sv b/test/type_param/core/pmp/src/pmp_entry.sv new file mode 100644 index 0000000..667ae18 --- /dev/null +++ b/test/type_param/core/pmp/src/pmp_entry.sv @@ -0,0 +1,125 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Moritz Schneider, ETH Zurich +// Date: 2.10.2019 +// Description: single PMP entry + +module pmp_entry #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned PLEN = 56, + parameter int unsigned PMP_LEN = 54 +) ( + // Input + input logic [PLEN-1:0] addr_i, + + // Configuration + input logic [PMP_LEN-1:0] conf_addr_i, + input logic [PMP_LEN-1:0] conf_addr_prev_i, + input riscv::pmp_addr_mode_t conf_addr_mode_i, + + // Output + output logic match_o +); + logic [PLEN-1:0] conf_addr_n; + logic [$clog2(PLEN)-1:0] trail_ones; + logic [PLEN-1:0] base; + logic [PLEN-1:0] mask; + int unsigned size; + assign conf_addr_n = {2'b11, ~conf_addr_i}; + lzc #( + .WIDTH(PLEN), + .MODE (1'b0) + ) i_lzc ( + .in_i (conf_addr_n), + .cnt_o (trail_ones), + .empty_o() + ); + + always_comb begin + case (conf_addr_mode_i) + riscv::TOR: begin + base = '0; + mask = '0; + size = '0; + // check that the requested address is in between the two + // configuration addresses + if (addr_i >= ({2'b0, conf_addr_prev_i} << 2) && addr_i < ({2'b0, conf_addr_i} << 2)) begin + match_o = 1'b1; + end else match_o = 1'b0; + + // synthesis translate_off + if (match_o == 0) begin + assert (addr_i >= ({2'b0, conf_addr_i} << 2) || addr_i < ({2'b0, conf_addr_prev_i} << 2)); + end else begin + assert (addr_i < ({2'b0, conf_addr_i} << 2) && addr_i >= ({2'b0, conf_addr_prev_i} << 2)); + end + // synthesis translate_on + + end + riscv::NA4, riscv::NAPOT: begin + + if (conf_addr_mode_i == riscv::NA4) size = 2; + else begin + // use the extracted trailing ones + size = {{(32 - $clog2(PLEN)) {1'b0}}, trail_ones} + 3; + end + + mask = '1 << size; + base = ({2'b0, conf_addr_i} << 2) & mask; + match_o = (addr_i & mask) == base ? 1'b1 : 1'b0; + + // synthesis translate_off + // size extract checks + assert (size >= 2); + if (conf_addr_mode_i == riscv::NAPOT) begin + assert (size > 2); + if (size < PMP_LEN) assert (conf_addr_i[size-3] == 0); + for (int i = 0; i < PMP_LEN; i++) begin + if (size > 3 && i <= size - 4) begin + assert (conf_addr_i[i] == 1); // check that all the rest are ones + end + end + end + + if (size < PLEN - 1) begin + if (base + 2 ** size > base) begin // check for overflow + if (match_o == 0) begin + assert (addr_i >= base + 2 ** size || addr_i < base); + end else begin + assert (addr_i < base + 2 ** size && addr_i >= base); + end + end else begin + if (match_o == 0) begin + assert (addr_i - 2 ** size >= base || addr_i < base); + end else begin + assert (addr_i - 2 ** size < base && addr_i >= base); + end + end + end + // synthesis translate_on + + end + riscv::OFF: begin + match_o = 1'b0; + base = '0; + mask = '0; + size = '0; + end + default: begin + match_o = 0; + base = '0; + mask = '0; + size = '0; + end + endcase + end + +endmodule diff --git a/test/type_param/core/scoreboard.sv b/test/type_param/core/scoreboard.sv new file mode 100644 index 0000000..5ea29cd --- /dev/null +++ b/test/type_param/core/scoreboard.sv @@ -0,0 +1,452 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.04.2017 +// Description: Scoreboard - keeps track of all decoded, issued and committed instructions + +module scoreboard #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type rs3_len_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + output logic sb_full_o, + input logic flush_unissued_instr_i, // flush only un-issued instructions + input logic flush_i, // flush whole scoreboard + input logic unresolved_branch_i, // we have an unresolved branch + // list of clobbered registers to issue stage + output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr_o, + output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o, + + // regfile like interface to operand read stage + input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i, + output riscv::xlen_t rs1_o, + output logic rs1_valid_o, + + input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i, + output riscv::xlen_t rs2_o, + output logic rs2_valid_o, + + input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i, + output rs3_len_t rs3_o, + output logic rs3_valid_o, + + // advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer + output ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o, + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, + + // instruction to put on top of scoreboard e.g.: top pointer + // we can always put this instruction to the top unless we signal with asserted full_o + input ariane_pkg::scoreboard_entry_t decoded_instr_i, + input logic decoded_instr_valid_i, + output logic decoded_instr_ack_o, + + // instruction to issue logic, if issue_instr_valid and issue_ready is asserted, advance the issue pointer + output ariane_pkg::scoreboard_entry_t issue_instr_o, + output logic issue_instr_valid_o, + input logic issue_ack_i, + + // write-back port + input ariane_pkg::bp_resolve_t resolved_branch_i, + input logic [CVA6Cfg.NrWbPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] trans_id_i, // transaction ID at which to write the result back + input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, // write data in + input ariane_pkg::exception_t [CVA6Cfg.NrWbPorts-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception) + input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, // data in is valid + input logic x_we_i, // cvxif we for writeback + + // RVFI + output logic [ariane_pkg::TRANS_ID_BITS-1:0] rvfi_issue_pointer_o, + output logic [CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] rvfi_commit_pointer_o +); + + // this is the FIFO struct of the issue queue + typedef struct packed { + logic issued; // this bit indicates whether we issued this instruction e.g.: if it is valid + logic is_rd_fpr_flag; // redundant meta info, added for speed + ariane_pkg::scoreboard_entry_t sbe; // this is the score board entry we will send to ex + } sb_mem_t; + sb_mem_t [ariane_pkg::NR_SB_ENTRIES-1:0] mem_q, mem_n; + + logic issue_full, issue_en; + logic [ariane_pkg::TRANS_ID_BITS:0] issue_cnt_n, issue_cnt_q; + logic [ariane_pkg::TRANS_ID_BITS-1:0] issue_pointer_n, issue_pointer_q; + logic [CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] + commit_pointer_n, commit_pointer_q; + logic [$clog2(CVA6Cfg.NrCommitPorts):0] num_commit; + + // the issue queue is full don't issue any new instructions + // works since aligned to power of 2 + assign issue_full = (issue_cnt_q[ariane_pkg::TRANS_ID_BITS] == 1'b1); + + assign sb_full_o = issue_full; + + ariane_pkg::scoreboard_entry_t decoded_instr; + always_comb begin + decoded_instr = decoded_instr_i; + end + + // output commit instruction directly + always_comb begin : commit_ports + for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + commit_instr_o[i] = mem_q[commit_pointer_q[i]].sbe; + commit_instr_o[i].trans_id = commit_pointer_q[i]; + end + end + + // an instruction is ready for issue if we have place in the issue FIFO and it the decoder says it is valid + always_comb begin + issue_instr_o = decoded_instr_i; + // make sure we assign the correct trans ID + issue_instr_o.trans_id = issue_pointer_q; + // we are ready if we are not full and don't have any unresolved branches, but it can be + // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i == 1) + issue_instr_valid_o = decoded_instr_valid_i & ~unresolved_branch_i & ~issue_full; + decoded_instr_ack_o = issue_ack_i & ~issue_full; + end + + // maintain a FIFO with issued instructions + // keep track of all issued instructions + always_comb begin : issue_fifo + // default assignment + mem_n = mem_q; + issue_en = 1'b0; + + // if we got a acknowledge from the issue stage, put this scoreboard entry in the queue + if (decoded_instr_valid_i && decoded_instr_ack_o && !flush_unissued_instr_i) begin + // the decoded instruction we put in there is valid (1st bit) + // increase the issue counter and advance issue pointer + issue_en = 1'b1; + mem_n[issue_pointer_q] = { + 1'b1, // valid bit + (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + decoded_instr_i.op + )), // whether rd goes to the fpr + decoded_instr // decoded instruction record + }; + end + + // ------------ + // FU NONE + // ------------ + for (int unsigned i = 0; i < ariane_pkg::NR_SB_ENTRIES; i++) begin + // The FU is NONE -> this instruction is valid immediately + if (mem_q[i].sbe.fu == ariane_pkg::NONE && mem_q[i].issued) mem_n[i].sbe.valid = 1'b1; + end + + // ------------ + // Write Back + // ------------ + for (int unsigned i = 0; i < CVA6Cfg.NrWbPorts; i++) begin + // check if this instruction was issued (e.g.: it could happen after a flush that there is still + // something in the pipeline e.g. an incomplete memory operation) + if (wt_valid_i[i] && mem_q[trans_id_i[i]].issued) begin + mem_n[trans_id_i[i]].sbe.valid = 1'b1; + mem_n[trans_id_i[i]].sbe.result = wbdata_i[i]; + // save the target address of a branch (needed for debug in commit stage) + if (CVA6Cfg.DebugEn) begin + mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address; + end + if (mem_n[trans_id_i[i]].sbe.fu == ariane_pkg::CVXIF && ~x_we_i) begin + mem_n[trans_id_i[i]].sbe.rd = 5'b0; + end + // write the exception back if it is valid + if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i]; + // write the fflags back from the FPU (exception valid is never set), leave tval intact + else if(CVA6Cfg.FpPresent && (mem_q[trans_id_i[i]].sbe.fu == ariane_pkg::FPU || mem_q[trans_id_i[i]].sbe.fu == ariane_pkg::FPU_VEC)) begin + mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause; + end + end + end + + // ------------ + // Commit Port + // ------------ + // we've got an acknowledge from commit + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + if (commit_ack_i[i]) begin + // this instruction is no longer in issue e.g.: it is considered finished + mem_n[commit_pointer_q[i]].issued = 1'b0; + mem_n[commit_pointer_q[i]].sbe.valid = 1'b0; + end + end + + // ------ + // Flush + // ------ + if (flush_i) begin + for (int unsigned i = 0; i < ariane_pkg::NR_SB_ENTRIES; i++) begin + // set all valid flags for all entries to zero + mem_n[i].issued = 1'b0; + mem_n[i].sbe.valid = 1'b0; + mem_n[i].sbe.ex.valid = 1'b0; + end + end + end + + // FIFO counter updates + if (CVA6Cfg.NrCommitPorts == 2) begin : gen_commit_ports + assign num_commit = commit_ack_i[1] + commit_ack_i[0]; + end else begin : gen_one_commit_port + assign num_commit = commit_ack_i[0]; + end + + assign issue_cnt_n = (flush_i) ? '0 : issue_cnt_q - {{ariane_pkg::TRANS_ID_BITS - $clog2( + CVA6Cfg.NrCommitPorts + ) {1'b0}}, num_commit} + {{ariane_pkg::TRANS_ID_BITS - 1{1'b0}}, issue_en}; + assign commit_pointer_n[0] = (flush_i) ? '0 : commit_pointer_q[0] + num_commit; + assign issue_pointer_n = (flush_i) ? '0 : issue_pointer_q + issue_en; + + // precompute offsets for commit slots + for (genvar k = 1; k < CVA6Cfg.NrCommitPorts; k++) begin : gen_cnt_incr + assign commit_pointer_n[k] = (flush_i) ? '0 : commit_pointer_n[0] + unsigned'(k); + end + + // ------------------- + // RD clobber process + // ------------------- + // rd_clobber output: output currently clobbered destination registers + logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][ariane_pkg::NR_SB_ENTRIES:0] gpr_clobber_vld; + logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][ariane_pkg::NR_SB_ENTRIES:0] fpr_clobber_vld; + ariane_pkg::fu_t [ ariane_pkg::NR_SB_ENTRIES:0] clobber_fu; + + always_comb begin : clobber_assign + gpr_clobber_vld = '0; + fpr_clobber_vld = '0; + + // default (highest entry hast lowest prio in arbiter tree below) + clobber_fu[ariane_pkg::NR_SB_ENTRIES] = ariane_pkg::NONE; + for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin + gpr_clobber_vld[i][ariane_pkg::NR_SB_ENTRIES] = 1'b1; + fpr_clobber_vld[i][ariane_pkg::NR_SB_ENTRIES] = 1'b1; + end + + // check for all valid entries and set the clobber accordingly + for (int unsigned i = 0; i < ariane_pkg::NR_SB_ENTRIES; i++) begin + gpr_clobber_vld[mem_q[i].sbe.rd][i] = mem_q[i].issued & ~mem_q[i].is_rd_fpr_flag; + fpr_clobber_vld[mem_q[i].sbe.rd][i] = mem_q[i].issued & mem_q[i].is_rd_fpr_flag; + clobber_fu[i] = mem_q[i].sbe.fu; + end + + // GPR[0] is always free + gpr_clobber_vld[0] = '0; + end + + for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers + // get fu that is going to clobber this register (there should be only one) + rr_arb_tree #( + .NumIn(ariane_pkg::NR_SB_ENTRIES + 1), + .DataType(ariane_pkg::fu_t), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_gpr_clobbers ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (gpr_clobber_vld[k]), + .gnt_o (), + .data_i (clobber_fu), + .gnt_i (1'b1), + .req_o (), + .data_o (rd_clobber_gpr_o[k]), + .idx_o () + ); + if (CVA6Cfg.FpPresent) begin + rr_arb_tree #( + .NumIn(ariane_pkg::NR_SB_ENTRIES + 1), + .DataType(ariane_pkg::fu_t), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_fpr_clobbers ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (fpr_clobber_vld[k]), + .gnt_o (), + .data_i (clobber_fu), + .gnt_i (1'b1), + .req_o (), + .data_o (rd_clobber_fpr_o[k]), + .idx_o () + ); + end + end + + // ---------------------------------- + // Read Operands (a.k.a forwarding) + // ---------------------------------- + // read operand interface: same logic as register file + logic [ariane_pkg::NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0] rs1_fwd_req, rs2_fwd_req, rs3_fwd_req; + logic [ariane_pkg::NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] rs_data; + logic rs1_valid, rs2_valid, rs3_valid; + + // WB ports have higher prio than entries + for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb + assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_o.op + ))); + assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_o.op + ))); + assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_o.op + ))); + assign rs_data[k] = wbdata_i[k]; + end + for (genvar k = 0; unsigned'(k) < ariane_pkg::NR_SB_ENTRIES; k++) begin : gen_rs_entries + assign rs1_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_o.op + ))); + assign rs2_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_o.op + ))); + assign rs3_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_o.op + ))); + assign rs_data[k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result; + end + + // check whether we are accessing GPR[0] + assign rs1_valid_o = rs1_valid & ((|rs1_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_o.op + ))); + assign rs2_valid_o = rs2_valid & ((|rs2_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_o.op + ))); + assign rs3_valid_o = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid & ((|rs3_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_o.op + ))) : rs3_valid; + + // use fixed prio here + // this implicitly gives higher prio to WB ports + rr_arb_tree #( + .NumIn(ariane_pkg::NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(riscv::XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs1 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs1_fwd_req), + .gnt_o (), + .data_i (rs_data), + .gnt_i (1'b1), + .req_o (rs1_valid), + .data_o (rs1_o), + .idx_o () + ); + + rr_arb_tree #( + .NumIn(ariane_pkg::NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(riscv::XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs2 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs2_fwd_req), + .gnt_o (), + .data_i (rs_data), + .gnt_i (1'b1), + .req_o (rs2_valid), + .data_o (rs2_o), + .idx_o () + ); + + riscv::xlen_t rs3; + + rr_arb_tree #( + .NumIn(ariane_pkg::NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(riscv::XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs3 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs3_fwd_req), + .gnt_o (), + .data_i (rs_data), + .gnt_i (1'b1), + .req_o (rs3_valid), + .data_o (rs3), + .idx_o () + ); + + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port + assign rs3_o = rs3[riscv::XLEN-1:0]; + end else begin : gen_fp_three_port + assign rs3_o = rs3[CVA6Cfg.FLen-1:0]; + end + + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin : regs + if (!rst_ni) begin + mem_q <= '{default: sb_mem_t'(0)}; + issue_cnt_q <= '0; + commit_pointer_q <= '0; + issue_pointer_q <= '0; + end else begin + issue_cnt_q <= issue_cnt_n; + issue_pointer_q <= issue_pointer_n; + mem_q <= mem_n; + commit_pointer_q <= commit_pointer_n; + end + end + + //RVFI + assign rvfi_issue_pointer_o = issue_pointer_q; + assign rvfi_commit_pointer_o = commit_pointer_q; + + //pragma translate_off + initial begin + assert (ariane_pkg::NR_SB_ENTRIES == 2 ** ariane_pkg::TRANS_ID_BITS) + else $fatal(1, "Scoreboard size needs to be a power of two."); + end + + // assert that zero is never set + assert property (@(posedge clk_i) disable iff (!rst_ni) (rd_clobber_gpr_o[0] == ariane_pkg::NONE)) + else $fatal(1, "RD 0 should not bet set"); + // assert that we never acknowledge a commit if the instruction is not valid + assert property ( + @(posedge clk_i) disable iff (!rst_ni) commit_ack_i[0] |-> commit_instr_o[0].valid) + else $fatal(1, "Commit acknowledged but instruction is not valid"); + if (CVA6Cfg.NrCommitPorts == 2) begin : gen_two_commit_ports + assert property ( + @(posedge clk_i) disable iff (!rst_ni) commit_ack_i[1] |-> commit_instr_o[1].valid) + else $fatal(1, "Commit acknowledged but instruction is not valid"); + end + // assert that we never give an issue ack signal if the instruction is not valid + assert property (@(posedge clk_i) disable iff (!rst_ni) issue_ack_i |-> issue_instr_valid_o) + else $fatal(1, "Issue acknowledged but instruction is not valid"); + + // there should never be more than one instruction writing the same destination register (except x0) + // check that no functional unit is retiring with the same transaction id + for (genvar i = 0; i < CVA6Cfg.NrWbPorts; i++) begin + for (genvar j = 0; j < CVA6Cfg.NrWbPorts; j++) begin + assert property ( + @(posedge clk_i) disable iff (!rst_ni) wt_valid_i[i] && wt_valid_i[j] && (i != j) |-> (trans_id_i[i] != trans_id_i[j])) + else + $fatal( + 1, + "Two or more functional units are retiring instructions with the same transaction id!" + ); + end + end + //pragma translate_on +endmodule diff --git a/test/type_param/core/serdiv.sv b/test/type_param/core/serdiv.sv new file mode 100644 index 0000000..244ee97 --- /dev/null +++ b/test/type_param/core/serdiv.sv @@ -0,0 +1,269 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Andreas Traber , ETH Zurich +// +// Date: 18.10.2018 +// Description: simple 64bit serial divider + + +module serdiv + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter WIDTH = 64, + parameter STABLE_HANDSHAKE = 0 // Guarantee a stable in_rdy_o during the input handshake. Keep it at 0 in CVA6 +) ( + input logic clk_i, + input logic rst_ni, + // input IF + input logic [TRANS_ID_BITS-1:0] id_i, + input logic [WIDTH-1:0] op_a_i, + input logic [WIDTH-1:0] op_b_i, + input logic [1:0] opcode_i, // 0: udiv, 2: urem, 1: div, 3: rem + // handshake + input logic in_vld_i, // there is a cycle delay from in_rdy_o->in_vld_i, see issue_read_operands.sv stage + output logic in_rdy_o, + input logic flush_i, + // output IF + output logic out_vld_o, + input logic out_rdy_i, + output logic [TRANS_ID_BITS-1:0] id_o, + output logic [WIDTH-1:0] res_o +); + + ///////////////////////////////////// + // signal declarations + ///////////////////////////////////// + + enum logic [1:0] { + IDLE, + DIVIDE, + FINISH + } + state_d, state_q; + + logic [WIDTH-1:0] res_q, res_d; + logic [WIDTH-1:0] op_a_q, op_a_d; + logic [WIDTH-1:0] op_b_q, op_b_d; + logic op_a_sign, op_b_sign; + logic op_b_zero, op_b_zero_q, op_b_zero_d; + logic op_b_neg_one, op_b_neg_one_q, op_b_neg_one_d; + + logic [TRANS_ID_BITS-1:0] id_q, id_d; + + logic rem_sel_d, rem_sel_q; + logic comp_inv_d, comp_inv_q; + logic res_inv_d, res_inv_q; + + logic [WIDTH-1:0] add_mux; + logic [WIDTH-1:0] add_out; + logic [WIDTH-1:0] add_tmp; + logic [WIDTH-1:0] b_mux; + logic [WIDTH-1:0] out_mux; + + logic [$clog2(WIDTH)-1:0] cnt_q, cnt_d; + logic cnt_zero; + + logic [WIDTH-1:0] lzc_a_input, lzc_b_input, op_b; + logic [$clog2(WIDTH)-1:0] lzc_a_result, lzc_b_result; + logic [$clog2(WIDTH+1)-1:0] shift_a; + logic [ $clog2(WIDTH+1):0] div_shift; + + logic a_reg_en, b_reg_en, res_reg_en, ab_comp, pm_sel, load_en; + logic lzc_a_no_one, lzc_b_no_one; + logic div_res_zero_d, div_res_zero_q; + + + ///////////////////////////////////// + // align the input operands + // for faster division + ///////////////////////////////////// + + assign op_a_sign = op_a_i[$high(op_a_i)]; + assign op_b_sign = op_b_i[$high(op_b_i)]; + assign op_b_zero = lzc_b_no_one & ~op_b_sign; + assign op_b_neg_one = lzc_b_no_one & op_b_sign; + + assign lzc_a_input = (opcode_i[0] & op_a_sign) ? {~op_a_i[$high(op_a_i)-1:0], 1'b1} : op_a_i; + assign lzc_b_input = (opcode_i[0] & op_b_sign) ? ~op_b_i : op_b_i; + + lzc #( + .MODE (1), // count leading zeros + .WIDTH(WIDTH) + ) i_lzc_a ( + .in_i (lzc_a_input), + .cnt_o (lzc_a_result), + .empty_o(lzc_a_no_one) + ); + + lzc #( + .MODE (1), // count leading zeros + .WIDTH(WIDTH) + ) i_lzc_b ( + .in_i (lzc_b_input), + .cnt_o (lzc_b_result), + .empty_o(lzc_b_no_one) + ); + + assign shift_a = (lzc_a_no_one) ? WIDTH : {1'b0, lzc_a_result}; + assign div_shift = {1'b0, lzc_b_result} - shift_a; + + assign op_b = op_b_i <<< $unsigned(div_shift); + + // the division is zero if |opB| > |opA| and can be terminated + assign div_res_zero_d = (load_en) ? div_shift[$high(div_shift)] : div_res_zero_q; + + ///////////////////////////////////// + // Datapath + ///////////////////////////////////// + + assign pm_sel = load_en & ~(opcode_i[0] & (op_a_sign ^ op_b_sign)); + + // muxes + assign add_mux = (load_en) ? op_a_i : op_b_q; + + // attention: logical shift by one in case of negative operand B! + assign b_mux = (load_en) ? op_b : {comp_inv_q, (op_b_q[$high(op_b_q):1])}; + + // in case of bad timing, we could output from regs -> needs a cycle more in the FSM + assign out_mux = (rem_sel_q) ? (op_b_neg_one_q ? '0 : op_a_q) : (op_b_zero_q ? '1 : (op_b_neg_one_q ? op_a_q : res_q)); + + // invert if necessary + assign res_o = (res_inv_q) ? -$signed(out_mux) : out_mux; + + // main comparator + assign ab_comp = ((op_a_q == op_b_q) | ((op_a_q > op_b_q) ^ comp_inv_q)) & ((|op_a_q) | op_b_zero_q); + + // main adder + assign add_tmp = (load_en) ? 0 : op_a_q; + assign add_out = (pm_sel) ? add_tmp + add_mux : add_tmp - $signed(add_mux); + + ///////////////////////////////////// + // FSM, counter + ///////////////////////////////////// + + assign cnt_zero = (cnt_q == 0); + assign cnt_d = (load_en) ? div_shift[$clog2(WIDTH)-1:0] : (~cnt_zero) ? cnt_q - 1 : cnt_q; + + always_comb begin : p_fsm + // default + state_d = state_q; + in_rdy_o = 1'b0; + out_vld_o = 1'b0; + load_en = 1'b0; + a_reg_en = 1'b0; + b_reg_en = 1'b0; + res_reg_en = 1'b0; + + unique case (state_q) + IDLE: begin + in_rdy_o = 1'b1; + + if (in_vld_i) begin + // CVA6: there is a cycle delay until the valid signal is asserted by the id stage + // Ara: we need a stable handshake + in_rdy_o = (STABLE_HANDSHAKE) ? 1'b1 : 1'b0; + a_reg_en = 1'b1; + b_reg_en = 1'b1; + load_en = 1'b1; + state_d = DIVIDE; + end + end + DIVIDE: begin + if (~(div_res_zero_q | op_b_zero_q | op_b_neg_one_q)) begin + a_reg_en = ab_comp; + b_reg_en = 1'b1; + res_reg_en = 1'b1; + end + // can end the division immediately if the result is known + if (div_res_zero_q | op_b_zero_q | op_b_neg_one_q) begin + out_vld_o = 1'b1; + state_d = FINISH; + if (out_rdy_i) begin + // in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage + state_d = IDLE; + end + end else if (cnt_zero) begin + state_d = FINISH; + end + end + FINISH: begin + out_vld_o = 1'b1; + + if (out_rdy_i) begin + // in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage + state_d = IDLE; + end + end + default: state_d = IDLE; + endcase + + if (flush_i) begin + in_rdy_o = 1'b0; + out_vld_o = 1'b0; + a_reg_en = 1'b0; + b_reg_en = 1'b0; + load_en = 1'b0; + state_d = IDLE; + end + end + + ///////////////////////////////////// + // regs, flags + ///////////////////////////////////// + + // get flags + assign rem_sel_d = (load_en) ? opcode_i[1] : rem_sel_q; + assign comp_inv_d = (load_en) ? opcode_i[0] & op_b_sign : comp_inv_q; + assign op_b_zero_d = (load_en) ? op_b_zero : op_b_zero_q; + assign op_b_neg_one_d = (load_en) ? op_b_neg_one : op_b_neg_one_q; + assign res_inv_d = (load_en) ? (~op_b_zero | opcode_i[1]) & opcode_i[0] & (op_a_sign ^ op_b_sign ^ op_b_neg_one) : res_inv_q; + + // transaction id + assign id_d = (load_en) ? id_i : id_q; + assign id_o = id_q; + + assign op_a_d = (a_reg_en) ? add_out : op_a_q; + assign op_b_d = (b_reg_en) ? b_mux : op_b_q; + assign res_d = (load_en) ? '0 : (res_reg_en) ? {res_q[$high(res_q)-1:0], ab_comp} : res_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (~rst_ni) begin + state_q <= IDLE; + op_a_q <= '0; + op_b_q <= '0; + res_q <= '0; + cnt_q <= '0; + id_q <= '0; + rem_sel_q <= 1'b0; + comp_inv_q <= 1'b0; + res_inv_q <= 1'b0; + op_b_zero_q <= 1'b0; + op_b_neg_one_q <= 1'b0; + div_res_zero_q <= 1'b0; + end else begin + state_q <= state_d; + op_a_q <= op_a_d; + op_b_q <= op_b_d; + res_q <= res_d; + cnt_q <= cnt_d; + id_q <= id_d; + rem_sel_q <= rem_sel_d; + comp_inv_q <= comp_inv_d; + res_inv_q <= res_inv_d; + op_b_zero_q <= op_b_zero_d; + op_b_neg_one_q <= op_b_neg_one_d; + div_res_zero_q <= div_res_zero_d; + end + end + +endmodule diff --git a/test/type_param/core/store_buffer.sv b/test/type_param/core/store_buffer.sv new file mode 100644 index 0000000..d41551d --- /dev/null +++ b/test/type_param/core/store_buffer.sv @@ -0,0 +1,291 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 25.04.2017 +// Description: Store queue persists store requests and pushes them to memory +// if they are no longer speculative + + +module store_buffer + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // if we flush we need to pause the transactions on the memory + // otherwise we will run in a deadlock with the memory arbiter + input logic stall_st_pending_i, // Stall issuing non-speculative request + output logic no_st_pending_o, // non-speculative queue is empty (e.g.: everything is committed to the memory hierarchy) + output logic store_buffer_empty_o, // there is no store pending in neither the speculative unit or the non-speculative queue + + input logic [11:0] page_offset_i, // check for the page offset (the last 12 bit if the current load matches them) + output logic page_offset_matches_o, // the above input page offset matches -> let the store buffer drain + + input logic commit_i, // commit the instruction which was placed there most recently + output logic commit_ready_o, // commit queue is ready to accept another commit request + output logic ready_o, // the store queue is ready to accept a new request + // it is only ready if it can unconditionally commit the instruction, e.g.: + // the commit buffer needs to be empty + input logic valid_i, // this is a valid store + input logic valid_without_flush_i, // just tell if the address is valid which we are current putting and do not take any further action + + input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue + output [riscv::PLEN-1:0] rvfi_mem_paddr_o, + input riscv::xlen_t data_i, // data which is placed in the queue + input logic [(riscv::XLEN/8)-1:0] be_i, // byte enable in + input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write) + + // D$ interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o +); + + // the store queue has two parts: + // 1. Speculative queue + // 2. Commit queue which is non-speculative, e.g.: the store will definitely happen. + struct packed { + logic [riscv::PLEN-1:0] address; + riscv::xlen_t data; + logic [(riscv::XLEN/8)-1:0] be; + logic [1:0] data_size; + logic valid; // this entry is valid, we need this for checking if the address offset matches + } + speculative_queue_n[DEPTH_SPEC-1:0], + speculative_queue_q[DEPTH_SPEC-1:0], + commit_queue_n[DEPTH_COMMIT-1:0], + commit_queue_q[DEPTH_COMMIT-1:0]; + + // keep a status count for both buffers + logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt_n, speculative_status_cnt_q; + logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt_n, commit_status_cnt_q; + // Speculative queue + logic [$clog2(DEPTH_SPEC)-1:0] speculative_read_pointer_n, speculative_read_pointer_q; + logic [$clog2(DEPTH_SPEC)-1:0] speculative_write_pointer_n, speculative_write_pointer_q; + // Commit Queue + logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q; + logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q; + + assign store_buffer_empty_o = (speculative_status_cnt_q == 0) & no_st_pending_o; + // ---------------------------------------- + // Speculative Queue - Core Interface + // ---------------------------------------- + always_comb begin : core_if + automatic logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt; + speculative_status_cnt = speculative_status_cnt_q; + + // default assignments + speculative_status_cnt_n = speculative_status_cnt_q; + speculative_read_pointer_n = speculative_read_pointer_q; + speculative_write_pointer_n = speculative_write_pointer_q; + speculative_queue_n = speculative_queue_q; + // LSU interface + // we are ready to accept a new entry and the input data is valid + if (valid_i) begin + speculative_queue_n[speculative_write_pointer_q].address = paddr_i; + speculative_queue_n[speculative_write_pointer_q].data = data_i; + speculative_queue_n[speculative_write_pointer_q].be = be_i; + speculative_queue_n[speculative_write_pointer_q].data_size = data_size_i; + speculative_queue_n[speculative_write_pointer_q].valid = 1'b1; + // advance the write pointer + speculative_write_pointer_n = speculative_write_pointer_q + 1'b1; + speculative_status_cnt++; + end + + // evict the current entry out of this queue, the commit queue will thankfully take it and commit it + // to the memory hierarchy + if (commit_i) begin + // invalidate + speculative_queue_n[speculative_read_pointer_q].valid = 1'b0; + // advance the read pointer + speculative_read_pointer_n = speculative_read_pointer_q + 1'b1; + speculative_status_cnt--; + end + + speculative_status_cnt_n = speculative_status_cnt; + + // when we flush evict the speculative stores + if (flush_i) begin + // reset all valid flags + for (int unsigned i = 0; i < DEPTH_SPEC; i++) speculative_queue_n[i].valid = 1'b0; + + speculative_write_pointer_n = speculative_read_pointer_q; + // also reset the status count + speculative_status_cnt_n = 'b0; + end + + // we are ready if the speculative and the commit queue have a space left + ready_o = (speculative_status_cnt_n < (DEPTH_SPEC)) || commit_i; + end + + // ---------------------------------------- + // Commit Queue - Memory Interface + // ---------------------------------------- + + // we will never kill a request in the store buffer since we already know that the translation is valid + // e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault + assign req_port_o.kill_req = 1'b0; + assign req_port_o.data_we = 1'b1; // we will always write in the store queue + assign req_port_o.tag_valid = 1'b0; + + // we do not require an acknowledgement for writes, thus we do not need to identify uniquely the responses + assign req_port_o.data_id = '0; + // those signals can directly be output to the memory + assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + // if we got a new request we already saved the tag from the previous cycle + assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_TAG_WIDTH + + ariane_pkg::DCACHE_INDEX_WIDTH-1 : + ariane_pkg::DCACHE_INDEX_WIDTH]; + assign req_port_o.data_wdata = commit_queue_q[commit_read_pointer_q].data; + assign req_port_o.data_be = commit_queue_q[commit_read_pointer_q].be; + assign req_port_o.data_size = commit_queue_q[commit_read_pointer_q].data_size; + + assign rvfi_mem_paddr_o = commit_queue_n[commit_read_pointer_n].address; + + always_comb begin : store_if + automatic logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt; + commit_status_cnt = commit_status_cnt_q; + + commit_ready_o = (commit_status_cnt_q < DEPTH_COMMIT); + // no store is pending if we don't have any element in the commit queue e.g.: it is empty + no_st_pending_o = (commit_status_cnt_q == 0); + // default assignments + commit_read_pointer_n = commit_read_pointer_q; + commit_write_pointer_n = commit_write_pointer_q; + + commit_queue_n = commit_queue_q; + + req_port_o.data_req = 1'b0; + + // there should be no commit when we are flushing + // if the entry in the commit queue is valid and not speculative anymore we can issue this instruction + if (commit_queue_q[commit_read_pointer_q].valid && !stall_st_pending_i) begin + req_port_o.data_req = 1'b1; + if (req_port_i.data_gnt) begin + // we can evict it from the commit buffer + commit_queue_n[commit_read_pointer_q].valid = 1'b0; + // advance the read_pointer + commit_read_pointer_n = commit_read_pointer_q + 1'b1; + commit_status_cnt--; + end + end + // we ignore the rvalid signal for now as we assume that the store + // happened if we got a grant + + // shift the store request from the speculative buffer to the non-speculative + if (commit_i) begin + commit_queue_n[commit_write_pointer_q] = speculative_queue_q[speculative_read_pointer_q]; + commit_write_pointer_n = commit_write_pointer_n + 1'b1; + commit_status_cnt++; + end + + commit_status_cnt_n = commit_status_cnt; + end + + // ------------------ + // Address Checker + // ------------------ + // The load should return the data stored by the most recent store to the + // same physical address. The most direct way to implement this is to + // maintain physical addresses in the store buffer. + + // Of course, there are other micro-architectural techniques to accomplish + // the same thing: you can interlock and wait for the store buffer to + // drain if the load VA matches any store VA modulo the page size (i.e. + // bits 11:0). As a special case, it is correct to bypass if the full VA + // matches, and no younger stores' VAs match in bits 11:0. + // + // checks if the requested load is in the store buffer + // page offsets are virtually and physically the same + always_comb begin : address_checker + page_offset_matches_o = 1'b0; + + // check if the LSBs are identical and the entry is valid + for (int unsigned i = 0; i < DEPTH_COMMIT; i++) begin + // Check if the page offset matches and whether the entry is valid, for the commit queue + if ((page_offset_i[11:3] == commit_queue_q[i].address[11:3]) && commit_queue_q[i].valid) begin + page_offset_matches_o = 1'b1; + break; + end + end + + for (int unsigned i = 0; i < DEPTH_SPEC; i++) begin + // do the same for the speculative queue + if ((page_offset_i[11:3] == speculative_queue_q[i].address[11:3]) && speculative_queue_q[i].valid) begin + page_offset_matches_o = 1'b1; + break; + end + end + // or it matches with the entry we are currently putting into the queue + if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_without_flush_i) begin + page_offset_matches_o = 1'b1; + end + end + + + // registers + always_ff @(posedge clk_i or negedge rst_ni) begin : p_spec + if (~rst_ni) begin + speculative_queue_q <= '{default: 0}; + speculative_read_pointer_q <= '0; + speculative_write_pointer_q <= '0; + speculative_status_cnt_q <= '0; + end else begin + speculative_queue_q <= speculative_queue_n; + speculative_read_pointer_q <= speculative_read_pointer_n; + speculative_write_pointer_q <= speculative_write_pointer_n; + speculative_status_cnt_q <= speculative_status_cnt_n; + end + end + + // registers + always_ff @(posedge clk_i or negedge rst_ni) begin : p_commit + if (~rst_ni) begin + commit_queue_q <= '{default: 0}; + commit_read_pointer_q <= '0; + commit_write_pointer_q <= '0; + commit_status_cnt_q <= '0; + end else begin + commit_queue_q <= commit_queue_n; + commit_read_pointer_q <= commit_read_pointer_n; + commit_write_pointer_q <= commit_write_pointer_n; + commit_status_cnt_q <= commit_status_cnt_n; + end + end + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off + // assert that commit is never set when we are flushing this would be counter intuitive + // as flush and commit is decided in the same stage + commit_and_flush : + assert property (@(posedge clk_i) rst_ni && flush_i |-> !commit_i) + else $error("[Commit Queue] You are trying to commit and flush in the same cycle"); + + speculative_buffer_overflow : + assert property (@(posedge clk_i) rst_ni && (speculative_status_cnt_q == DEPTH_SPEC) |-> !valid_i) + else + $error("[Speculative Queue] You are trying to push new data although the buffer is not ready"); + + speculative_buffer_underflow : + assert property (@(posedge clk_i) rst_ni && (speculative_status_cnt_q == 0) |-> !commit_i) + else $error("[Speculative Queue] You are committing although there are no stores to commit"); + + commit_buffer_overflow : + assert property (@(posedge clk_i) rst_ni && (commit_status_cnt_q == DEPTH_COMMIT) |-> !commit_i) + else $error("[Commit Queue] You are trying to commit a store although the buffer is full"); + //pragma translate_on +endmodule + + + diff --git a/test/type_param/core/store_unit.sv b/test/type_param/core/store_unit.sv new file mode 100644 index 0000000..fb93818 --- /dev/null +++ b/test/type_param/core/store_unit.sv @@ -0,0 +1,300 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 22.05.2017 +// Description: Store Unit, takes care of all store requests and atomic memory operations (AMOs) + + +module store_unit + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + input logic stall_st_pending_i, + output logic no_st_pending_o, + output logic store_buffer_empty_o, + // store unit input port + input logic valid_i, + input lsu_ctrl_t lsu_ctrl_i, + output logic pop_st_o, + input logic commit_i, + output logic commit_ready_o, + input logic amo_valid_commit_i, + // store unit output port + output logic valid_o, + output logic [TRANS_ID_BITS-1:0] trans_id_o, + output riscv::xlen_t result_o, + output exception_t ex_o, + // MMU -> Address Translation + output logic translation_req_o, // request address translation + output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out + output [riscv::PLEN-1:0] rvfi_mem_paddr_o, + input logic [riscv::PLEN-1:0] paddr_i, // physical address in + input exception_t ex_i, + input logic dtlb_hit_i, // will be one in the same cycle translation_req was asserted if it hits + // address checker + input logic [11:0] page_offset_i, + output logic page_offset_matches_o, + // D$ interface + output amo_req_t amo_req_o, + input amo_resp_t amo_resp_i, + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o +); + // it doesn't matter what we are writing back as stores don't return anything + assign result_o = lsu_ctrl_i.data; + + enum logic [1:0] { + IDLE, + VALID_STORE, + WAIT_TRANSLATION, + WAIT_STORE_READY + } + state_d, state_q; + + // store buffer control signals + logic st_ready; + logic st_valid; + logic st_valid_without_flush; + logic instr_is_amo; + assign instr_is_amo = is_amo(lsu_ctrl_i.operation); + // keep the data and the byte enable for the second cycle (after address translation) + riscv::xlen_t st_data_n, st_data_q; + logic [(riscv::XLEN/8)-1:0] st_be_n, st_be_q; + logic [1:0] st_data_size_n, st_data_size_q; + amo_t amo_op_d, amo_op_q; + + logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; + + // output assignments + assign vaddr_o = lsu_ctrl_i.vaddr; // virtual address + assign trans_id_o = trans_id_q; // transaction id from previous cycle + + always_comb begin : store_control + translation_req_o = 1'b0; + valid_o = 1'b0; + st_valid = 1'b0; + st_valid_without_flush = 1'b0; + pop_st_o = 1'b0; + ex_o = ex_i; + trans_id_n = lsu_ctrl_i.trans_id; + state_d = state_q; + + case (state_q) + // we got a valid store + IDLE: begin + if (valid_i) begin + state_d = VALID_STORE; + translation_req_o = 1'b1; + pop_st_o = 1'b1; + // check if translation was valid and we have space in the store buffer + // otherwise simply stall + if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin + state_d = WAIT_TRANSLATION; + pop_st_o = 1'b0; + end + + if (!st_ready) begin + state_d = WAIT_STORE_READY; + pop_st_o = 1'b0; + end + end + end + + VALID_STORE: begin + valid_o = 1'b1; + // post this store to the store buffer if we are not flushing + if (!flush_i) st_valid = 1'b1; + + st_valid_without_flush = 1'b1; + + // we have another request and its not an AMO (the AMO buffer only has depth 1) + if ((valid_i && CVA6Cfg.RVA && !instr_is_amo) || (valid_i && !CVA6Cfg.RVA)) begin + + translation_req_o = 1'b1; + state_d = VALID_STORE; + pop_st_o = 1'b1; + + if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin + state_d = WAIT_TRANSLATION; + pop_st_o = 1'b0; + end + + if (!st_ready) begin + state_d = WAIT_STORE_READY; + pop_st_o = 1'b0; + end + // if we do not have another request go back to idle + end else begin + state_d = IDLE; + end + end + + // the store queue is currently full + WAIT_STORE_READY: begin + // keep the translation request high + translation_req_o = 1'b1; + + if (st_ready && dtlb_hit_i) begin + state_d = IDLE; + end + end + + default: begin + // we didn't receive a valid translation, wait for one + // but we know that the store queue is not full as we could only have landed here if + // it wasn't full + if (state_q == WAIT_TRANSLATION && ariane_pkg::MMU_PRESENT) begin + translation_req_o = 1'b1; + + if (dtlb_hit_i) begin + state_d = IDLE; + end + end + end + endcase + + // ----------------- + // Access Exception + // ----------------- + // we got an address translation exception (access rights, misaligned or page fault) + if (ex_i.valid && (state_q != IDLE)) begin + // the only difference is that we do not want to store this request + pop_st_o = 1'b1; + st_valid = 1'b0; + state_d = IDLE; + valid_o = 1'b1; + end + + if (flush_i) state_d = IDLE; + end + + // ----------- + // Re-aligner + // ----------- + // re-align the write data to comply with the address offset + always_comb begin + st_be_n = lsu_ctrl_i.be; + // don't shift the data if we are going to perform an AMO as we still need to operate on this data + st_data_n = (CVA6Cfg.RVA && instr_is_amo) ? lsu_ctrl_i.data[riscv::XLEN-1:0] : + data_align(lsu_ctrl_i.vaddr[2:0], {{64 - riscv::XLEN{1'b0}}, lsu_ctrl_i.data}); + st_data_size_n = extract_transfer_size(lsu_ctrl_i.operation); + // save AMO op for next cycle + if (CVA6Cfg.RVA) begin + case (lsu_ctrl_i.operation) + AMO_LRW, AMO_LRD: amo_op_d = AMO_LR; + AMO_SCW, AMO_SCD: amo_op_d = AMO_SC; + AMO_SWAPW, AMO_SWAPD: amo_op_d = AMO_SWAP; + AMO_ADDW, AMO_ADDD: amo_op_d = AMO_ADD; + AMO_ANDW, AMO_ANDD: amo_op_d = AMO_AND; + AMO_ORW, AMO_ORD: amo_op_d = AMO_OR; + AMO_XORW, AMO_XORD: amo_op_d = AMO_XOR; + AMO_MAXW, AMO_MAXD: amo_op_d = AMO_MAX; + AMO_MAXWU, AMO_MAXDU: amo_op_d = AMO_MAXU; + AMO_MINW, AMO_MIND: amo_op_d = AMO_MIN; + AMO_MINWU, AMO_MINDU: amo_op_d = AMO_MINU; + default: amo_op_d = AMO_NONE; + endcase + end else begin + amo_op_d = AMO_NONE; + end + end + + logic store_buffer_valid, amo_buffer_valid; + logic store_buffer_ready, amo_buffer_ready; + + // multiplex between store unit and amo buffer + assign store_buffer_valid = st_valid & (amo_op_q == AMO_NONE); + assign amo_buffer_valid = st_valid & (amo_op_q != AMO_NONE); + + assign st_ready = store_buffer_ready & amo_buffer_ready; + + // --------------- + // Store Queue + // --------------- + store_buffer #( + .CVA6Cfg(CVA6Cfg) + ) store_buffer_i ( + .clk_i, + .rst_ni, + .flush_i, + .stall_st_pending_i, + .no_st_pending_o, + .store_buffer_empty_o, + .page_offset_i, + .page_offset_matches_o, + .commit_i, + .commit_ready_o, + .ready_o (store_buffer_ready), + .valid_i (store_buffer_valid), + // the flush signal can be critical and we need this valid + // signal to check whether the page_offset matches or not, + // functionaly it doesn't make a difference whether we use + // the correct valid signal or not as we are flushing + // the whole pipeline anyway + .valid_without_flush_i(st_valid_without_flush), + .paddr_i, + .rvfi_mem_paddr_o (rvfi_mem_paddr_o), + .data_i (st_data_q), + .be_i (st_be_q), + .data_size_i (st_data_size_q), + .req_port_i (req_port_i), + .req_port_o (req_port_o) + ); + + if (CVA6Cfg.RVA) begin + amo_buffer #( + .CVA6Cfg(CVA6Cfg) + ) i_amo_buffer ( + .clk_i, + .rst_ni, + .flush_i, + .valid_i (amo_buffer_valid), + .ready_o (amo_buffer_ready), + .paddr_i (paddr_i), + .amo_op_i (amo_op_q), + .data_i (st_data_q), + .data_size_i (st_data_size_q), + .amo_req_o (amo_req_o), + .amo_resp_i (amo_resp_i), + .amo_valid_commit_i(amo_valid_commit_i), + .no_st_pending_i (no_st_pending_o) + ); + end else begin + assign amo_buffer_ready = '1; + assign amo_req_o = '0; + end + + // --------------- + // Registers + // --------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + st_be_q <= '0; + st_data_q <= '0; + st_data_size_q <= '0; + trans_id_q <= '0; + amo_op_q <= AMO_NONE; + end else begin + state_q <= state_d; + st_be_q <= st_be_n; + st_data_q <= st_data_n; + trans_id_q <= trans_id_n; + st_data_size_q <= st_data_size_n; + amo_op_q <= amo_op_d; + end + end + +endmodule diff --git a/test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv b/test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv new file mode 100644 index 0000000..1575595 --- /dev/null +++ b/test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv @@ -0,0 +1,301 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// ---------------------------- +// AXI to SRAM Adapter +// ---------------------------- +// Author: Florian Zaruba (zarubaf@iis.ee.ethz.ch) +// +// Description: Manages AXI transactions +// Supports all burst accesses but only on aligned addresses and with full data width. +// Assertions should guide you if there is something unsupported happening. +// +module axi2mem #( + parameter int unsigned AXI_ID_WIDTH = 10, + parameter int unsigned AXI_ADDR_WIDTH = 64, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter int unsigned AXI_USER_WIDTH = 10 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + AXI_BUS.Slave slave, + output logic req_o, + output logic we_o, + output logic [AXI_ADDR_WIDTH-1:0] addr_o, + output logic [AXI_DATA_WIDTH/8-1:0] be_o, + output logic [AXI_USER_WIDTH-1:0] user_o, + output logic [AXI_DATA_WIDTH-1:0] data_o, + input logic [AXI_USER_WIDTH-1:0] user_i, + input logic [AXI_DATA_WIDTH-1:0] data_i +); + + // AXI has the following rules governing the use of bursts: + // - for wrapping bursts, the burst length must be 2, 4, 8, or 16 + // - a burst must not cross a 4KB address boundary + // - early termination of bursts is not supported. + typedef enum logic [1:0] { FIXED = 2'b00, INCR = 2'b01, WRAP = 2'b10} axi_burst_t; + + localparam LOG_NR_BYTES = $clog2(AXI_DATA_WIDTH/8); + + typedef struct packed { + logic [AXI_ID_WIDTH-1:0] id; + logic [AXI_ADDR_WIDTH-1:0] addr; + logic [7:0] len; + logic [2:0] size; + axi_burst_t burst; + } ax_req_t; + + // Registers + enum logic [2:0] { IDLE, READ, WRITE, SEND_B, WAIT_WVALID } state_d, state_q; + ax_req_t ax_req_d, ax_req_q; + logic [AXI_ADDR_WIDTH-1:0] req_addr_d, req_addr_q; + logic [7:0] cnt_d, cnt_q; + + function automatic logic [AXI_ADDR_WIDTH-1:0] get_wrap_boundary (input logic [AXI_ADDR_WIDTH-1:0] unaligned_address, input logic [7:0] len); + logic [AXI_ADDR_WIDTH-1:0] warp_address = '0; + // for wrapping transfers ax_len can only be of size 1, 3, 7 or 15 + if (len == 4'b1) + warp_address[AXI_ADDR_WIDTH-1:1+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-1:1+LOG_NR_BYTES]; + else if (len == 4'b11) + warp_address[AXI_ADDR_WIDTH-1:2+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-1:2+LOG_NR_BYTES]; + else if (len == 4'b111) + warp_address[AXI_ADDR_WIDTH-1:3+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-3:2+LOG_NR_BYTES]; + else if (len == 4'b1111) + warp_address[AXI_ADDR_WIDTH-1:4+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-3:4+LOG_NR_BYTES]; + + return warp_address; + endfunction + + logic [AXI_ADDR_WIDTH-1:0] aligned_address; + logic [AXI_ADDR_WIDTH-1:0] wrap_boundary; + logic [AXI_ADDR_WIDTH-1:0] upper_wrap_boundary; + logic [AXI_ADDR_WIDTH-1:0] cons_addr; + + always_comb begin + // address generation + aligned_address = {ax_req_q.addr[AXI_ADDR_WIDTH-1:LOG_NR_BYTES], {{LOG_NR_BYTES}{1'b0}}}; + wrap_boundary = get_wrap_boundary(ax_req_q.addr, ax_req_q.len); + // this will overflow + upper_wrap_boundary = wrap_boundary + ((ax_req_q.len + 1) << LOG_NR_BYTES); + // calculate consecutive address + cons_addr = aligned_address + (cnt_q << LOG_NR_BYTES); + + // Transaction attributes + // default assignments + state_d = state_q; + ax_req_d = ax_req_q; + req_addr_d = req_addr_q; + cnt_d = cnt_q; + // Memory default assignments + data_o = slave.w_data; + user_o = slave.w_user; + be_o = slave.w_strb; + we_o = 1'b0; + req_o = 1'b0; + addr_o = '0; + // AXI assignments + // request + slave.aw_ready = 1'b0; + slave.ar_ready = 1'b0; + // read response channel + slave.r_valid = 1'b0; + slave.r_data = data_i; + slave.r_resp = '0; + slave.r_last = '0; + slave.r_id = ax_req_q.id; + slave.r_user = user_i; + // slave write data channel + slave.w_ready = 1'b0; + // write response channel + slave.b_valid = 1'b0; + slave.b_resp = 1'b0; + slave.b_id = 1'b0; + slave.b_user = 1'b0; + + case (state_q) + + IDLE: begin + // Wait for a read or write + // ------------ + // Read + // ------------ + if (slave.ar_valid) begin + slave.ar_ready = 1'b1; + // sample ax + ax_req_d = {slave.ar_id, slave.ar_addr, slave.ar_len, slave.ar_size, slave.ar_burst}; + state_d = READ; + // we can request the first address, this saves us time + req_o = 1'b1; + addr_o = slave.ar_addr; + // save the address + req_addr_d = slave.ar_addr; + // save the ar_len + cnt_d = 1; + // ------------ + // Write + // ------------ + end else if (slave.aw_valid) begin + slave.aw_ready = 1'b1; + slave.w_ready = 1'b1; + addr_o = slave.aw_addr; + // sample ax + ax_req_d = {slave.aw_id, slave.aw_addr, slave.aw_len, slave.aw_size, slave.aw_burst}; + // we've got our first w_valid so start the write process + if (slave.w_valid) begin + req_o = 1'b1; + we_o = 1'b1; + state_d = (slave.w_last) ? SEND_B : WRITE; + cnt_d = 1; + // we still have to wait for the first w_valid to arrive + end else + state_d = WAIT_WVALID; + end + end + + // ~> we are still missing a w_valid + WAIT_WVALID: begin + slave.w_ready = 1'b1; + addr_o = ax_req_q.addr; + // we can now make our first request + if (slave.w_valid) begin + req_o = 1'b1; + we_o = 1'b1; + state_d = (slave.w_last) ? SEND_B : WRITE; + cnt_d = 1; + end + end + + READ: begin + // keep request to memory high + req_o = 1'b1; + addr_o = req_addr_q; + // send the response + slave.r_valid = 1'b1; + slave.r_data = data_i; + slave.r_user = user_i; + slave.r_id = ax_req_q.id; + slave.r_last = (cnt_q == ax_req_q.len + 1); + + // check that the master is ready, the slave must not wait on this + if (slave.r_ready) begin + // ---------------------------- + // Next address generation + // ---------------------------- + // handle the correct burst type + case (ax_req_q.burst) + FIXED, INCR: addr_o = cons_addr; + WRAP: begin + // check if the address reached warp boundary + if (cons_addr == upper_wrap_boundary) begin + addr_o = wrap_boundary; + // address warped beyond boundary + end else if (cons_addr > upper_wrap_boundary) begin + addr_o = ax_req_q.addr + ((cnt_q - ax_req_q.len) << LOG_NR_BYTES); + // we are still in the incremental regime + end else begin + addr_o = cons_addr; + end + end + endcase + // we need to change the address here for the upcoming request + // we sent the last byte -> go back to idle + if (slave.r_last) begin + state_d = IDLE; + // we already got everything + req_o = 1'b0; + end + // save the request address for the next cycle + req_addr_d = addr_o; + // we can decrease the counter as the master has consumed the read data + cnt_d = cnt_q + 1; + // TODO: configure correct byte-lane + end + end + // ~> we already wrote the first word here + WRITE: begin + + slave.w_ready = 1'b1; + + // consume a word here + if (slave.w_valid) begin + req_o = 1'b1; + we_o = 1'b1; + // ---------------------------- + // Next address generation + // ---------------------------- + // handle the correct burst type + case (ax_req_q.burst) + + FIXED, INCR: addr_o = cons_addr; + WRAP: begin + // check if the address reached warp boundary + if (cons_addr == upper_wrap_boundary) begin + addr_o = wrap_boundary; + // address warped beyond boundary + end else if (cons_addr > upper_wrap_boundary) begin + addr_o = ax_req_q.addr + ((cnt_q - ax_req_q.len) << LOG_NR_BYTES); + // we are still in the incremental regime + end else begin + addr_o = cons_addr; + end + end + endcase + // save the request address for the next cycle + req_addr_d = addr_o; + // we can decrease the counter as the master has consumed the read data + cnt_d = cnt_q + 1; + + if (slave.w_last) + state_d = SEND_B; + end + end + // ~> send a write acknowledge back + SEND_B: begin + slave.b_valid = 1'b1; + slave.b_id = ax_req_q.id; + if (slave.b_ready) + state_d = IDLE; + end + + endcase + end + + `ifndef SYNTHESIS + `ifndef VERILATOR + // assert that only full data lane transfers allowed + // assert property ( + // @(posedge clk_i) slave.aw_valid |-> (slave.aw_size == LOG_NR_BYTES)) else $fatal ("Only full data lane transfers allowed"); + // assert property ( + // @(posedge clk_i) slave.ar_valid |-> (slave.ar_size == LOG_NR_BYTES)) else $fatal ("Only full data lane transfers allowed"); + // assert property ( + // @(posedge clk_i) slave.aw_valid |-> (slave.ar_addr[LOG_NR_BYTES-1:0] == '0)) else $fatal ("Unaligned accesses are not allowed at the moment"); + // assert property ( + // @(posedge clk_i) slave.ar_valid |-> (slave.aw_addr[LOG_NR_BYTES-1:0] == '0)) else $fatal ("Unaligned accesses are not allowed at the moment"); + `endif + `endif + // -------------- + // Registers + // -------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + ax_req_q <= '0; + req_addr_q <= '0; + cnt_q <= '0; + end else begin + state_q <= state_d; + ax_req_q <= ax_req_d; + req_addr_q <= req_addr_d; + cnt_q <= cnt_d; + end + end +endmodule + + diff --git a/test/type_param/corev_apu/bootrom/bootrom.sv b/test/type_param/corev_apu/bootrom/bootrom.sv new file mode 100644 index 0000000..58ba804 --- /dev/null +++ b/test/type_param/corev_apu/bootrom/bootrom.sv @@ -0,0 +1,225 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: $filename.v + * + * Description: Auto-generated bootrom + */ + +// Auto-generated code +module bootrom ( + input logic clk_i, + input logic req_i, + input logic [63:0] addr_i, + output logic [63:0] rdata_o +); + localparam int RomSize = 186; + + const logic [RomSize-1:0][63:0] mem = { + 64'h00000000_00000068, + 64'h74646977_2d6f692d, + 64'h67657200_74666968, + 64'h732d6765_72007374, + 64'h70757272_65746e69, + 64'h00646565_70732d74, + 64'h6e657272_75630073, + 64'h656d616e_2d676572, + 64'h00646564_6e657478, + 64'h652d7374_70757272, + 64'h65746e69_00736567, + 64'h6e617200_656c646e, + 64'h61687000_72656c6c, + 64'h6f72746e_6f632d74, + 64'h70757272_65746e69, + 64'h00736c6c_65632d74, + 64'h70757272_65746e69, + 64'h23007469_6c70732d, + 64'h626c7400_65707974, + 64'h2d756d6d_00617369, + 64'h2c766373_69720073, + 64'h75746174_73006765, + 64'h72006570_79745f65, + 64'h63697665_64007963, + 64'h6e657571_6572662d, + 64'h6b636f6c_63007963, + 64'h6e657571_6572662d, + 64'h65736162_656d6974, + 64'h006c6564_6f6d0065, + 64'h6c626974_61706d6f, + 64'h6300736c_6c65632d, + 64'h657a6973_2300736c, + 64'h6c65632d_73736572, + 64'h64646123_09000000, + 64'h02000000_02000000, + 64'h02000000_006c6f72, + 64'h746e6f63_cc000000, + 64'h08000000_03000000, + 64'h00100000_00000000, + 64'h00000018_00000000, + 64'h5b000000_10000000, + 64'h03000000_07000000, + 64'h06000000_05000000, + 64'h04000000_e4000000, + 64'h10000000_03000000, + 64'h00007265_6d69745f, + 64'h6270612c_706c7570, + 64'h1b000000_0f000000, + 64'h03000000_00003030, + 64'h30303030_38314072, + 64'h656d6974_01000000, + 64'h02000000_04000000, + 64'hf9000000_04000000, + 64'h03000000_02000000, + 64'hef000000_04000000, + 64'h03000000_01000000, + 64'he4000000_04000000, + 64'h03000000_00c20100, + 64'hd6000000_04000000, + 64'h03000000_80f0fa02, + 64'h3f000000_04000000, + 64'h03000000_00100000, + 64'h00000000_00000010, + 64'h00000000_5b000000, + 64'h10000000_03000000, + 64'h00000000_61303535, + 64'h3631736e_1b000000, + 64'h09000000_03000000, + 64'h00000030_30303030, + 64'h30303140_74726175, + 64'h01000000_02000000, + 64'h006c6f72_746e6f63, + 64'hcc000000_08000000, + 64'h03000000_00000c00, + 64'h00000000_00000002, + 64'h00000000_5b000000, + 64'h10000000_03000000, + 64'h07000000_01000000, + 64'h03000000_01000000, + 64'hb8000000_10000000, + 64'h03000000_00000000, + 64'h30746e69_6c632c76, + 64'h63736972_1b000000, + 64'h0d000000_03000000, + 64'h00000030_30303030, + 64'h30324074_6e696c63, + 64'h01000000_b1000000, + 64'h00000000_03000000, + 64'h00007375_622d656c, + 64'h706d6973_00636f73, + 64'h2d657261_622d656e, + 64'h61697261_2c687465, + 64'h1b000000_1f000000, + 64'h03000000_02000000, + 64'h0f000000_04000000, + 64'h03000000_02000000, + 64'h00000000_04000000, + 64'h03000000_00636f73, + 64'h01000000_02000000, + 64'h00000010_00000000, + 64'h00000080_00000000, + 64'h5b000000_10000000, + 64'h03000000_00007972, + 64'h6f6d656d_4f000000, + 64'h07000000_03000000, + 64'h00303030_30303030, + 64'h38407972_6f6d656d, + 64'h01000000_02000000, + 64'h02000000_02000000, + 64'h01000000_a9000000, + 64'h04000000_03000000, + 64'h00006374_6e692d75, + 64'h70632c76_63736972, + 64'h1b000000_0f000000, + 64'h03000000_94000000, + 64'h00000000_03000000, + 64'h01000000_83000000, + 64'h04000000_03000000, + 64'h00000000_72656c6c, + 64'h6f72746e_6f632d74, + 64'h70757272_65746e69, + 64'h01000000_79000000, + 64'h00000000_03000000, + 64'h00003933_76732c76, + 64'h63736972_70000000, + 64'h0b000000_03000000, + 64'h00006364_66616d69, + 64'h34367672_66000000, + 64'h0b000000_03000000, + 64'h00000076_63736972, + 64'h00656e61_69726120, + 64'h2c687465_1b000000, + 64'h12000000_03000000, + 64'h00000000_79616b6f, + 64'h5f000000_05000000, + 64'h03000000_00000000, + 64'h5b000000_04000000, + 64'h03000000_00757063, + 64'h4f000000_04000000, + 64'h03000000_80f0fa02, + 64'h3f000000_04000000, + 64'h03000000_00000030, + 64'h40757063_01000000, + 64'h00800000_2c000000, + 64'h04000000_03000000, + 64'h00000000_0f000000, + 64'h04000000_03000000, + 64'h01000000_00000000, + 64'h04000000_03000000, + 64'h00000000_73757063, + 64'h01000000_00657261, + 64'h622d656e_61697261, + 64'h2c687465_26000000, + 64'h10000000_03000000, + 64'h00766564_2d657261, + 64'h622d656e_61697261, + 64'h2c687465_1b000000, + 64'h14000000_03000000, + 64'h02000000_0f000000, + 64'h04000000_03000000, + 64'h02000000_00000000, + 64'h04000000_03000000, + 64'h00000000_01000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h0c040000_06010000, + 64'h00000000_10000000, + 64'h11000000_28000000, + 64'h44040000_38000000, + 64'h4a050000_edfe0dd0, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_ffdff06f, + 64'h10500073_03c58593, + 64'h00000597_f1402573, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00040067_07458593, + 64'h00000597_f1402573, + 64'h01f41413_00100413 + }; + + logic [$clog2(RomSize)-1:0] addr_q; + + always_ff @(posedge clk_i) begin + if (req_i) begin + addr_q <= addr_i[$clog2(RomSize)-1+3:3]; + end + end + + // this prevents spurious Xes from propagating into + // the speculative fetch stage of the core + assign rdata_o = (addr_q < RomSize) ? mem[addr_q] : '0; +endmodule diff --git a/test/type_param/corev_apu/clint/axi_lite_interface.sv b/test/type_param/corev_apu/clint/axi_lite_interface.sv new file mode 100644 index 0000000..c431dc0 --- /dev/null +++ b/test/type_param/corev_apu/clint/axi_lite_interface.sv @@ -0,0 +1,170 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 17/07/2017 +// Description: AXI Lite compatible interface +// + +module axi_lite_interface #( + parameter int unsigned AXI_ADDR_WIDTH = 64, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter int unsigned AXI_ID_WIDTH = 10, + parameter type axi_req_t = ariane_axi::req_t, + parameter type axi_resp_t = ariane_axi::resp_t +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + input axi_req_t axi_req_i, + output axi_resp_t axi_resp_o, + + output logic [AXI_ADDR_WIDTH-1:0] address_o, + output logic en_o, // transaction is valid + output logic we_o, // write + output logic [(AXI_DATA_WIDTH/8)-1:0] be_o, // byte enable write + input logic [AXI_DATA_WIDTH-1:0] data_i, // data + output logic [AXI_DATA_WIDTH-1:0] data_o +); + + // The RLAST signal is not required, and is considered asserted for every transfer on the read data channel. + enum logic [1:0] { IDLE, READ, WRITE, WRITE_B } state_q, state_d; + // save the trans id, we will need it for reflection otherwise we are not plug compatible to the AXI standard + logic [AXI_ID_WIDTH-1:0] trans_id_n, trans_id_q; + // address register + logic [AXI_ADDR_WIDTH-1:0] address_n, address_q; + + // pass through read data on the read data channel + assign axi_resp_o.r.data = data_i; + // send back the transaction id we've latched + assign axi_resp_o.r.id = trans_id_q; + assign axi_resp_o.b.id = trans_id_q; + // set r_last to one as defined by the AXI4 - Lite standard + assign axi_resp_o.r.last = 1'b1; + // we do not support any errors so set response flag to all zeros + assign axi_resp_o.b.resp = 2'b0; + assign axi_resp_o.r.resp = 2'b0; + // output data which we want to write to the slave + assign data_o = axi_req_i.w.data; + assign be_o = axi_req_i.w.strb; + // ------------------------ + // AXI4-Lite State Machine + // ------------------------ + always_comb begin + // default signal assignment + state_d = state_q; + address_n = address_q; + trans_id_n = trans_id_q; + + // we'll answer a write request only if we got address and data + axi_resp_o.aw_ready = 1'b0; + axi_resp_o.w_ready = 1'b0; + axi_resp_o.b_valid = 1'b0; + + axi_resp_o.ar_ready = 1'b0; + axi_resp_o.r_valid = 1'b0; + + address_o = '0; + we_o = 1'b0; + en_o = 1'b0; + + case (state_q) + // we are ready to accept a new request + IDLE: begin + // we've git a valid write request, we also know that we have asserted the aw_ready + if (axi_req_i.aw_valid) begin + axi_resp_o.aw_ready = 1'b1; + // this costs performance but the interconnect does not obey the AXI standard + // e.g.: we could wait for aw_valid && w_valid to do the transaction. + state_d = WRITE; + // save address + address_n = axi_req_i.aw.addr; + // save the transaction id for reflection + trans_id_n = axi_req_i.aw.id; + + // we've got a valid read request, we also know that we have asserted the ar_ready + end else if (axi_req_i.ar_valid) begin + axi_resp_o.ar_ready = 1'b1; + state_d = READ; + // save address + address_n = axi_req_i.ar.addr; + // save the transaction id for reflection + trans_id_n = axi_req_i.ar.id; + + end + end + // We've got a read request at least one cycle earlier + // so data_i will already contain the data we'd like tor read + READ: begin + // enable the ram-like + en_o = 1'b1; + // further assert the correct address + address_o = address_q; + // the read is valid + axi_resp_o.r_valid = 1'b1; + // check if we got a valid r_ready and go back to IDLE + if (axi_req_i.r_ready) + state_d = IDLE; + end + // We've got a write request at least one cycle earlier + // wait here for the data + WRITE: begin + if (axi_req_i.w_valid) begin + axi_resp_o.w_ready = 1'b1; + // use the latched address + address_o = address_q; + en_o = 1'b1; + we_o = 1'b1; + // close this request + state_d = WRITE_B; + end + end + + WRITE_B: begin + axi_resp_o.b_valid = 1'b1; + // we've already performed the write here so wait for the ready signal + if (axi_req_i.b_ready) + state_d = IDLE; + end + default:; + + endcase + end + + // ------------------------ + // Registers + // ------------------------ + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + state_q <= IDLE; + address_q <= '0; + trans_id_q <= '0; + end else begin + state_q <= state_d; + address_q <= address_n; + trans_id_q <= trans_id_n; + end + end + + // ------------------------ + // Assertions + // ------------------------ + // Listen for illegal transactions + //pragma translate_off + `ifndef VERILATOR + // check that burst length is just one + assert property (@(posedge clk_i) axi_req_i.ar_valid |-> ((axi_req_i.ar.len == 8'b0))) + else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end + // do the same for the write channel + assert property (@(posedge clk_i) axi_req_i.aw_valid |-> ((axi_req_i.aw.len == 8'b0))) + else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end + `endif + //pragma translate_on +endmodule diff --git a/test/type_param/corev_apu/clint/clint.sv b/test/type_param/corev_apu/clint/clint.sv new file mode 100644 index 0000000..e76f96d --- /dev/null +++ b/test/type_param/corev_apu/clint/clint.sv @@ -0,0 +1,294 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 15/07/2017 +// Description: A RISC-V privilege spec 1.11 (WIP) compatible CLINT (core local interrupt controller) +// + +// Platforms provide a real-time counter, exposed as a memory-mapped machine-mode register, mtime. mtime must run at +// constant frequency, and the platform must provide a mechanism for determining the timebase of mtime (device tree). + +module clint #( + parameter int unsigned AXI_ADDR_WIDTH = 64, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter int unsigned AXI_ID_WIDTH = 10, + parameter int unsigned NR_CORES = 1, // Number of cores therefore also the number of timecmp registers and timer interrupts + parameter type axi_req_t = ariane_axi::req_t, + parameter type axi_resp_t = ariane_axi::resp_t +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, + input axi_req_t axi_req_i, + output axi_resp_t axi_resp_o, + input logic rtc_i, // Real-time clock in (usually 32.768 kHz) + output logic [NR_CORES-1:0] timer_irq_o, // Timer interrupts + output logic [NR_CORES-1:0] ipi_o // software interrupt (a.k.a inter-process-interrupt) +); + // register offset + localparam logic [15:0] MSIP_BASE = 16'h0; + localparam logic [15:0] MTIMECMP_BASE = 16'h4000; + localparam logic [15:0] MTIME_BASE = 16'hbff8; + + localparam AddrSelWidth = (NR_CORES == 1) ? 1 : $clog2(NR_CORES); + + // signals from AXI 4 Lite + logic [AXI_ADDR_WIDTH-1:0] address; + logic en; + logic we; + logic [7:0] be; + logic [63:0] wdata; + logic [63:0] rdata; + + + // bit 11 and 10 are determining the address offset + logic [15:0] register_address; + assign register_address = address[15:0]; + // actual registers + logic [63:0] mtime_n, mtime_q; + logic [NR_CORES-1:0][63:0] mtimecmp_n, mtimecmp_q; + logic [NR_CORES-1:0] msip_n, msip_q; + // increase the timer + logic increase_timer; + + // ----------------------------- + // AXI Interface Logic + // ----------------------------- + axi_lite_interface #( + .AXI_ADDR_WIDTH ( AXI_ADDR_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .axi_req_t ( axi_req_t ), + .axi_resp_t ( axi_resp_t ) + ) axi_lite_interface_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .axi_req_i ( axi_req_i ), + .axi_resp_o ( axi_resp_o ), + .address_o ( address ), + .en_o ( en ), + .we_o ( we ), + .be_o ( be ), + .data_i ( rdata ), + .data_o ( wdata ) + ); + + // ----------------------------- + // Register Update Logic + // ----------------------------- + // APB register write logic + always_comb begin + mtime_n = mtime_q; + mtimecmp_n = mtimecmp_q; + msip_n = msip_q; + // RTC says we should increase the timer + if (increase_timer) + mtime_n = mtime_q + 1; + + // written from APB bus - gets priority + if (en && we) begin + case (register_address) inside + [MSIP_BASE:MSIP_BASE+4*NR_CORES]: begin + msip_n[$unsigned(address[AddrSelWidth-1+2:2])] = wdata[32*address[2]]; + end + + [MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin + if (riscv::XLEN == 32) begin + if (be[3:0] == 4'hf) + mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])][31:0] = wdata[31:0]; + else + mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])][63:32] = wdata[63:32]; + + end else begin + mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])] = wdata; + end + end + + [MTIME_BASE:MTIME_BASE+4]: begin + if (riscv::XLEN == 32) begin + if (address[2:0] == 3'h0) + mtime_n[31:0] = wdata[31:0]; + else begin + if (address[2:0] == 3'h4) + mtime_n[63:32] = wdata[63:32]; + end + end else begin + mtime_n = wdata; + end + end + default:; + endcase + end + end + + // APB register read logic + always_comb begin + rdata = 'b0; + + if (en && !we) begin + case (register_address) inside + [MSIP_BASE:MSIP_BASE+4*NR_CORES]: begin + if (riscv::XLEN == 32) + rdata[31:0] = msip_q[$unsigned(address[AddrSelWidth-1+2:2])]; + else + rdata = msip_q[$unsigned(address[AddrSelWidth-1+2:2])]; + end + + [MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin + if (riscv::XLEN == 32) begin + if (address[2:0] == 3'h0) + rdata[31:0] = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])][31:0]; + else begin + if (address[2:0] == 3'h4) + rdata[63:32] = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])][63:32]; + end + + end else begin + rdata = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])]; + end + end + + [MTIME_BASE:MTIME_BASE+4]: begin + if (riscv::XLEN == 32) begin + if (address[2:0] == 3'h0) + rdata[31:0] = mtime_q[31:0]; + else begin + if (address[2:0] == 3'h4) + rdata[63:32] = mtime_q[63:32]; + end + end else begin + rdata = mtime_q; + end + end + default:; + endcase + end + end + + // ----------------------------- + // IRQ Generation + // ----------------------------- + // The mtime register has a 64-bit precision on all RV32, RV64, and RV128 systems. Platforms provide a 64-bit + // memory-mapped machine-mode timer compare register (mtimecmp), which causes a timer interrupt to be posted when the + // mtime register contains a value greater than or equal (mtime >= mtimecmp) to the value in the mtimecmp register. + // The interrupt remains posted until it is cleared by writing the mtimecmp register. The interrupt will only be taken + // if interrupts are enabled and the MTIE bit is set in the mie register. + always_comb begin : irq_gen + // check that the mtime cmp register is set to a meaningful value + for (int unsigned i = 0; i < NR_CORES; i++) begin + if (mtime_q >= mtimecmp_q[i]) begin + timer_irq_o[i] = 1'b1; + end else begin + timer_irq_o[i] = 1'b0; + end + end + end + + // ----------------------------- + // RTC time tracking facilities + // ----------------------------- + // 1. Put the RTC input through a classic two stage edge-triggered synchronizer to filter out any + // metastability effects (or at least make them unlikely :-)) + clint_sync_wedge i_sync_edge ( + .clk_i, + .rst_ni, + .serial_i ( rtc_i ), + .r_edge_o ( increase_timer ), + .f_edge_o ( ), // left open + .serial_o ( ) // left open + ); + + // Registers + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mtime_q <= 64'b0; + mtimecmp_q <= 'b0; + msip_q <= '0; + end else begin + mtime_q <= mtime_n; + mtimecmp_q <= mtimecmp_n; + msip_q <= msip_n; + end + end + + assign ipi_o = msip_q; + + // ------------- + // Assertions + // -------------- + //pragma translate_off + `ifndef VERILATOR + // Static assertion check for appropriate bus width + initial begin + assert (AXI_DATA_WIDTH == 64) else $fatal(1, "Timer needs to interface with a 64 bit bus, everything else is not supported"); + end + `endif + //pragma translate_on + +endmodule + +// TODO(zarubaf): Replace by common-cells 2.0 +module clint_sync_wedge #( + parameter int unsigned STAGES = 2 +) ( + input logic clk_i, + input logic rst_ni, + input logic serial_i, + output logic r_edge_o, + output logic f_edge_o, + output logic serial_o +); + logic serial, serial_q; + + assign serial_o = serial_q; + assign f_edge_o = (~serial) & serial_q; + assign r_edge_o = serial & (~serial_q); + + clint_sync #( + .STAGES (STAGES) + ) i_sync ( + .clk_i, + .rst_ni, + .serial_i, + .serial_o ( serial ) + ); + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + serial_q <= 1'b0; + end else begin + serial_q <= serial; + end + end +endmodule + +module clint_sync #( + parameter int unsigned STAGES = 2 +) ( + input logic clk_i, + input logic rst_ni, + input logic serial_i, + output logic serial_o +); + + logic [STAGES-1:0] reg_q; + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + reg_q <= 'h0; + end else begin + reg_q <= {reg_q[STAGES-2:0], serial_i}; + end + end + + assign serial_o = reg_q[STAGES-1]; + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv b/test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv new file mode 100644 index 0000000..90134ca --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv @@ -0,0 +1,88 @@ +// Copyright 2015 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`define REGS_MAX_ADR 2'd2 + +module apb_timer +#( + parameter APB_ADDR_WIDTH = 12, //APB slaves are 4KB by default + parameter TIMER_CNT = 2 // how many timers should be instantiated +) +( + input logic HCLK, + input logic HRESETn, + input logic [APB_ADDR_WIDTH-1:0] PADDR, + input logic [31:0] PWDATA, + input logic PWRITE, + input logic PSEL, + input logic PENABLE, + output logic [31:0] PRDATA, + output logic PREADY, + output logic PSLVERR, + + output logic [(TIMER_CNT * 2) - 1:0] irq_o // overflow and cmp interrupt +); + + logic [TIMER_CNT-1:0] psel_int, pready, pslverr; + logic [$clog2(TIMER_CNT) - 1:0] slave_address_int; + logic [TIMER_CNT-1:0] [31:0] prdata; + + assign slave_address_int = PADDR[$clog2(TIMER_CNT)+ `REGS_MAX_ADR + 1:`REGS_MAX_ADR + 2]; + + always_comb + begin + psel_int = '0; + psel_int[slave_address_int] = PSEL; + end + + // output mux + always_comb + begin + + if (psel_int != '0) + begin + PRDATA = prdata[slave_address_int]; + PREADY = pready[slave_address_int]; + PSLVERR = pslverr[slave_address_int]; + end + else + begin + PRDATA = '0; + PREADY = 1'b1; + PSLVERR = 1'b0; + end + end + + + genvar k; + + generate + for(k = 0; k < TIMER_CNT; k++) + begin : TIMER_GEN + timer #( + .APB_ADDR_WIDTH ( APB_ADDR_WIDTH ) + ) timer_i ( + .HCLK ( HCLK ), + .HRESETn ( HRESETn ), + + .PADDR ( PADDR ), + .PWDATA ( PWDATA ), + .PWRITE ( PWRITE ), + .PSEL ( psel_int[k] ), + .PENABLE ( PENABLE ), + .PRDATA ( prdata[k] ), + .PREADY ( pready[k] ), + .PSLVERR ( pslverr[k] ), + + .irq_o ( irq_o[2*k+1 : 2*k] ) + ); + end +endgenerate +endmodule diff --git a/test/type_param/corev_apu/fpga/src/apb_timer/timer.sv b/test/type_param/corev_apu/fpga/src/apb_timer/timer.sv new file mode 100644 index 0000000..1a3a4f1 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/apb_timer/timer.sv @@ -0,0 +1,145 @@ +// Copyright 2015 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// define three registers per timer - timer, cmp and prescaler registers +`define REGS_MAX_IDX 'd2 +`define REG_TIMER 2'b00 +`define REG_TIMER_CTRL 2'b01 +`define REG_CMP 2'b10 +`define PRESCALER_STARTBIT 'd3 +`define PRESCALER_STOPBIT 'd5 +`define ENABLE_BIT 'd0 + +module timer +#( + parameter APB_ADDR_WIDTH = 12 //APB slaves are 4KB by default +) +( + input logic HCLK, + input logic HRESETn, + input logic [APB_ADDR_WIDTH-1:0] PADDR, + input logic [31:0] PWDATA, + input logic PWRITE, + input logic PSEL, + input logic PENABLE, + output logic [31:0] PRDATA, + output logic PREADY, + output logic PSLVERR, + + output logic [1:0] irq_o // overflow and cmp interrupt +); + + // APB register interface + logic [`REGS_MAX_IDX-1:0] register_adr; + assign register_adr = PADDR[`REGS_MAX_IDX + 2:2]; + // APB logic: we are always ready to capture the data into our regs + // not supporting transfare failure + assign PREADY = 1'b1; + assign PSLVERR = 1'b0; + // registers + logic [0:`REGS_MAX_IDX] [31:0] regs_q, regs_n; + logic [31:0] cycle_counter_n, cycle_counter_q; + + logic [2:0] prescaler_int; + + //irq logic + always_comb + begin + irq_o = 2'b0; + + // overlow irq + if (regs_q[`REG_TIMER] == 32'hffff_ffff) + irq_o[0] = 1'b1; + + // compare match irq if compare reg ist set + if (regs_q[`REG_CMP] != 'b0 && regs_q[`REG_TIMER] == regs_q[`REG_CMP]) + irq_o[1] = 1'b1; + + end + + assign prescaler_int = regs_q[`REG_TIMER_CTRL][`PRESCALER_STOPBIT:`PRESCALER_STARTBIT]; + // register write logic + always_comb + begin + regs_n = regs_q; + cycle_counter_n = cycle_counter_q + 1; + + // reset timer after cmp or overflow + if (irq_o[0] == 1'b1 || irq_o[1] == 1'b1) + regs_n[`REG_TIMER] = 1'b0; + else if(regs_q[`REG_TIMER_CTRL][`ENABLE_BIT] && prescaler_int != 'b0 && prescaler_int == cycle_counter_q) // prescaler + begin + regs_n[`REG_TIMER] = regs_q[`REG_TIMER] + 1; //prescaler mode + end + else if (regs_q[`REG_TIMER_CTRL][`ENABLE_BIT] && regs_q[`REG_TIMER_CTRL][`PRESCALER_STOPBIT:`PRESCALER_STARTBIT] == 'b0) // normal count mode + regs_n[`REG_TIMER] = regs_q[`REG_TIMER] + 1; + + // reset prescaler cycle counter + if (cycle_counter_q >= regs_q[`REG_TIMER_CTRL]) + cycle_counter_n = 32'b0; + + // written from APB bus - gets priority + if (PSEL && PENABLE && PWRITE) + begin + + case (register_adr) + `REG_TIMER: + regs_n[`REG_TIMER] = PWDATA; + + `REG_TIMER_CTRL: + regs_n[`REG_TIMER_CTRL] = PWDATA; + + `REG_CMP: + begin + regs_n[`REG_CMP] = PWDATA; + regs_n[`REG_TIMER] = 32'b0; // reset timer if compare register is written + end + endcase + end + end + + // APB register read logic + always_comb + begin + PRDATA = 'b0; + + if (PSEL && PENABLE && !PWRITE) + begin + + case (register_adr) + `REG_TIMER: + PRDATA = regs_q[`REG_TIMER]; + + `REG_TIMER_CTRL: + PRDATA = regs_q[`REG_TIMER_CTRL]; + + `REG_CMP: + PRDATA = regs_q[`REG_CMP]; + endcase + + end + end + // synchronouse part + always_ff @(posedge HCLK, negedge HRESETn) + begin + if(~HRESETn) + begin + regs_q <= '{default: 32'b0}; + cycle_counter_q <= 32'b0; + end + else + begin + regs_q <= regs_n; + cycle_counter_q <= cycle_counter_n; + end + end + + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv new file mode 100644 index 0000000..ceaa312 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv @@ -0,0 +1,449 @@ +// Copyright 2014-2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Igor Loi +// Davide Rossi +// Florian Zaruba + +`define OKAY 2'b00 +`define EXOKAY 2'b01 +`define SLVERR 2'b10 +`define DECERR 2'b11 + +module axi2apb +#( + parameter AXI4_ADDRESS_WIDTH = 32, + parameter AXI4_RDATA_WIDTH = 32, + parameter AXI4_WDATA_WIDTH = 32, + parameter AXI4_ID_WIDTH = 16, + parameter AXI4_USER_WIDTH = 10, + parameter AXI_NUMBYTES = AXI4_WDATA_WIDTH/8, + + parameter BUFF_DEPTH_SLAVE = 4, + parameter APB_ADDR_WIDTH = 32 +) +( + input logic ACLK, + input logic ARESETn, + input logic test_en_i, + + input logic [AXI4_ID_WIDTH-1:0] AWID_i, + input logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR_i, + input logic [ 7:0] AWLEN_i, + input logic [ 2:0] AWSIZE_i, + input logic [ 1:0] AWBURST_i, + input logic AWLOCK_i, + input logic [ 3:0] AWCACHE_i, + input logic [ 2:0] AWPROT_i, + input logic [ 3:0] AWREGION_i, + input logic [ AXI4_USER_WIDTH-1:0] AWUSER_i, + input logic [ 3:0] AWQOS_i, + input logic AWVALID_i, + output logic AWREADY_o, + + input logic [AXI4_WDATA_WIDTH-1:0] WDATA_i, + input logic [AXI_NUMBYTES-1:0] WSTRB_i, + input logic WLAST_i, + input logic [AXI4_USER_WIDTH-1:0] WUSER_i, + input logic WVALID_i, + output logic WREADY_o, + + output logic [AXI4_ID_WIDTH-1:0] BID_o, + output logic [ 1:0] BRESP_o, + output logic BVALID_o, + output logic [AXI4_USER_WIDTH-1:0] BUSER_o, + input logic BREADY_i, + + input logic [AXI4_ID_WIDTH-1:0] ARID_i, + input logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR_i, + input logic [ 7:0] ARLEN_i, + input logic [ 2:0] ARSIZE_i, + input logic [ 1:0] ARBURST_i, + input logic ARLOCK_i, + input logic [ 3:0] ARCACHE_i, + input logic [ 2:0] ARPROT_i, + input logic [ 3:0] ARREGION_i, + input logic [ AXI4_USER_WIDTH-1:0] ARUSER_i, + input logic [ 3:0] ARQOS_i, + input logic ARVALID_i, + output logic ARREADY_o, + + output logic [AXI4_ID_WIDTH-1:0] RID_o, + output logic [AXI4_RDATA_WIDTH-1:0] RDATA_o, + output logic [ 1:0] RRESP_o, + output logic RLAST_o, + output logic [AXI4_USER_WIDTH-1:0] RUSER_o, + output logic RVALID_o, + input logic RREADY_i, + + output logic PENABLE, + output logic PWRITE, + output logic [APB_ADDR_WIDTH-1:0] PADDR, + output logic PSEL, + output logic [AXI4_WDATA_WIDTH-1:0] PWDATA, + input logic [AXI4_RDATA_WIDTH-1:0] PRDATA, + input logic PREADY, + input logic PSLVERR +); + + // -------------------- + // AXI write address bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] AWID; + logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR; + logic [ 7:0] AWLEN; + logic [ 2:0] AWSIZE; + logic [ 1:0] AWBURST; + logic AWLOCK; + logic [ 3:0] AWCACHE; + logic [ 2:0] AWPROT; + logic [ 3:0] AWREGION; + logic [ AXI4_USER_WIDTH-1:0] AWUSER; + logic [ 3:0] AWQOS; + logic AWVALID; + logic AWREADY; + // -------------------- + // AXI write data bus + // -------------------- + logic [AXI4_WDATA_WIDTH-1:0] WDATA; // from FIFO + logic [AXI_NUMBYTES-1:0] WSTRB; // from FIFO + logic WLAST; // from FIFO + logic [AXI4_USER_WIDTH-1:0] WUSER; // from FIFO + logic WVALID; // from FIFO + logic WREADY; // TO FIFO + // -------------------- + // AXI write response bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] BID; + logic [ 1:0] BRESP; + logic BVALID; + logic [AXI4_USER_WIDTH-1:0] BUSER; + logic BREADY; + // -------------------- + // AXI read address bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] ARID; + logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR; + logic [ 7:0] ARLEN; + logic [ 2:0] ARSIZE; + logic [ 1:0] ARBURST; + logic ARLOCK; + logic [ 3:0] ARCACHE; + logic [ 2:0] ARPROT; + logic [ 3:0] ARREGION; + logic [ AXI4_USER_WIDTH-1:0] ARUSER; + logic [ 3:0] ARQOS; + logic ARVALID; + logic ARREADY; + // -------------------- + // AXI read data bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] RID; + logic [AXI4_RDATA_WIDTH-1:0] RDATA; + logic [ 1:0] RRESP; + logic RLAST; + logic [AXI4_USER_WIDTH-1:0] RUSER; + logic RVALID; + logic RREADY; + + enum logic [2:0] { IDLE, + DONE_SINGLE_RD, + WAIT_W_PREADY, + WAIT_R_PREADY, + SEND_B_RESP + } CS, NS; + + logic [AXI4_ADDRESS_WIDTH-1:0] address; + logic sample_RDATA; + + logic [AXI4_RDATA_WIDTH-1:0] RDATA_Q; + + logic read_req; + logic write_req; + + assign PENABLE = write_req | read_req; + assign PWRITE = write_req; + assign PADDR = address[APB_ADDR_WIDTH-1:0]; + assign PWDATA = WDATA; + assign PSEL = 1'b1; + + // AXI WRITE ADDRESS CHANNEL BUFFER + axi_aw_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .ADDR_WIDTH ( AXI4_ADDRESS_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_aw_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( AWVALID_i ), + .slave_addr_i ( AWADDR_i ), + .slave_prot_i ( AWPROT_i ), + .slave_region_i ( AWREGION_i ), + .slave_len_i ( AWLEN_i ), + .slave_size_i ( AWSIZE_i ), + .slave_burst_i ( AWBURST_i ), + .slave_lock_i ( AWLOCK_i ), + .slave_cache_i ( AWCACHE_i ), + .slave_qos_i ( AWQOS_i ), + .slave_id_i ( AWID_i ), + .slave_user_i ( AWUSER_i ), + .slave_ready_o ( AWREADY_o ), + + .master_valid_o ( AWVALID ), + .master_addr_o ( AWADDR ), + .master_prot_o ( AWPROT ), + .master_region_o ( AWREGION ), + .master_len_o ( AWLEN ), + .master_size_o ( AWSIZE ), + .master_burst_o ( AWBURST ), + .master_lock_o ( AWLOCK ), + .master_cache_o ( AWCACHE ), + .master_qos_o ( AWQOS ), + .master_id_o ( AWID ), + .master_user_o ( AWUSER ), + .master_ready_i ( AWREADY ) + ); + + // AXI WRITE ADDRESS CHANNEL BUFFER + axi_ar_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .ADDR_WIDTH ( AXI4_ADDRESS_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_ar_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( ARVALID_i ), + .slave_addr_i ( ARADDR_i ), + .slave_prot_i ( ARPROT_i ), + .slave_region_i ( ARREGION_i ), + .slave_len_i ( ARLEN_i ), + .slave_size_i ( ARSIZE_i ), + .slave_burst_i ( ARBURST_i ), + .slave_lock_i ( ARLOCK_i ), + .slave_cache_i ( ARCACHE_i ), + .slave_qos_i ( ARQOS_i ), + .slave_id_i ( ARID_i ), + .slave_user_i ( ARUSER_i ), + .slave_ready_o ( ARREADY_o ), + + .master_valid_o ( ARVALID ), + .master_addr_o ( ARADDR ), + .master_prot_o ( ARPROT ), + .master_region_o ( ARREGION ), + .master_len_o ( ARLEN ), + .master_size_o ( ARSIZE ), + .master_burst_o ( ARBURST ), + .master_lock_o ( ARLOCK ), + .master_cache_o ( ARCACHE ), + .master_qos_o ( ARQOS ), + .master_id_o ( ARID ), + .master_user_o ( ARUSER ), + .master_ready_i ( ARREADY ) + ); + + + axi_w_buffer #( + .DATA_WIDTH(AXI4_WDATA_WIDTH), + .USER_WIDTH(AXI4_USER_WIDTH), + .BUFFER_DEPTH(BUFF_DEPTH_SLAVE) + ) slave_w_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( WVALID_i ), + .slave_data_i ( WDATA_i ), + .slave_strb_i ( WSTRB_i ), + .slave_user_i ( WUSER_i ), + .slave_last_i ( WLAST_i ), + .slave_ready_o ( WREADY_o ), + + .master_valid_o ( WVALID ), + .master_data_o ( WDATA ), + .master_strb_o ( WSTRB ), + .master_user_o ( WUSER ), + .master_last_o ( WLAST ), + .master_ready_i ( WREADY ) + ); + + axi_r_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .DATA_WIDTH ( AXI4_RDATA_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_r_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( RVALID ), + .slave_data_i ( RDATA ), + .slave_resp_i ( RRESP ), + .slave_user_i ( RUSER ), + .slave_id_i ( RID ), + .slave_last_i ( RLAST ), + .slave_ready_o ( RREADY ), + + .master_valid_o ( RVALID_o ), + .master_data_o ( RDATA_o ), + .master_resp_o ( RRESP_o ), + .master_user_o ( RUSER_o ), + .master_id_o ( RID_o ), + .master_last_o ( RLAST_o ), + .master_ready_i ( RREADY_i ) + ); + + axi_b_buffer #( + .ID_WIDTH(AXI4_ID_WIDTH), + .USER_WIDTH(AXI4_USER_WIDTH), + .BUFFER_DEPTH(BUFF_DEPTH_SLAVE) + ) slave_b_buffer ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( BVALID ), + .slave_resp_i ( BRESP ), + .slave_id_i ( BID ), + .slave_user_i ( BUSER ), + .slave_ready_o ( BREADY ), + + .master_valid_o ( BVALID_o ), + .master_resp_o ( BRESP_o ), + .master_id_o ( BID_o ), + .master_user_o ( BUSER_o ), + .master_ready_i ( BREADY_i ) + ); + + always_comb begin + + read_req = 1'b0; + write_req = 1'b0; + address = '0; + + sample_RDATA = 1'b0; + + ARREADY = 1'b0; + AWREADY = 1'b0; + WREADY = 1'b0; + + BVALID = 1'b0; + BRESP = `OKAY; + BID = AWID; + BUSER = AWUSER; + + RVALID = 1'b0; + RLAST = 1'b0; + RID = ARID; + RUSER = ARUSER; + RRESP = `OKAY; + RDATA = RDATA_Q; + + case(CS) + + WAIT_R_PREADY: begin + read_req = 1'b1; + address = ARADDR[APB_ADDR_WIDTH - 1 : 0]; + sample_RDATA = PREADY; + + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + NS = DONE_SINGLE_RD; + end + end + + WAIT_W_PREADY: begin + write_req = 1'b1; + address = AWADDR[APB_ADDR_WIDTH - 1:0]; + // There is a Pending WRITE!! + if (PREADY == 1'b1) begin // APB is READY --> WDATA is LAtched + NS = SEND_B_RESP; + end + end + + IDLE: begin + if (ARVALID == 1'b1) begin + read_req = 1'b1; + address = ARADDR[APB_ADDR_WIDTH - 1:0];; + sample_RDATA = PREADY; + + if(PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + NS = DONE_SINGLE_RD; + end else begin // APB not ready + NS = WAIT_R_PREADY; + end + end else begin + if (AWVALID) begin + address = AWADDR[APB_ADDR_WIDTH - 1:0]; + if (WVALID) begin + write_req = 1'b1; + + // There is a Pending WRITE!! + if (PREADY == 1'b1) begin// APB is READY --> WDATA is LAtched + NS = SEND_B_RESP; + end else begin // APB not READY + NS = WAIT_W_PREADY; + end + end else begin // GOT ADDRESS WRITE, not DATA + write_req = 1'b0; + address = '0; + NS = IDLE; + end + end + end + end + + SEND_B_RESP: begin + + BVALID = 1'b1; + address = '0; + + if (BREADY) begin + NS = IDLE; + AWREADY = 1'b1; + WREADY = 1'b1; + end + end + + DONE_SINGLE_RD: begin + + RVALID = 1'b1; + RLAST = 1; + address = '0; + + if (RREADY) begin // ready to send back the rdata + NS = IDLE; + ARREADY = 1'b1; + end + end + + default: NS = IDLE; + + endcase + end + + always_ff @(posedge ACLK, negedge ARESETn) begin + if (ARESETn == 1'b0) begin + CS <= IDLE; + RDATA_Q <= '0; + end else begin + CS <= NS; + + if (sample_RDATA) + RDATA_Q <= PRDATA; + end + end + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv new file mode 100644 index 0000000..c98b179 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv @@ -0,0 +1,745 @@ +// Copyright 2014-2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Igor Loi +// Davide Rossi +// Florian Zaruba + +`define OKAY 2'b00 +`define EXOKAY 2'b01 +`define SLVERR 2'b10 +`define DECERR 2'b11 + +module axi2apb_64_32 #( + parameter int unsigned AXI4_ADDRESS_WIDTH = 32, + parameter int unsigned AXI4_RDATA_WIDTH = 64, + parameter int unsigned AXI4_WDATA_WIDTH = 64, + parameter int unsigned AXI4_ID_WIDTH = 16, + parameter int unsigned AXI4_USER_WIDTH = 10, + parameter int unsigned AXI_NUMBYTES = AXI4_WDATA_WIDTH/8, + + parameter int unsigned BUFF_DEPTH_SLAVE = 4, + parameter int unsigned APB_NUM_SLAVES = 8, + parameter int unsigned APB_ADDR_WIDTH = 12 +) +( + input logic ACLK, + input logic ARESETn, + input logic test_en_i, + // --------------------------------------------------------- + // AXI TARG Port Declarations ------------------------------ + // --------------------------------------------------------- + //AXI write address bus -------------- // USED// ----------- + input logic [AXI4_ID_WIDTH-1:0] AWID_i , + input logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR_i , + input logic [ 7:0] AWLEN_i , + input logic [ 2:0] AWSIZE_i , + input logic [ 1:0] AWBURST_i , + input logic AWLOCK_i , + input logic [ 3:0] AWCACHE_i , + input logic [ 2:0] AWPROT_i , + input logic [ 3:0] AWREGION_i , + input logic [ AXI4_USER_WIDTH-1:0] AWUSER_i , + input logic [ 3:0] AWQOS_i , + input logic AWVALID_i , + output logic AWREADY_o , + // --------------------------------------------------------- + + //AXI write data bus -------------- // USED// -------------- + input logic [AXI_NUMBYTES-1:0][7:0] WDATA_i , + input logic [AXI_NUMBYTES-1:0] WSTRB_i , + input logic WLAST_i , + input logic [AXI4_USER_WIDTH-1:0] WUSER_i , + input logic WVALID_i , + output logic WREADY_o , + // --------------------------------------------------------- + + //AXI write response bus -------------- // USED// ---------- + output logic [AXI4_ID_WIDTH-1:0] BID_o , + output logic [ 1:0] BRESP_o , + output logic BVALID_o , + output logic [AXI4_USER_WIDTH-1:0] BUSER_o , + input logic BREADY_i , + // --------------------------------------------------------- + + //AXI read address bus ------------------------------------- + input logic [AXI4_ID_WIDTH-1:0] ARID_i , + input logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR_i , + input logic [ 7:0] ARLEN_i , + input logic [ 2:0] ARSIZE_i , + input logic [ 1:0] ARBURST_i , + input logic ARLOCK_i , + input logic [ 3:0] ARCACHE_i , + input logic [ 2:0] ARPROT_i , + input logic [ 3:0] ARREGION_i , + input logic [ AXI4_USER_WIDTH-1:0] ARUSER_i , + input logic [ 3:0] ARQOS_i , + input logic ARVALID_i , + output logic ARREADY_o , + // --------------------------------------------------------- + + //AXI read data bus ---------------------------------------- + output logic [AXI4_ID_WIDTH-1:0] RID_o , + output logic [AXI4_RDATA_WIDTH-1:0] RDATA_o , + output logic [ 1:0] RRESP_o , + output logic RLAST_o , + output logic [AXI4_USER_WIDTH-1:0] RUSER_o , + output logic RVALID_o , + input logic RREADY_i , + // --------------------------------------------------------- + + output logic PENABLE , + output logic PWRITE , + output logic [APB_ADDR_WIDTH-1:0] PADDR , + output logic PSEL , + output logic [31:0] PWDATA , + input logic [31:0] PRDATA , + input logic PREADY , + input logic PSLVERR +); + + // -------------------- + // AXI write address bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] AWID; + logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR; + logic [ 7:0] AWLEN; + logic [ 2:0] AWSIZE; + logic [ 1:0] AWBURST; + logic AWLOCK; + logic [ 3:0] AWCACHE; + logic [ 2:0] AWPROT; + logic [ 3:0] AWREGION; + logic [ AXI4_USER_WIDTH-1:0] AWUSER; + logic [ 3:0] AWQOS; + logic AWVALID; + logic AWREADY; + // -------------------- + // AXI write data bus + // -------------------- + logic [1:0][31:0] WDATA; // from FIFO + logic [AXI_NUMBYTES-1:0] WSTRB; // from FIFO + logic WLAST; // from FIFO + logic [AXI4_USER_WIDTH-1:0] WUSER; // from FIFO + logic WVALID; // from FIFO + logic WREADY; // TO FIFO + // -------------------- + // AXI write response bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] BID; + logic [ 1:0] BRESP; + logic BVALID; + logic [AXI4_USER_WIDTH-1:0] BUSER; + logic BREADY; + // -------------------- + // AXI read address bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] ARID; + logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR; + logic [ 7:0] ARLEN; + logic [ 2:0] ARSIZE; + logic [ 1:0] ARBURST; + logic ARLOCK; + logic [ 3:0] ARCACHE; + logic [ 2:0] ARPROT; + logic [ 3:0] ARREGION; + logic [ AXI4_USER_WIDTH-1:0] ARUSER; + logic [ 3:0] ARQOS; + logic ARVALID; + logic ARREADY; + // -------------------- + // AXI read data bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] RID; + logic [1:0][31:0] RDATA; + logic [ 1:0] RRESP; + logic RLAST; + logic [AXI4_USER_WIDTH-1:0] RUSER; + logic RVALID; + logic RREADY; + + enum logic [3:0] { IDLE, + SINGLE_RD, SINGLE_RD_64, + BURST_RD_1, BURST_RD, BURST_RD_64, + BURST_WR, BURST_WR_64, + SINGLE_WR,SINGLE_WR_64, + WAIT_R_PREADY, WAIT_W_PREADY + } CS, NS; + + logic W_word_sel; + + logic [APB_ADDR_WIDTH-1:0] address; + + logic read_req; + logic write_req; + + logic sample_AR; + logic [8:0] ARLEN_Q; + logic decr_ARLEN; + + logic sample_AW; + logic [8:0] AWLEN_Q; + logic decr_AWLEN; + + logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR_Q; + logic incr_ARADDR; + + logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR_Q; + logic incr_AWADDR; + + logic sample_RDATA_0; // sample the first 32 bit chunk to be aggregated in 64 bit rdata + logic sample_RDATA_1; // sample the second 32 bit chunk to be aggregated in 64 bit rdata + logic [31:0] RDATA_Q_0; + logic [31:0] RDATA_Q_1; + + assign PENABLE = write_req | read_req; + assign PWRITE = write_req; + assign PADDR = address[APB_ADDR_WIDTH-1:0]; + + assign PWDATA = WDATA[W_word_sel]; + assign PSEL = 1'b1; + + // AXI WRITE ADDRESS CHANNEL BUFFER + axi_aw_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .ADDR_WIDTH ( AXI4_ADDRESS_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_aw_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + .slave_valid_i ( AWVALID_i ), + .slave_addr_i ( AWADDR_i ), + .slave_prot_i ( AWPROT_i ), + .slave_region_i ( AWREGION_i ), + .slave_len_i ( AWLEN_i ), + .slave_size_i ( AWSIZE_i ), + .slave_burst_i ( AWBURST_i ), + .slave_lock_i ( AWLOCK_i ), + .slave_cache_i ( AWCACHE_i ), + .slave_qos_i ( AWQOS_i ), + .slave_id_i ( AWID_i ), + .slave_user_i ( AWUSER_i ), + .slave_ready_o ( AWREADY_o ), + .master_valid_o ( AWVALID ), + .master_addr_o ( AWADDR ), + .master_prot_o ( AWPROT ), + .master_region_o ( AWREGION ), + .master_len_o ( AWLEN ), + .master_size_o ( AWSIZE ), + .master_burst_o ( AWBURST ), + .master_lock_o ( AWLOCK ), + .master_cache_o ( AWCACHE ), + .master_qos_o ( AWQOS ), + .master_id_o ( AWID ), + .master_user_o ( AWUSER ), + .master_ready_i ( AWREADY ) + ); + // AXI WRITE ADDRESS CHANNEL BUFFER + axi_ar_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .ADDR_WIDTH ( AXI4_ADDRESS_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_ar_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + .slave_valid_i ( ARVALID_i ), + .slave_addr_i ( ARADDR_i ), + .slave_prot_i ( ARPROT_i ), + .slave_region_i ( ARREGION_i ), + .slave_len_i ( ARLEN_i ), + .slave_size_i ( ARSIZE_i ), + .slave_burst_i ( ARBURST_i ), + .slave_lock_i ( ARLOCK_i ), + .slave_cache_i ( ARCACHE_i ), + .slave_qos_i ( ARQOS_i ), + .slave_id_i ( ARID_i ), + .slave_user_i ( ARUSER_i ), + .slave_ready_o ( ARREADY_o ), + .master_valid_o ( ARVALID ), + .master_addr_o ( ARADDR ), + .master_prot_o ( ARPROT ), + .master_region_o ( ARREGION ), + .master_len_o ( ARLEN ), + .master_size_o ( ARSIZE ), + .master_burst_o ( ARBURST ), + .master_lock_o ( ARLOCK ), + .master_cache_o ( ARCACHE ), + .master_qos_o ( ARQOS ), + .master_id_o ( ARID ), + .master_user_o ( ARUSER ), + .master_ready_i ( ARREADY ) + ); + axi_w_buffer #( + .DATA_WIDTH ( AXI4_WDATA_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_w_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + .slave_valid_i ( WVALID_i ), + .slave_data_i ( WDATA_i ), + .slave_strb_i ( WSTRB_i ), + .slave_user_i ( WUSER_i ), + .slave_last_i ( WLAST_i ), + .slave_ready_o ( WREADY_o ), + .master_valid_o ( WVALID ), + .master_data_o ( WDATA ), + .master_strb_o ( WSTRB ), + .master_user_o ( WUSER ), + .master_last_o ( WLAST ), + .master_ready_i ( WREADY ) + ); + axi_r_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .DATA_WIDTH ( AXI4_RDATA_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_r_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + .slave_valid_i ( RVALID ), + .slave_data_i ( RDATA ), + .slave_resp_i ( RRESP ), + .slave_user_i ( RUSER ), + .slave_id_i ( RID ), + .slave_last_i ( RLAST ), + .slave_ready_o ( RREADY ), + .master_valid_o ( RVALID_o ), + .master_data_o ( RDATA_o ), + .master_resp_o ( RRESP_o ), + .master_user_o ( RUSER_o ), + .master_id_o ( RID_o ), + .master_last_o ( RLAST_o ), + .master_ready_i ( RREADY_i ) + ); + + axi_b_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_b_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( BVALID ), + .slave_resp_i ( BRESP ), + .slave_id_i ( BID ), + .slave_user_i ( BUSER ), + .slave_ready_o ( BREADY ), + + .master_valid_o ( BVALID_o ), + .master_resp_o ( BRESP_o ), + .master_id_o ( BID_o ), + .master_user_o ( BUSER_o ), + .master_ready_i ( BREADY_i ) + ); + + always_comb begin + read_req = 1'b0; + write_req = 1'b0; + W_word_sel = 1'b0; // Write Word Selector + + sample_AW = 1'b0; + decr_AWLEN = 1'b0; + sample_AR = 1'b0; + decr_ARLEN = 1'b0; + + incr_AWADDR = 1'b0; + incr_ARADDR = 1'b0; + + sample_RDATA_0 = 1'b0; + sample_RDATA_1 = 1'b0; + + ARREADY = 1'b0; + AWREADY = 1'b0; + WREADY = 1'b0; + RDATA = '0; + + BVALID = 1'b0; + BRESP = `OKAY; + BID = AWID; + BUSER = AWUSER; + + RVALID = 1'b0; + RLAST = 1'b0; + RID = ARID; + RUSER = ARUSER; + RRESP = `OKAY; + + case(CS) + + WAIT_R_PREADY: begin + sample_AR = 1'b0; + read_req = 1'b1; + address = ARADDR; + + if (PREADY == 1'b1) begin// APB is READY --> RDATA is AVAILABLE + if (ARLEN == 0) begin + case (ARSIZE) + 3'h3: begin + NS = SINGLE_RD_64; + if (ARADDR[2:0] == 3'h4) + sample_RDATA_1 = 1'b1; + else sample_RDATA_0 = 1'b1; + end + + default: begin + NS = SINGLE_RD; + if (ARADDR[2:0] == 3'h4) + sample_RDATA_1 = 1'b1; + else + sample_RDATA_0 = 1'b1; + end + endcase + end else begin // ARLEN > 0 --> BURST + NS = BURST_RD_64; + sample_RDATA_0 = 1'b1; + decr_ARLEN = 1'b1; + incr_ARADDR = 1'b1; + end + end else begin // APB not ready + NS = WAIT_R_PREADY; + end + end + + WAIT_W_PREADY: begin + address = AWADDR; + write_req = 1'b1; + + if (AWADDR[2:0] == 3'h4) + W_word_sel = 1'b1; + else + W_word_sel = 1'b0; + + // There is a Pending WRITE!! + if (PREADY == 1'b1) begin // APB is READY --> WDATA is LAtched + if (AWLEN == 0) begin // single write + case (AWSIZE) + 3'h3: NS = SINGLE_WR_64; + default: NS = SINGLE_WR; + endcase + end else begin // BURST WRITE + sample_AW = 1'b1; + NS = BURST_WR_64; + end + end else begin // APB not READY + NS = WAIT_W_PREADY; + end + end + + IDLE: begin + if (ARVALID == 1'b1) begin + sample_AR = 1'b1; + read_req = 1'b1; + address = ARADDR; + + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + if (ARLEN == 0) begin + case (ARSIZE) + 3'h3: begin + NS = SINGLE_RD_64; + if (ARADDR[2:0] == 4) + sample_RDATA_1 = 1'b1; + else + sample_RDATA_0 = 1'b1; + end + default: begin + NS = SINGLE_RD; + if (ARADDR[2:0] == 4) + sample_RDATA_1 = 1'b1; + else + sample_RDATA_0 = 1'b1; + end + endcase end else begin //ARLEN > 0 --> BURST + NS = BURST_RD_64; + sample_RDATA_0 = 1'b1; + end + end else begin // APB not ready + NS = WAIT_R_PREADY; + end + end else begin + + if (AWVALID) begin //: _VALID_AW_REQ_ + if (WVALID) begin // : _VALID_W_REQ_ + write_req = 1'b1; + address = AWADDR; + + if (AWADDR[2:0] == 3'h4) + W_word_sel = 1'b1; + else + W_word_sel = 1'b0; + + // There is a Pending WRITE!! + if (PREADY == 1'b1) begin// APB is READY --> WDATA is LAtched _APB_SLAVE_READY_ + if(AWLEN == 0) begin //: _SINGLE_WRITE_ + case(AWSIZE) + 3'h3: NS = SINGLE_WR_64; + default: NS = SINGLE_WR; + endcase + end else begin // BURST WRITE + sample_AW = 1'b1; + if ((AWADDR[2:0] == 3'h4) && (WSTRB[7:4] == 0)) + incr_AWADDR = 1'b0; + else + incr_AWADDR = 1'b1; + NS = BURST_WR_64; + end + end else begin// APB not READY + NS = WAIT_W_PREADY; + end + end else begin // GOT ADDRESS WRITE, not DATA + write_req = 1'b0; + address = '0; + NS = IDLE; + end + end else begin// No requests + NS = IDLE; + address = '0; + end + end + end + + SINGLE_WR_64: begin + address = AWADDR + 4; + W_word_sel = 1'b1; // write the Second data chunk + write_req = WVALID; + if (WVALID) begin + if (PREADY == 1'b1) + NS = SINGLE_WR; + else + NS = SINGLE_WR_64; + end else begin + NS = SINGLE_WR_64; + end + end + + SINGLE_WR: begin + BVALID = 1'b1; + address = '0; + if (BREADY) begin + NS = IDLE; + AWREADY = 1'b1; + WREADY = 1'b1; + end else begin + NS = SINGLE_WR; + end + end + + BURST_WR_64: begin + W_word_sel = 1'b1; // write the Second data chunk first + write_req = WVALID & (|WSTRB[7:4]); + address = AWADDR_Q; // second Chunk, Fixzed Burst + + if (WVALID) begin + if (&WSTRB[7:4]) begin + if(PREADY == 1'b1) begin + NS = BURST_WR; + WREADY = 1'b1; // pop onother data from the WDATA fifo + decr_AWLEN = 1'b1; // decrement the remaining BURST beat + incr_AWADDR = 1'b1; // increment address + end else begin + NS = BURST_WR_64; + end + end else begin + NS = BURST_WR; + WREADY = 1'b1; // pop onother data from the WDATA fifo + decr_AWLEN = 1'b1; // decrement the remaining BURST beat + incr_AWADDR = 1'b1; // increment address + end + end else begin + NS = BURST_WR_64; + end + end + + BURST_WR: begin + address = AWADDR_Q; // second Chunk, Fixzed Burst + if (AWLEN_Q == 0) begin // last : _BURST_COMPLETED_ + BVALID = 1'b1; + if (BREADY) begin + NS = IDLE; + AWREADY = 1'b1; + end else + NS = BURST_WR; + end else begin //: _BUSRST_NOT_COMPLETED_ + W_word_sel = 1'b0; // write the Second data chunk first + write_req = WVALID & (&WSTRB[3:0]); + if (WVALID) begin + if (PREADY == 1'b1) begin + NS = BURST_WR_64; + incr_AWADDR = 1'b1; + decr_AWLEN = 1'b1; //decrement the remaining BURST beat + end else + NS = BURST_WR; + end else begin + NS = BURST_WR_64; + end + end + end + + BURST_RD_64: begin + read_req = 1'b1; + address = ARADDR_Q; + + if (ARLEN_Q == 0) begin // burst completed + NS = IDLE; + ARREADY = 1'b1; + end else begin + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + decr_ARLEN = 1'b1; + sample_RDATA_1 = 1'b1; + NS = BURST_RD; + + if (ARADDR_Q[2:0] == 3'h4) + incr_ARADDR = 1'b1; + else + incr_ARADDR = 1'b0; + end + else begin + NS = BURST_RD_64; + end + end + end + + BURST_RD: begin + RVALID = 1'b1; + RDATA[0] = RDATA_Q_0; + RDATA[1] = RDATA_Q_1; + RLAST = (ARLEN_Q == 0) ? 1'b1 : 1'b0; + address = ARADDR_Q; + + if (RREADY) begin // ready to send back the rdata + if (ARLEN_Q == 0) begin // burst completed + NS = IDLE; + ARREADY = 1'b1; + end else begin //: _READ_BUSRST_NOT_COMPLETED_ + read_req = 1'b1; + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + sample_RDATA_0 = 1'b1; + NS = BURST_RD_64; + incr_ARADDR = 1'b1; + decr_ARLEN = 1'b1; + end else begin + NS = BURST_RD_1; + end + end + end else begin // NOT ready to send back the rdata + NS = BURST_RD; + end + end + + BURST_RD_1: begin + read_req = 1'b1; + address = ARADDR_Q; + + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + sample_RDATA_0 = 1'b1; + NS = BURST_RD_64; + incr_ARADDR = 1'b1; + decr_ARLEN = 1'b1; + end else begin + NS = BURST_RD_1; + end + end + + SINGLE_RD: begin + RVALID = 1'b1; + RDATA[0] = RDATA_Q_0; + RDATA[1] = RDATA_Q_1; + RLAST = 1; + address = '0; + + if (RREADY) begin // ready to send back the rdata + NS = IDLE; + ARREADY = 1'b1; + end else begin // NOT ready to send back the rdata + NS = SINGLE_RD; + end + end + + SINGLE_RD_64: begin + read_req = 1'b1; + address = ARADDR + 4; + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + NS = SINGLE_RD; + if(ARADDR[2:0] == 3'h4) + sample_RDATA_0 = 1'b1; + else + sample_RDATA_1 = 1'b1; + end else begin + NS = SINGLE_RD_64; + end + end + + default: begin + NS = IDLE; + address = '0; + end + endcase + end + + // ----------- + // Registers + // ----------- + always_ff @(posedge ACLK, negedge ARESETn) begin + if (ARESETn == 1'b0) begin + CS <= IDLE; + //Read Channel + ARLEN_Q <= '0; + AWADDR_Q <= '0; + //Write Channel + AWLEN_Q <= '0; + RDATA_Q_0 <= '0; + RDATA_Q_1 <= '0; + ARADDR_Q <= '0; + end else begin + CS <= NS; + + if (sample_AR) begin + ARLEN_Q <= {ARLEN,1'b0} + 2; + end else if (decr_ARLEN) begin + ARLEN_Q <= ARLEN_Q - 1; + end + + if (sample_RDATA_0) + RDATA_Q_0 <= PRDATA; + + if (sample_RDATA_1) + RDATA_Q_1 <= PRDATA; + + case ({sample_AW, decr_AWLEN}) + 2'b00: AWLEN_Q <= AWLEN_Q; + 2'b01: AWLEN_Q <= AWLEN_Q - 1; + 2'b10: AWLEN_Q <= {AWLEN, 1'b0} + 1; + 2'b11: AWLEN_Q <= {AWLEN, 1'b0}; + endcase + + case ({sample_AW, incr_AWADDR}) + 2'b00: AWADDR_Q <= AWADDR_Q; + 2'b01: AWADDR_Q <= AWADDR_Q + 4; + 2'b10: AWADDR_Q <= {AWADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000}; + 2'b11: AWADDR_Q <= {AWADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000} + 4; + endcase + + case({sample_AR, incr_ARADDR}) + 2'b00: ARADDR_Q <= ARADDR_Q; + 2'b01: ARADDR_Q <= ARADDR_Q + 4; + 2'b10: ARADDR_Q <= {ARADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000}; + 2'b11: ARADDR_Q <= {ARADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000} + 4; + endcase + end + end +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv new file mode 100644 index 0000000..e133693 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv @@ -0,0 +1,74 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_ar_buffer #( + parameter int ID_WIDTH = -1, + parameter int ADDR_WIDTH = -1, + parameter int USER_WIDTH = -1, + parameter int BUFFER_DEPTH = -1 +)( + + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + input logic slave_valid_i, + input logic [ADDR_WIDTH-1:0] slave_addr_i, + input logic [2:0] slave_prot_i, + input logic [3:0] slave_region_i, + input logic [7:0] slave_len_i, + input logic [2:0] slave_size_i, + input logic [1:0] slave_burst_i, + input logic slave_lock_i, + input logic [3:0] slave_cache_i, + input logic [3:0] slave_qos_i, + input logic [ID_WIDTH-1:0] slave_id_i, + input logic [USER_WIDTH-1:0] slave_user_i, + output logic slave_ready_o, + + output logic master_valid_o, + output logic [ADDR_WIDTH-1:0] master_addr_o, + output logic [2:0] master_prot_o, + output logic [3:0] master_region_o, + output logic [7:0] master_len_o, + output logic [2:0] master_size_o, + output logic [1:0] master_burst_o, + output logic master_lock_o, + output logic [3:0] master_cache_o, + output logic [3:0] master_qos_o, + output logic [ID_WIDTH-1:0] master_id_o, + output logic [USER_WIDTH-1:0] master_user_o, + input logic master_ready_i +); + + logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_in; + logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_out; + + assign s_data_in = {slave_cache_i, slave_prot_i, slave_lock_i, slave_burst_i, slave_size_i, slave_len_i, slave_qos_i, slave_region_i, slave_addr_i, slave_user_i, slave_id_i} ; + assign {master_cache_o, master_prot_o, master_lock_o, master_burst_o, master_size_o, master_len_o, master_qos_o, master_region_o, master_addr_o, master_user_o, master_id_o} = s_data_out; + + + + axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH)) i_axi_single_slice ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .valid_i ( slave_valid_i ), + .ready_o ( slave_ready_o ), + .data_i ( s_data_in ), + .ready_i ( master_ready_i ), + .valid_o ( master_valid_o ), + .data_o ( s_data_out ) + ); + + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv new file mode 100644 index 0000000..15b9345 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv @@ -0,0 +1,74 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_aw_buffer #( + parameter int ID_WIDTH = -1, + parameter int ADDR_WIDTH = -1, + parameter int USER_WIDTH = -1, + parameter int BUFFER_DEPTH = -1 +)( + + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + input logic slave_valid_i, + input logic [ADDR_WIDTH-1:0] slave_addr_i, + input logic [2:0] slave_prot_i, + input logic [3:0] slave_region_i, + input logic [7:0] slave_len_i, + input logic [2:0] slave_size_i, + input logic [1:0] slave_burst_i, + input logic slave_lock_i, + input logic [3:0] slave_cache_i, + input logic [3:0] slave_qos_i, + input logic [ID_WIDTH-1:0] slave_id_i, + input logic [USER_WIDTH-1:0] slave_user_i, + output logic slave_ready_o, + + output logic master_valid_o, + output logic [ADDR_WIDTH-1:0] master_addr_o, + output logic [2:0] master_prot_o, + output logic [3:0] master_region_o, + output logic [7:0] master_len_o, + output logic [2:0] master_size_o, + output logic [1:0] master_burst_o, + output logic master_lock_o, + output logic [3:0] master_cache_o, + output logic [3:0] master_qos_o, + output logic [ID_WIDTH-1:0] master_id_o, + output logic [USER_WIDTH-1:0] master_user_o, + input logic master_ready_i +); + + logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_in; + logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_out; + + + + assign s_data_in = {slave_cache_i, slave_prot_i, slave_lock_i, slave_burst_i, slave_size_i, slave_len_i, slave_qos_i, slave_region_i, slave_addr_i, slave_user_i, slave_id_i}; + assign {master_cache_o, master_prot_o, master_lock_o, master_burst_o, master_size_o, master_len_o, master_qos_o, master_region_o, master_addr_o, master_user_o, master_id_o} = s_data_out; + + + axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH)) i_axi_single_slice ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .valid_i ( slave_valid_i ), + .ready_o ( slave_ready_o ), + .data_i ( s_data_in ), + .ready_i ( master_ready_i ), + .valid_o ( master_valid_o ), + .data_o ( s_data_out ) + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv new file mode 100644 index 0000000..d2576bb --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv @@ -0,0 +1,54 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_b_buffer #( + parameter int ID_WIDTH = -1, + parameter int USER_WIDTH = -1, + parameter int BUFFER_DEPTH = -1 +)( + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + input logic slave_valid_i, + input logic [1:0] slave_resp_i, + input logic [ID_WIDTH-1:0] slave_id_i, + input logic [USER_WIDTH-1:0] slave_user_i, + output logic slave_ready_o, + + output logic master_valid_o, + output logic [1:0] master_resp_o, + output logic [ID_WIDTH-1:0] master_id_o, + output logic [USER_WIDTH-1:0] master_user_o, + input logic master_ready_i +); + + logic [2+USER_WIDTH+ID_WIDTH-1:0] s_data_in; + logic [2+USER_WIDTH+ID_WIDTH-1:0] s_data_out; + + assign s_data_in = {slave_id_i, slave_user_i, slave_resp_i}; + assign {master_id_o, master_user_o, master_resp_o} = s_data_out; + + + axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(2+USER_WIDTH+ID_WIDTH)) i_axi_single_slice ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .valid_i ( slave_valid_i ), + .ready_o ( slave_ready_o ), + .data_i ( s_data_in ), + .ready_i ( master_ready_i ), + .valid_o ( master_valid_o ), + .data_o ( s_data_out ) + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv new file mode 100644 index 0000000..3c92b25 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv @@ -0,0 +1,60 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_r_buffer #( + parameter ID_WIDTH = 4, + parameter DATA_WIDTH = 64, + parameter USER_WIDTH = 6, + parameter BUFFER_DEPTH = 8, + parameter STRB_WIDTH = DATA_WIDTH/8 // DO NOT OVERRIDE +)( + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + input logic slave_valid_i, + input logic [DATA_WIDTH-1:0] slave_data_i, + input logic [1:0] slave_resp_i, + input logic [USER_WIDTH-1:0] slave_user_i, + input logic [ID_WIDTH-1:0] slave_id_i, + input logic slave_last_i, + output logic slave_ready_o, + + output logic master_valid_o, + output logic [DATA_WIDTH-1:0] master_data_o, + output logic [1:0] master_resp_o, + output logic [USER_WIDTH-1:0] master_user_o, + output logic [ID_WIDTH-1:0] master_id_o, + output logic master_last_o, + input logic master_ready_i +); + + logic [2+DATA_WIDTH+USER_WIDTH+ID_WIDTH:0] s_data_in; + logic [2+DATA_WIDTH+USER_WIDTH+ID_WIDTH:0] s_data_out; + + + assign s_data_in = {slave_id_i, slave_user_i, slave_data_i, slave_resp_i, slave_last_i}; + assign {master_id_o, master_user_o, master_data_o, master_resp_o, master_last_o} = s_data_out; + + axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(3+DATA_WIDTH+USER_WIDTH+ID_WIDTH)) i_axi_single_slice ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .valid_i ( slave_valid_i ), + .ready_o ( slave_ready_o ), + .data_i ( s_data_in ), + .ready_i ( master_ready_i ), + .valid_o ( master_valid_o ), + .data_o ( s_data_out ) + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv new file mode 100644 index 0000000..fe7fbbc --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv @@ -0,0 +1,51 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Wrapper for a generic fifo +module axi_single_slice #( + parameter int BUFFER_DEPTH = -1, + parameter int DATA_WIDTH = -1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, + input logic valid_i, + output logic ready_o, + input logic [DATA_WIDTH-1:0] data_i, + + input logic ready_i, + output logic valid_o, + output logic [DATA_WIDTH-1:0] data_o +); + + logic full, empty; + + assign ready_o = ~full; + assign valid_o = ~empty; + + fifo #( + .FALL_THROUGH ( 1'b0 ), + .DATA_WIDTH ( DATA_WIDTH ), + .DEPTH ( BUFFER_DEPTH ) + ) i_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .threshold_o (), // NC + .testmode_i ( testmode_i ), + .full_o ( full ), + .empty_o ( empty ), + .data_i ( data_i ), + .push_i ( valid_i & ready_o ), + .data_o ( data_o ), + .pop_i ( ready_i & valid_o ) + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv new file mode 100644 index 0000000..91072d6 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv @@ -0,0 +1,311 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_slice +#( + parameter AXI_ADDR_WIDTH = 32, + parameter AXI_DATA_WIDTH = 64, + parameter AXI_USER_WIDTH = 6, + parameter AXI_ID_WIDTH = 3, + parameter SLICE_DEPTH = 2, + parameter AXI_STRB_WIDTH = AXI_DATA_WIDTH/8 +) +( + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + // AXI4 SLAVE + //*************************************** + // WRITE ADDRESS CHANNEL + input logic axi_slave_aw_valid_i, + input logic [AXI_ADDR_WIDTH-1:0] axi_slave_aw_addr_i, + input logic [2:0] axi_slave_aw_prot_i, + input logic [3:0] axi_slave_aw_region_i, + input logic [7:0] axi_slave_aw_len_i, + input logic [2:0] axi_slave_aw_size_i, + input logic [1:0] axi_slave_aw_burst_i, + input logic axi_slave_aw_lock_i, + input logic [3:0] axi_slave_aw_cache_i, + input logic [3:0] axi_slave_aw_qos_i, + input logic [AXI_ID_WIDTH-1:0] axi_slave_aw_id_i, + input logic [AXI_USER_WIDTH-1:0] axi_slave_aw_user_i, + output logic axi_slave_aw_ready_o, + + // READ ADDRESS CHANNEL + input logic axi_slave_ar_valid_i, + input logic [AXI_ADDR_WIDTH-1:0] axi_slave_ar_addr_i, + input logic [2:0] axi_slave_ar_prot_i, + input logic [3:0] axi_slave_ar_region_i, + input logic [7:0] axi_slave_ar_len_i, + input logic [2:0] axi_slave_ar_size_i, + input logic [1:0] axi_slave_ar_burst_i, + input logic axi_slave_ar_lock_i, + input logic [3:0] axi_slave_ar_cache_i, + input logic [3:0] axi_slave_ar_qos_i, + input logic [AXI_ID_WIDTH-1:0] axi_slave_ar_id_i, + input logic [AXI_USER_WIDTH-1:0] axi_slave_ar_user_i, + output logic axi_slave_ar_ready_o, + + // WRITE DATA CHANNEL + input logic axi_slave_w_valid_i, + input logic [AXI_DATA_WIDTH-1:0] axi_slave_w_data_i, + input logic [AXI_STRB_WIDTH-1:0] axi_slave_w_strb_i, + input logic [AXI_USER_WIDTH-1:0] axi_slave_w_user_i, + input logic axi_slave_w_last_i, + output logic axi_slave_w_ready_o, + + // READ DATA CHANNEL + output logic axi_slave_r_valid_o, + output logic [AXI_DATA_WIDTH-1:0] axi_slave_r_data_o, + output logic [1:0] axi_slave_r_resp_o, + output logic axi_slave_r_last_o, + output logic [AXI_ID_WIDTH-1:0] axi_slave_r_id_o, + output logic [AXI_USER_WIDTH-1:0] axi_slave_r_user_o, + input logic axi_slave_r_ready_i, + + // WRITE RESPONSE CHANNEL + output logic axi_slave_b_valid_o, + output logic [1:0] axi_slave_b_resp_o, + output logic [AXI_ID_WIDTH-1:0] axi_slave_b_id_o, + output logic [AXI_USER_WIDTH-1:0] axi_slave_b_user_o, + input logic axi_slave_b_ready_i, + + // AXI4 MASTER + //*************************************** + // WRITE ADDRESS CHANNEL + output logic axi_master_aw_valid_o, + output logic [AXI_ADDR_WIDTH-1:0] axi_master_aw_addr_o, + output logic [2:0] axi_master_aw_prot_o, + output logic [3:0] axi_master_aw_region_o, + output logic [7:0] axi_master_aw_len_o, + output logic [2:0] axi_master_aw_size_o, + output logic [1:0] axi_master_aw_burst_o, + output logic axi_master_aw_lock_o, + output logic [3:0] axi_master_aw_cache_o, + output logic [3:0] axi_master_aw_qos_o, + output logic [AXI_ID_WIDTH-1:0] axi_master_aw_id_o, + output logic [AXI_USER_WIDTH-1:0] axi_master_aw_user_o, + input logic axi_master_aw_ready_i, + + // READ ADDRESS CHANNEL + output logic axi_master_ar_valid_o, + output logic [AXI_ADDR_WIDTH-1:0] axi_master_ar_addr_o, + output logic [2:0] axi_master_ar_prot_o, + output logic [3:0] axi_master_ar_region_o, + output logic [7:0] axi_master_ar_len_o, + output logic [2:0] axi_master_ar_size_o, + output logic [1:0] axi_master_ar_burst_o, + output logic axi_master_ar_lock_o, + output logic [3:0] axi_master_ar_cache_o, + output logic [3:0] axi_master_ar_qos_o, + output logic [AXI_ID_WIDTH-1:0] axi_master_ar_id_o, + output logic [AXI_USER_WIDTH-1:0] axi_master_ar_user_o, + input logic axi_master_ar_ready_i, + + // WRITE DATA CHANNEL + output logic axi_master_w_valid_o, + output logic [AXI_DATA_WIDTH-1:0] axi_master_w_data_o, + output logic [AXI_STRB_WIDTH-1:0] axi_master_w_strb_o, + output logic [AXI_USER_WIDTH-1:0] axi_master_w_user_o, + output logic axi_master_w_last_o, + input logic axi_master_w_ready_i, + + // READ DATA CHANNEL + input logic axi_master_r_valid_i, + input logic [AXI_DATA_WIDTH-1:0] axi_master_r_data_i, + input logic [1:0] axi_master_r_resp_i, + input logic axi_master_r_last_i, + input logic [AXI_ID_WIDTH-1:0] axi_master_r_id_i, + input logic [AXI_USER_WIDTH-1:0] axi_master_r_user_i, + output logic axi_master_r_ready_o, + + // WRITE RESPONSE CHANNEL + input logic axi_master_b_valid_i, + input logic [1:0] axi_master_b_resp_i, + input logic [AXI_ID_WIDTH-1:0] axi_master_b_id_i, + input logic [AXI_USER_WIDTH-1:0] axi_master_b_user_i, + output logic axi_master_b_ready_o +); + + // AXI WRITE ADDRESS CHANNEL BUFFER + axi_aw_buffer + #( + .ID_WIDTH (AXI_ID_WIDTH), + .ADDR_WIDTH (AXI_ADDR_WIDTH), + .USER_WIDTH (AXI_USER_WIDTH), + .BUFFER_DEPTH (SLICE_DEPTH) + ) + aw_buffer_i + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( axi_slave_aw_valid_i ), + .slave_addr_i ( axi_slave_aw_addr_i ), + .slave_prot_i ( axi_slave_aw_prot_i ), + .slave_region_i ( axi_slave_aw_region_i ), + .slave_len_i ( axi_slave_aw_len_i ), + .slave_size_i ( axi_slave_aw_size_i ), + .slave_burst_i ( axi_slave_aw_burst_i ), + .slave_lock_i ( axi_slave_aw_lock_i ), + .slave_cache_i ( axi_slave_aw_cache_i ), + .slave_qos_i ( axi_slave_aw_qos_i ), + .slave_id_i ( axi_slave_aw_id_i ), + .slave_user_i ( axi_slave_aw_user_i ), + .slave_ready_o ( axi_slave_aw_ready_o ), + + .master_valid_o ( axi_master_aw_valid_o ), + .master_addr_o ( axi_master_aw_addr_o ), + .master_prot_o ( axi_master_aw_prot_o ), + .master_region_o ( axi_master_aw_region_o ), + .master_len_o ( axi_master_aw_len_o ), + .master_size_o ( axi_master_aw_size_o ), + .master_burst_o ( axi_master_aw_burst_o ), + .master_lock_o ( axi_master_aw_lock_o ), + .master_cache_o ( axi_master_aw_cache_o ), + .master_qos_o ( axi_master_aw_qos_o ), + .master_id_o ( axi_master_aw_id_o ), + .master_user_o ( axi_master_aw_user_o ), + .master_ready_i ( axi_master_aw_ready_i ) + ); + + // AXI READ ADDRESS CHANNEL BUFFER + axi_ar_buffer + #( + .ID_WIDTH (AXI_ID_WIDTH), + .ADDR_WIDTH (AXI_ADDR_WIDTH), + .USER_WIDTH (AXI_USER_WIDTH), + .BUFFER_DEPTH (SLICE_DEPTH) + ) + ar_buffer_i + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( axi_slave_ar_valid_i ), + .slave_addr_i ( axi_slave_ar_addr_i ), + .slave_prot_i ( axi_slave_ar_prot_i ), + .slave_region_i ( axi_slave_ar_region_i ), + .slave_len_i ( axi_slave_ar_len_i ), + .slave_size_i ( axi_slave_ar_size_i ), + .slave_burst_i ( axi_slave_ar_burst_i ), + .slave_lock_i ( axi_slave_ar_lock_i ), + .slave_cache_i ( axi_slave_ar_cache_i ), + .slave_qos_i ( axi_slave_ar_qos_i ), + .slave_id_i ( axi_slave_ar_id_i ), + .slave_user_i ( axi_slave_ar_user_i ), + .slave_ready_o ( axi_slave_ar_ready_o ), + + .master_valid_o ( axi_master_ar_valid_o ), + .master_addr_o ( axi_master_ar_addr_o ), + .master_prot_o ( axi_master_ar_prot_o ), + .master_region_o ( axi_master_ar_region_o ), + .master_len_o ( axi_master_ar_len_o ), + .master_size_o ( axi_master_ar_size_o ), + .master_burst_o ( axi_master_ar_burst_o ), + .master_lock_o ( axi_master_ar_lock_o ), + .master_cache_o ( axi_master_ar_cache_o ), + .master_qos_o ( axi_master_ar_qos_o ), + .master_id_o ( axi_master_ar_id_o ), + .master_user_o ( axi_master_ar_user_o ), + .master_ready_i ( axi_master_ar_ready_i ) + ); + + // WRITE DATA CHANNEL BUFFER + axi_w_buffer + #( + .DATA_WIDTH (AXI_DATA_WIDTH), + .USER_WIDTH (AXI_USER_WIDTH), + .BUFFER_DEPTH (SLICE_DEPTH) + ) + w_buffer_i + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( axi_slave_w_valid_i ), + .slave_data_i ( axi_slave_w_data_i ), + .slave_strb_i ( axi_slave_w_strb_i ), + .slave_user_i ( axi_slave_w_user_i ), + .slave_last_i ( axi_slave_w_last_i ), + .slave_ready_o ( axi_slave_w_ready_o ), + + .master_valid_o ( axi_master_w_valid_o ), + .master_data_o ( axi_master_w_data_o ), + .master_strb_o ( axi_master_w_strb_o ), + .master_user_o ( axi_master_w_user_o ), + .master_last_o ( axi_master_w_last_o ), + .master_ready_i ( axi_master_w_ready_i ) + ); + + // READ DATA CHANNEL BUFFER + axi_r_buffer + #( + .ID_WIDTH (AXI_ID_WIDTH), + .DATA_WIDTH (AXI_DATA_WIDTH), + .USER_WIDTH (AXI_USER_WIDTH), + .BUFFER_DEPTH (SLICE_DEPTH) + ) + r_buffer_i + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( axi_master_r_valid_i ), + .slave_data_i ( axi_master_r_data_i ), + .slave_resp_i ( axi_master_r_resp_i ), + .slave_user_i ( axi_master_r_user_i ), + .slave_id_i ( axi_master_r_id_i ), + .slave_last_i ( axi_master_r_last_i ), + .slave_ready_o ( axi_master_r_ready_o ), + + .master_valid_o ( axi_slave_r_valid_o ), + .master_data_o ( axi_slave_r_data_o ), + .master_resp_o ( axi_slave_r_resp_o ), + .master_user_o ( axi_slave_r_user_o ), + .master_id_o ( axi_slave_r_id_o ), + .master_last_o ( axi_slave_r_last_o ), + .master_ready_i ( axi_slave_r_ready_i ) + ); + + // WRITE RESPONSE CHANNEL BUFFER + axi_b_buffer + #( + .ID_WIDTH (AXI_ID_WIDTH), + .USER_WIDTH (AXI_USER_WIDTH), + .BUFFER_DEPTH (SLICE_DEPTH) + ) + b_buffer_i + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( axi_master_b_valid_i ), + .slave_resp_i ( axi_master_b_resp_i ), + .slave_id_i ( axi_master_b_id_i ), + .slave_user_i ( axi_master_b_user_i ), + .slave_ready_o ( axi_master_b_ready_o ), + + .master_valid_o ( axi_slave_b_valid_o ), + .master_resp_o ( axi_slave_b_resp_o ), + .master_id_o ( axi_slave_b_id_o ), + .master_user_o ( axi_slave_b_user_o ), + .master_ready_i ( axi_slave_b_ready_i ) + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv new file mode 100644 index 0000000..2e643a4 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv @@ -0,0 +1,115 @@ +module axi_slice_wrap #( + parameter AXI_ADDR_WIDTH = 32, + parameter AXI_DATA_WIDTH = 64, + parameter AXI_USER_WIDTH = 6, + parameter AXI_ID_WIDTH = 3, + parameter SLICE_DEPTH = 2, + parameter AXI_STRB_WIDTH = AXI_DATA_WIDTH/8 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_en_i, + AXI_BUS.Slave axi_slave, + AXI_BUS.Master axi_master +); + + axi_slice #( + .AXI_ADDR_WIDTH ( AXI_ADDR_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .SLICE_DEPTH ( SLICE_DEPTH ), + .AXI_STRB_WIDTH ( AXI_STRB_WIDTH ) + ) i_axi_slice ( + .axi_slave_aw_valid_i ( axi_slave.aw_valid ), + .axi_slave_aw_addr_i ( axi_slave.aw_addr ), + .axi_slave_aw_prot_i ( axi_slave.aw_prot ), + .axi_slave_aw_region_i ( axi_slave.aw_region ), + .axi_slave_aw_len_i ( axi_slave.aw_len ), + .axi_slave_aw_size_i ( axi_slave.aw_size ), + .axi_slave_aw_burst_i ( axi_slave.aw_burst ), + .axi_slave_aw_lock_i ( axi_slave.aw_lock ), + .axi_slave_aw_cache_i ( axi_slave.aw_cache ), + .axi_slave_aw_qos_i ( axi_slave.aw_qos ), + .axi_slave_aw_id_i ( axi_slave.aw_id ), + .axi_slave_aw_user_i ( axi_slave.aw_user ), + .axi_slave_aw_ready_o ( axi_slave.aw_ready ), + .axi_slave_ar_valid_i ( axi_slave.ar_valid ), + .axi_slave_ar_addr_i ( axi_slave.ar_addr ), + .axi_slave_ar_prot_i ( axi_slave.ar_prot ), + .axi_slave_ar_region_i ( axi_slave.ar_region ), + .axi_slave_ar_len_i ( axi_slave.ar_len ), + .axi_slave_ar_size_i ( axi_slave.ar_size ), + .axi_slave_ar_burst_i ( axi_slave.ar_burst ), + .axi_slave_ar_lock_i ( axi_slave.ar_lock ), + .axi_slave_ar_cache_i ( axi_slave.ar_cache ), + .axi_slave_ar_qos_i ( axi_slave.ar_qos ), + .axi_slave_ar_id_i ( axi_slave.ar_id ), + .axi_slave_ar_user_i ( axi_slave.ar_user ), + .axi_slave_ar_ready_o ( axi_slave.ar_ready ), + .axi_slave_w_valid_i ( axi_slave.w_valid ), + .axi_slave_w_data_i ( axi_slave.w_data ), + .axi_slave_w_strb_i ( axi_slave.w_strb ), + .axi_slave_w_user_i ( axi_slave.w_user ), + .axi_slave_w_last_i ( axi_slave.w_last ), + .axi_slave_w_ready_o ( axi_slave.w_ready ), + .axi_slave_r_valid_o ( axi_slave.r_valid ), + .axi_slave_r_data_o ( axi_slave.r_data ), + .axi_slave_r_resp_o ( axi_slave.r_resp ), + .axi_slave_r_last_o ( axi_slave.r_last ), + .axi_slave_r_id_o ( axi_slave.r_id ), + .axi_slave_r_user_o ( axi_slave.r_user ), + .axi_slave_r_ready_i ( axi_slave.r_ready ), + .axi_slave_b_valid_o ( axi_slave.b_valid ), + .axi_slave_b_resp_o ( axi_slave.b_resp ), + .axi_slave_b_id_o ( axi_slave.b_id ), + .axi_slave_b_user_o ( axi_slave.b_user ), + .axi_slave_b_ready_i ( axi_slave.b_ready ), + .axi_master_aw_valid_o ( axi_master.aw_valid ), + .axi_master_aw_addr_o ( axi_master.aw_addr ), + .axi_master_aw_prot_o ( axi_master.aw_prot ), + .axi_master_aw_region_o ( axi_master.aw_region ), + .axi_master_aw_len_o ( axi_master.aw_len ), + .axi_master_aw_size_o ( axi_master.aw_size ), + .axi_master_aw_burst_o ( axi_master.aw_burst ), + .axi_master_aw_lock_o ( axi_master.aw_lock ), + .axi_master_aw_cache_o ( axi_master.aw_cache ), + .axi_master_aw_qos_o ( axi_master.aw_qos ), + .axi_master_aw_id_o ( axi_master.aw_id ), + .axi_master_aw_user_o ( axi_master.aw_user ), + .axi_master_aw_ready_i ( axi_master.aw_ready ), + .axi_master_ar_valid_o ( axi_master.ar_valid ), + .axi_master_ar_addr_o ( axi_master.ar_addr ), + .axi_master_ar_prot_o ( axi_master.ar_prot ), + .axi_master_ar_region_o ( axi_master.ar_region ), + .axi_master_ar_len_o ( axi_master.ar_len ), + .axi_master_ar_size_o ( axi_master.ar_size ), + .axi_master_ar_burst_o ( axi_master.ar_burst ), + .axi_master_ar_lock_o ( axi_master.ar_lock ), + .axi_master_ar_cache_o ( axi_master.ar_cache ), + .axi_master_ar_qos_o ( axi_master.ar_qos ), + .axi_master_ar_id_o ( axi_master.ar_id ), + .axi_master_ar_user_o ( axi_master.ar_user ), + .axi_master_ar_ready_i ( axi_master.ar_ready ), + .axi_master_w_valid_o ( axi_master.w_valid ), + .axi_master_w_data_o ( axi_master.w_data ), + .axi_master_w_strb_o ( axi_master.w_strb ), + .axi_master_w_user_o ( axi_master.w_user ), + .axi_master_w_last_o ( axi_master.w_last ), + .axi_master_w_ready_i ( axi_master.w_ready ), + .axi_master_r_valid_i ( axi_master.r_valid ), + .axi_master_r_data_i ( axi_master.r_data ), + .axi_master_r_resp_i ( axi_master.r_resp ), + .axi_master_r_last_i ( axi_master.r_last ), + .axi_master_r_id_i ( axi_master.r_id ), + .axi_master_r_user_i ( axi_master.r_user ), + .axi_master_r_ready_o ( axi_master.r_ready ), + .axi_master_b_valid_i ( axi_master.b_valid ), + .axi_master_b_resp_i ( axi_master.b_resp ), + .axi_master_b_id_i ( axi_master.b_id ), + .axi_master_b_user_i ( axi_master.b_user ), + .axi_master_b_ready_o ( axi_master.b_ready ), + .* + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv new file mode 100644 index 0000000..0e89a47 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv @@ -0,0 +1,55 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_w_buffer #( + parameter int DATA_WIDTH = -1, + parameter int USER_WIDTH = -1, + parameter int BUFFER_DEPTH = -1, + parameter int STRB_WIDTH = DATA_WIDTH/8 // DO NOT OVERRIDE +)( + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + input logic slave_valid_i, + input logic [DATA_WIDTH-1:0] slave_data_i, + input logic [STRB_WIDTH-1:0] slave_strb_i, + input logic [USER_WIDTH-1:0] slave_user_i, + input logic slave_last_i, + output logic slave_ready_o, + + output logic master_valid_o, + output logic [DATA_WIDTH-1:0] master_data_o, + output logic [STRB_WIDTH-1:0] master_strb_o, + output logic [USER_WIDTH-1:0] master_user_o, + output logic master_last_o, + input logic master_ready_i +); + + logic [DATA_WIDTH+STRB_WIDTH+USER_WIDTH:0] s_data_in; + logic [DATA_WIDTH+STRB_WIDTH+USER_WIDTH:0] s_data_out; + + assign s_data_in = { slave_user_i, slave_strb_i, slave_data_i, slave_last_i }; + assign { master_user_o, master_strb_o, master_data_o, master_last_o } = s_data_out; + + axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(1+DATA_WIDTH+STRB_WIDTH+USER_WIDTH)) i_axi_single_slice ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .valid_i ( slave_valid_i ), + .ready_o ( slave_ready_o ), + .data_i ( s_data_in ), + .ready_i ( master_ready_i ), + .valid_o ( master_valid_o ), + .data_o ( s_data_out ) + ); +endmodule diff --git a/test/type_param/corev_apu/register_interface/include/register_interface/assign.svh b/test/type_param/corev_apu/register_interface/include/register_interface/assign.svh new file mode 100644 index 0000000..30c44ac --- /dev/null +++ b/test/type_param/corev_apu/register_interface/include/register_interface/assign.svh @@ -0,0 +1,46 @@ +// Copyright (c) 2020 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Florian Zaruba +/// Macros to define register bus request/response structs. + +`ifndef REGISTER_INTERFACE_ASSIGN_SVH_ +`define REGISTER_INTERFACE_ASSIGN_SVH_ + +`define REG_BUS_ASSIGN_TO_REQ(lhs, rhs) \ + assign lhs = '{ \ + addr: rhs.addr, \ + write: rhs.write, \ + wdata: rhs.wdata, \ + wstrb: rhs.wstrb, \ + valid: rhs.valid \ + }; + +`define REG_BUS_ASSIGN_FROM_REQ(lhs, rhs) \ + assign lhs.addr = rhs.addr; \ + assign lhs.write = rhs.write; \ + assign lhs.wdata = rhs.wdata; \ + assign lhs.wstrb = rhs.wstrb; \ + assign lhs.valid = rhs.valid; \ + +`define REG_BUS_ASSIGN_TO_RSP(lhs, rhs) \ + assign lhs = '{ \ + rdata: rhs.rdata, \ + error: rhs.error, \ + ready: rhs.ready \ + }; + +`define REG_BUS_ASSIGN_FROM_RSP(lhs, rhs) \ + assign lhs.rdata = rhs.rdata; \ + assign lhs.error = rhs.error; \ + assign lhs.ready = rhs.ready; + +`endif \ No newline at end of file diff --git a/test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh b/test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh new file mode 100644 index 0000000..350d79e --- /dev/null +++ b/test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh @@ -0,0 +1,38 @@ +// Copyright (c) 2020 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Florian Zaruba +/// Macros to define register bus request/response structs. + +`ifndef REGISTER_INTERFACE_TYPEDEF_SVH_ +`define REGISTER_INTERFACE_TYPEDEF_SVH_ + +`define REG_BUS_TYPEDEF_REQ(req_t, addr_t, data_t, strb_t) \ + typedef struct packed { \ + addr_t addr; \ + logic write; \ + data_t wdata; \ + strb_t wstrb; \ + logic valid; \ + } req_t; + +`define REG_BUS_TYPEDEF_RSP(rsp_t, data_t) \ + typedef struct packed { \ + data_t rdata; \ + logic error; \ + logic ready; \ + } rsp_t; + +`define REG_BUS_TYPEDEF_ALL(name, addr_t, data_t, strb_t) \ + `REG_BUS_TYPEDEF_REQ(name``_req_t, addr_t, data_t, strb_t) \ + `REG_BUS_TYPEDEF_RSP(name``_rsp_t, data_t) + +`endif diff --git a/test/type_param/corev_apu/register_interface/src/apb_to_reg.sv b/test/type_param/corev_apu/register_interface/src/apb_to_reg.sv new file mode 100644 index 0000000..f2f14dc --- /dev/null +++ b/test/type_param/corev_apu/register_interface/src/apb_to_reg.sv @@ -0,0 +1,39 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Florian Zaruba + +module apb_to_reg ( + input logic clk_i, + input logic rst_ni, + + input logic penable_i, + input logic pwrite_i, + input logic [31:0] paddr_i, + input logic psel_i, + input logic [31:0] pwdata_i, + output logic [31:0] prdata_o, + output logic pready_o, + output logic pslverr_o, + + REG_BUS.out reg_o +); + + always_comb begin + reg_o.addr = paddr_i; + reg_o.write = pwrite_i; + reg_o.wdata = pwdata_i; + reg_o.wstrb = '1; + reg_o.valid = psel_i & penable_i; + pready_o = reg_o.ready; + pslverr_o = reg_o.error; + prdata_o = reg_o.rdata; + end +endmodule diff --git a/test/type_param/corev_apu/register_interface/src/reg_intf.sv b/test/type_param/corev_apu/register_interface/src/reg_intf.sv new file mode 100644 index 0000000..5923ae3 --- /dev/null +++ b/test/type_param/corev_apu/register_interface/src/reg_intf.sv @@ -0,0 +1,43 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A simple register interface. +/// +/// This is pretty much as simple as it gets. Transactions consist of only one +/// phase. The master sets the address, write, write data, and write strobe +/// signals and pulls valid high. Once pulled high, valid must remain high and +/// none of the signals may change. The transaction completes when both valid +/// and ready are high. Valid must not depend on ready. The slave presents the +/// read data and error signals. These signals must be constant while valid and +/// ready are both high. +interface REG_BUS #( + /// The width of the address. + parameter int ADDR_WIDTH = -1, + /// The width of the data. + parameter int DATA_WIDTH = -1 +)( + input logic clk_i +); + + logic [ADDR_WIDTH-1:0] addr; + logic write; // 0=read, 1=write + logic [DATA_WIDTH-1:0] rdata; + logic [DATA_WIDTH-1:0] wdata; + logic [DATA_WIDTH/8-1:0] wstrb; // byte-wise strobe + logic error; // 0=ok, 1=error + logic valid; + logic ready; + + modport in (input addr, write, wdata, wstrb, valid, output rdata, error, ready); + modport out (output addr, write, wdata, wstrb, valid, input rdata, error, ready); + +endinterface diff --git a/test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv b/test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv new file mode 100644 index 0000000..0299db6 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv @@ -0,0 +1,66 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: $filename.v + * + * Description: Auto-generated bootrom + */ + +// Auto-generated code +module debug_rom ( + input logic clk_i, + input logic req_i, + input logic [63:0] addr_i, + output logic [63:0] rdata_o +); + + localparam int unsigned RomSize = 19; + + logic [RomSize-1:0][63:0] mem; + assign mem = { + 64'h00000000_7b200073, + 64'h7b202473_7b302573, + 64'h10852423_f1402473, + 64'ha85ff06f_7b202473, + 64'h7b302573_10052223, + 64'h00100073_7b202473, + 64'h7b302573_10052623, + 64'h00c51513_00c55513, + 64'h00000517_fd5ff06f, + 64'hfa041ce3_00247413, + 64'h40044403_00a40433, + 64'hf1402473_02041c63, + 64'h00147413_40044403, + 64'h00a40433_10852023, + 64'hf1402473_00c51513, + 64'h00c55513_00000517, + 64'h7b351073_7b241073, + 64'h0ff0000f_04c0006f, + 64'h07c0006f_00c0006f + }; + + logic [$clog2(RomSize)-1:0] addr_q; + + always_ff @(posedge clk_i) begin + if (req_i) begin + addr_q <= addr_i[$clog2(RomSize)-1+3:3]; + end + end + + // this prevents spurious Xes from propagating into + // the speculative fetch stage of the core + always_comb begin : p_outmux + rdata_o = '0; + if (addr_q < $clog2(RomSize)'(RomSize)) begin + rdata_o = mem[addr_q]; + end + end + +endmodule diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv new file mode 100644 index 0000000..45e8878 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv @@ -0,0 +1,634 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: dm_csrs.sv + * Author: Florian Zaruba + * Date: 30.6.2018 + * + * Description: Debug CSRs. Communication over Debug Transport Module (DTM) + */ + +module dm_csrs #( + parameter int unsigned NrHarts = 1, + parameter int unsigned BusWidth = 32, + parameter logic [NrHarts-1:0] SelectableHarts = {NrHarts{1'b1}} +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, + input logic dmi_rst_ni, // Debug Module Intf reset active-low + input logic dmi_req_valid_i, + output logic dmi_req_ready_o, + input dm::dmi_req_t dmi_req_i, + // every request needs a response one cycle later + output logic dmi_resp_valid_o, + input logic dmi_resp_ready_i, + output dm::dmi_resp_t dmi_resp_o, + // global ctrl + output logic ndmreset_o, // non-debug module reset active-high + output logic dmactive_o, // 1 -> debug-module is active, + // 0 -> synchronous re-set + // hart status + input dm::hartinfo_t [NrHarts-1:0] hartinfo_i, // static hartinfo + input logic [NrHarts-1:0] halted_i, // hart is halted + input logic [NrHarts-1:0] unavailable_i, // e.g.: powered down + input logic [NrHarts-1:0] resumeack_i, // hart acknowledged resume request + // hart control + output logic [19:0] hartsel_o, // hartselect to ctrl module + output logic [NrHarts-1:0] haltreq_o, // request to halt a hart + output logic [NrHarts-1:0] resumereq_o, // request hart to resume + output logic clear_resumeack_o, + + output logic cmd_valid_o, // debugger writing to cmd field + output dm::command_t cmd_o, // abstract command + input logic cmderror_valid_i, // an error occurred + input dm::cmderr_e cmderror_i, // this error occurred + input logic cmdbusy_i, // cmd is currently busy executing + + output logic [dm::ProgBufSize-1:0][31:0] progbuf_o, // to system bus + output logic [dm::DataCount-1:0][31:0] data_o, + + input logic [dm::DataCount-1:0][31:0] data_i, + input logic data_valid_i, + // system bus access module (SBA) + output logic [BusWidth-1:0] sbaddress_o, + input logic [BusWidth-1:0] sbaddress_i, + output logic sbaddress_write_valid_o, + // control signals in + output logic sbreadonaddr_o, + output logic sbautoincrement_o, + output logic [2:0] sbaccess_o, + // data out + output logic sbreadondata_o, + output logic [BusWidth-1:0] sbdata_o, + output logic sbdata_read_valid_o, + output logic sbdata_write_valid_o, + // read data in + input logic [BusWidth-1:0] sbdata_i, + input logic sbdata_valid_i, + // control signals + input logic sbbusy_i, + input logic sberror_valid_i, // bus error occurred + input logic [2:0] sberror_i // bus error occurred +); + // the amount of bits we need to represent all harts + localparam int unsigned HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts); + localparam int unsigned NrHartsAligned = 2**HartSelLen; + + dm::dtm_op_e dtm_op; + assign dtm_op = dm::dtm_op_e'(dmi_req_i.op); + + logic resp_queue_full; + logic resp_queue_empty; + logic resp_queue_push; + logic resp_queue_pop; + logic [31:0] resp_queue_data; + + localparam dm::dm_csr_e DataEnd = dm::dm_csr_e'(dm::Data0 + {4'h0, dm::DataCount} - 8'h1); + localparam dm::dm_csr_e ProgBufEnd = dm::dm_csr_e'(dm::ProgBuf0 + {4'h0, dm::ProgBufSize} - 8'h1); + + logic [31:0] haltsum0, haltsum1, haltsum2, haltsum3; + logic [((NrHarts-1)/2**5 + 1) * 32 - 1 : 0] halted; + logic [(NrHarts-1)/2**5:0][31:0] halted_reshaped0; + logic [(NrHarts-1)/2**10:0][31:0] halted_reshaped1; + logic [(NrHarts-1)/2**15:0][31:0] halted_reshaped2; + logic [((NrHarts-1)/2**10+1)*32-1:0] halted_flat1; + logic [((NrHarts-1)/2**15+1)*32-1:0] halted_flat2; + logic [31:0] halted_flat3; + + // haltsum0 + logic [14:0] hartsel_idx0; + always_comb begin : p_haltsum0 + halted = '0; + haltsum0 = '0; + hartsel_idx0 = hartsel_o[19:5]; + halted[NrHarts-1:0] = halted_i; + halted_reshaped0 = halted; + if (hartsel_idx0 < 15'((NrHarts-1)/2**5+1)) begin + haltsum0 = halted_reshaped0[hartsel_idx0]; + end + end + + // haltsum1 + logic [9:0] hartsel_idx1; + always_comb begin : p_reduction1 + halted_flat1 = '0; + haltsum1 = '0; + hartsel_idx1 = hartsel_o[19:10]; + + for (int unsigned k = 0; k < (NrHarts-1)/2**5+1; k++) begin + halted_flat1[k] = |halted_reshaped0[k]; + end + halted_reshaped1 = halted_flat1; + + if (hartsel_idx1 < 10'(((NrHarts-1)/2**10+1))) begin + haltsum1 = halted_reshaped1[hartsel_idx1]; + end + end + + // haltsum2 + logic [4:0] hartsel_idx2; + always_comb begin : p_reduction2 + halted_flat2 = '0; + haltsum2 = '0; + hartsel_idx2 = hartsel_o[19:15]; + + for (int unsigned k = 0; k < (NrHarts-1)/2**10+1; k++) begin + halted_flat2[k] = |halted_reshaped1[k]; + end + halted_reshaped2 = halted_flat2; + + if (hartsel_idx2 < 5'(((NrHarts-1)/2**15+1))) begin + haltsum2 = halted_reshaped2[hartsel_idx2]; + end + end + + // haltsum3 + always_comb begin : p_reduction3 + halted_flat3 = '0; + for (int unsigned k = 0; k < NrHarts/2**15+1; k++) begin + halted_flat3[k] = |halted_reshaped2[k]; + end + haltsum3 = halted_flat3; + end + + + dm::dmstatus_t dmstatus; + dm::dmcontrol_t dmcontrol_d, dmcontrol_q; + dm::abstractcs_t abstractcs; + dm::cmderr_e cmderr_d, cmderr_q; + dm::command_t command_d, command_q; + logic cmd_valid_d, cmd_valid_q; + dm::abstractauto_t abstractauto_d, abstractauto_q; + dm::sbcs_t sbcs_d, sbcs_q; + logic [63:0] sbaddr_d, sbaddr_q; + logic [63:0] sbdata_d, sbdata_q; + + logic [NrHarts-1:0] havereset_d, havereset_q; + // program buffer + logic [dm::ProgBufSize-1:0][31:0] progbuf_d, progbuf_q; + logic [dm::DataCount-1:0][31:0] data_d, data_q; + + logic [HartSelLen-1:0] selected_hart; + + // a successful response returns zero + assign dmi_resp_o.resp = dm::DTM_SUCCESS; + assign dmi_resp_valid_o = ~resp_queue_empty; + assign dmi_req_ready_o = ~resp_queue_full; + assign resp_queue_push = dmi_req_valid_i & dmi_req_ready_o; + // SBA + assign sbautoincrement_o = sbcs_q.sbautoincrement; + assign sbreadonaddr_o = sbcs_q.sbreadonaddr; + assign sbreadondata_o = sbcs_q.sbreadondata; + assign sbaccess_o = sbcs_q.sbaccess; + assign sbdata_o = sbdata_q[BusWidth-1:0]; + assign sbaddress_o = sbaddr_q[BusWidth-1:0]; + + assign hartsel_o = {dmcontrol_q.hartselhi, dmcontrol_q.hartsello}; + + // needed to avoid lint warnings + logic [NrHartsAligned-1:0] havereset_d_aligned, havereset_q_aligned, + resumeack_aligned, unavailable_aligned, + halted_aligned; + assign resumeack_aligned = NrHartsAligned'(resumeack_i); + assign unavailable_aligned = NrHartsAligned'(unavailable_i); + assign halted_aligned = NrHartsAligned'(halted_i); + + assign havereset_d = NrHarts'(havereset_d_aligned); + assign havereset_q_aligned = NrHartsAligned'(havereset_q); + + dm::hartinfo_t [NrHartsAligned-1:0] hartinfo_aligned; + always_comb begin : p_hartinfo_align + hartinfo_aligned = '0; + hartinfo_aligned[NrHarts-1:0] = hartinfo_i; + end + + // helper variables + dm::dm_csr_e dm_csr_addr; + dm::sbcs_t sbcs; + dm::abstractcs_t a_abstractcs; + logic [3:0] autoexecdata_idx; // 0 == Data0 ... 11 == Data11 + + // Get the data index, i.e. 0 for dm::Data0 up to 11 for dm::Data11 + assign dm_csr_addr = dm::dm_csr_e'({1'b0, dmi_req_i.addr}); + // Xilinx Vivado 2020.1 does not allow subtraction of two enums; do the subtraction with logic + // types instead. + assign autoexecdata_idx = 4'({dm_csr_addr} - {dm::Data0}); + + always_comb begin : csr_read_write + // -------------------- + // Static Values (R/O) + // -------------------- + // dmstatus + dmstatus = '0; + dmstatus.version = dm::DbgVersion013; + // no authentication implemented + dmstatus.authenticated = 1'b1; + // we do not support halt-on-reset sequence + dmstatus.hasresethaltreq = 1'b0; + // TODO(zarubaf) things need to change here if we implement the array mask + dmstatus.allhavereset = havereset_q_aligned[selected_hart]; + dmstatus.anyhavereset = havereset_q_aligned[selected_hart]; + + dmstatus.allresumeack = resumeack_aligned[selected_hart]; + dmstatus.anyresumeack = resumeack_aligned[selected_hart]; + + dmstatus.allunavail = unavailable_aligned[selected_hart]; + dmstatus.anyunavail = unavailable_aligned[selected_hart]; + + // as soon as we are out of the legal Hart region tell the debugger + // that there are only non-existent harts + dmstatus.allnonexistent = logic'(32'(hartsel_o) > (NrHarts - 1)); + dmstatus.anynonexistent = logic'(32'(hartsel_o) > (NrHarts - 1)); + + // We are not allowed to be in multiple states at once. This is a to + // make the running/halted and unavailable states exclusive. + dmstatus.allhalted = halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart]; + dmstatus.anyhalted = halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart]; + + dmstatus.allrunning = ~halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart]; + dmstatus.anyrunning = ~halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart]; + + // abstractcs + abstractcs = '0; + abstractcs.datacount = dm::DataCount; + abstractcs.progbufsize = dm::ProgBufSize; + abstractcs.busy = cmdbusy_i; + abstractcs.cmderr = cmderr_q; + + // abstractautoexec + abstractauto_d = abstractauto_q; + abstractauto_d.zero0 = '0; + + // default assignments + havereset_d_aligned = NrHartsAligned'(havereset_q); + dmcontrol_d = dmcontrol_q; + cmderr_d = cmderr_q; + command_d = command_q; + progbuf_d = progbuf_q; + data_d = data_q; + sbcs_d = sbcs_q; + sbaddr_d = 64'(sbaddress_i); + sbdata_d = sbdata_q; + + resp_queue_data = 32'h0; + cmd_valid_d = 1'b0; + sbaddress_write_valid_o = 1'b0; + sbdata_read_valid_o = 1'b0; + sbdata_write_valid_o = 1'b0; + clear_resumeack_o = 1'b0; + + // helper variables + sbcs = '0; + a_abstractcs = '0; + + // reads + if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_READ) begin + unique case (dm_csr_addr) inside + [(dm::Data0):DataEnd]: begin + resp_queue_data = data_q[$clog2(dm::DataCount)'(autoexecdata_idx)]; + if (!cmdbusy_i) begin + // check whether we need to re-execute the command (just give a cmd_valid) + cmd_valid_d = abstractauto_q.autoexecdata[autoexecdata_idx]; + // An abstract command was executing while one of the data registers was read + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + dm::DMControl: resp_queue_data = dmcontrol_q; + dm::DMStatus: resp_queue_data = dmstatus; + dm::Hartinfo: resp_queue_data = hartinfo_aligned[selected_hart]; + dm::AbstractCS: resp_queue_data = abstractcs; + dm::AbstractAuto: resp_queue_data = abstractauto_q; + // command is read-only + dm::Command: resp_queue_data = '0; + [(dm::ProgBuf0):ProgBufEnd]: begin + resp_queue_data = progbuf_q[dmi_req_i.addr[$clog2(dm::ProgBufSize)-1:0]]; + if (!cmdbusy_i) begin + // check whether we need to re-execute the command (just give a cmd_valid) + // range of autoexecprogbuf is 31:16 + cmd_valid_d = abstractauto_q.autoexecprogbuf[{1'b1, dmi_req_i.addr[3:0]}]; + + // An abstract command was executing while one of the progbuf registers was read + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + dm::HaltSum0: resp_queue_data = haltsum0; + dm::HaltSum1: resp_queue_data = haltsum1; + dm::HaltSum2: resp_queue_data = haltsum2; + dm::HaltSum3: resp_queue_data = haltsum3; + dm::SBCS: begin + resp_queue_data = sbcs_q; + end + dm::SBAddress0: begin + resp_queue_data = sbaddr_q[31:0]; + end + dm::SBAddress1: begin + resp_queue_data = sbaddr_q[63:32]; + end + dm::SBData0: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbdata_read_valid_o = (sbcs_q.sberror == '0); + resp_queue_data = sbdata_q[31:0]; + end + end + dm::SBData1: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + resp_queue_data = sbdata_q[63:32]; + end + end + default:; + endcase + end + + // write + if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_WRITE) begin + unique case (dm_csr_addr) inside + [(dm::Data0):DataEnd]: begin + if (dm::DataCount > 0) begin + // attempts to write them while busy is set does not change their value + if (!cmdbusy_i) begin + data_d[dmi_req_i.addr[$clog2(dm::DataCount)-1:0]] = dmi_req_i.data; + // check whether we need to re-execute the command (just give a cmd_valid) + cmd_valid_d = abstractauto_q.autoexecdata[autoexecdata_idx]; + //An abstract command was executing while one of the data registers was written + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + end + dm::DMControl: begin + dmcontrol_d = dmi_req_i.data; + // clear the havreset of the selected hart + if (dmcontrol_d.ackhavereset) begin + havereset_d_aligned[selected_hart] = 1'b0; + end + end + dm::DMStatus:; // write are ignored to R/O register + dm::Hartinfo:; // hartinfo is R/O + // only command error is write-able + dm::AbstractCS: begin // W1C + // Gets set if an abstract command fails. The bits in this + // field remain set until they are cleared by writing 1 to + // them. No abstract command is started until the value is + // reset to 0. + a_abstractcs = dm::abstractcs_t'(dmi_req_i.data); + // reads during abstract command execution are not allowed + if (!cmdbusy_i) begin + cmderr_d = dm::cmderr_e'(~a_abstractcs.cmderr & cmderr_q); + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + dm::Command: begin + // writes are ignored if a command is already busy + if (!cmdbusy_i) begin + cmd_valid_d = 1'b1; + command_d = dm::command_t'(dmi_req_i.data); + // if there was an attempted to write during a busy execution + // and the cmderror field is zero set the busy error + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + dm::AbstractAuto: begin + // this field can only be written legally when there is no command executing + if (!cmdbusy_i) begin + abstractauto_d = 32'h0; + abstractauto_d.autoexecdata = 12'(dmi_req_i.data[dm::DataCount-1:0]); + abstractauto_d.autoexecprogbuf = 16'(dmi_req_i.data[dm::ProgBufSize-1+16:16]); + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + [(dm::ProgBuf0):ProgBufEnd]: begin + // attempts to write them while busy is set does not change their value + if (!cmdbusy_i) begin + progbuf_d[dmi_req_i.addr[$clog2(dm::ProgBufSize)-1:0]] = dmi_req_i.data; + // check whether we need to re-execute the command (just give a cmd_valid) + // this should probably throw an error if executed during another command + // was busy + // range of autoexecprogbuf is 31:16 + cmd_valid_d = abstractauto_q.autoexecprogbuf[{1'b1, dmi_req_i.addr[3:0]}]; + //An abstract command was executing while one of the progbuf registers was written + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + dm::SBCS: begin + // access while the SBA was busy + if (sbbusy_i) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbcs = dm::sbcs_t'(dmi_req_i.data); + sbcs_d = sbcs; + // R/W1C + sbcs_d.sbbusyerror = sbcs_q.sbbusyerror & (~sbcs.sbbusyerror); + sbcs_d.sberror = sbcs_q.sberror & (~sbcs.sberror); + end + end + dm::SBAddress0: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbaddr_d[31:0] = dmi_req_i.data; + sbaddress_write_valid_o = (sbcs_q.sberror == '0); + end + end + dm::SBAddress1: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbaddr_d[63:32] = dmi_req_i.data; + end + end + dm::SBData0: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbdata_d[31:0] = dmi_req_i.data; + sbdata_write_valid_o = (sbcs_q.sberror == '0); + end + end + dm::SBData1: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbdata_d[63:32] = dmi_req_i.data; + end + end + default:; + endcase + end + // hart threw a command error and has precedence over bus writes + if (cmderror_valid_i) begin + cmderr_d = cmderror_i; + end + + // update data registers + if (data_valid_i) begin + data_d = data_i; + end + + // set the havereset flag when we did a ndmreset + if (ndmreset_o) begin + havereset_d_aligned[NrHarts-1:0] = '1; + end + // ------------- + // System Bus + // ------------- + // set bus error + if (sberror_valid_i) begin + sbcs_d.sberror = sberror_i; + end + // update read data + if (sbdata_valid_i) begin + sbdata_d = 64'(sbdata_i); + end + + // dmcontrol + // TODO(zarubaf) we currently do not implement the hartarry mask + dmcontrol_d.hasel = 1'b0; + // we do not support resetting an individual hart + dmcontrol_d.hartreset = 1'b0; + dmcontrol_d.setresethaltreq = 1'b0; + dmcontrol_d.clrresethaltreq = 1'b0; + dmcontrol_d.zero1 = '0; + dmcontrol_d.zero0 = '0; + // Non-writeable, clear only + dmcontrol_d.ackhavereset = 1'b0; + if (!dmcontrol_q.resumereq && dmcontrol_d.resumereq) begin + clear_resumeack_o = 1'b1; + end + if (dmcontrol_q.resumereq && resumeack_i) begin + dmcontrol_d.resumereq = 1'b0; + end + // static values for dcsr + sbcs_d.sbversion = 3'd1; + sbcs_d.sbbusy = sbbusy_i; + sbcs_d.sbasize = $bits(sbcs_d.sbasize)'(BusWidth); + sbcs_d.sbaccess128 = 1'b0; + sbcs_d.sbaccess64 = logic'(BusWidth == 32'd64); + sbcs_d.sbaccess32 = logic'(BusWidth == 32'd32); + sbcs_d.sbaccess16 = 1'b0; + sbcs_d.sbaccess8 = 1'b0; + sbcs_d.sbaccess = (BusWidth == 32'd64) ? 3'd3 : 3'd2; + end + + // output multiplexer + always_comb begin : p_outmux + selected_hart = hartsel_o[HartSelLen-1:0]; + // default assignment + haltreq_o = '0; + resumereq_o = '0; + if (selected_hart <= HartSelLen'(NrHarts-1)) begin + haltreq_o[selected_hart] = dmcontrol_q.haltreq; + resumereq_o[selected_hart] = dmcontrol_q.resumereq; + end + end + + assign dmactive_o = dmcontrol_q.dmactive; + assign cmd_o = command_q; + assign cmd_valid_o = cmd_valid_q; + assign progbuf_o = progbuf_q; + assign data_o = data_q; + + assign resp_queue_pop = dmi_resp_ready_i & ~resp_queue_empty; + + assign ndmreset_o = dmcontrol_q.ndmreset; + + // response FIFO + fifo_v2 #( + .dtype ( logic [31:0] ), + .DEPTH ( 2 ) + ) i_fifo ( + .clk_i ( clk_i ), + .rst_ni ( dmi_rst_ni ), // reset only when system is re-set + .flush_i ( 1'b0 ), // we do not need to flush this queue + .testmode_i ( testmode_i ), + .full_o ( resp_queue_full ), + .empty_o ( resp_queue_empty ), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i ( resp_queue_data ), + .push_i ( resp_queue_push ), + .data_o ( dmi_resp_o.data ), + .pop_i ( resp_queue_pop ) + ); + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + // PoR + if (!rst_ni) begin + dmcontrol_q <= '0; + // this is the only write-able bit during reset + cmderr_q <= dm::CmdErrNone; + command_q <= '0; + cmd_valid_q <= '0; + abstractauto_q <= '0; + progbuf_q <= '0; + data_q <= '0; + sbcs_q <= '0; + sbaddr_q <= '0; + sbdata_q <= '0; + havereset_q <= '1; + end else begin + havereset_q <= SelectableHarts & havereset_d; + // synchronous re-set of debug module, active-low, except for dmactive + if (!dmcontrol_q.dmactive) begin + dmcontrol_q.haltreq <= '0; + dmcontrol_q.resumereq <= '0; + dmcontrol_q.hartreset <= '0; + dmcontrol_q.ackhavereset <= '0; + dmcontrol_q.zero1 <= '0; + dmcontrol_q.hasel <= '0; + dmcontrol_q.hartsello <= '0; + dmcontrol_q.hartselhi <= '0; + dmcontrol_q.zero0 <= '0; + dmcontrol_q.setresethaltreq <= '0; + dmcontrol_q.clrresethaltreq <= '0; + dmcontrol_q.ndmreset <= '0; + // this is the only write-able bit during reset + dmcontrol_q.dmactive <= dmcontrol_d.dmactive; + cmderr_q <= dm::CmdErrNone; + command_q <= '0; + cmd_valid_q <= '0; + abstractauto_q <= '0; + progbuf_q <= '0; + data_q <= '0; + sbcs_q <= '0; + sbaddr_q <= '0; + sbdata_q <= '0; + end else begin + dmcontrol_q <= dmcontrol_d; + cmderr_q <= cmderr_d; + command_q <= command_d; + cmd_valid_q <= cmd_valid_d; + abstractauto_q <= abstractauto_d; + progbuf_q <= progbuf_d; + data_q <= data_d; + sbcs_q <= sbcs_d; + sbaddr_q <= sbaddr_d; + sbdata_q <= sbdata_d; + end + end + end + +endmodule : dm_csrs diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv new file mode 100755 index 0000000..178259f --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv @@ -0,0 +1,523 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. +* Copyright and related rights are licensed under the Solderpad Hardware +* License, Version 0.51 (the “License”); you may not use this file except in +* compliance with the License. You may obtain a copy of the License at +* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +* or agreed to in writing, software, hardware and materials distributed under +* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +* +* File: dm_mem.sv +* Author: Florian Zaruba +* Date: 11.7.2018 +* +* Description: Memory module for execution-based debug clients +* +*/ + +module dm_mem #( + parameter int unsigned NrHarts = 1, + parameter int unsigned BusWidth = 32, + parameter logic [NrHarts-1:0] SelectableHarts = {NrHarts{1'b1}}, + parameter int unsigned DmBaseAddress = '0 +) ( + input logic clk_i, // Clock + input logic rst_ni, // debug module reset + + output logic [NrHarts-1:0] debug_req_o, + input logic [19:0] hartsel_i, + // from Ctrl and Status register + input logic [NrHarts-1:0] haltreq_i, + input logic [NrHarts-1:0] resumereq_i, + input logic clear_resumeack_i, + + // state bits + output logic [NrHarts-1:0] halted_o, // hart acknowledge halt + output logic [NrHarts-1:0] resuming_o, // hart is resuming + + input logic [dm::ProgBufSize-1:0][31:0] progbuf_i, // program buffer to expose + + input logic [dm::DataCount-1:0][31:0] data_i, // data in + output logic [dm::DataCount-1:0][31:0] data_o, // data out + output logic data_valid_o, // data out is valid + // abstract command interface + input logic cmd_valid_i, + input dm::command_t cmd_i, + output logic cmderror_valid_o, + output dm::cmderr_e cmderror_o, + output logic cmdbusy_o, + // data interface + + // SRAM interface + input logic req_i, + input logic we_i, + input logic [BusWidth-1:0] addr_i, + input logic [BusWidth-1:0] wdata_i, + input logic [BusWidth/8-1:0] be_i, + output logic [BusWidth-1:0] rdata_o +); + localparam int unsigned DbgAddressBits = 12; + localparam int unsigned HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts); + localparam int unsigned NrHartsAligned = 2**HartSelLen; + localparam int unsigned MaxAar = (BusWidth == 64) ? 4 : 3; + localparam bit HasSndScratch = (DmBaseAddress != 0); + // Depending on whether we are at the zero page or not we either use `x0` or `x10/a0` + localparam logic [4:0] LoadBaseAddr = (DmBaseAddress == 0) ? 5'd0 : 5'd10; + + localparam logic [DbgAddressBits-1:0] DataBaseAddr = (dm::DataAddr); + localparam logic [DbgAddressBits-1:0] DataEndAddr = (dm::DataAddr + 4*dm::DataCount - 1); + localparam logic [DbgAddressBits-1:0] ProgBufBaseAddr = (dm::DataAddr - 4*dm::ProgBufSize); + localparam logic [DbgAddressBits-1:0] ProgBufEndAddr = (dm::DataAddr - 1); + localparam logic [DbgAddressBits-1:0] AbstractCmdBaseAddr = (ProgBufBaseAddr - 4*10); + localparam logic [DbgAddressBits-1:0] AbstractCmdEndAddr = (ProgBufBaseAddr - 1); + + localparam logic [DbgAddressBits-1:0] WhereToAddr = 'h300; + localparam logic [DbgAddressBits-1:0] FlagsBaseAddr = 'h400; + localparam logic [DbgAddressBits-1:0] FlagsEndAddr = 'h7FF; + + localparam logic [DbgAddressBits-1:0] HaltedAddr = 'h100; + localparam logic [DbgAddressBits-1:0] GoingAddr = 'h104; + localparam logic [DbgAddressBits-1:0] ResumingAddr = 'h108; + localparam logic [DbgAddressBits-1:0] ExceptionAddr = 'h10C; + + logic [dm::ProgBufSize/2-1:0][63:0] progbuf; + logic [7:0][63:0] abstract_cmd; + logic [NrHarts-1:0] halted_d, halted_q; + logic [NrHarts-1:0] resuming_d, resuming_q; + logic resume, go, going; + + logic exception; + logic unsupported_command; + + logic [63:0] rom_rdata; + logic [63:0] rdata_d, rdata_q; + logic word_enable32_q; + + // this is needed to avoid lint warnings related to array indexing + // resize hartsel to valid range + logic [HartSelLen-1:0] hartsel, wdata_hartsel; + + assign hartsel = hartsel_i[HartSelLen-1:0]; + assign wdata_hartsel = wdata_i[HartSelLen-1:0]; + + logic [NrHartsAligned-1:0] resumereq_aligned, haltreq_aligned, + halted_d_aligned, halted_q_aligned, + halted_aligned, resumereq_wdata_aligned, + resuming_d_aligned, resuming_q_aligned; + + assign resumereq_aligned = NrHartsAligned'(resumereq_i); + assign haltreq_aligned = NrHartsAligned'(haltreq_i); + assign resumereq_wdata_aligned = NrHartsAligned'(resumereq_i); + + assign halted_q_aligned = NrHartsAligned'(halted_q); + assign halted_d = NrHarts'(halted_d_aligned); + assign resuming_q_aligned = NrHartsAligned'(resuming_q); + assign resuming_d = NrHarts'(resuming_d_aligned); + + // distinguish whether we need to forward data from the ROM or the FSM + // latch the address for this + logic fwd_rom_d, fwd_rom_q; + dm::ac_ar_cmd_t ac_ar; + + // Abstract Command Access Register + assign ac_ar = dm::ac_ar_cmd_t'(cmd_i.control); + assign debug_req_o = haltreq_i; + assign halted_o = halted_q; + assign resuming_o = resuming_q; + + // reshape progbuf + assign progbuf = progbuf_i; + + typedef enum logic [1:0] { Idle, Go, Resume, CmdExecuting } state_e; + state_e state_d, state_q; + + // hart ctrl queue + always_comb begin : p_hart_ctrl_queue + cmderror_valid_o = 1'b0; + cmderror_o = dm::CmdErrNone; + state_d = state_q; + go = 1'b0; + resume = 1'b0; + cmdbusy_o = 1'b1; + + unique case (state_q) + Idle: begin + cmdbusy_o = 1'b0; + if (cmd_valid_i && halted_q_aligned[hartsel] && !unsupported_command) begin + // give the go signal + state_d = Go; + end else if (cmd_valid_i) begin + // hart must be halted for all requests + cmderror_valid_o = 1'b1; + cmderror_o = dm::CmdErrorHaltResume; + end + // CSRs want to resume, the request is ignored when the hart is + // requested to halt or it didn't clear the resuming_q bit before + if (resumereq_aligned[hartsel] && !resuming_q_aligned[hartsel] && + !haltreq_aligned[hartsel] && halted_q_aligned[hartsel]) begin + state_d = Resume; + end + end + + Go: begin + // we are already busy here since we scheduled the execution of a program + cmdbusy_o = 1'b1; + go = 1'b1; + // the thread is now executing the command, track its state + if (going) begin + state_d = CmdExecuting; + end + end + + Resume: begin + cmdbusy_o = 1'b1; + resume = 1'b1; + if (resuming_q_aligned[hartsel]) begin + state_d = Idle; + end + end + + CmdExecuting: begin + cmdbusy_o = 1'b1; + go = 1'b0; + // wait until the hart has halted again + if (halted_aligned[hartsel]) begin + state_d = Idle; + end + end + + default: ; + endcase + + // only signal once that cmd is unsupported so that we can clear cmderr + // in subsequent writes to abstractcs + if (unsupported_command && cmd_valid_i) begin + cmderror_valid_o = 1'b1; + cmderror_o = dm::CmdErrNotSupported; + end + + if (exception) begin + cmderror_valid_o = 1'b1; + cmderror_o = dm::CmdErrorException; + end + end + + // word mux for 32bit and 64bit buses + logic [63:0] word_mux; + assign word_mux = (fwd_rom_q) ? rom_rdata : rdata_q; + + if (BusWidth == 64) begin : gen_word_mux64 + assign rdata_o = word_mux; + end else begin : gen_word_mux32 + assign rdata_o = (word_enable32_q) ? word_mux[32 +: 32] : word_mux[0 +: 32]; + end + + // read/write logic + logic [63:0] data_bits; + logic [7:0][7:0] rdata; + always_comb begin : p_rw_logic + + halted_d_aligned = NrHartsAligned'(halted_q); + resuming_d_aligned = NrHartsAligned'(resuming_q); + rdata_d = rdata_q; + // convert the data in bits representation + data_bits = data_i; + rdata = '0; + + // write data in csr register + data_valid_o = 1'b0; + exception = 1'b0; + halted_aligned = '0; + going = 1'b0; + + // The resume ack signal is lowered when the resume request is deasserted + if (clear_resumeack_i) begin + resuming_d_aligned[hartsel] = 1'b0; + end + // we've got a new request + if (req_i) begin + // this is a write + if (we_i) begin + unique case (addr_i[DbgAddressBits-1:0]) inside + HaltedAddr: begin + halted_aligned[wdata_hartsel] = 1'b1; + halted_d_aligned[wdata_hartsel] = 1'b1; + end + GoingAddr: begin + going = 1'b1; + end + ResumingAddr: begin + // clear the halted flag as the hart resumed execution + halted_d_aligned[wdata_hartsel] = 1'b0; + // set the resuming flag which needs to be cleared by the debugger + resuming_d_aligned[wdata_hartsel] = 1'b1; + end + // an exception occurred during execution + ExceptionAddr: exception = 1'b1; + // core can write data registers + [DataBaseAddr:DataEndAddr]: begin + data_valid_o = 1'b1; + for (int i = 0; i < $bits(be_i); i++) begin + if (be_i[i]) begin + data_bits[i*8+:8] = wdata_i[i*8+:8]; + end + end + end + default ; + endcase + + // this is a read + end else begin + unique case (addr_i[DbgAddressBits-1:0]) inside + // variable ROM content + WhereToAddr: begin + // variable jump to abstract cmd, program_buffer or resume + if (resumereq_wdata_aligned[wdata_hartsel]) begin + rdata_d = {32'b0, dm::jal('0, 21'(dm::ResumeAddress[11:0])-21'(WhereToAddr))}; + end + + // there is a command active so jump there + if (cmdbusy_o) begin + // transfer not set is shortcut to the program buffer if postexec is set + // keep this statement narrow to not catch invalid commands + if (cmd_i.cmdtype == dm::AccessRegister && + !ac_ar.transfer && ac_ar.postexec) begin + rdata_d = {32'b0, dm::jal('0, 21'(ProgBufBaseAddr)-21'(WhereToAddr))}; + // this is a legit abstract cmd -> execute it + end else begin + rdata_d = {32'b0, dm::jal('0, 21'(AbstractCmdBaseAddr)-21'(WhereToAddr))}; + end + end + end + + [DataBaseAddr:DataEndAddr]: begin + rdata_d = { + data_i[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] - + DataBaseAddr[DbgAddressBits-1:3] + 1'b1)], + data_i[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] - + DataBaseAddr[DbgAddressBits-1:3])] + }; + end + + [ProgBufBaseAddr:ProgBufEndAddr]: begin + rdata_d = progbuf[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] - + ProgBufBaseAddr[DbgAddressBits-1:3])]; + end + + // two slots for abstract command + [AbstractCmdBaseAddr:AbstractCmdEndAddr]: begin + // return the correct address index + rdata_d = abstract_cmd[3'(addr_i[DbgAddressBits-1:3] - + AbstractCmdBaseAddr[DbgAddressBits-1:3])]; + end + // harts are polling for flags here + [FlagsBaseAddr:FlagsEndAddr]: begin + // release the corresponding hart + if (({addr_i[DbgAddressBits-1:3], 3'b0} - FlagsBaseAddr[DbgAddressBits-1:0]) == + (DbgAddressBits'(hartsel) & {{(DbgAddressBits-3){1'b1}}, 3'b0})) begin + rdata[DbgAddressBits'(hartsel) & DbgAddressBits'(3'b111)] = {6'b0, resume, go}; + end + rdata_d = rdata; + end + default: ; + endcase + end + end + + data_o = data_bits; + end + + always_comb begin : p_abstract_cmd_rom + // this abstract command is currently unsupported + unsupported_command = 1'b0; + // default memory + // if ac_ar.transfer is not set then we can take a shortcut to the program buffer + abstract_cmd[0][31:0] = dm::illegal(); + // load debug module base address into a0, this is shared among all commands + abstract_cmd[0][63:32] = HasSndScratch ? dm::auipc(5'd10, '0) : dm::nop(); + // clr lowest 12b -> DM base offset + abstract_cmd[1][31:0] = HasSndScratch ? dm::srli(5'd10, 5'd10, 6'd12) : dm::nop(); + abstract_cmd[1][63:32] = HasSndScratch ? dm::slli(5'd10, 5'd10, 6'd12) : dm::nop(); + abstract_cmd[2][31:0] = dm::nop(); + abstract_cmd[2][63:32] = dm::nop(); + abstract_cmd[3][31:0] = dm::nop(); + abstract_cmd[3][63:32] = dm::nop(); + abstract_cmd[4][31:0] = HasSndScratch ? dm::csrr(dm::CSR_DSCRATCH1, 5'd10) : dm::nop(); + abstract_cmd[4][63:32] = dm::ebreak(); + abstract_cmd[7:5] = '0; + + // this depends on the command being executed + unique case (cmd_i.cmdtype) + // -------------------- + // Access Register + // -------------------- + dm::AccessRegister: begin + if (32'(ac_ar.aarsize) < MaxAar && ac_ar.transfer && ac_ar.write) begin + // store a0 in dscratch1 + abstract_cmd[0][31:0] = HasSndScratch ? dm::csrw(dm::CSR_DSCRATCH1, 5'd10) : dm::nop(); + // this range is reserved + if (ac_ar.regno[15:14] != '0) begin + abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap + unsupported_command = 1'b1; + // A0 access needs to be handled separately, as we use A0 to load + // the DM address offset need to access DSCRATCH1 in this case + end else if (HasSndScratch && ac_ar.regno[12] && (!ac_ar.regno[5]) && + (ac_ar.regno[4:0] == 5'd10)) begin + // store s0 in dscratch + abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8); + // load from data register + abstract_cmd[2][63:32] = dm::load(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr); + // and store it in the corresponding CSR + abstract_cmd[3][31:0] = dm::csrw(dm::CSR_DSCRATCH1, 5'd8); + // restore s0 again from dscratch + abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8); + // GPR/FPR access + end else if (ac_ar.regno[12]) begin + // determine whether we want to access the floating point register or not + if (ac_ar.regno[5]) begin + abstract_cmd[2][31:0] = + dm::float_load(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr); + end else begin + abstract_cmd[2][31:0] = + dm::load(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr); + end + // CSR access + end else begin + // data register to CSR + // store s0 in dscratch + abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8); + // load from data register + abstract_cmd[2][63:32] = dm::load(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr); + // and store it in the corresponding CSR + abstract_cmd[3][31:0] = dm::csrw(dm::csr_reg_t'(ac_ar.regno[11:0]), 5'd8); + // restore s0 again from dscratch + abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8); + end + end else if (32'(ac_ar.aarsize) < MaxAar && ac_ar.transfer && !ac_ar.write) begin + // store a0 in dscratch1 + abstract_cmd[0][31:0] = HasSndScratch ? + dm::csrw(dm::CSR_DSCRATCH1, LoadBaseAddr) : + dm::nop(); + // this range is reserved + if (ac_ar.regno[15:14] != '0) begin + abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap + unsupported_command = 1'b1; + // A0 access needs to be handled separately, as we use A0 to load + // the DM address offset need to access DSCRATCH1 in this case + end else if (HasSndScratch && ac_ar.regno[12] && (!ac_ar.regno[5]) && + (ac_ar.regno[4:0] == 5'd10)) begin + // store s0 in dscratch + abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8); + // read value from CSR into s0 + abstract_cmd[2][63:32] = dm::csrr(dm::CSR_DSCRATCH1, 5'd8); + // and store s0 into data section + abstract_cmd[3][31:0] = dm::store(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr); + // restore s0 again from dscratch + abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8); + // GPR/FPR access + end else if (ac_ar.regno[12]) begin + // determine whether we want to access the floating point register or not + if (ac_ar.regno[5]) begin + abstract_cmd[2][31:0] = + dm::float_store(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr); + end else begin + abstract_cmd[2][31:0] = + dm::store(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr); + end + // CSR access + end else begin + // CSR register to data + // store s0 in dscratch + abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8); + // read value from CSR into s0 + abstract_cmd[2][63:32] = dm::csrr(dm::csr_reg_t'(ac_ar.regno[11:0]), 5'd8); + // and store s0 into data section + abstract_cmd[3][31:0] = dm::store(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr); + // restore s0 again from dscratch + abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8); + end + end else if (32'(ac_ar.aarsize) >= MaxAar || ac_ar.aarpostincrement == 1'b1) begin + // this should happend when e.g. ac_ar.aarsize >= MaxAar + // Openocd will try to do an access with aarsize=64 bits + // first before falling back to 32 bits. + abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap + unsupported_command = 1'b1; + end + + // Check whether we need to execute the program buffer. When we + // get an unsupported command we really should abort instead of + // still trying to execute the program buffer, makes it easier + // for the debugger to recover + if (ac_ar.postexec && !unsupported_command) begin + // issue a nop, we will automatically run into the program buffer + abstract_cmd[4][63:32] = dm::nop(); + end + end + // not supported at the moment + // dm::QuickAccess:; + // dm::AccessMemory:; + default: begin + abstract_cmd[0][31:0] = dm::ebreak(); + unsupported_command = 1'b1; + end + endcase + end + + logic [63:0] rom_addr; + assign rom_addr = 64'(addr_i); + + // Depending on whether the debug module is located + // at the zero page we can instantiate a simplified version + // which only requires one scratch register per hart. + // For all other cases we need to set aside + // two registers per hart, hence we also need + // two scratch registers. + if (HasSndScratch) begin : gen_rom_snd_scratch + debug_rom i_debug_rom ( + .clk_i, + .req_i, + .addr_i ( rom_addr ), + .rdata_o ( rom_rdata ) + ); + end else begin : gen_rom_one_scratch + // It uses the zero register (`x0`) as the base + // for its loads. The zero register does not need to + // be saved. + debug_rom_one_scratch i_debug_rom ( + .clk_i, + .req_i, + .addr_i ( rom_addr ), + .rdata_o ( rom_rdata ) + ); + end + + // ROM starts at the HaltAddress of the core e.g.: it immediately jumps to + // the ROM base address + assign fwd_rom_d = logic'(addr_i[DbgAddressBits-1:0] >= dm::HaltAddress[DbgAddressBits-1:0]); + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + fwd_rom_q <= 1'b0; + rdata_q <= '0; + state_q <= Idle; + word_enable32_q <= 1'b0; + end else begin + fwd_rom_q <= fwd_rom_d; + rdata_q <= rdata_d; + state_q <= state_d; + word_enable32_q <= addr_i[2]; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + halted_q <= 1'b0; + resuming_q <= 1'b0; + end else begin + halted_q <= SelectableHarts & halted_d; + resuming_q <= SelectableHarts & resuming_d; + end + end + +endmodule : dm_mem diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv new file mode 100644 index 0000000..971f128 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv @@ -0,0 +1,436 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: dm_pkg.sv + * Author: Florian Zaruba + * Date: 30.6.2018 + * + * Description: Debug-module package, contains common system definitions. + * + */ + +package dm; + localparam logic [3:0] DbgVersion013 = 4'h2; + // size of program buffer in junks of 32-bit words + localparam logic [4:0] ProgBufSize = 5'h8; + + // amount of data count registers implemented + localparam logic [3:0] DataCount = 4'h2; + + // address to which a hart should jump when it was requested to halt + localparam logic [63:0] HaltAddress = 64'h800; + localparam logic [63:0] ResumeAddress = HaltAddress + 4; + localparam logic [63:0] ExceptionAddress = HaltAddress + 8; + + // address where data0-15 is shadowed or if shadowed in a CSR + // address of the first CSR used for shadowing the data + localparam logic [11:0] DataAddr = 12'h380; // we are aligned with Rocket here + + // debug registers + typedef enum logic [7:0] { + Data0 = 8'h04, + Data1 = 8'h05, + Data2 = 8'h06, + Data3 = 8'h07, + Data4 = 8'h08, + Data5 = 8'h09, + Data6 = 8'h0A, + Data7 = 8'h0B, + Data8 = 8'h0C, + Data9 = 8'h0D, + Data10 = 8'h0E, + Data11 = 8'h0F, + DMControl = 8'h10, + DMStatus = 8'h11, // r/o + Hartinfo = 8'h12, + HaltSum1 = 8'h13, + HAWindowSel = 8'h14, + HAWindow = 8'h15, + AbstractCS = 8'h16, + Command = 8'h17, + AbstractAuto = 8'h18, + DevTreeAddr0 = 8'h19, + DevTreeAddr1 = 8'h1A, + DevTreeAddr2 = 8'h1B, + DevTreeAddr3 = 8'h1C, + NextDM = 8'h1D, + ProgBuf0 = 8'h20, + ProgBuf1 = 8'h21, + ProgBuf2 = 8'h22, + ProgBuf3 = 8'h23, + ProgBuf4 = 8'h24, + ProgBuf5 = 8'h25, + ProgBuf6 = 8'h26, + ProgBuf7 = 8'h27, + ProgBuf8 = 8'h28, + ProgBuf9 = 8'h29, + ProgBuf10 = 8'h2A, + ProgBuf11 = 8'h2B, + ProgBuf12 = 8'h2C, + ProgBuf13 = 8'h2D, + ProgBuf14 = 8'h2E, + ProgBuf15 = 8'h2F, + AuthData = 8'h30, + HaltSum2 = 8'h34, + HaltSum3 = 8'h35, + SBAddress3 = 8'h37, + SBCS = 8'h38, + SBAddress0 = 8'h39, + SBAddress1 = 8'h3A, + SBAddress2 = 8'h3B, + SBData0 = 8'h3C, + SBData1 = 8'h3D, + SBData2 = 8'h3E, + SBData3 = 8'h3F, + HaltSum0 = 8'h40 + } dm_csr_e; + + // debug causes + localparam logic [2:0] CauseBreakpoint = 3'h1; + localparam logic [2:0] CauseTrigger = 3'h2; + localparam logic [2:0] CauseRequest = 3'h3; + localparam logic [2:0] CauseSingleStep = 3'h4; + + typedef struct packed { + logic [31:23] zero1; + logic impebreak; + logic [21:20] zero0; + logic allhavereset; + logic anyhavereset; + logic allresumeack; + logic anyresumeack; + logic allnonexistent; + logic anynonexistent; + logic allunavail; + logic anyunavail; + logic allrunning; + logic anyrunning; + logic allhalted; + logic anyhalted; + logic authenticated; + logic authbusy; + logic hasresethaltreq; + logic devtreevalid; + logic [3:0] version; + } dmstatus_t; + + typedef struct packed { + logic haltreq; + logic resumereq; + logic hartreset; + logic ackhavereset; + logic zero1; + logic hasel; + logic [25:16] hartsello; + logic [15:6] hartselhi; + logic [5:4] zero0; + logic setresethaltreq; + logic clrresethaltreq; + logic ndmreset; + logic dmactive; + } dmcontrol_t; + + typedef struct packed { + logic [31:24] zero1; + logic [23:20] nscratch; + logic [19:17] zero0; + logic dataaccess; + logic [15:12] datasize; + logic [11:0] dataaddr; + } hartinfo_t; + + typedef enum logic [2:0] { + CmdErrNone, CmdErrBusy, CmdErrNotSupported, + CmdErrorException, CmdErrorHaltResume, + CmdErrorBus, CmdErrorOther = 7 + } cmderr_e; + + typedef struct packed { + logic [31:29] zero3; + logic [28:24] progbufsize; + logic [23:13] zero2; + logic busy; + logic zero1; + cmderr_e cmderr; + logic [7:4] zero0; + logic [3:0] datacount; + } abstractcs_t; + + typedef enum logic [7:0] { + AccessRegister = 8'h0, + QuickAccess = 8'h1, + AccessMemory = 8'h2 + } cmd_e; + + typedef struct packed { + cmd_e cmdtype; + logic [23:0] control; + } command_t; + + typedef struct packed { + logic [31:16] autoexecprogbuf; + logic [15:12] zero0; + logic [11:0] autoexecdata; + } abstractauto_t; + + typedef struct packed { + logic zero1; + logic [22:20] aarsize; + logic aarpostincrement; + logic postexec; + logic transfer; + logic write; + logic [15:0] regno; + } ac_ar_cmd_t; + + // DTM + typedef enum logic [1:0] { + DTM_NOP = 2'h0, + DTM_READ = 2'h1, + DTM_WRITE = 2'h2 + } dtm_op_e; + + typedef struct packed { + logic [31:29] sbversion; + logic [28:23] zero0; + logic sbbusyerror; + logic sbbusy; + logic sbreadonaddr; + logic [19:17] sbaccess; + logic sbautoincrement; + logic sbreadondata; + logic [14:12] sberror; + logic [11:5] sbasize; + logic sbaccess128; + logic sbaccess64; + logic sbaccess32; + logic sbaccess16; + logic sbaccess8; + } sbcs_t; + + localparam logic [1:0] DTM_SUCCESS = 2'h0; + + typedef struct packed { + logic [6:0] addr; + dtm_op_e op; + logic [31:0] data; + } dmi_req_t; + + typedef struct packed { + logic [31:0] data; + logic [1:0] resp; + } dmi_resp_t; + + // privilege levels + typedef enum logic[1:0] { + PRIV_LVL_M = 2'b11, + PRIV_LVL_S = 2'b01, + PRIV_LVL_U = 2'b00 + } priv_lvl_t; + + // debugregs in core + typedef struct packed { + logic [31:28] xdebugver; + logic [27:16] zero2; + logic ebreakm; + logic zero1; + logic ebreaks; + logic ebreaku; + logic stepie; + logic stopcount; + logic stoptime; + logic [8:6] cause; + logic zero0; + logic mprven; + logic nmip; + logic step; + priv_lvl_t prv; + } dcsr_t; + + // CSRs + typedef enum logic [11:0] { + // Floating-Point CSRs + CSR_FFLAGS = 12'h001, + CSR_FRM = 12'h002, + CSR_FCSR = 12'h003, + CSR_FTRAN = 12'h800, + // Supervisor Mode CSRs + CSR_SSTATUS = 12'h100, + CSR_SIE = 12'h104, + CSR_STVEC = 12'h105, + CSR_SCOUNTEREN = 12'h106, + CSR_SSCRATCH = 12'h140, + CSR_SEPC = 12'h141, + CSR_SCAUSE = 12'h142, + CSR_STVAL = 12'h143, + CSR_SIP = 12'h144, + CSR_SATP = 12'h180, + // Machine Mode CSRs + CSR_MSTATUS = 12'h300, + CSR_MISA = 12'h301, + CSR_MEDELEG = 12'h302, + CSR_MIDELEG = 12'h303, + CSR_MIE = 12'h304, + CSR_MTVEC = 12'h305, + CSR_MCOUNTEREN = 12'h306, + CSR_MSCRATCH = 12'h340, + CSR_MEPC = 12'h341, + CSR_MCAUSE = 12'h342, + CSR_MTVAL = 12'h343, + CSR_MIP = 12'h344, + CSR_PMPCFG0 = 12'h3A0, + CSR_PMPADDR0 = 12'h3B0, + CSR_MVENDORID = 12'hF11, + CSR_MARCHID = 12'hF12, + CSR_MIMPID = 12'hF13, + CSR_MHARTID = 12'hF14, + CSR_MCYCLE = 12'hB00, + CSR_MINSTRET = 12'hB02, + CSR_DCACHE = 12'h701, + CSR_ICACHE = 12'h700, + + CSR_TSELECT = 12'h7A0, + CSR_TDATA1 = 12'h7A1, + CSR_TDATA2 = 12'h7A2, + CSR_TDATA3 = 12'h7A3, + CSR_TINFO = 12'h7A4, + + // Debug CSR + CSR_DCSR = 12'h7b0, + CSR_DPC = 12'h7b1, + CSR_DSCRATCH0 = 12'h7b2, // optional + CSR_DSCRATCH1 = 12'h7b3, // optional + + // Counters and Timers + CSR_CYCLE = 12'hC00, + CSR_TIME = 12'hC01, + CSR_INSTRET = 12'hC02 + } csr_reg_t; + + // SBA state + typedef enum logic [2:0] { + Idle, + Read, + Write, + WaitRead, + WaitWrite + } sba_state_e; + + // Instruction Generation Helpers + function automatic logic [31:0] jal (logic [4:0] rd, + logic [20:0] imm); + // OpCode Jal + return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h6f}; + endfunction + + function automatic logic [31:0] jalr (logic [4:0] rd, + logic [4:0] rs1, + logic [11:0] offset); + // OpCode Jal + return {offset[11:0], rs1, 3'b0, rd, 7'h67}; + endfunction + + function automatic logic [31:0] andi (logic [4:0] rd, + logic [4:0] rs1, + logic [11:0] imm); + // OpCode andi + return {imm[11:0], rs1, 3'h7, rd, 7'h13}; + endfunction + + function automatic logic [31:0] slli (logic [4:0] rd, + logic [4:0] rs1, + logic [5:0] shamt); + // OpCode slli + return {6'b0, shamt[5:0], rs1, 3'h1, rd, 7'h13}; + endfunction + + function automatic logic [31:0] srli (logic [4:0] rd, + logic [4:0] rs1, + logic [5:0] shamt); + // OpCode srli + return {6'b0, shamt[5:0], rs1, 3'h5, rd, 7'h13}; + endfunction + + function automatic logic [31:0] load (logic [2:0] size, + logic [4:0] dest, + logic [4:0] base, + logic [11:0] offset); + // OpCode Load + return {offset[11:0], base, size, dest, 7'h03}; + endfunction + + function automatic logic [31:0] auipc (logic [4:0] rd, + logic [20:0] imm); + // OpCode Auipc + return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h17}; + endfunction + + function automatic logic [31:0] store (logic [2:0] size, + logic [4:0] src, + logic [4:0] base, + logic [11:0] offset); + // OpCode Store + return {offset[11:5], src, base, size, offset[4:0], 7'h23}; + endfunction + + function automatic logic [31:0] float_load (logic [2:0] size, + logic [4:0] dest, + logic [4:0] base, + logic [11:0] offset); + // OpCode Load + return {offset[11:0], base, size, dest, 7'b00_001_11}; + endfunction + + function automatic logic [31:0] float_store (logic [2:0] size, + logic [4:0] src, + logic [4:0] base, + logic [11:0] offset); + // OpCode Store + return {offset[11:5], src, base, size, offset[4:0], 7'b01_001_11}; + endfunction + + function automatic logic [31:0] csrw (csr_reg_t csr, + logic [4:0] rs1); + // CSRRW, rd, OpCode System + return {csr, rs1, 3'h1, 5'h0, 7'h73}; + endfunction + + function automatic logic [31:0] csrr (csr_reg_t csr, + logic [4:0] dest); + // rs1, CSRRS, rd, OpCode System + return {csr, 5'h0, 3'h2, dest, 7'h73}; + endfunction + + function automatic logic [31:0] branch(logic [4:0] src2, + logic [4:0] src1, + logic [2:0] funct3, + logic [11:0] offset); + // OpCode Branch + return {offset[11], offset[9:4], src2, src1, funct3, + offset[3:0], offset[10], 7'b11_000_11}; + endfunction + + function automatic logic [31:0] ebreak (); + return 32'h00100073; + endfunction + + function automatic logic [31:0] wfi (); + return 32'h10500073; + endfunction + + function automatic logic [31:0] nop (); + return 32'h00000013; + endfunction + + function automatic logic [31:0] illegal (); + return 32'h00000000; + endfunction + +endpackage : dm diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv new file mode 100644 index 0000000..98c586c --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv @@ -0,0 +1,170 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. +* Copyright and related rights are licensed under the Solderpad Hardware +* License, Version 0.51 (the “License”); you may not use this file except in +* compliance with the License. You may obtain a copy of the License at +* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +* or agreed to in writing, software, hardware and materials distributed under +* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +* +* File: dm_sba.sv +* Author: Florian Zaruba +* Date: 1.8.2018 +* +* Description: System Bus Access Module +* +*/ +module dm_sba #( + parameter int unsigned BusWidth = 32, + parameter bit ReadByteEnable = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, + input logic dmactive_i, // synchronous reset active low + + output logic master_req_o, + output logic [BusWidth-1:0] master_add_o, + output logic master_we_o, + output logic [BusWidth-1:0] master_wdata_o, + output logic [BusWidth/8-1:0] master_be_o, + input logic master_gnt_i, + input logic master_r_valid_i, + input logic [BusWidth-1:0] master_r_rdata_i, + + input logic [BusWidth-1:0] sbaddress_i, + input logic sbaddress_write_valid_i, + // control signals in + input logic sbreadonaddr_i, + output logic [BusWidth-1:0] sbaddress_o, + input logic sbautoincrement_i, + input logic [2:0] sbaccess_i, + // data in + input logic sbreadondata_i, + input logic [BusWidth-1:0] sbdata_i, + input logic sbdata_read_valid_i, + input logic sbdata_write_valid_i, + // read data out + output logic [BusWidth-1:0] sbdata_o, + output logic sbdata_valid_o, + // control signals + output logic sbbusy_o, + output logic sberror_valid_o, // bus error occurred + output logic [2:0] sberror_o // bus error occurred +); + + dm::sba_state_e state_d, state_q; + + logic [BusWidth-1:0] address; + logic req; + logic gnt; + logic we; + logic [BusWidth/8-1:0] be; + logic [BusWidth/8-1:0] be_mask; + logic [$clog2(BusWidth/8)-1:0] be_idx; + + assign sbbusy_o = logic'(state_q != dm::Idle); + + always_comb begin : p_be_mask + be_mask = '0; + + // generate byte enable mask + unique case (sbaccess_i) + 3'b000: begin + be_mask[be_idx] = '1; + end + 3'b001: begin + be_mask[int'({be_idx[$high(be_idx):1], 1'b0}) +: 2] = '1; + end + 3'b010: begin + if (BusWidth == 32'd64) be_mask[int'({be_idx[$high(be_idx)], 2'h0}) +: 4] = '1; + else be_mask = '1; + end + 3'b011: be_mask = '1; + default: ; + endcase + end + + always_comb begin : p_fsm + req = 1'b0; + address = sbaddress_i; + we = 1'b0; + be = '0; + be_idx = sbaddress_i[$clog2(BusWidth/8)-1:0]; + + sberror_o = '0; + sberror_valid_o = 1'b0; + sbaddress_o = sbaddress_i; + + state_d = state_q; + + unique case (state_q) + dm::Idle: begin + // debugger requested a read + if (sbaddress_write_valid_i && sbreadonaddr_i) state_d = dm::Read; + // debugger requested a write + if (sbdata_write_valid_i) state_d = dm::Write; + // perform another read + if (sbdata_read_valid_i && sbreadondata_i) state_d = dm::Read; + end + + dm::Read: begin + req = 1'b1; + if (ReadByteEnable) be = be_mask; + if (gnt) state_d = dm::WaitRead; + end + + dm::Write: begin + req = 1'b1; + we = 1'b1; + be = be_mask; + if (gnt) state_d = dm::WaitWrite; + end + + dm::WaitRead: begin + if (sbdata_valid_o) begin + state_d = dm::Idle; + // auto-increment address + if (sbautoincrement_i) sbaddress_o = sbaddress_i + (32'h1 << sbaccess_i); + end + end + + dm::WaitWrite: begin + if (sbdata_valid_o) begin + state_d = dm::Idle; + // auto-increment address + if (sbautoincrement_i) sbaddress_o = sbaddress_i + (32'h1 << sbaccess_i); + end + end + + default: state_d = dm::Idle; // catch parasitic state + endcase + + // handle error case + if (sbaccess_i > 3 && state_q != dm::Idle) begin + req = 1'b0; + state_d = dm::Idle; + sberror_valid_o = 1'b1; + sberror_o = 3'd3; + end + // further error handling should go here ... + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + state_q <= dm::Idle; + end else begin + state_q <= state_d; + end + end + + assign master_req_o = req; + assign master_add_o = address[BusWidth-1:0]; + assign master_we_o = we; + assign master_wdata_o = sbdata_i[BusWidth-1:0]; + assign master_be_o = be[BusWidth/8-1:0]; + assign gnt = master_gnt_i; + assign sbdata_valid_o = master_r_valid_i; + assign sbdata_o = master_r_rdata_i[BusWidth-1:0]; + +endmodule : dm_sba diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_top.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_top.sv new file mode 100644 index 0000000..9887aef --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dm_top.sv @@ -0,0 +1,218 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. +* Copyright and related rights are licensed under the Solderpad Hardware +* License, Version 0.51 (the “License”); you may not use this file except in +* compliance with the License. You may obtain a copy of the License at +* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +* or agreed to in writing, software, hardware and materials distributed under +* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +* +* File: dm_top.sv +* Author: Florian Zaruba +* Date: 30.6.2018 +* +* Description: Top-level of debug module (DM). This is an AXI-Slave. +* DTM protocol is equal to SiFives debug protocol to leverage +* SW infrastructure re-use. As of version 0.13 +*/ + +module dm_top #( + parameter int unsigned NrHarts = 1, + parameter int unsigned BusWidth = 32, + parameter int unsigned DmBaseAddress = 'h1000, // default to non-zero page + // Bitmask to select physically available harts for systems + // that don't use hart numbers in a contiguous fashion. + parameter logic [NrHarts-1:0] SelectableHarts = {NrHarts{1'b1}}, + parameter bit ReadByteEnable = 1 // toggle new behavior to drive master_be_o during a read +) ( + input logic clk_i, // clock + input logic rst_ni, // asynchronous reset active low, connect PoR here, not the system reset + input logic testmode_i, + output logic ndmreset_o, // non-debug module reset + output logic dmactive_o, // debug module is active + output logic [NrHarts-1:0] debug_req_o, // async debug request + input logic [NrHarts-1:0] unavailable_i, // communicate whether the hart is unavailable (e.g.: power down) + input dm::hartinfo_t [NrHarts-1:0] hartinfo_i, + + input logic slave_req_i, + input logic slave_we_i, + input logic [BusWidth-1:0] slave_addr_i, + input logic [BusWidth/8-1:0] slave_be_i, + input logic [BusWidth-1:0] slave_wdata_i, + output logic [BusWidth-1:0] slave_rdata_o, + + output logic master_req_o, + output logic [BusWidth-1:0] master_add_o, + output logic master_we_o, + output logic [BusWidth-1:0] master_wdata_o, + output logic [BusWidth/8-1:0] master_be_o, + input logic master_gnt_i, + input logic master_r_valid_i, + input logic [BusWidth-1:0] master_r_rdata_i, + + // Connection to DTM - compatible to RocketChip Debug Module + input logic dmi_rst_ni, + input logic dmi_req_valid_i, + output logic dmi_req_ready_o, + input dm::dmi_req_t dmi_req_i, + + output logic dmi_resp_valid_o, + input logic dmi_resp_ready_i, + output dm::dmi_resp_t dmi_resp_o +); + + // Debug CSRs + logic [NrHarts-1:0] halted; + // logic [NrHarts-1:0] running; + logic [NrHarts-1:0] resumeack; + logic [NrHarts-1:0] haltreq; + logic [NrHarts-1:0] resumereq; + logic clear_resumeack; + logic cmd_valid; + dm::command_t cmd; + + logic cmderror_valid; + dm::cmderr_e cmderror; + logic cmdbusy; + logic [dm::ProgBufSize-1:0][31:0] progbuf; + logic [dm::DataCount-1:0][31:0] data_csrs_mem; + logic [dm::DataCount-1:0][31:0] data_mem_csrs; + logic data_valid; + logic [19:0] hartsel; + // System Bus Access Module + logic [BusWidth-1:0] sbaddress_csrs_sba; + logic [BusWidth-1:0] sbaddress_sba_csrs; + logic sbaddress_write_valid; + logic sbreadonaddr; + logic sbautoincrement; + logic [2:0] sbaccess; + logic sbreadondata; + logic [BusWidth-1:0] sbdata_write; + logic sbdata_read_valid; + logic sbdata_write_valid; + logic [BusWidth-1:0] sbdata_read; + logic sbdata_valid; + logic sbbusy; + logic sberror_valid; + logic [2:0] sberror; + + + dm_csrs #( + .NrHarts(NrHarts), + .BusWidth(BusWidth), + .SelectableHarts(SelectableHarts) + ) i_dm_csrs ( + .clk_i, + .rst_ni, + .testmode_i, + .dmi_rst_ni, + .dmi_req_valid_i, + .dmi_req_ready_o, + .dmi_req_i, + .dmi_resp_valid_o, + .dmi_resp_ready_i, + .dmi_resp_o, + .ndmreset_o, + .dmactive_o, + .hartsel_o ( hartsel ), + .hartinfo_i, + .halted_i ( halted ), + .unavailable_i, + .resumeack_i ( resumeack ), + .haltreq_o ( haltreq ), + .resumereq_o ( resumereq ), + .clear_resumeack_o ( clear_resumeack ), + .cmd_valid_o ( cmd_valid ), + .cmd_o ( cmd ), + .cmderror_valid_i ( cmderror_valid ), + .cmderror_i ( cmderror ), + .cmdbusy_i ( cmdbusy ), + .progbuf_o ( progbuf ), + .data_i ( data_mem_csrs ), + .data_valid_i ( data_valid ), + .data_o ( data_csrs_mem ), + .sbaddress_o ( sbaddress_csrs_sba ), + .sbaddress_i ( sbaddress_sba_csrs ), + .sbaddress_write_valid_o ( sbaddress_write_valid ), + .sbreadonaddr_o ( sbreadonaddr ), + .sbautoincrement_o ( sbautoincrement ), + .sbaccess_o ( sbaccess ), + .sbreadondata_o ( sbreadondata ), + .sbdata_o ( sbdata_write ), + .sbdata_read_valid_o ( sbdata_read_valid ), + .sbdata_write_valid_o ( sbdata_write_valid ), + .sbdata_i ( sbdata_read ), + .sbdata_valid_i ( sbdata_valid ), + .sbbusy_i ( sbbusy ), + .sberror_valid_i ( sberror_valid ), + .sberror_i ( sberror ) + ); + + dm_sba #( + .BusWidth(BusWidth), + .ReadByteEnable(ReadByteEnable) + ) i_dm_sba ( + .clk_i, + .rst_ni, + .dmactive_i ( dmactive_o ), + + .master_req_o, + .master_add_o, + .master_we_o, + .master_wdata_o, + .master_be_o, + .master_gnt_i, + .master_r_valid_i, + .master_r_rdata_i, + + .sbaddress_i ( sbaddress_csrs_sba ), + .sbaddress_o ( sbaddress_sba_csrs ), + .sbaddress_write_valid_i ( sbaddress_write_valid ), + .sbreadonaddr_i ( sbreadonaddr ), + .sbautoincrement_i ( sbautoincrement ), + .sbaccess_i ( sbaccess ), + .sbreadondata_i ( sbreadondata ), + .sbdata_i ( sbdata_write ), + .sbdata_read_valid_i ( sbdata_read_valid ), + .sbdata_write_valid_i ( sbdata_write_valid ), + .sbdata_o ( sbdata_read ), + .sbdata_valid_o ( sbdata_valid ), + .sbbusy_o ( sbbusy ), + .sberror_valid_o ( sberror_valid ), + .sberror_o ( sberror ) + ); + + dm_mem #( + .NrHarts(NrHarts), + .BusWidth(BusWidth), + .SelectableHarts(SelectableHarts), + .DmBaseAddress(DmBaseAddress) + ) i_dm_mem ( + .clk_i, + .rst_ni, + .debug_req_o, + .hartsel_i ( hartsel ), + .haltreq_i ( haltreq ), + .resumereq_i ( resumereq ), + .clear_resumeack_i ( clear_resumeack ), + .halted_o ( halted ), + .resuming_o ( resumeack ), + .cmd_valid_i ( cmd_valid ), + .cmd_i ( cmd ), + .cmderror_valid_o ( cmderror_valid ), + .cmderror_o ( cmderror ), + .cmdbusy_o ( cmdbusy ), + .progbuf_i ( progbuf ), + .data_i ( data_csrs_mem ), + .data_o ( data_mem_csrs ), + .data_valid_o ( data_valid ), + .req_i ( slave_req_i ), + .we_i ( slave_we_i ), + .addr_i ( slave_addr_i ), + .wdata_i ( slave_wdata_i ), + .be_i ( slave_be_i ), + .rdata_o ( slave_rdata_o ) + ); + +endmodule : dm_top diff --git a/test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv b/test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv new file mode 100644 index 0000000..4665c91 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv @@ -0,0 +1,73 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. +* Copyright and related rights are licensed under the Solderpad Hardware +* License, Version 0.51 (the “License”); you may not use this file except in +* compliance with the License. You may obtain a copy of the License at +* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +* or agreed to in writing, software, hardware and materials distributed under +* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +* +* File: axi_riscv_debug_module.sv +* Author: Andreas Traber +* Author: Florian Zaruba +* +* Description: Clock domain crossings for JTAG to DMI very heavily based +* on previous work by Andreas Traber for the PULP project. +* This is mainly a wrapper around the existing CDCs. +*/ +module dmi_cdc ( + // JTAG side (master side) + input logic tck_i, + input logic trst_ni, + + input dm::dmi_req_t jtag_dmi_req_i, + output logic jtag_dmi_ready_o, + input logic jtag_dmi_valid_i, + + output dm::dmi_resp_t jtag_dmi_resp_o, + output logic jtag_dmi_valid_o, + input logic jtag_dmi_ready_i, + + // core side (slave side) + input logic clk_i, + input logic rst_ni, + + output dm::dmi_req_t core_dmi_req_o, + output logic core_dmi_valid_o, + input logic core_dmi_ready_i, + + input dm::dmi_resp_t core_dmi_resp_i, + output logic core_dmi_ready_o, + input logic core_dmi_valid_i +); + + cdc_2phase #(.T(dm::dmi_req_t)) i_cdc_req ( + .src_rst_ni ( trst_ni ), + .src_clk_i ( tck_i ), + .src_data_i ( jtag_dmi_req_i ), + .src_valid_i ( jtag_dmi_valid_i ), + .src_ready_o ( jtag_dmi_ready_o ), + + .dst_rst_ni ( rst_ni ), + .dst_clk_i ( clk_i ), + .dst_data_o ( core_dmi_req_o ), + .dst_valid_o ( core_dmi_valid_o ), + .dst_ready_i ( core_dmi_ready_i ) + ); + + cdc_2phase #(.T(dm::dmi_resp_t)) i_cdc_resp ( + .src_rst_ni ( rst_ni ), + .src_clk_i ( clk_i ), + .src_data_i ( core_dmi_resp_i ), + .src_valid_i ( core_dmi_valid_i ), + .src_ready_o ( core_dmi_ready_o ), + + .dst_rst_ni ( trst_ni ), + .dst_clk_i ( tck_i ), + .dst_data_o ( jtag_dmi_resp_o ), + .dst_valid_o ( jtag_dmi_valid_o ), + .dst_ready_i ( jtag_dmi_ready_i ) + ); + +endmodule : dmi_cdc diff --git a/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv new file mode 100644 index 0000000..c4c7b52 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv @@ -0,0 +1,271 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. +* Copyright and related rights are licensed under the Solderpad Hardware +* License, Version 0.51 (the “License”); you may not use this file except in +* compliance with the License. You may obtain a copy of the License at +* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +* or agreed to in writing, software, hardware and materials distributed under +* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +* +* File: axi_riscv_debug_module.sv +* Author: Florian Zaruba +* Date: 19.7.2018 +* +* Description: JTAG DMI (debug module interface) +* +*/ + +module dmi_jtag #( + parameter logic [31:0] IdcodeValue = 32'h00000001 +) ( + input logic clk_i, // DMI Clock + input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, + + output logic dmi_rst_no, // hard reset + output dm::dmi_req_t dmi_req_o, + output logic dmi_req_valid_o, + input logic dmi_req_ready_i, + + input dm::dmi_resp_t dmi_resp_i, + output logic dmi_resp_ready_o, + input logic dmi_resp_valid_i, + + input logic tck_i, // JTAG test clock pad + input logic tms_i, // JTAG test mode select pad + input logic trst_ni, // JTAG test reset pad + input logic td_i, // JTAG test data input pad + output logic td_o, // JTAG test data output pad + output logic tdo_oe_o // Data out output enable +); + assign dmi_rst_no = rst_ni; + + logic test_logic_reset; + logic shift_dr; + logic update_dr; + logic capture_dr; + logic dmi_access; + logic dtmcs_select; + logic dmi_reset; + logic dmi_tdi; + logic dmi_tdo; + + dm::dmi_req_t dmi_req; + logic dmi_req_ready; + logic dmi_req_valid; + + dm::dmi_resp_t dmi_resp; + logic dmi_resp_valid; + logic dmi_resp_ready; + + typedef struct packed { + logic [6:0] address; + logic [31:0] data; + logic [1:0] op; + } dmi_t; + + typedef enum logic [1:0] { + DMINoError = 2'h0, DMIReservedError = 2'h1, + DMIOPFailed = 2'h2, DMIBusy = 2'h3 + } dmi_error_e; + + typedef enum logic [2:0] { Idle, Read, WaitReadValid, Write, WaitWriteValid } state_e; + state_e state_d, state_q; + + logic [$bits(dmi_t)-1:0] dr_d, dr_q; + logic [6:0] address_d, address_q; + logic [31:0] data_d, data_q; + + dmi_t dmi; + assign dmi = dmi_t'(dr_q); + assign dmi_req.addr = address_q; + assign dmi_req.data = data_q; + assign dmi_req.op = (state_q == Write) ? dm::DTM_WRITE : dm::DTM_READ; + // we'will always be ready to accept the data we requested + assign dmi_resp_ready = 1'b1; + + logic error_dmi_busy; + dmi_error_e error_d, error_q; + + always_comb begin : p_fsm + error_dmi_busy = 1'b0; + // default assignments + state_d = state_q; + address_d = address_q; + data_d = data_q; + error_d = error_q; + + dmi_req_valid = 1'b0; + + unique case (state_q) + Idle: begin + // make sure that no error is sticky + if (dmi_access && update_dr && (error_q == DMINoError)) begin + // save address and value + address_d = dmi.address; + data_d = dmi.data; + if (dm::dtm_op_e'(dmi.op) == dm::DTM_READ) begin + state_d = Read; + end else if (dm::dtm_op_e'(dmi.op) == dm::DTM_WRITE) begin + state_d = Write; + end + // else this is a nop and we can stay here + end + end + + Read: begin + dmi_req_valid = 1'b1; + if (dmi_req_ready) begin + state_d = WaitReadValid; + end + end + + WaitReadValid: begin + // load data into register and shift out + if (dmi_resp_valid) begin + data_d = dmi_resp.data; + state_d = Idle; + end + end + + Write: begin + dmi_req_valid = 1'b1; + // request sent, wait for response before going back to idle + if (dmi_req_ready) begin + state_d = WaitWriteValid; + end + end + + WaitWriteValid: begin + // got a valid answer go back to idle + if (dmi_resp_valid) begin + state_d = Idle; + end + end + + default: begin + // just wait for idle here + if (dmi_resp_valid) begin + state_d = Idle; + end + end + endcase + + // update_dr means we got another request but we didn't finish + // the one in progress, this state is sticky + if (update_dr && state_q != Idle) begin + error_dmi_busy = 1'b1; + end + + // if capture_dr goes high while we are in the read state + // or in the corresponding wait state we are not giving back a valid word + // -> throw an error + if (capture_dr && state_q inside {Read, WaitReadValid}) begin + error_dmi_busy = 1'b1; + end + + if (error_dmi_busy) begin + error_d = DMIBusy; + end + // clear sticky error flag + if (update_dr && dmi_reset && dtmcs_select) begin + error_d = DMINoError; + end + end + + // shift register + assign dmi_tdo = dr_q[0]; + + always_comb begin : p_shift + dr_d = dr_q; + + if (capture_dr) begin + if (dmi_access) begin + if (error_q == DMINoError && !error_dmi_busy) begin + dr_d = {address_q, data_q, DMINoError}; + // DMI was busy, report an error + end else if (error_q == DMIBusy || error_dmi_busy) begin + dr_d = {address_q, data_q, DMIBusy}; + end + end + end + + if (shift_dr) begin + if (dmi_access) begin + dr_d = {dmi_tdi, dr_q[$bits(dr_q)-1:1]}; + end + end + + if (test_logic_reset) begin + dr_d = '0; + end + end + + always_ff @(posedge tck_i or negedge trst_ni) begin : p_regs + if (!trst_ni) begin + dr_q <= '0; + state_q <= Idle; + address_q <= '0; + data_q <= '0; + error_q <= DMINoError; + end else begin + dr_q <= dr_d; + state_q <= state_d; + address_q <= address_d; + data_q <= data_d; + error_q <= error_d; + end + end + + // --------- + // TAP + // --------- + dmi_jtag_tap #( + .IrLength (5), + .IdcodeValue(IdcodeValue) + ) i_dmi_jtag_tap ( + .tck_i, + .tms_i, + .trst_ni, + .td_i, + .td_o, + .tdo_oe_o, + .testmode_i, + .test_logic_reset_o ( test_logic_reset ), + .shift_dr_o ( shift_dr ), + .update_dr_o ( update_dr ), + .capture_dr_o ( capture_dr ), + .dmi_access_o ( dmi_access ), + .dtmcs_select_o ( dtmcs_select ), + .dmi_reset_o ( dmi_reset ), + .dmi_error_i ( error_q ), + .dmi_tdi_o ( dmi_tdi ), + .dmi_tdo_i ( dmi_tdo ) + ); + + // --------- + // CDC + // --------- + dmi_cdc i_dmi_cdc ( + // JTAG side (master side) + .tck_i, + .trst_ni, + .jtag_dmi_req_i ( dmi_req ), + .jtag_dmi_ready_o ( dmi_req_ready ), + .jtag_dmi_valid_i ( dmi_req_valid ), + .jtag_dmi_resp_o ( dmi_resp ), + .jtag_dmi_valid_o ( dmi_resp_valid ), + .jtag_dmi_ready_i ( dmi_resp_ready ), + // core side + .clk_i, + .rst_ni, + .core_dmi_req_o ( dmi_req_o ), + .core_dmi_valid_o ( dmi_req_valid_o ), + .core_dmi_ready_i ( dmi_req_ready_i ), + .core_dmi_resp_i ( dmi_resp_i ), + .core_dmi_ready_o ( dmi_resp_ready_o ), + .core_dmi_valid_i ( dmi_resp_valid_i ) + ); + +endmodule : dmi_jtag diff --git a/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv new file mode 100644 index 0000000..c2e8d6e --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv @@ -0,0 +1,349 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: dmi_jtag_tap.sv + * Author: Florian Zaruba + * Date: 19.7.2018 + * + * Description: JTAG TAP for DMI (according to debug spec 0.13) + * + */ + +module dmi_jtag_tap #( + parameter int unsigned IrLength = 5, + // JTAG IDCODE Value + parameter logic [31:0] IdcodeValue = 32'h00000001 + // xxxx version + // xxxxxxxxxxxxxxxx part number + // xxxxxxxxxxx manufacturer id + // 1 required by standard +) ( + input logic tck_i, // JTAG test clock pad + input logic tms_i, // JTAG test mode select pad + input logic trst_ni, // JTAG test reset pad + input logic td_i, // JTAG test data input pad + output logic td_o, // JTAG test data output pad + output logic tdo_oe_o, // Data out output enable + input logic testmode_i, + output logic test_logic_reset_o, + output logic shift_dr_o, + output logic update_dr_o, + output logic capture_dr_o, + + // we want to access DMI register + output logic dmi_access_o, + // JTAG is interested in writing the DTM CSR register + output logic dtmcs_select_o, + // clear error state + output logic dmi_reset_o, + input logic [1:0] dmi_error_i, + // test data to submodule + output logic dmi_tdi_o, + // test data in from submodule + input logic dmi_tdo_i +); + + // to submodule + assign dmi_tdi_o = td_i; + + typedef enum logic [3:0] { + TestLogicReset, RunTestIdle, SelectDrScan, + CaptureDr, ShiftDr, Exit1Dr, PauseDr, Exit2Dr, + UpdateDr, SelectIrScan, CaptureIr, ShiftIr, + Exit1Ir, PauseIr, Exit2Ir, UpdateIr + } tap_state_e; + + tap_state_e tap_state_q, tap_state_d; + + typedef enum logic [IrLength-1:0] { + BYPASS0 = 'h0, + IDCODE = 'h1, + DTMCSR = 'h10, + DMIACCESS = 'h11, + BYPASS1 = 'h1f + } ir_reg_e; + + typedef struct packed { + logic [31:18] zero1; + logic dmihardreset; + logic dmireset; + logic zero0; + logic [14:12] idle; + logic [11:10] dmistat; + logic [9:4] abits; + logic [3:0] version; + } dtmcs_t; + + // ---------------- + // IR logic + // ---------------- + + // shift register + logic [IrLength-1:0] jtag_ir_shift_d, jtag_ir_shift_q; + // IR register -> this gets captured from shift register upon update_ir + ir_reg_e jtag_ir_d, jtag_ir_q; + logic capture_ir, shift_ir, update_ir; // pause_ir + + always_comb begin : p_jtag + jtag_ir_shift_d = jtag_ir_shift_q; + jtag_ir_d = jtag_ir_q; + + // IR shift register + if (shift_ir) begin + jtag_ir_shift_d = {td_i, jtag_ir_shift_q[IrLength-1:1]}; + end + + // capture IR register + if (capture_ir) begin + jtag_ir_shift_d = IrLength'(4'b0101); + end + + // update IR register + if (update_ir) begin + jtag_ir_d = ir_reg_e'(jtag_ir_shift_q); + end + + // synchronous test-logic reset + if (test_logic_reset_o) begin + jtag_ir_shift_d = '0; + jtag_ir_d = IDCODE; + end + end + + always_ff @(posedge tck_i, negedge trst_ni) begin : p_jtag_ir_reg + if (!trst_ni) begin + jtag_ir_shift_q <= '0; + jtag_ir_q <= IDCODE; + end else begin + jtag_ir_shift_q <= jtag_ir_shift_d; + jtag_ir_q <= jtag_ir_d; + end + end + + // ---------------- + // TAP DR Regs + // ---------------- + // - Bypass + // - IDCODE + // - DTM CS + logic [31:0] idcode_d, idcode_q; + logic idcode_select; + logic bypass_select; + dtmcs_t dtmcs_d, dtmcs_q; + logic bypass_d, bypass_q; // this is a 1-bit register + + assign dmi_reset_o = dtmcs_q.dmireset; + + always_comb begin + idcode_d = idcode_q; + bypass_d = bypass_q; + dtmcs_d = dtmcs_q; + + if (capture_dr_o) begin + if (idcode_select) idcode_d = IdcodeValue; + if (bypass_select) bypass_d = 1'b0; + if (dtmcs_select_o) begin + dtmcs_d = '{ + zero1 : '0, + dmihardreset : 1'b0, + dmireset : 1'b0, + zero0 : '0, + idle : 3'd1, // 1: Enter Run-Test/Idle and leave it immediately + dmistat : dmi_error_i, // 0: No error, 2: Op failed, 3: too fast + abits : 6'd7, // The size of address in dmi + version : 4'd1 // Version described in spec version 0.13 (and later?) + }; + end + end + + if (shift_dr_o) begin + if (idcode_select) idcode_d = {td_i, 31'(idcode_q >> 1)}; + if (bypass_select) bypass_d = td_i; + if (dtmcs_select_o) dtmcs_d = {td_i, 31'(dtmcs_q >> 1)}; + end + + if (test_logic_reset_o) begin + idcode_d = IdcodeValue; + bypass_d = 1'b0; + end + end + + // ---------------- + // Data reg select + // ---------------- + always_comb begin : p_data_reg_sel + dmi_access_o = 1'b0; + dtmcs_select_o = 1'b0; + idcode_select = 1'b0; + bypass_select = 1'b0; + unique case (jtag_ir_q) + BYPASS0: bypass_select = 1'b1; + IDCODE: idcode_select = 1'b1; + DTMCSR: dtmcs_select_o = 1'b1; + DMIACCESS: dmi_access_o = 1'b1; + BYPASS1: bypass_select = 1'b1; + default: bypass_select = 1'b1; + endcase + end + + // ---------------- + // Output select + // ---------------- + logic tdo_mux; + + always_comb begin : p_out_sel + // we are shifting out the IR register + if (shift_ir) begin + tdo_mux = jtag_ir_shift_q[0]; + // here we are shifting the DR register + end else begin + unique case (jtag_ir_q) + IDCODE: tdo_mux = idcode_q[0]; // Reading ID code + DTMCSR: tdo_mux = dtmcs_q.version[0]; + DMIACCESS: tdo_mux = dmi_tdo_i; // Read from DMI TDO + default: tdo_mux = bypass_q; // BYPASS instruction + endcase + end + end + + // ---------------- + // DFT + // ---------------- + logic tck_n, tck_ni; + + cluster_clock_inverter i_tck_inv ( + .clk_i ( tck_i ), + .clk_o ( tck_ni ) + ); + + pulp_clock_mux2 i_dft_tck_mux ( + .clk0_i ( tck_ni ), + .clk1_i ( tck_i ), // bypass the inverted clock for testing + .clk_sel_i ( testmode_i ), + .clk_o ( tck_n ) + ); + + // TDO changes state at negative edge of TCK + always_ff @(posedge tck_n, negedge trst_ni) begin : p_tdo_regs + if (!trst_ni) begin + td_o <= 1'b0; + tdo_oe_o <= 1'b0; + end else begin + td_o <= tdo_mux; + tdo_oe_o <= (shift_ir | shift_dr_o); + end + end + // ---------------- + // TAP FSM + // ---------------- + // Determination of next state; purely combinatorial + always_comb begin : p_tap_fsm + + test_logic_reset_o = 1'b0; + + capture_dr_o = 1'b0; + shift_dr_o = 1'b0; + update_dr_o = 1'b0; + + capture_ir = 1'b0; + shift_ir = 1'b0; + // pause_ir = 1'b0; unused + update_ir = 1'b0; + + unique case (tap_state_q) + TestLogicReset: begin + tap_state_d = (tms_i) ? TestLogicReset : RunTestIdle; + test_logic_reset_o = 1'b1; + end + RunTestIdle: begin + tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle; + end + // DR Path + SelectDrScan: begin + tap_state_d = (tms_i) ? SelectIrScan : CaptureDr; + end + CaptureDr: begin + capture_dr_o = 1'b1; + tap_state_d = (tms_i) ? Exit1Dr : ShiftDr; + end + ShiftDr: begin + shift_dr_o = 1'b1; + tap_state_d = (tms_i) ? Exit1Dr : ShiftDr; + end + Exit1Dr: begin + tap_state_d = (tms_i) ? UpdateDr : PauseDr; + end + PauseDr: begin + tap_state_d = (tms_i) ? Exit2Dr : PauseDr; + end + Exit2Dr: begin + tap_state_d = (tms_i) ? UpdateDr : ShiftDr; + end + UpdateDr: begin + update_dr_o = 1'b1; + tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle; + end + // IR Path + SelectIrScan: begin + tap_state_d = (tms_i) ? TestLogicReset : CaptureIr; + end + // In this controller state, the shift register bank in the + // Instruction Register parallel loads a pattern of fixed values on + // the rising edge of TCK. The last two significant bits must always + // be "01". + CaptureIr: begin + capture_ir = 1'b1; + tap_state_d = (tms_i) ? Exit1Ir : ShiftIr; + end + // In this controller state, the instruction register gets connected + // between TDI and TDO, and the captured pattern gets shifted on + // each rising edge of TCK. The instruction available on the TDI + // pin is also shifted in to the instruction register. + ShiftIr: begin + shift_ir = 1'b1; + tap_state_d = (tms_i) ? Exit1Ir : ShiftIr; + end + Exit1Ir: begin + tap_state_d = (tms_i) ? UpdateIr : PauseIr; + end + PauseIr: begin + // pause_ir = 1'b1; // unused + tap_state_d = (tms_i) ? Exit2Ir : PauseIr; + end + Exit2Ir: begin + tap_state_d = (tms_i) ? UpdateIr : ShiftIr; + end + // In this controller state, the instruction in the instruction + // shift register is latched to the latch bank of the Instruction + // Register on every falling edge of TCK. This instruction becomes + // the current instruction once it is latched. + UpdateIr: begin + update_ir = 1'b1; + tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle; + end + default: ; // can't actually happen since case is full + endcase + end + + always_ff @(posedge tck_i or negedge trst_ni) begin : p_regs + if (!trst_ni) begin + tap_state_q <= RunTestIdle; + idcode_q <= IdcodeValue; + bypass_q <= 1'b0; + dtmcs_q <= '0; + end else begin + tap_state_q <= tap_state_d; + idcode_q <= idcode_d; + bypass_q <= bypass_d; + dtmcs_q <= dtmcs_d; + end + end + +endmodule : dmi_jtag_tap diff --git a/test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv b/test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv new file mode 100644 index 0000000..bbf6f94 --- /dev/null +++ b/test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv @@ -0,0 +1,357 @@ +// Do not edit - auto-generated +module plic_regs #( + parameter type reg_req_t = logic, + parameter type reg_rsp_t = logic +)( + input logic [30:0][2:0] prio_i, + output logic [30:0][2:0] prio_o, + output logic [30:0] prio_we_o, + output logic [30:0] prio_re_o, + input logic [0:0][30:0] ip_i, + output logic [0:0] ip_re_o, + input logic [1:0][30:0] ie_i, + output logic [1:0][30:0] ie_o, + output logic [1:0] ie_we_o, + output logic [1:0] ie_re_o, + input logic [1:0][2:0] threshold_i, + output logic [1:0][2:0] threshold_o, + output logic [1:0] threshold_we_o, + output logic [1:0] threshold_re_o, + input logic [1:0][4:0] cc_i, + output logic [1:0][4:0] cc_o, + output logic [1:0] cc_we_o, + output logic [1:0] cc_re_o, + // Bus Interface + input reg_req_t req_i, + output reg_rsp_t resp_o +); +always_comb begin + resp_o.ready = 1'b1; + resp_o.rdata = '0; + resp_o.error = '0; + prio_o = '0; + prio_we_o = '0; + prio_re_o = '0; + ie_o = '0; + ie_we_o = '0; + ie_re_o = '0; + threshold_o = '0; + threshold_we_o = '0; + threshold_re_o = '0; + cc_o = '0; + cc_we_o = '0; + cc_re_o = '0; + if (req_i.valid) begin + if (req_i.write) begin + unique case(req_i.addr) + 32'hc000000: begin + prio_o[0][2:0] = req_i.wdata[2:0]; + prio_we_o[0] = 1'b1; + end + 32'hc000004: begin + prio_o[1][2:0] = req_i.wdata[2:0]; + prio_we_o[1] = 1'b1; + end + 32'hc000008: begin + prio_o[2][2:0] = req_i.wdata[2:0]; + prio_we_o[2] = 1'b1; + end + 32'hc00000c: begin + prio_o[3][2:0] = req_i.wdata[2:0]; + prio_we_o[3] = 1'b1; + end + 32'hc000010: begin + prio_o[4][2:0] = req_i.wdata[2:0]; + prio_we_o[4] = 1'b1; + end + 32'hc000014: begin + prio_o[5][2:0] = req_i.wdata[2:0]; + prio_we_o[5] = 1'b1; + end + 32'hc000018: begin + prio_o[6][2:0] = req_i.wdata[2:0]; + prio_we_o[6] = 1'b1; + end + 32'hc00001c: begin + prio_o[7][2:0] = req_i.wdata[2:0]; + prio_we_o[7] = 1'b1; + end + 32'hc000020: begin + prio_o[8][2:0] = req_i.wdata[2:0]; + prio_we_o[8] = 1'b1; + end + 32'hc000024: begin + prio_o[9][2:0] = req_i.wdata[2:0]; + prio_we_o[9] = 1'b1; + end + 32'hc000028: begin + prio_o[10][2:0] = req_i.wdata[2:0]; + prio_we_o[10] = 1'b1; + end + 32'hc00002c: begin + prio_o[11][2:0] = req_i.wdata[2:0]; + prio_we_o[11] = 1'b1; + end + 32'hc000030: begin + prio_o[12][2:0] = req_i.wdata[2:0]; + prio_we_o[12] = 1'b1; + end + 32'hc000034: begin + prio_o[13][2:0] = req_i.wdata[2:0]; + prio_we_o[13] = 1'b1; + end + 32'hc000038: begin + prio_o[14][2:0] = req_i.wdata[2:0]; + prio_we_o[14] = 1'b1; + end + 32'hc00003c: begin + prio_o[15][2:0] = req_i.wdata[2:0]; + prio_we_o[15] = 1'b1; + end + 32'hc000040: begin + prio_o[16][2:0] = req_i.wdata[2:0]; + prio_we_o[16] = 1'b1; + end + 32'hc000044: begin + prio_o[17][2:0] = req_i.wdata[2:0]; + prio_we_o[17] = 1'b1; + end + 32'hc000048: begin + prio_o[18][2:0] = req_i.wdata[2:0]; + prio_we_o[18] = 1'b1; + end + 32'hc00004c: begin + prio_o[19][2:0] = req_i.wdata[2:0]; + prio_we_o[19] = 1'b1; + end + 32'hc000050: begin + prio_o[20][2:0] = req_i.wdata[2:0]; + prio_we_o[20] = 1'b1; + end + 32'hc000054: begin + prio_o[21][2:0] = req_i.wdata[2:0]; + prio_we_o[21] = 1'b1; + end + 32'hc000058: begin + prio_o[22][2:0] = req_i.wdata[2:0]; + prio_we_o[22] = 1'b1; + end + 32'hc00005c: begin + prio_o[23][2:0] = req_i.wdata[2:0]; + prio_we_o[23] = 1'b1; + end + 32'hc000060: begin + prio_o[24][2:0] = req_i.wdata[2:0]; + prio_we_o[24] = 1'b1; + end + 32'hc000064: begin + prio_o[25][2:0] = req_i.wdata[2:0]; + prio_we_o[25] = 1'b1; + end + 32'hc000068: begin + prio_o[26][2:0] = req_i.wdata[2:0]; + prio_we_o[26] = 1'b1; + end + 32'hc00006c: begin + prio_o[27][2:0] = req_i.wdata[2:0]; + prio_we_o[27] = 1'b1; + end + 32'hc000070: begin + prio_o[28][2:0] = req_i.wdata[2:0]; + prio_we_o[28] = 1'b1; + end + 32'hc000074: begin + prio_o[29][2:0] = req_i.wdata[2:0]; + prio_we_o[29] = 1'b1; + end + 32'hc000078: begin + prio_o[30][2:0] = req_i.wdata[2:0]; + prio_we_o[30] = 1'b1; + end + 32'hc002000: begin + ie_o[0][30:0] = req_i.wdata[30:0]; + ie_we_o[0] = 1'b1; + end + 32'hc002080: begin + ie_o[1][30:0] = req_i.wdata[30:0]; + ie_we_o[1] = 1'b1; + end + 32'hc200000: begin + threshold_o[0][2:0] = req_i.wdata[2:0]; + threshold_we_o[0] = 1'b1; + end + 32'hc201000: begin + threshold_o[1][2:0] = req_i.wdata[2:0]; + threshold_we_o[1] = 1'b1; + end + 32'hc200004: begin + cc_o[0][4:0] = req_i.wdata[4:0]; + cc_we_o[0] = 1'b1; + end + 32'hc201004: begin + cc_o[1][4:0] = req_i.wdata[4:0]; + cc_we_o[1] = 1'b1; + end + default: resp_o.error = 1'b1; + endcase + end else begin + unique case(req_i.addr) + 32'hc000000: begin + resp_o.rdata[2:0] = prio_i[0][2:0]; + prio_re_o[0] = 1'b1; + end + 32'hc000004: begin + resp_o.rdata[2:0] = prio_i[1][2:0]; + prio_re_o[1] = 1'b1; + end + 32'hc000008: begin + resp_o.rdata[2:0] = prio_i[2][2:0]; + prio_re_o[2] = 1'b1; + end + 32'hc00000c: begin + resp_o.rdata[2:0] = prio_i[3][2:0]; + prio_re_o[3] = 1'b1; + end + 32'hc000010: begin + resp_o.rdata[2:0] = prio_i[4][2:0]; + prio_re_o[4] = 1'b1; + end + 32'hc000014: begin + resp_o.rdata[2:0] = prio_i[5][2:0]; + prio_re_o[5] = 1'b1; + end + 32'hc000018: begin + resp_o.rdata[2:0] = prio_i[6][2:0]; + prio_re_o[6] = 1'b1; + end + 32'hc00001c: begin + resp_o.rdata[2:0] = prio_i[7][2:0]; + prio_re_o[7] = 1'b1; + end + 32'hc000020: begin + resp_o.rdata[2:0] = prio_i[8][2:0]; + prio_re_o[8] = 1'b1; + end + 32'hc000024: begin + resp_o.rdata[2:0] = prio_i[9][2:0]; + prio_re_o[9] = 1'b1; + end + 32'hc000028: begin + resp_o.rdata[2:0] = prio_i[10][2:0]; + prio_re_o[10] = 1'b1; + end + 32'hc00002c: begin + resp_o.rdata[2:0] = prio_i[11][2:0]; + prio_re_o[11] = 1'b1; + end + 32'hc000030: begin + resp_o.rdata[2:0] = prio_i[12][2:0]; + prio_re_o[12] = 1'b1; + end + 32'hc000034: begin + resp_o.rdata[2:0] = prio_i[13][2:0]; + prio_re_o[13] = 1'b1; + end + 32'hc000038: begin + resp_o.rdata[2:0] = prio_i[14][2:0]; + prio_re_o[14] = 1'b1; + end + 32'hc00003c: begin + resp_o.rdata[2:0] = prio_i[15][2:0]; + prio_re_o[15] = 1'b1; + end + 32'hc000040: begin + resp_o.rdata[2:0] = prio_i[16][2:0]; + prio_re_o[16] = 1'b1; + end + 32'hc000044: begin + resp_o.rdata[2:0] = prio_i[17][2:0]; + prio_re_o[17] = 1'b1; + end + 32'hc000048: begin + resp_o.rdata[2:0] = prio_i[18][2:0]; + prio_re_o[18] = 1'b1; + end + 32'hc00004c: begin + resp_o.rdata[2:0] = prio_i[19][2:0]; + prio_re_o[19] = 1'b1; + end + 32'hc000050: begin + resp_o.rdata[2:0] = prio_i[20][2:0]; + prio_re_o[20] = 1'b1; + end + 32'hc000054: begin + resp_o.rdata[2:0] = prio_i[21][2:0]; + prio_re_o[21] = 1'b1; + end + 32'hc000058: begin + resp_o.rdata[2:0] = prio_i[22][2:0]; + prio_re_o[22] = 1'b1; + end + 32'hc00005c: begin + resp_o.rdata[2:0] = prio_i[23][2:0]; + prio_re_o[23] = 1'b1; + end + 32'hc000060: begin + resp_o.rdata[2:0] = prio_i[24][2:0]; + prio_re_o[24] = 1'b1; + end + 32'hc000064: begin + resp_o.rdata[2:0] = prio_i[25][2:0]; + prio_re_o[25] = 1'b1; + end + 32'hc000068: begin + resp_o.rdata[2:0] = prio_i[26][2:0]; + prio_re_o[26] = 1'b1; + end + 32'hc00006c: begin + resp_o.rdata[2:0] = prio_i[27][2:0]; + prio_re_o[27] = 1'b1; + end + 32'hc000070: begin + resp_o.rdata[2:0] = prio_i[28][2:0]; + prio_re_o[28] = 1'b1; + end + 32'hc000074: begin + resp_o.rdata[2:0] = prio_i[29][2:0]; + prio_re_o[29] = 1'b1; + end + 32'hc000078: begin + resp_o.rdata[2:0] = prio_i[30][2:0]; + prio_re_o[30] = 1'b1; + end + 32'hc001000: begin + resp_o.rdata[30:0] = ip_i[0][30:0]; + ip_re_o[0] = 1'b1; + end + 32'hc002000: begin + resp_o.rdata[30:0] = ie_i[0][30:0]; + ie_re_o[0] = 1'b1; + end + 32'hc002080: begin + resp_o.rdata[30:0] = ie_i[1][30:0]; + ie_re_o[1] = 1'b1; + end + 32'hc200000: begin + resp_o.rdata[2:0] = threshold_i[0][2:0]; + threshold_re_o[0] = 1'b1; + end + 32'hc201000: begin + resp_o.rdata[2:0] = threshold_i[1][2:0]; + threshold_re_o[1] = 1'b1; + end + 32'hc200004: begin + resp_o.rdata[4:0] = cc_i[0][4:0]; + cc_re_o[0] = 1'b1; + end + 32'hc201004: begin + resp_o.rdata[4:0] = cc_i[1][4:0]; + cc_re_o[1] = 1'b1; + end + default: resp_o.error = 1'b1; + endcase + end + end +end +endmodule + diff --git a/test/type_param/corev_apu/rv_plic/rtl/plic_top.sv b/test/type_param/corev_apu/rv_plic/rtl/plic_top.sv new file mode 100644 index 0000000..2a32102 --- /dev/null +++ b/test/type_param/corev_apu/rv_plic/rtl/plic_top.sv @@ -0,0 +1,157 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba +// +// Description: Platform level interrupt controller + +module plic_top #( + parameter int N_SOURCE = 30, + parameter int N_TARGET = 2, + parameter int MAX_PRIO = 7, + parameter int SRCW = $clog2(N_SOURCE+1), + parameter type reg_req_t = logic, + parameter type reg_rsp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // Bus Interface + input reg_req_t req_i, + output reg_rsp_t resp_o, + input logic [N_SOURCE-1:0] le_i, // 0:level 1:edge + // Interrupt Sources + input logic [N_SOURCE-1:0] irq_sources_i, + // Interrupt notification to targets + output logic [N_TARGET-1:0] eip_targets_o +); + localparam PRIOW = $clog2(MAX_PRIO+1); + + logic [N_SOURCE-1:0] ip; + + logic [N_TARGET-1:0][PRIOW-1:0] threshold_q; + + logic [N_TARGET-1:0] claim_re; //Target read indicator + logic [N_TARGET-1:0][SRCW-1:0] claim_id; + logic [N_SOURCE-1:0] claim; //Converted from claim_re/claim_id + + logic [N_TARGET-1:0] complete_we; //Target write indicator + logic [N_TARGET-1:0][SRCW-1:0] complete_id; + logic [N_SOURCE-1:0] complete; //Converted from complete_re/complete_id + + logic [N_SOURCE-1:0][PRIOW-1:0] prio_q; + logic [N_TARGET-1:0][N_SOURCE-1:0] ie_q; + + always_comb begin + claim = '0; + complete = '0; + for (int i = 0 ; i < N_TARGET ; i++) begin + if (claim_re[i] && claim_id[i] != 0) claim[claim_id[i]-1] = 1'b1; + if (complete_we[i] && complete_id[i] != 0) complete[complete_id[i]-1] = 1'b1; + end + end + + // Gateways + rv_plic_gateway #( + .N_SOURCE (N_SOURCE) + ) i_rv_plic_gateway ( + .clk_i, + .rst_ni, + .src(irq_sources_i), + .le(le_i), + .claim(claim), + .complete(complete), + .ip(ip) + ); + + // Target interrupt notification + for (genvar i = 0 ; i < N_TARGET; i++) begin : gen_target + rv_plic_target #( + .N_SOURCE ( N_SOURCE ), + .MAX_PRIO ( MAX_PRIO ), + .ALGORITHM ( "SEQUENTIAL" ) + ) i_target ( + .clk_i, + .rst_ni, + .ip(ip), + .ie(ie_q[i]), + .prio(prio_q), + .threshold(threshold_q[i]), + .irq(eip_targets_o[i]), + .irq_id(claim_id[i]) + ); + end + + logic [N_TARGET-1:0] threshold_we_o; + logic [N_TARGET-1:0][PRIOW-1:0] threshold_o; + + logic [N_SOURCE:0][PRIOW-1:0] prio_i, prio_o; + logic [N_SOURCE:0] prio_we_o; + + // TODO(zarubaf): This needs more graceful handling + // it will break if the number of sources is larger than 32 + logic [N_TARGET-1:0][N_SOURCE:0] ie_i, ie_o; + logic [N_TARGET-1:0] ie_we_o; + + plic_regs #( + .reg_req_t ( reg_req_t ), + .reg_rsp_t ( reg_rsp_t ) + ) i_plic_regs ( + .prio_i(prio_i), + .prio_o(prio_o), + .prio_we_o(prio_we_o), + .prio_re_o(), // don't care + // source zero is always zero + .ip_i({ip, 1'b0}), + .ip_re_o(), // don't care + .ie_i(ie_i), + .ie_o(ie_o), + .ie_we_o(ie_we_o), + .ie_re_o(), // don't care + .threshold_i(threshold_q), + .threshold_o(threshold_o), + .threshold_we_o(threshold_we_o), + .threshold_re_o(), // don't care + .cc_i(claim_id), + .cc_o(complete_id), + .cc_we_o(complete_we), + .cc_re_o(claim_re), + .req_i, + .resp_o + ); + + assign prio_i[0] = '0; + + for (genvar i = 0; i < N_TARGET; i++) begin + assign ie_i[i] = {ie_q[i][N_SOURCE-1:0], 1'b0}; + end + + for (genvar i = 1; i < N_SOURCE + 1; i++) begin + assign prio_i[i] = prio_q[i - 1]; + end + + // registers + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + prio_q <= '0; + ie_q <= '0; + threshold_q <= '0; + end else begin + // source zero is 0 + for (int i = 0; i < N_SOURCE; i++) begin + prio_q[i] <= prio_we_o[i + 1] ? prio_o[i + 1] : prio_q[i]; + end + for (int i = 0; i < N_TARGET; i++) begin + threshold_q[i] <= threshold_we_o[i] ? threshold_o[i] : threshold_q[i]; + ie_q[i] <= ie_we_o[i] ? ie_o[i][N_SOURCE:1] : ie_q[i]; + end + + end + end +endmodule diff --git a/test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv new file mode 100644 index 0000000..c68f78c --- /dev/null +++ b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv @@ -0,0 +1,60 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// RISC-V Platform-Level Interrupt Gateways module + +module rv_plic_gateway #( + parameter int N_SOURCE = 32 +) ( + input clk_i, + input rst_ni, + + input [N_SOURCE-1:0] src, + input [N_SOURCE-1:0] le, // Level0 Edge1 + + input [N_SOURCE-1:0] claim, // $onehot0(claim) + input [N_SOURCE-1:0] complete, // $onehot0(complete) + + output logic [N_SOURCE-1:0] ip +); + +logic [N_SOURCE-1:0] ia; // Interrupt Active + +logic [N_SOURCE-1:0] set; // Set: (le) ? src & ~src_d : src ; +logic [N_SOURCE-1:0] src_d; + +always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) src_d <= '0; + else src_d <= src; +end + +always_comb begin + for (int i = 0 ; i < N_SOURCE; i++) begin + set[i] = (le[i]) ? src[i] & ~src_d[i] : src[i] ; + end +end + +// Interrupt pending is set by source (depends on le), cleared by claim. +// Until interrupt is claimed, set doesn't affect ip. +// RISC-V PLIC spec mentioned it can have counter for edge triggered +// But skipped the feature as counter consumes substantial logic size. +always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + ip <= '0; + end else begin + ip <= (ip | (set & ~ia & ~ip)) & (~claim); + end +end + +// Interrupt active is to control ip. If ip is set then until completed +// by target, ip shouldn't be set by source even claim can clear ip. +always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + ia <= '0; + end else begin + ia <= (ia | (set & ~ia)) & (~complete); + end +end + +endmodule diff --git a/test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv new file mode 100644 index 0000000..26bd69c --- /dev/null +++ b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv @@ -0,0 +1,125 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// RISC-V Platform-Level Interrupt Generator for Target +// +// This module basically doing IE & IP based on priority and threshold. +// Keep in mind that increasing MAX_PRIO affects logic size a lot. + +module rv_plic_target #( + parameter int N_SOURCE = 32, + parameter int MAX_PRIO = 7, + parameter ALGORITHM = "SEQUENTIAL", // SEQUENTIAL | MATRIX + + // Local param (Do not change this through parameter + parameter int unsigned SRCW = $clog2(N_SOURCE+1), + parameter int unsigned PRIOW = $clog2(MAX_PRIO+1) // Bits to represent MAX_PRIO +) ( + input clk_i, + input rst_ni, + + input [N_SOURCE-1:0] ip, + input [N_SOURCE-1:0] ie, + + input [N_SOURCE-1:0][PRIOW-1:0] prio, + input [PRIOW-1:0] threshold, + + output logic irq, + output logic [SRCW-1:0] irq_id +); + + +//always_ff @(posedge clk_i, negedge rst_ni) begin +// if (!rst_ni) begin +// gt_th <= '0; +// end else begin +// for (int i = 0 ; i < N_SOURCE ; i++) begin +// gt_th[i] = (prio[i] > threshold) ? 1'b1 : 1'b0 ; +// end +// end +//end + + +if (ALGORITHM == "SEQUENTIAL") begin : gen_sequential + // Let first implementation be brute-force + // As N_SOURCE increasing logic depth increases O(logN) + // This approach slows down the simulation. + logic [PRIOW-1:0] max_prio; + logic irq_next; + logic [SRCW-1:0] irq_id_next; + always_comb begin + max_prio = threshold + 1'b1; // Priority strictly greater than threshold + irq_id_next = '0; // default: No Interrupt + irq_next = 1'b0; + for (int i = N_SOURCE-1 ; i >= 0 ; i--) begin + if ((ip[i] & ie[i]) == 1'b1 && prio[i] >= max_prio) begin + max_prio = prio[i]; + irq_id_next = SRCW'(i+1); + irq_next = 1'b1; + end + end // for i + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + irq <= 1'b0; + irq_id <= '0; + end else begin + irq <= irq_next; + irq_id <= irq_id_next; + end + end +end else if (ALGORITHM == "MATRIX") begin : gen_mat + // Second trial : N X N matrix + // Set mat[i][j] to 1 if prio[i] >= prio[j] and ip[i] & ie[i] & ip[j] & ie[j] + // Comparator depth is just 1 then logN AND gate then Leading One detector + // It is to find the max value of priority + // + // This uses a lot of comparators: (N x (N-1))/2. + // So if above approach(ALGORITHM 1) meets timing, don't use this algorithm. + logic [N_SOURCE-1:0] is; + + logic [N_SOURCE-1:0][N_SOURCE-1:0] mat; + logic [N_SOURCE-1:0] merged_row; + + assign is = ip & ie; + always_comb begin + merged_row[N_SOURCE-1] = is[N_SOURCE-1] & (prio[N_SOURCE-1] > threshold); + for (int i = 0 ; i < N_SOURCE-1 ; i++) begin + merged_row[i] = 1'b1; + for (int j = i+1 ; j < N_SOURCE ; j++) begin + mat[i][j] = (prio[i] <= threshold) ? 1'b0 : // No compare if less than TH + (is[i] & is[j]) ? prio[i] >= prio[j] : + (is[i]) ? 1'b 1 : 1'b 0 ; + merged_row[i] = merged_row[i] & mat[i][j]; // all should be 1 + end // for j + end // for i + end // always_comb + + // Leading One detector + logic [N_SOURCE-1:0] lod; + assign lod = merged_row & (~merged_row + 1'b1); + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + irq <= 1'b0; + irq_id <= '0; // No interrupt + end else if (|lod) begin + // as $onehot0(lod), at most one bit set. + // so, safely run for loop + for (int i = N_SOURCE-1 ; i >= 0 ; i--) begin + if (lod[i] == 1'b1) begin + irq <= 1'b 1; + irq_id <= SRCW'(i + 1); + end + end // for + end else begin + // No pending interrupt + irq <= 1'b0; + irq_id <= '0; + end + end // always_ff +end // ALGORITHM + +endmodule + diff --git a/test/type_param/corev_apu/src/ariane.sv b/test/type_param/corev_apu/src/ariane.sv new file mode 100644 index 0000000..1ec15ef --- /dev/null +++ b/test/type_param/corev_apu/src/ariane.sv @@ -0,0 +1,86 @@ +// Copyright 2017-2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.03.2017 +// Description: Ariane Top-level module + + +module ariane import ariane_pkg::*; #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter bit IsRVFI = bit'(0), + parameter type rvfi_probes_t = logic, + parameter int unsigned AxiAddrWidth = ariane_axi::AddrWidth, + parameter int unsigned AxiDataWidth = ariane_axi::DataWidth, + parameter int unsigned AxiIdWidth = ariane_axi::IdWidth, + parameter type axi_ar_chan_t = ariane_axi::ar_chan_t, + parameter type axi_aw_chan_t = ariane_axi::aw_chan_t, + parameter type axi_w_chan_t = ariane_axi::w_chan_t, + parameter type noc_req_t = ariane_axi::req_t, + parameter type noc_resp_t = ariane_axi::resp_t +) ( + input logic clk_i, + input logic rst_ni, + // Core ID, Cluster ID and boot address are considered more or less static + input logic [riscv::VLEN-1:0] boot_addr_i, // reset boot address + input logic [riscv::XLEN-1:0] hart_id_i, // hart id in a multicore environment (reflected in a CSR) + + // Interrupt inputs + input logic [1:0] irq_i, // level sensitive IR lines, mip & sip (async) + input logic ipi_i, // inter-processor interrupts (async) + // Timer facilities + input logic time_irq_i, // timer interrupt in (async) + input logic debug_req_i, // debug request (async) + // RISC-V formal interface port (`rvfi`): + // Can be left open when formal tracing is not needed. + output rvfi_probes_t rvfi_probes_o, + // memory side + output noc_req_t noc_req_o, + input noc_resp_t noc_resp_i +); + + cvxif_pkg::cvxif_req_t cvxif_req; + cvxif_pkg::cvxif_resp_t cvxif_resp; + + cva6 #( + .CVA6Cfg ( CVA6Cfg ), + .IsRVFI ( IsRVFI ), + .rvfi_probes_t ( rvfi_probes_t ), + .axi_ar_chan_t (axi_ar_chan_t), + .axi_aw_chan_t (axi_aw_chan_t), + .axi_w_chan_t (axi_w_chan_t), + .noc_req_t (noc_req_t), + .noc_resp_t (noc_resp_t) + ) i_cva6 ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .boot_addr_i ( boot_addr_i ), + .hart_id_i ( hart_id_i ), + .irq_i ( irq_i ), + .ipi_i ( ipi_i ), + .time_irq_i ( time_irq_i ), + .debug_req_i ( debug_req_i ), + .rvfi_probes_o ( rvfi_probes_o ), + .cvxif_req_o ( cvxif_req ), + .cvxif_resp_i ( cvxif_resp ), + .noc_req_o ( noc_req_o ), + .noc_resp_i ( noc_resp_i ) + ); + + if (CVA6Cfg.CvxifEn) begin : gen_example_coprocessor + cvxif_example_coprocessor i_cvxif_coprocessor ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .cvxif_req_i ( cvxif_req ), + .cvxif_resp_o ( cvxif_resp ) + ); + end + +endmodule // ariane diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv new file mode 100644 index 0000000..b3c5615 --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv @@ -0,0 +1,93 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// AXI Reservation Table +module axi_res_tbl #( + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0 +) ( + input logic clk_i, + input logic rst_ni, + input logic [AXI_ADDR_WIDTH-1:0] clr_addr_i, + input logic clr_req_i, + output logic clr_gnt_o, + input logic [AXI_ADDR_WIDTH-1:0] set_addr_i, + input logic [AXI_ID_WIDTH-1:0] set_id_i, + input logic set_req_i, + output logic set_gnt_o, + input logic [AXI_ADDR_WIDTH-1:0] check_addr_i, + input logic [AXI_ID_WIDTH-1:0] check_id_i, + output logic check_res_o, + input logic check_req_i, + output logic check_gnt_o +); + + localparam integer N_IDS = 2**AXI_ID_WIDTH; + + // Declarations of Signals and Types + logic [N_IDS-1:0][AXI_ADDR_WIDTH-1:0] tbl_d, tbl_q; + logic clr, + set; + + generate for (genvar i = 0; i < N_IDS; ++i) begin: gen_tbl + always_comb begin + tbl_d[i] = tbl_q[i]; + if (set && i == set_id_i) begin + tbl_d[i] = set_addr_i; + end else if (clr && tbl_q[i] == clr_addr_i) begin + tbl_d[i] = '0; + end + end + end endgenerate + + // Table-Managing Logic + always_comb begin + clr = 1'b0; + set = 1'b0; + clr_gnt_o = 1'b0; + set_gnt_o = 1'b0; + check_res_o = 1'b0; + check_gnt_o = 1'b0; + + if (clr_req_i) begin + clr = 1'b1; + clr_gnt_o = 1'b1; + end else if (set_req_i) begin + set = 1'b1; + set_gnt_o = 1'b1; + end else if (check_req_i) begin + check_res_o = (tbl_q[check_id_i] == check_addr_i); + check_gnt_o = 1'b1; + end + end + + // Registers + always_ff @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + tbl_q <= '0; + end else begin + tbl_q <= tbl_d; + end + end + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (AXI_ADDR_WIDTH > 0) + else $fatal(1, "AXI_ADDR_WIDTH must be greater than 0!"); + assert (AXI_ID_WIDTH > 0) + else $fatal(1, "AXI_ID_WIDTH must be greater than 0!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv new file mode 100644 index 0000000..fafdb4f --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv @@ -0,0 +1,1004 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// AXI RISC-V Atomic Operations (AMOs) Adapter +// +// This adapter implements atomic memory operations in accordance with the RVWMO memory consistency +// model. +// +// Interface notes: +// - This module has combinational paths between AXI inputs and outputs for minimum latency. Add +// slices upstream or downstream or in both directions if combinatorial paths become too long. +// The module adheres to the AXI ready/valid dependency specification to prevent combinatorial +// loops. + +module axi_riscv_amos #( + // AXI Parameters + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + // Maximum number of AXI write transactions outstanding at the same time + parameter int unsigned AXI_MAX_WRITE_TXNS = 0, + // Word width of the widest RISC-V processor that can issue requests to this module. + // 32 for RV32; 64 for RV64, where both 32-bit (.W suffix) and 64-bit (.D suffix) AMOs are + // supported if `aw_strb` is set correctly. + parameter int unsigned RISCV_WORD_WIDTH = 0, + /// Derived Parameters (do NOT change manually!) + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8 +) ( + input logic clk_i, + input logic rst_ni, + + /// Slave Interface + input logic [AXI_ADDR_WIDTH-1:0] slv_aw_addr_i, + input logic [2:0] slv_aw_prot_i, + input logic [3:0] slv_aw_region_i, + input logic [5:0] slv_aw_atop_i, + input logic [7:0] slv_aw_len_i, + input logic [2:0] slv_aw_size_i, + input logic [1:0] slv_aw_burst_i, + input logic slv_aw_lock_i, + input logic [3:0] slv_aw_cache_i, + input logic [3:0] slv_aw_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_aw_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_aw_user_i, + output logic slv_aw_ready_o, + input logic slv_aw_valid_i, + + input logic [AXI_ADDR_WIDTH-1:0] slv_ar_addr_i, + input logic [2:0] slv_ar_prot_i, + input logic [3:0] slv_ar_region_i, + input logic [7:0] slv_ar_len_i, + input logic [2:0] slv_ar_size_i, + input logic [1:0] slv_ar_burst_i, + input logic slv_ar_lock_i, + input logic [3:0] slv_ar_cache_i, + input logic [3:0] slv_ar_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_ar_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_ar_user_i, + output logic slv_ar_ready_o, + input logic slv_ar_valid_i, + + input logic [AXI_DATA_WIDTH-1:0] slv_w_data_i, + input logic [AXI_STRB_WIDTH-1:0] slv_w_strb_i, + input logic [AXI_USER_WIDTH-1:0] slv_w_user_i, + input logic slv_w_last_i, + output logic slv_w_ready_o, + input logic slv_w_valid_i, + + output logic [AXI_DATA_WIDTH-1:0] slv_r_data_o, + output logic [1:0] slv_r_resp_o, + output logic slv_r_last_o, + output logic [AXI_ID_WIDTH-1:0] slv_r_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_r_user_o, + input logic slv_r_ready_i, + output logic slv_r_valid_o, + + output logic [1:0] slv_b_resp_o, + output logic [AXI_ID_WIDTH-1:0] slv_b_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_b_user_o, + input logic slv_b_ready_i, + output logic slv_b_valid_o, + + /// Master Interface + output logic [AXI_ADDR_WIDTH-1:0] mst_aw_addr_o, + output logic [2:0] mst_aw_prot_o, + output logic [3:0] mst_aw_region_o, + output logic [5:0] mst_aw_atop_o, + output logic [7:0] mst_aw_len_o, + output logic [2:0] mst_aw_size_o, + output logic [1:0] mst_aw_burst_o, + output logic mst_aw_lock_o, + output logic [3:0] mst_aw_cache_o, + output logic [3:0] mst_aw_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_aw_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_aw_user_o, + input logic mst_aw_ready_i, + output logic mst_aw_valid_o, + + output logic [AXI_ADDR_WIDTH-1:0] mst_ar_addr_o, + output logic [2:0] mst_ar_prot_o, + output logic [3:0] mst_ar_region_o, + output logic [7:0] mst_ar_len_o, + output logic [2:0] mst_ar_size_o, + output logic [1:0] mst_ar_burst_o, + output logic mst_ar_lock_o, + output logic [3:0] mst_ar_cache_o, + output logic [3:0] mst_ar_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_ar_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_ar_user_o, + input logic mst_ar_ready_i, + output logic mst_ar_valid_o, + + output logic [AXI_DATA_WIDTH-1:0] mst_w_data_o, + output logic [AXI_STRB_WIDTH-1:0] mst_w_strb_o, + output logic [AXI_USER_WIDTH-1:0] mst_w_user_o, + output logic mst_w_last_o, + input logic mst_w_ready_i, + output logic mst_w_valid_o, + + input logic [AXI_DATA_WIDTH-1:0] mst_r_data_i, + input logic [1:0] mst_r_resp_i, + input logic mst_r_last_i, + input logic [AXI_ID_WIDTH-1:0] mst_r_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_r_user_i, + output logic mst_r_ready_o, + input logic mst_r_valid_i, + + input logic [1:0] mst_b_resp_i, + input logic [AXI_ID_WIDTH-1:0] mst_b_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_b_user_i, + output logic mst_b_ready_o, + input logic mst_b_valid_i +); + + localparam int unsigned OUTSTND_BURSTS_WIDTH = $clog2(AXI_MAX_WRITE_TXNS+1); + localparam int unsigned AXI_ALU_RATIO = AXI_DATA_WIDTH/RISCV_WORD_WIDTH; + + // State types + typedef enum logic [1:0] { FEEDTHROUGH_AW, WAIT_RESULT_AW, SEND_AW } aw_state_t; + aw_state_t aw_state_d, aw_state_q; + + typedef enum logic [2:0] { FEEDTHROUGH_W, WAIT_DATA_W, WAIT_RESULT_W, WAIT_CHANNEL_W, SEND_W } w_state_t; + w_state_t w_state_d, w_state_q; + + typedef enum logic [1:0] { FEEDTHROUGH_B, WAIT_COMPLETE_B, WAIT_CHANNEL_B, SEND_B } b_state_t; + b_state_t b_state_d, b_state_q; + + typedef enum logic [1:0] { FEEDTHROUGH_AR, WAIT_CHANNEL_AR, SEND_AR } ar_state_t; + ar_state_t ar_state_d, ar_state_q; + + typedef enum logic [1:0] { FEEDTHROUGH_R, WAIT_DATA_R, WAIT_CHANNEL_R, SEND_R } r_state_t; + r_state_t r_state_d, r_state_q; + + typedef enum logic [1:0] { NONE, INVALID, LOAD, STORE } atop_req_t; + atop_req_t atop_valid_d, atop_valid_q; + + // Signal declarations + // Transaction FF + logic [AXI_ADDR_WIDTH-1:0] addr_d, addr_q; + logic [AXI_ID_WIDTH-1:0] id_d, id_q; + logic [AXI_STRB_WIDTH-1:0] strb_d, strb_q; + logic [2:0] size_d, size_q; + logic [5:0] atop_d, atop_q; + logic [3:0] cache_d, cache_q; + logic [2:0] prot_d, prot_q; + logic [3:0] qos_d, qos_q; + logic [3:0] region_d, region_q; + logic [1:0] r_resp_d, r_resp_q; + logic [AXI_USER_WIDTH-1:0] aw_user_d, aw_user_q, + w_user_d, w_user_q, + r_user_d, r_user_q; + // Data FF + logic [AXI_DATA_WIDTH-1:0] w_data_d, w_data_q; // AMO operand + logic [AXI_DATA_WIDTH-1:0] r_data_d, r_data_q; // Data from memory + logic [AXI_DATA_WIDTH-1:0] result_d, result_q; // Result of AMO operation + logic w_d_valid_d, w_d_valid_q, // AMO operand valid + r_d_valid_d, r_d_valid_q; // Data from memory valid + // Counters + logic [OUTSTND_BURSTS_WIDTH-1:0] w_cnt_d, w_cnt_q; // Outstanding W beats + logic [OUTSTND_BURSTS_WIDTH-1:0] w_cnt_req_d, w_cnt_req_q; // W beats until AMO can read W + logic [OUTSTND_BURSTS_WIDTH-1:0] w_cnt_inj_d, w_cnt_inj_q; // W beats until AMO can insert its W + // States + logic adapter_ready; + logic transaction_collision; + logic aw_valid, aw_ready, aw_free, + w_valid, w_ready, w_free, + b_valid, b_ready, b_free, + ar_valid, ar_ready, ar_free, + r_valid, r_ready, r_free; + // ALU Signals + logic [RISCV_WORD_WIDTH-1:0] alu_operand_a; + logic [RISCV_WORD_WIDTH-1:0] alu_operand_b; + logic [RISCV_WORD_WIDTH-1:0] alu_result; + logic [AXI_DATA_WIDTH-1:0] alu_result_ext; + logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0] op_a; + logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0] op_b; + logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0] op_a_sign_ext; + logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0] op_b_sign_ext; + logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0] res; + logic [AXI_STRB_WIDTH-1:0][7:0] strb_ext; + logic sign_a; + logic sign_b; + + /** + * Calculate ready signals and channel states + */ + + // Check if all state machines are ready for the next atomic request + assign adapter_ready = (aw_state_q == FEEDTHROUGH_AW) && + ( w_state_q == FEEDTHROUGH_W ) && + ( b_state_q == FEEDTHROUGH_B ) && + (ar_state_q == FEEDTHROUGH_AR) && + ( r_state_q == FEEDTHROUGH_R ); + + // Calculate if the channels are free + assign aw_free = ~aw_valid | aw_ready; + assign w_free = ~ w_valid | w_ready; + assign b_free = ~ b_valid | b_ready; + assign ar_free = ~ar_valid | ar_ready; + assign r_free = ~ r_valid | r_ready; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + aw_valid <= 0; + aw_ready <= 0; + w_valid <= 0; + w_ready <= 0; + b_valid <= 0; + b_ready <= 0; + ar_valid <= 0; + ar_ready <= 0; + r_valid <= 0; + r_ready <= 0; + end else begin + aw_valid <= mst_aw_valid_o; + aw_ready <= mst_aw_ready_i; + w_valid <= mst_w_valid_o; + w_ready <= mst_w_ready_i; + b_valid <= slv_b_valid_o; + b_ready <= slv_b_ready_i; + ar_valid <= mst_ar_valid_o; + ar_ready <= mst_ar_ready_i; + r_valid <= slv_r_valid_o; + r_ready <= slv_r_ready_i; + end + end + + // Calculate if the request interferes with the ongoing atomic transaction + // The protected bytes go from addr_q up to addr_q + (1 << size_q) - 1 + // TODO Bursts need special treatment + assign transaction_collision = (slv_aw_addr_i < ( addr_q + (8'h01 << size_q))) & + ( addr_q < (slv_aw_addr_i + (8'h01 << slv_aw_size_i))); + + always_comb begin : calc_atop_valid + atop_valid_d = atop_valid_q; + if (adapter_ready) begin + atop_valid_d = NONE; + if (slv_aw_valid_i && slv_aw_atop_i) begin + // Default is invalid request + atop_valid_d = INVALID; + // Valid load operation + if ((slv_aw_atop_i == axi_pkg::ATOP_ATOMICSWAP) || + (slv_aw_atop_i[5:3] == {axi_pkg::ATOP_ATOMICLOAD , axi_pkg::ATOP_LITTLE_END})) begin + atop_valid_d = LOAD; + end + // Valid store operation + if (slv_aw_atop_i[5:3] == {axi_pkg::ATOP_ATOMICSTORE, axi_pkg::ATOP_LITTLE_END}) begin + atop_valid_d = STORE; + end + // Invalidate valid request if control signals do not match + // Burst or exclusive access + if (slv_aw_len_i | slv_aw_lock_i) begin + atop_valid_d = INVALID; + end + // Unsupported size + if (slv_aw_size_i > $clog2(RISCV_WORD_WIDTH/8)) begin + atop_valid_d = INVALID; + end + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_atop_valid + if(~rst_ni) begin + atop_valid_q <= NONE; + end else begin + atop_valid_q <= atop_valid_d; + end + end + + /** + * Write Channel: AW, W, B + */ + + /*==================================================================== + = AW = + ====================================================================*/ + always_comb begin : axi_aw_channel + // Defaults AXI Bus + mst_aw_id_o = slv_aw_id_i; + mst_aw_addr_o = slv_aw_addr_i; + mst_aw_len_o = slv_aw_len_i; + mst_aw_size_o = slv_aw_size_i; + mst_aw_burst_o = slv_aw_burst_i; + mst_aw_lock_o = slv_aw_lock_i; + mst_aw_cache_o = slv_aw_cache_i; + mst_aw_prot_o = slv_aw_prot_i; + mst_aw_qos_o = slv_aw_qos_i; + mst_aw_region_o = slv_aw_region_i; + mst_aw_atop_o = 6'b0; + mst_aw_user_o = slv_aw_user_i; + // Defaults FF + addr_d = addr_q; + id_d = id_q; + size_d = size_q; + atop_d = atop_q; + cache_d = cache_q; + prot_d = prot_q; + qos_d = qos_q; + region_d = region_q; + aw_user_d = aw_user_q; + w_cnt_inj_d = w_cnt_inj_q; + // State Machine + aw_state_d = aw_state_q; + + // Default control: Block AW channel if... + if (slv_aw_valid_i && slv_aw_atop_i) begin + // Block if atomic request + mst_aw_valid_o = 1'b0; + slv_aw_ready_o = 1'b0; + end else if (w_cnt_q == AXI_MAX_WRITE_TXNS) begin + // Block if counter is overflowing + mst_aw_valid_o = 1'b0; + slv_aw_ready_o = 1'b0; + end else if (slv_aw_valid_i && transaction_collision && !adapter_ready) begin + // Block requests to the same address as current atomic transaction + mst_aw_valid_o = 1'b0; + slv_aw_ready_o = 1'b0; + end else begin + // Forward + mst_aw_valid_o = slv_aw_valid_i; + slv_aw_ready_o = mst_aw_ready_i; + end + + // Count W burst to know when to inject the W data + if (w_cnt_inj_q && mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin + w_cnt_inj_d = w_cnt_inj_q - 1; + end + + unique case (aw_state_q) + + FEEDTHROUGH_AW: begin + // Feedthrough slave to master until atomic operation is detected + if (slv_aw_valid_i && slv_aw_atop_i && adapter_ready) begin + // Acknowledge atomic transaction + slv_aw_ready_o = 1'b1; + // Remember request + atop_d = slv_aw_atop_i; + addr_d = slv_aw_addr_i; + id_d = slv_aw_id_i; + size_d = slv_aw_size_i; + cache_d = slv_aw_cache_i; + prot_d = slv_aw_prot_i; + qos_d = slv_aw_qos_i; + region_d = slv_aw_region_i; + aw_user_d = slv_aw_user_i; + // If valid AMO --> wait for result + if (atop_valid_d != INVALID) begin + aw_state_d = WAIT_RESULT_AW; + end + end + + end // FEEDTHROUGH_AW + + WAIT_RESULT_AW, SEND_AW: begin + // If the result is ready and the channel is free --> inject AW request + if ((r_d_valid_q && w_d_valid_q && aw_free) || (aw_state_q == SEND_AW)) begin + // Block + slv_aw_ready_o = 1'b0; + // Make write request + mst_aw_valid_o = 1'b1; + mst_aw_addr_o = addr_q; + mst_aw_len_o = 8'h00; + mst_aw_id_o = id_q; + mst_aw_size_o = size_q; + mst_aw_burst_o = 2'b00; + mst_aw_lock_o = 1'b0; + mst_aw_cache_o = cache_q; + mst_aw_prot_o = prot_q; + mst_aw_qos_o = qos_q; + mst_aw_region_o = region_q; + mst_aw_user_o = aw_user_q; + // Check if request is acknowledged + if (mst_aw_ready_i) begin + aw_state_d = FEEDTHROUGH_AW; + end else begin + aw_state_d = SEND_AW; + end + // Remember outstanding W beats before injected request + if (aw_state_q == WAIT_RESULT_AW) begin + if (w_cnt_q && mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin + w_cnt_inj_d = w_cnt_q - 1; + end else begin + w_cnt_inj_d = w_cnt_q; + end + end + end + end // WAIT_RESULT_AW, SEND_AW + + default: aw_state_d = FEEDTHROUGH_AW; + + endcase + end // axi_aw_channel + + /*==================================================================== + = W = + ====================================================================*/ + always_comb begin : axi_w_channel + // Defaults AXI Bus + mst_w_data_o = slv_w_data_i; + mst_w_strb_o = slv_w_strb_i; + mst_w_last_o = slv_w_last_i; + mst_w_user_o = slv_w_user_i; + // Defaults FF + strb_d = strb_q; + w_user_d = w_user_q; + w_data_d = w_data_q; + result_d = result_q; + w_d_valid_d = w_d_valid_q; + w_cnt_req_d = w_cnt_req_q; + // State Machine + w_state_d = w_state_q; + + // Default control + // Make sure no data is sent without knowing if it's atomic + if (w_cnt_q == 0) begin + // Stall W as it precedes the AW request + slv_w_ready_o = 1'b0; + mst_w_valid_o = 1'b0; + end else begin + mst_w_valid_o = slv_w_valid_i; + slv_w_ready_o = mst_w_ready_i; + end + + unique case (w_state_q) + + FEEDTHROUGH_W: begin + if (adapter_ready) begin + // Reset read flag + w_d_valid_d = 1'b0; + result_d = '0; + + if (atop_valid_d != NONE) begin + // Check if data is also available and does not belong to previous request + if (w_cnt_q == 0) begin + // Block downstream + mst_w_valid_o = 1'b0; + // Fetch data and wait for all data + slv_w_ready_o = 1'b1; + if (slv_w_valid_i) begin + if (atop_valid_d != INVALID) begin + w_data_d = slv_w_data_i; + strb_d = slv_w_strb_i; + w_user_d = slv_w_user_i; + w_d_valid_d = 1'b1; + w_state_d = WAIT_RESULT_W; + end + end else begin + w_cnt_req_d = '0; + w_state_d = WAIT_DATA_W; + end + end else begin + // Remember the amount of outstanding bursts and count down + if (mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin + w_cnt_req_d = w_cnt_q - 1; + end else begin + w_cnt_req_d = w_cnt_q; + end + w_state_d = WAIT_DATA_W; + end + end + end + end // FEEDTHROUGH_W + + WAIT_DATA_W: begin + // Count W beats until data arrives that belongs to the AMO request + if (w_cnt_req_q == 0) begin + // Block downstream + mst_w_valid_o = 1'b0; + // Ready upstream + slv_w_ready_o = 1'b1; + + if (slv_w_valid_i) begin + if (atop_valid_q == INVALID) begin + w_state_d = FEEDTHROUGH_W; + end else begin + w_data_d = slv_w_data_i; + strb_d = slv_w_strb_i; + w_user_d = slv_w_user_i; + w_d_valid_d = 1'b1; + w_state_d = WAIT_RESULT_W; + end + end + end else if (mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin + w_cnt_req_d = w_cnt_req_q - 1; + end + end // WAIT_DATA_W + + WAIT_RESULT_W: begin + // If the result is ready, try to write it + if (r_d_valid_q && w_d_valid_q && aw_free) begin + // Check if W channel is free and make sure data is not interleaved + result_d = alu_result_ext; + if (w_free && w_cnt_q == 0) begin + // Block + slv_w_ready_o = 1'b0; + // Send write data + mst_w_valid_o = 1'b1; + mst_w_data_o = alu_result_ext; + mst_w_last_o = 1'b1; + mst_w_strb_o = strb_q; + mst_w_user_o = w_user_q; + if (mst_w_ready_i) begin + w_state_d = FEEDTHROUGH_W; + end else begin + w_state_d = SEND_W; + end + end else begin + w_state_d = WAIT_CHANNEL_W; + end + end + end // WAIT_RESULT_W + + WAIT_CHANNEL_W, SEND_W: begin + // Wait to not interleave the data + if ((w_free && w_cnt_inj_q == 0) || (w_state_q == SEND_W)) begin + // Block + slv_w_ready_o = 1'b0; + // Send write data + mst_w_valid_o = 1'b1; + mst_w_data_o = result_q; + mst_w_last_o = 1'b1; + mst_w_strb_o = strb_q; + mst_w_user_o = w_user_q; + if (mst_w_ready_i) begin + w_state_d = FEEDTHROUGH_W; + end else begin + w_state_d = SEND_W; + end + end + end // WAIT_CHANNEL_W, SEND_W + + default: w_state_d = FEEDTHROUGH_W; + + endcase + end // axi_w_channel + + /*==================================================================== + = B = + ====================================================================*/ + always_comb begin : axi_b_channel + // Defaults AXI Bus + mst_b_ready_o = slv_b_ready_i; + slv_b_id_o = mst_b_id_i; + slv_b_resp_o = mst_b_resp_i; + slv_b_user_o = mst_b_user_i; + slv_b_valid_o = mst_b_valid_i; + // State Machine + b_state_d = b_state_q; + + unique case (b_state_q) + + FEEDTHROUGH_B: begin + if (adapter_ready) begin + if (atop_valid_d == LOAD || atop_valid_d == STORE) begin + // Wait until write is complete + b_state_d = WAIT_COMPLETE_B; + end else if (atop_valid_d == INVALID) begin + // Inject B error resp once the channel is free + if (b_free) begin + // Block downstream + mst_b_ready_o = 1'b0; + // Write B response + slv_b_valid_o = 1'b1; + slv_b_id_o = slv_aw_id_i; + slv_b_resp_o = axi_pkg::RESP_SLVERR; + slv_b_user_o = '0; + if (!slv_b_ready_i) begin + b_state_d = SEND_B; + end + end else begin + b_state_d = WAIT_CHANNEL_B; + end + end + end + end // FEEDTHROUGH_B + + WAIT_CHANNEL_B, SEND_B: begin + if (b_free || (b_state_q == SEND_B)) begin + // Block downstream + mst_b_ready_o = 1'b0; + // Write B response + slv_b_valid_o = 1'b1; + slv_b_id_o = id_q; + slv_b_resp_o = axi_pkg::RESP_SLVERR; + slv_b_user_o = '0; + if (slv_b_ready_i) begin + b_state_d = FEEDTHROUGH_B; + end else begin + b_state_d = SEND_B; + end + end + end // WAIT_CHANNEL_B, SEND_B + + WAIT_COMPLETE_B: begin + if (mst_b_valid_i && (mst_b_id_i == id_q)) begin + b_state_d = FEEDTHROUGH_B; + end + end // WAIT_COMPLETE_B + + default: b_state_d = FEEDTHROUGH_B; + + endcase + end // axi_b_channel + + // Keep track of outstanding downstream write bursts and responses. + always_comb begin + w_cnt_d = w_cnt_q; + if (mst_aw_valid_o && mst_aw_ready_i) begin + w_cnt_d += 1; + end + if (mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin + w_cnt_d -= 1; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : axi_write_channel_ff + if(~rst_ni) begin + aw_state_q <= FEEDTHROUGH_AW; + w_state_q <= FEEDTHROUGH_W; + b_state_q <= FEEDTHROUGH_B; + w_cnt_q <= '0; + w_cnt_req_q <= '0; + w_cnt_inj_q <= '0; + addr_q <= '0; + id_q <= '0; + size_q <= '0; + strb_q <= '0; + cache_q <= '0; + prot_q <= '0; + qos_q <= '0; + region_q <= '0; + aw_user_q <= '0; + w_user_q <= '0; + w_data_q <= '0; + result_q <= '0; + w_d_valid_q <= '0; + atop_q <= 6'b0; + end else begin + aw_state_q <= aw_state_d; + w_state_q <= w_state_d; + b_state_q <= b_state_d; + w_cnt_q <= w_cnt_d; + w_cnt_req_q <= w_cnt_req_d; + w_cnt_inj_q <= w_cnt_inj_d; + addr_q <= addr_d; + id_q <= id_d; + size_q <= size_d; + strb_q <= strb_d; + cache_q <= cache_d; + prot_q <= prot_d; + qos_q <= qos_d; + region_q <= region_d; + aw_user_q <= aw_user_d; + w_user_q <= w_user_d; + w_data_q <= w_data_d; + result_q <= result_d; + w_d_valid_q <= w_d_valid_d; + atop_q <= atop_d; + end + end + + /** + * Read Channel: AR, R + */ + + /*==================================================================== + = AR = + ====================================================================*/ + always_comb begin : axi_ar_channel + // Defaults AXI Bus + mst_ar_id_o = slv_ar_id_i; + mst_ar_addr_o = slv_ar_addr_i; + mst_ar_len_o = slv_ar_len_i; + mst_ar_size_o = slv_ar_size_i; + mst_ar_burst_o = slv_ar_burst_i; + mst_ar_lock_o = slv_ar_lock_i; + mst_ar_cache_o = slv_ar_cache_i; + mst_ar_prot_o = slv_ar_prot_i; + mst_ar_qos_o = slv_ar_qos_i; + mst_ar_region_o = slv_ar_region_i; + mst_ar_user_o = slv_ar_user_i; + mst_ar_valid_o = 1'b0; + slv_ar_ready_o = 1'b0; + // State Machine + ar_state_d = ar_state_q; + + unique case (ar_state_q) + + FEEDTHROUGH_AR: begin + // Feed through + mst_ar_valid_o = slv_ar_valid_i; + slv_ar_ready_o = mst_ar_ready_i; + + if (adapter_ready) begin + if (atop_valid_d == LOAD | atop_valid_d == STORE) begin + if (ar_free) begin + // Acquire channel + slv_ar_ready_o = 1'b0; + // Immediately start read request + mst_ar_valid_o = 1'b1; + mst_ar_addr_o = slv_aw_addr_i; + mst_ar_id_o = slv_aw_id_i; + mst_ar_len_o = 8'h00; + mst_ar_size_o = slv_aw_size_i; + mst_ar_burst_o = 2'b00; + mst_ar_lock_o = 1'h0; + mst_ar_cache_o = slv_aw_cache_i; + mst_ar_prot_o = slv_aw_prot_i; + mst_ar_qos_o = slv_aw_qos_i; + mst_ar_region_o = slv_aw_region_i; + mst_ar_user_o = slv_aw_user_i; + if (!mst_ar_ready_i) begin + // Hold read request but do not depend on AW + ar_state_d = SEND_AR; + end + end else begin + // Wait until AR is free + ar_state_d = WAIT_CHANNEL_AR; + end + end + end + end // FEEDTHROUGH_AR + + WAIT_CHANNEL_AR, SEND_AR: begin + // Issue read request + if (ar_free || (ar_state_q == SEND_AR)) begin + // Inject read request + mst_ar_valid_o = 1'b1; + mst_ar_addr_o = addr_q; + mst_ar_id_o = id_q; + mst_ar_len_o = 8'h00; + mst_ar_size_o = size_q; + mst_ar_burst_o = 2'b00; + mst_ar_lock_o = 1'h0; + mst_ar_cache_o = cache_q; + mst_ar_prot_o = prot_q; + mst_ar_qos_o = qos_q; + mst_ar_region_o = region_q; + mst_ar_user_o = aw_user_q; + if (mst_ar_ready_i) begin + // Request acknowledged + ar_state_d = FEEDTHROUGH_AR; + end else begin + // Hold read request + ar_state_d = SEND_AR; + end + end else begin + // Wait until AR is free + mst_ar_valid_o = slv_ar_valid_i; + slv_ar_ready_o = mst_ar_ready_i; + end + end // WAIT_CHANNEL_AR, SEND_AR + + default: ar_state_d = FEEDTHROUGH_AR; + + endcase + end // axi_ar_channel + + /*==================================================================== + = R = + ====================================================================*/ + always_comb begin : axi_r_channel + // Defaults AXI Bus + mst_r_ready_o = slv_r_ready_i; + slv_r_id_o = mst_r_id_i; + slv_r_data_o = mst_r_data_i; + slv_r_resp_o = mst_r_resp_i; + slv_r_last_o = mst_r_last_i; + slv_r_user_o = mst_r_user_i; + slv_r_valid_o = mst_r_valid_i; + // Defaults FF + r_data_d = r_data_q; + r_resp_d = r_resp_q; + r_user_d = r_user_q; + r_d_valid_d = r_d_valid_q; + // State Machine + r_state_d = r_state_q; + + unique case (r_state_q) + + FEEDTHROUGH_R: begin + if (adapter_ready) begin + // Reset read flag + r_d_valid_d = 1'b0; + + if (atop_valid_d == LOAD || atop_valid_d == STORE) begin + // Wait for R response to read data + r_state_d = WAIT_DATA_R; + end else if (atop_valid_d == INVALID) begin + // Send R response once channel is free + if (r_free) begin + // Acquire the R channel + // Block downstream + mst_r_ready_o = 1'b0; + // Send R error response + slv_r_valid_o = 1'b1; + slv_r_data_o = '0; + slv_r_id_o = slv_aw_id_i; + slv_r_last_o = 1'b1; + slv_r_resp_o = axi_pkg::RESP_SLVERR; + slv_r_user_o = '0; + if (!slv_r_ready_i) begin + // Hold R response + r_state_d = SEND_R; + end + end else begin + r_state_d = WAIT_CHANNEL_R; + end + end + end + end // FEEDTHROUGH_R + + WAIT_DATA_R: begin + // Read data + if (mst_r_valid_i && (mst_r_id_i == id_q)) begin + // Acknowledge downstream and block upstream + mst_r_ready_o = 1'b1; + slv_r_valid_o = 1'b0; + // Store data + r_data_d = mst_r_data_i; + r_resp_d = mst_r_resp_i; + r_user_d = mst_r_user_i; + r_d_valid_d = 1'b1; + if (atop_valid_q == STORE) begin + r_state_d = FEEDTHROUGH_R; + end else begin + // Wait for B resp before injecting R + r_state_d = WAIT_CHANNEL_R; + end + end + end // WAIT_DATA_R + + WAIT_CHANNEL_R, SEND_R: begin + // Wait for the R channel to become free and B response to be valid + // TODO: Use b_state_d to be one cycle quicker + if ((r_free && (b_state_q != WAIT_COMPLETE_B)) || (r_state_q == SEND_R)) begin + // Block downstream + mst_r_ready_o = 1'b0; + // Send R response + slv_r_valid_o = 1'b1; + slv_r_data_o = r_data_q; + slv_r_id_o = id_q; + slv_r_last_o = 1'b1; + slv_r_resp_o = r_resp_q; + slv_r_user_o = r_user_q; + if (atop_valid_q == INVALID) begin + slv_r_data_o = '0; + slv_r_resp_o = axi_pkg::RESP_SLVERR; + slv_r_user_o = '0; + end + if (slv_r_ready_i) begin + r_state_d = FEEDTHROUGH_R; + end else begin + r_state_d = SEND_R; + end + end + end // WAIT_CHANNEL_R, SEND_R + + default: r_state_d = FEEDTHROUGH_R; + + endcase + end // axi_r_channel + + always_ff @(posedge clk_i or negedge rst_ni) begin : axi_read_channel_ff + if(~rst_ni) begin + ar_state_q <= FEEDTHROUGH_AR; + r_state_q <= FEEDTHROUGH_R; + r_data_q <= '0; + r_resp_q <= '0; + r_user_q <= '0; + r_d_valid_q <= 1'b0; + end else begin + ar_state_q <= ar_state_d; + r_state_q <= r_state_d; + r_data_q <= r_data_d; + r_resp_q <= r_resp_d; + r_user_q <= r_user_d; + r_d_valid_q <= r_d_valid_d; + end + end + + /** + * ALU + */ + + assign op_a = r_data_q & strb_ext; + assign op_b = w_data_q & strb_ext; + assign sign_a = |(op_a & ~(strb_ext >> 1)); + assign sign_b = |(op_b & ~(strb_ext >> 1)); + assign alu_result_ext = res; + + generate + if (AXI_ALU_RATIO == 1 && RISCV_WORD_WIDTH == 32) begin + assign alu_operand_a = op_a; + assign alu_operand_b = op_b; + assign res = alu_result; + end else if (AXI_ALU_RATIO == 1 && RISCV_WORD_WIDTH == 64) begin + assign res = alu_result; + always_comb begin + op_a_sign_ext = op_a | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_a}} & ~strb_ext); + op_b_sign_ext = op_b | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_b}} & ~strb_ext); + + if (atop_q[2:0] == axi_pkg::ATOP_SMAX || atop_q[2:0] == axi_pkg::ATOP_SMIN) begin + // Sign extend + alu_operand_a = op_a_sign_ext; + alu_operand_b = op_b_sign_ext; + end else begin + // No sign extension necessary + alu_operand_a = op_a; + alu_operand_b = op_b; + end + end + end else begin + always_comb begin + op_a_sign_ext = op_a | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_a}} & ~strb_ext); + op_b_sign_ext = op_b | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_b}} & ~strb_ext); + + if (atop_q[2:0] == axi_pkg::ATOP_SMAX || atop_q[2:0] == axi_pkg::ATOP_SMIN) begin + // Sign extend + alu_operand_a = op_a_sign_ext[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]]; + alu_operand_b = op_b_sign_ext[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]]; + end else begin + // No sign extension necessary + alu_operand_a = op_a[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]]; + alu_operand_b = op_b[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]]; + end + res = '0; + res[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]] = alu_result; + end + end + endgenerate + + generate + for (genvar i = 0; i < AXI_STRB_WIDTH; i++) begin + always_comb begin + if (strb_q[i]) begin + strb_ext[i] = 8'hFF; + end else begin + strb_ext[i] = 8'h00; + end + end + end + endgenerate + + axi_riscv_amos_alu #( + .DATA_WIDTH ( RISCV_WORD_WIDTH ) + ) i_amo_alu ( + .amo_op_i ( atop_q ), + .amo_operand_a_i ( alu_operand_a ), + .amo_operand_b_i ( alu_operand_b ), + .amo_result_o ( alu_result ) + ); + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (AXI_ADDR_WIDTH > 0) + else $fatal(1, "AXI_ADDR_WIDTH must be greater than 0!"); + assert (AXI_DATA_WIDTH > 0) + else $fatal(1, "AXI_DATA_WIDTH must be greater than 0!"); + assert (AXI_ID_WIDTH > 0) + else $fatal(1, "AXI_ID_WIDTH must be greater than 0!"); + assert (AXI_MAX_WRITE_TXNS > 0) + else $fatal(1, "AXI_MAX_WRITE_TXNS must be greater than 0!"); + assert (RISCV_WORD_WIDTH == 32 || RISCV_WORD_WIDTH == 64) + else $fatal(1, "RISCV_WORD_WIDTH must be 32 or 64!"); + assert (RISCV_WORD_WIDTH <= AXI_DATA_WIDTH) + else $fatal(1, "RISCV_WORD_WIDTH must not be greater than AXI_DATA_WIDTH!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv new file mode 100644 index 0000000..40a52b0 --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv @@ -0,0 +1,78 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// AXI RISC-V Atomic Operations (AMOs) ALU +module axi_riscv_amos_alu # ( + parameter int unsigned DATA_WIDTH = 0 +) ( + input logic [5:0] amo_op_i, + input logic [DATA_WIDTH-1:0] amo_operand_a_i, + input logic [DATA_WIDTH-1:0] amo_operand_b_i, + output logic [DATA_WIDTH-1:0] amo_result_o +); + + logic [DATA_WIDTH:0] adder_sum; + logic [DATA_WIDTH:0] adder_operand_a, adder_operand_b; + + assign adder_sum = adder_operand_a + adder_operand_b; + + always_comb begin + + adder_operand_a = $signed(amo_operand_a_i); + adder_operand_b = $signed(amo_operand_b_i); + + amo_result_o = amo_operand_a_i; + + if (amo_op_i == axi_pkg::ATOP_ATOMICSWAP) begin + // Swap operation + amo_result_o = amo_operand_b_i; + end else if ((amo_op_i[5:4] == axi_pkg::ATOP_ATOMICLOAD) | (amo_op_i[5:4] == axi_pkg::ATOP_ATOMICSTORE)) begin + // Load operation + unique case (amo_op_i[2:0]) + // the default is to output operand_a + axi_pkg::ATOP_ADD: amo_result_o = adder_sum[DATA_WIDTH-1:0]; + axi_pkg::ATOP_CLR: amo_result_o = amo_operand_a_i & (~amo_operand_b_i); + axi_pkg::ATOP_SET: amo_result_o = amo_operand_a_i | amo_operand_b_i; + axi_pkg::ATOP_EOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i; + axi_pkg::ATOP_SMAX: begin + adder_operand_b = -$signed(amo_operand_b_i); + amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_b_i : amo_operand_a_i; + end + axi_pkg::ATOP_SMIN: begin + adder_operand_b = -$signed(amo_operand_b_i); + amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_a_i : amo_operand_b_i; + end + axi_pkg::ATOP_UMAX: begin + adder_operand_a = $unsigned(amo_operand_a_i); + adder_operand_b = -$unsigned(amo_operand_b_i); + amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_b_i : amo_operand_a_i; + end + axi_pkg::ATOP_UMIN: begin + adder_operand_a = $unsigned(amo_operand_a_i); + adder_operand_b = -$unsigned(amo_operand_b_i); + amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_a_i : amo_operand_b_i; + end + default: amo_result_o = '0; + endcase + end + end + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (DATA_WIDTH > 0) + else $fatal(1, "DATA_WIDTH must be greater than 0!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv new file mode 100644 index 0000000..f9b72d3 --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv @@ -0,0 +1,400 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// AXI RISC-V Atomics ("A" Extension) Adapter +// +// This AXI adapter implements the RISC-V "A" extension and adheres to the RVWMO memory consistency +// model. +// +// Maintainer: Andreas Kurth + +module axi_riscv_atomics #( + /// AXI Parameters + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + // Maximum number of AXI write bursts outstanding at the same time + parameter int unsigned AXI_MAX_WRITE_TXNS = 0, + // Word width of the widest RISC-V processor that can issue requests to this module. + // 32 for RV32; 64 for RV64, where both 32-bit (.W suffix) and 64-bit (.D suffix) AMOs are + // supported if `aw_strb` is set correctly. + parameter int unsigned RISCV_WORD_WIDTH = 0, + /// Derived Parameters (do NOT change manually!) + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8 +) ( + input logic clk_i, + input logic rst_ni, + + /// Slave Interface + input logic [AXI_ADDR_WIDTH-1:0] slv_aw_addr_i, + input logic [2:0] slv_aw_prot_i, + input logic [3:0] slv_aw_region_i, + input logic [5:0] slv_aw_atop_i, + input logic [7:0] slv_aw_len_i, + input logic [2:0] slv_aw_size_i, + input logic [1:0] slv_aw_burst_i, + input logic slv_aw_lock_i, + input logic [3:0] slv_aw_cache_i, + input logic [3:0] slv_aw_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_aw_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_aw_user_i, + output logic slv_aw_ready_o, + input logic slv_aw_valid_i, + + input logic [AXI_ADDR_WIDTH-1:0] slv_ar_addr_i, + input logic [2:0] slv_ar_prot_i, + input logic [3:0] slv_ar_region_i, + input logic [7:0] slv_ar_len_i, + input logic [2:0] slv_ar_size_i, + input logic [1:0] slv_ar_burst_i, + input logic slv_ar_lock_i, + input logic [3:0] slv_ar_cache_i, + input logic [3:0] slv_ar_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_ar_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_ar_user_i, + output logic slv_ar_ready_o, + input logic slv_ar_valid_i, + + input logic [AXI_DATA_WIDTH-1:0] slv_w_data_i, + input logic [AXI_STRB_WIDTH-1:0] slv_w_strb_i, + input logic [AXI_USER_WIDTH-1:0] slv_w_user_i, + input logic slv_w_last_i, + output logic slv_w_ready_o, + input logic slv_w_valid_i, + + output logic [AXI_DATA_WIDTH-1:0] slv_r_data_o, + output logic [1:0] slv_r_resp_o, + output logic slv_r_last_o, + output logic [AXI_ID_WIDTH-1:0] slv_r_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_r_user_o, + input logic slv_r_ready_i, + output logic slv_r_valid_o, + + output logic [1:0] slv_b_resp_o, + output logic [AXI_ID_WIDTH-1:0] slv_b_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_b_user_o, + input logic slv_b_ready_i, + output logic slv_b_valid_o, + + /// Master Interface + output logic [AXI_ADDR_WIDTH-1:0] mst_aw_addr_o, + output logic [2:0] mst_aw_prot_o, + output logic [3:0] mst_aw_region_o, + output logic [5:0] mst_aw_atop_o, + output logic [7:0] mst_aw_len_o, + output logic [2:0] mst_aw_size_o, + output logic [1:0] mst_aw_burst_o, + output logic mst_aw_lock_o, + output logic [3:0] mst_aw_cache_o, + output logic [3:0] mst_aw_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_aw_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_aw_user_o, + input logic mst_aw_ready_i, + output logic mst_aw_valid_o, + + output logic [AXI_ADDR_WIDTH-1:0] mst_ar_addr_o, + output logic [2:0] mst_ar_prot_o, + output logic [3:0] mst_ar_region_o, + output logic [7:0] mst_ar_len_o, + output logic [2:0] mst_ar_size_o, + output logic [1:0] mst_ar_burst_o, + output logic mst_ar_lock_o, + output logic [3:0] mst_ar_cache_o, + output logic [3:0] mst_ar_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_ar_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_ar_user_o, + input logic mst_ar_ready_i, + output logic mst_ar_valid_o, + + output logic [AXI_DATA_WIDTH-1:0] mst_w_data_o, + output logic [AXI_STRB_WIDTH-1:0] mst_w_strb_o, + output logic [AXI_USER_WIDTH-1:0] mst_w_user_o, + output logic mst_w_last_o, + input logic mst_w_ready_i, + output logic mst_w_valid_o, + + input logic [AXI_DATA_WIDTH-1:0] mst_r_data_i, + input logic [1:0] mst_r_resp_i, + input logic mst_r_last_i, + input logic [AXI_ID_WIDTH-1:0] mst_r_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_r_user_i, + output logic mst_r_ready_o, + input logic mst_r_valid_i, + + input logic [1:0] mst_b_resp_i, + input logic [AXI_ID_WIDTH-1:0] mst_b_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_b_user_i, + output logic mst_b_ready_o, + input logic mst_b_valid_i +); + + // Make the entire address range exclusively accessible. Since the AMO adapter does not support + // address ranges, it would not make sense to expose the address range as a parameter of this + // module. + localparam longint unsigned ADDR_BEGIN = '0; + localparam longint unsigned ADDR_END = {AXI_ADDR_WIDTH{1'b1}}; + + logic [AXI_ADDR_WIDTH-1:0] int_axi_aw_addr; + logic [2:0] int_axi_aw_prot; + logic [3:0] int_axi_aw_region; + logic [5:0] int_axi_aw_atop; + logic [7:0] int_axi_aw_len; + logic [2:0] int_axi_aw_size; + logic [1:0] int_axi_aw_burst; + logic int_axi_aw_lock; + logic [3:0] int_axi_aw_cache; + logic [3:0] int_axi_aw_qos; + logic [AXI_ID_WIDTH-1:0] int_axi_aw_id; + logic [AXI_USER_WIDTH-1:0] int_axi_aw_user; + logic int_axi_aw_ready; + logic int_axi_aw_valid; + + logic [AXI_ADDR_WIDTH-1:0] int_axi_ar_addr; + logic [2:0] int_axi_ar_prot; + logic [3:0] int_axi_ar_region; + logic [7:0] int_axi_ar_len; + logic [2:0] int_axi_ar_size; + logic [1:0] int_axi_ar_burst; + logic int_axi_ar_lock; + logic [3:0] int_axi_ar_cache; + logic [3:0] int_axi_ar_qos; + logic [AXI_ID_WIDTH-1:0] int_axi_ar_id; + logic [AXI_USER_WIDTH-1:0] int_axi_ar_user; + logic int_axi_ar_ready; + logic int_axi_ar_valid; + + logic [AXI_DATA_WIDTH-1:0] int_axi_w_data; + logic [AXI_STRB_WIDTH-1:0] int_axi_w_strb; + logic [AXI_USER_WIDTH-1:0] int_axi_w_user; + logic int_axi_w_last; + logic int_axi_w_ready; + logic int_axi_w_valid; + + logic [AXI_DATA_WIDTH-1:0] int_axi_r_data; + logic [1:0] int_axi_r_resp; + logic int_axi_r_last; + logic [AXI_ID_WIDTH-1:0] int_axi_r_id; + logic [AXI_USER_WIDTH-1:0] int_axi_r_user; + logic int_axi_r_ready; + logic int_axi_r_valid; + + logic [1:0] int_axi_b_resp; + logic [AXI_ID_WIDTH-1:0] int_axi_b_id; + logic [AXI_USER_WIDTH-1:0] int_axi_b_user; + logic int_axi_b_ready; + logic int_axi_b_valid; + + axi_riscv_amos #( + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ID_WIDTH (AXI_ID_WIDTH), + .AXI_USER_WIDTH (AXI_USER_WIDTH), + .AXI_MAX_WRITE_TXNS (AXI_MAX_WRITE_TXNS), + .RISCV_WORD_WIDTH (RISCV_WORD_WIDTH) + ) i_amos ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_aw_addr_i ( slv_aw_addr_i ), + .slv_aw_prot_i ( slv_aw_prot_i ), + .slv_aw_region_i ( slv_aw_region_i ), + .slv_aw_atop_i ( slv_aw_atop_i ), + .slv_aw_len_i ( slv_aw_len_i ), + .slv_aw_size_i ( slv_aw_size_i ), + .slv_aw_burst_i ( slv_aw_burst_i ), + .slv_aw_lock_i ( slv_aw_lock_i ), + .slv_aw_cache_i ( slv_aw_cache_i ), + .slv_aw_qos_i ( slv_aw_qos_i ), + .slv_aw_id_i ( slv_aw_id_i ), + .slv_aw_user_i ( slv_aw_user_i ), + .slv_aw_ready_o ( slv_aw_ready_o ), + .slv_aw_valid_i ( slv_aw_valid_i ), + .slv_ar_addr_i ( slv_ar_addr_i ), + .slv_ar_prot_i ( slv_ar_prot_i ), + .slv_ar_region_i ( slv_ar_region_i ), + .slv_ar_len_i ( slv_ar_len_i ), + .slv_ar_size_i ( slv_ar_size_i ), + .slv_ar_burst_i ( slv_ar_burst_i ), + .slv_ar_lock_i ( slv_ar_lock_i ), + .slv_ar_cache_i ( slv_ar_cache_i ), + .slv_ar_qos_i ( slv_ar_qos_i ), + .slv_ar_id_i ( slv_ar_id_i ), + .slv_ar_user_i ( slv_ar_user_i ), + .slv_ar_ready_o ( slv_ar_ready_o ), + .slv_ar_valid_i ( slv_ar_valid_i ), + .slv_w_data_i ( slv_w_data_i ), + .slv_w_strb_i ( slv_w_strb_i ), + .slv_w_user_i ( slv_w_user_i ), + .slv_w_last_i ( slv_w_last_i ), + .slv_w_ready_o ( slv_w_ready_o ), + .slv_w_valid_i ( slv_w_valid_i ), + .slv_r_data_o ( slv_r_data_o ), + .slv_r_resp_o ( slv_r_resp_o ), + .slv_r_last_o ( slv_r_last_o ), + .slv_r_id_o ( slv_r_id_o ), + .slv_r_user_o ( slv_r_user_o ), + .slv_r_ready_i ( slv_r_ready_i ), + .slv_r_valid_o ( slv_r_valid_o ), + .slv_b_resp_o ( slv_b_resp_o ), + .slv_b_id_o ( slv_b_id_o ), + .slv_b_user_o ( slv_b_user_o ), + .slv_b_ready_i ( slv_b_ready_i ), + .slv_b_valid_o ( slv_b_valid_o ), + .mst_aw_addr_o ( int_axi_aw_addr ), + .mst_aw_prot_o ( int_axi_aw_prot ), + .mst_aw_region_o ( int_axi_aw_region ), + .mst_aw_atop_o ( int_axi_aw_atop ), + .mst_aw_len_o ( int_axi_aw_len ), + .mst_aw_size_o ( int_axi_aw_size ), + .mst_aw_burst_o ( int_axi_aw_burst ), + .mst_aw_lock_o ( int_axi_aw_lock ), + .mst_aw_cache_o ( int_axi_aw_cache ), + .mst_aw_qos_o ( int_axi_aw_qos ), + .mst_aw_id_o ( int_axi_aw_id ), + .mst_aw_user_o ( int_axi_aw_user ), + .mst_aw_ready_i ( int_axi_aw_ready ), + .mst_aw_valid_o ( int_axi_aw_valid ), + .mst_ar_addr_o ( int_axi_ar_addr ), + .mst_ar_prot_o ( int_axi_ar_prot ), + .mst_ar_region_o ( int_axi_ar_region ), + .mst_ar_len_o ( int_axi_ar_len ), + .mst_ar_size_o ( int_axi_ar_size ), + .mst_ar_burst_o ( int_axi_ar_burst ), + .mst_ar_lock_o ( int_axi_ar_lock ), + .mst_ar_cache_o ( int_axi_ar_cache ), + .mst_ar_qos_o ( int_axi_ar_qos ), + .mst_ar_id_o ( int_axi_ar_id ), + .mst_ar_user_o ( int_axi_ar_user ), + .mst_ar_ready_i ( int_axi_ar_ready ), + .mst_ar_valid_o ( int_axi_ar_valid ), + .mst_w_data_o ( int_axi_w_data ), + .mst_w_strb_o ( int_axi_w_strb ), + .mst_w_user_o ( int_axi_w_user ), + .mst_w_last_o ( int_axi_w_last ), + .mst_w_ready_i ( int_axi_w_ready ), + .mst_w_valid_o ( int_axi_w_valid ), + .mst_r_data_i ( int_axi_r_data ), + .mst_r_resp_i ( int_axi_r_resp ), + .mst_r_last_i ( int_axi_r_last ), + .mst_r_id_i ( int_axi_r_id ), + .mst_r_user_i ( int_axi_r_user ), + .mst_r_ready_o ( int_axi_r_ready ), + .mst_r_valid_i ( int_axi_r_valid ), + .mst_b_resp_i ( int_axi_b_resp ), + .mst_b_id_i ( int_axi_b_id ), + .mst_b_user_i ( int_axi_b_user ), + .mst_b_ready_o ( int_axi_b_ready ), + .mst_b_valid_i ( int_axi_b_valid ) + ); + + axi_riscv_lrsc #( + .ADDR_BEGIN (ADDR_BEGIN), + .ADDR_END (ADDR_END), + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ID_WIDTH (AXI_ID_WIDTH), + .AXI_USER_WIDTH (AXI_USER_WIDTH) + ) i_lrsc ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_aw_addr_i ( int_axi_aw_addr ), + .slv_aw_prot_i ( int_axi_aw_prot ), + .slv_aw_region_i ( int_axi_aw_region ), + .slv_aw_atop_i ( int_axi_aw_atop ), + .slv_aw_len_i ( int_axi_aw_len ), + .slv_aw_size_i ( int_axi_aw_size ), + .slv_aw_burst_i ( int_axi_aw_burst ), + .slv_aw_lock_i ( int_axi_aw_lock ), + .slv_aw_cache_i ( int_axi_aw_cache ), + .slv_aw_qos_i ( int_axi_aw_qos ), + .slv_aw_id_i ( int_axi_aw_id ), + .slv_aw_user_i ( int_axi_aw_user ), + .slv_aw_ready_o ( int_axi_aw_ready ), + .slv_aw_valid_i ( int_axi_aw_valid ), + .slv_ar_addr_i ( int_axi_ar_addr ), + .slv_ar_prot_i ( int_axi_ar_prot ), + .slv_ar_region_i ( int_axi_ar_region ), + .slv_ar_len_i ( int_axi_ar_len ), + .slv_ar_size_i ( int_axi_ar_size ), + .slv_ar_burst_i ( int_axi_ar_burst ), + .slv_ar_lock_i ( int_axi_ar_lock ), + .slv_ar_cache_i ( int_axi_ar_cache ), + .slv_ar_qos_i ( int_axi_ar_qos ), + .slv_ar_id_i ( int_axi_ar_id ), + .slv_ar_user_i ( int_axi_ar_user ), + .slv_ar_ready_o ( int_axi_ar_ready ), + .slv_ar_valid_i ( int_axi_ar_valid ), + .slv_w_data_i ( int_axi_w_data ), + .slv_w_strb_i ( int_axi_w_strb ), + .slv_w_user_i ( int_axi_w_user ), + .slv_w_last_i ( int_axi_w_last ), + .slv_w_ready_o ( int_axi_w_ready ), + .slv_w_valid_i ( int_axi_w_valid ), + .slv_r_data_o ( int_axi_r_data ), + .slv_r_resp_o ( int_axi_r_resp ), + .slv_r_last_o ( int_axi_r_last ), + .slv_r_id_o ( int_axi_r_id ), + .slv_r_user_o ( int_axi_r_user ), + .slv_r_ready_i ( int_axi_r_ready ), + .slv_r_valid_o ( int_axi_r_valid ), + .slv_b_resp_o ( int_axi_b_resp ), + .slv_b_id_o ( int_axi_b_id ), + .slv_b_user_o ( int_axi_b_user ), + .slv_b_ready_i ( int_axi_b_ready ), + .slv_b_valid_o ( int_axi_b_valid ), + .mst_aw_addr_o ( mst_aw_addr_o ), + .mst_aw_prot_o ( mst_aw_prot_o ), + .mst_aw_region_o ( mst_aw_region_o ), + .mst_aw_atop_o ( mst_aw_atop_o ), + .mst_aw_len_o ( mst_aw_len_o ), + .mst_aw_size_o ( mst_aw_size_o ), + .mst_aw_burst_o ( mst_aw_burst_o ), + .mst_aw_lock_o ( mst_aw_lock_o ), + .mst_aw_cache_o ( mst_aw_cache_o ), + .mst_aw_qos_o ( mst_aw_qos_o ), + .mst_aw_id_o ( mst_aw_id_o ), + .mst_aw_user_o ( mst_aw_user_o ), + .mst_aw_ready_i ( mst_aw_ready_i ), + .mst_aw_valid_o ( mst_aw_valid_o ), + .mst_ar_addr_o ( mst_ar_addr_o ), + .mst_ar_prot_o ( mst_ar_prot_o ), + .mst_ar_region_o ( mst_ar_region_o ), + .mst_ar_len_o ( mst_ar_len_o ), + .mst_ar_size_o ( mst_ar_size_o ), + .mst_ar_burst_o ( mst_ar_burst_o ), + .mst_ar_lock_o ( mst_ar_lock_o ), + .mst_ar_cache_o ( mst_ar_cache_o ), + .mst_ar_qos_o ( mst_ar_qos_o ), + .mst_ar_id_o ( mst_ar_id_o ), + .mst_ar_user_o ( mst_ar_user_o ), + .mst_ar_ready_i ( mst_ar_ready_i ), + .mst_ar_valid_o ( mst_ar_valid_o ), + .mst_w_data_o ( mst_w_data_o ), + .mst_w_strb_o ( mst_w_strb_o ), + .mst_w_user_o ( mst_w_user_o ), + .mst_w_last_o ( mst_w_last_o ), + .mst_w_ready_i ( mst_w_ready_i ), + .mst_w_valid_o ( mst_w_valid_o ), + .mst_r_data_i ( mst_r_data_i ), + .mst_r_resp_i ( mst_r_resp_i ), + .mst_r_last_i ( mst_r_last_i ), + .mst_r_id_i ( mst_r_id_i ), + .mst_r_user_i ( mst_r_user_i ), + .mst_r_ready_o ( mst_r_ready_o ), + .mst_r_valid_i ( mst_r_valid_i ), + .mst_b_resp_i ( mst_b_resp_i ), + .mst_b_id_i ( mst_b_id_i ), + .mst_b_user_i ( mst_b_user_i ), + .mst_b_ready_o ( mst_b_ready_o ), + .mst_b_valid_i ( mst_b_valid_i ) + ); + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv new file mode 100644 index 0000000..ad3505c --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv @@ -0,0 +1,151 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Wrapper for the AXI RISC-V Atomics Adapter that exposes AXI SystemVerilog interfaces. +// +// See the header of `axi_riscv_atomics` for a description. +// +// Maintainer: Andreas Kurth + +module axi_riscv_atomics_wrap #( + /// AXI Parameters + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + /// Maximum number of AXI bursts outstanding at the same time + parameter int unsigned AXI_MAX_WRITE_TXNS = 0, + // Word width of the widest RISC-V processor that can issue requests to this module. + // 32 for RV32; 64 for RV64, where both 32-bit (.W suffix) and 64-bit (.D suffix) AMOs are + // supported if `aw_strb` is set correctly. + parameter int unsigned RISCV_WORD_WIDTH = 0, + /// Derived Parameters (do NOT change manually!) + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8 +) ( + input logic clk_i, + input logic rst_ni, + AXI_BUS.Master mst, + AXI_BUS.Slave slv +); + + axi_riscv_atomics #( + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ID_WIDTH (AXI_ID_WIDTH), + .AXI_USER_WIDTH (AXI_USER_WIDTH), + .AXI_MAX_WRITE_TXNS (AXI_MAX_WRITE_TXNS), + .RISCV_WORD_WIDTH (RISCV_WORD_WIDTH) + ) i_atomics ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_aw_addr_i ( slv.aw_addr ), + .slv_aw_prot_i ( slv.aw_prot ), + .slv_aw_region_i ( slv.aw_region ), + .slv_aw_atop_i ( slv.aw_atop ), + .slv_aw_len_i ( slv.aw_len ), + .slv_aw_size_i ( slv.aw_size ), + .slv_aw_burst_i ( slv.aw_burst ), + .slv_aw_lock_i ( slv.aw_lock ), + .slv_aw_cache_i ( slv.aw_cache ), + .slv_aw_qos_i ( slv.aw_qos ), + .slv_aw_id_i ( slv.aw_id ), + .slv_aw_user_i ( slv.aw_user ), + .slv_aw_ready_o ( slv.aw_ready ), + .slv_aw_valid_i ( slv.aw_valid ), + .slv_ar_addr_i ( slv.ar_addr ), + .slv_ar_prot_i ( slv.ar_prot ), + .slv_ar_region_i ( slv.ar_region ), + .slv_ar_len_i ( slv.ar_len ), + .slv_ar_size_i ( slv.ar_size ), + .slv_ar_burst_i ( slv.ar_burst ), + .slv_ar_lock_i ( slv.ar_lock ), + .slv_ar_cache_i ( slv.ar_cache ), + .slv_ar_qos_i ( slv.ar_qos ), + .slv_ar_id_i ( slv.ar_id ), + .slv_ar_user_i ( slv.ar_user ), + .slv_ar_ready_o ( slv.ar_ready ), + .slv_ar_valid_i ( slv.ar_valid ), + .slv_w_data_i ( slv.w_data ), + .slv_w_strb_i ( slv.w_strb ), + .slv_w_user_i ( slv.w_user ), + .slv_w_last_i ( slv.w_last ), + .slv_w_ready_o ( slv.w_ready ), + .slv_w_valid_i ( slv.w_valid ), + .slv_r_data_o ( slv.r_data ), + .slv_r_resp_o ( slv.r_resp ), + .slv_r_last_o ( slv.r_last ), + .slv_r_id_o ( slv.r_id ), + .slv_r_user_o ( slv.r_user ), + .slv_r_ready_i ( slv.r_ready ), + .slv_r_valid_o ( slv.r_valid ), + .slv_b_resp_o ( slv.b_resp ), + .slv_b_id_o ( slv.b_id ), + .slv_b_user_o ( slv.b_user ), + .slv_b_ready_i ( slv.b_ready ), + .slv_b_valid_o ( slv.b_valid ), + .mst_aw_addr_o ( mst.aw_addr ), + .mst_aw_prot_o ( mst.aw_prot ), + .mst_aw_region_o ( mst.aw_region ), + .mst_aw_atop_o ( mst.aw_atop ), + .mst_aw_len_o ( mst.aw_len ), + .mst_aw_size_o ( mst.aw_size ), + .mst_aw_burst_o ( mst.aw_burst ), + .mst_aw_lock_o ( mst.aw_lock ), + .mst_aw_cache_o ( mst.aw_cache ), + .mst_aw_qos_o ( mst.aw_qos ), + .mst_aw_id_o ( mst.aw_id ), + .mst_aw_user_o ( mst.aw_user ), + .mst_aw_ready_i ( mst.aw_ready ), + .mst_aw_valid_o ( mst.aw_valid ), + .mst_ar_addr_o ( mst.ar_addr ), + .mst_ar_prot_o ( mst.ar_prot ), + .mst_ar_region_o ( mst.ar_region ), + .mst_ar_len_o ( mst.ar_len ), + .mst_ar_size_o ( mst.ar_size ), + .mst_ar_burst_o ( mst.ar_burst ), + .mst_ar_lock_o ( mst.ar_lock ), + .mst_ar_cache_o ( mst.ar_cache ), + .mst_ar_qos_o ( mst.ar_qos ), + .mst_ar_id_o ( mst.ar_id ), + .mst_ar_user_o ( mst.ar_user ), + .mst_ar_ready_i ( mst.ar_ready ), + .mst_ar_valid_o ( mst.ar_valid ), + .mst_w_data_o ( mst.w_data ), + .mst_w_strb_o ( mst.w_strb ), + .mst_w_user_o ( mst.w_user ), + .mst_w_last_o ( mst.w_last ), + .mst_w_ready_i ( mst.w_ready ), + .mst_w_valid_o ( mst.w_valid ), + .mst_r_data_i ( mst.r_data ), + .mst_r_resp_i ( mst.r_resp ), + .mst_r_last_i ( mst.r_last ), + .mst_r_id_i ( mst.r_id ), + .mst_r_user_i ( mst.r_user ), + .mst_r_ready_o ( mst.r_ready ), + .mst_r_valid_i ( mst.r_valid ), + .mst_b_resp_i ( mst.b_resp ), + .mst_b_id_i ( mst.b_id ), + .mst_b_user_i ( mst.b_user ), + .mst_b_ready_o ( mst.b_ready ), + .mst_b_valid_i ( mst.b_valid ) + ); + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (AXI_STRB_WIDTH == AXI_DATA_WIDTH/8) + else $fatal(1, "AXI_STRB_WIDTH must equal AXI_DATA_WIDTH/8!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv new file mode 100644 index 0000000..82c132c --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv @@ -0,0 +1,509 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// AXI RISC-V LR/SC Adapter +// +// This adapter adds support for AXI4 exclusive accesses to a slave that natively does not support +// exclusive accesses. It is to be placed between that slave and the upstream master port, so that +// the `mst` port of this module drives the slave and the `slv` port of this module is driven by +// the upstream master. +// +// Exclusive accesses are only enabled for a range of addresses specified through parameters. All +// addresses within that range are guaranteed to fulfill the constraints described in A7.2 of the +// AXI4 standard, both for normal and exclusive memory accesses. Addresses outside that range +// behave like a slave that does not support exclusive memory accesses (see AXI4, A7.2.5). +// +// Limitations: +// - The adapter allows at most one read and one write access to be outstanding at any given +// time. +// - The adapter does not support bursts in exclusive accessing. Only single words can be +// reserved. +// +// Maintainer: Andreas Kurth + +module axi_riscv_lrsc #( + /// Exclusively-accessible address range (closed interval from ADDR_BEGIN to ADDR_END) + parameter longint unsigned ADDR_BEGIN = 0, + parameter longint unsigned ADDR_END = 0, + /// AXI Parameters + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + /// Derived Parameters (do NOT change manually!) + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8 +) ( + input logic clk_i, + input logic rst_ni, + + /// Slave Interface + input logic [AXI_ADDR_WIDTH-1:0] slv_aw_addr_i, + input logic [2:0] slv_aw_prot_i, + input logic [3:0] slv_aw_region_i, + input logic [5:0] slv_aw_atop_i, + input logic [7:0] slv_aw_len_i, + input logic [2:0] slv_aw_size_i, + input logic [1:0] slv_aw_burst_i, + input logic slv_aw_lock_i, + input logic [3:0] slv_aw_cache_i, + input logic [3:0] slv_aw_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_aw_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_aw_user_i, + output logic slv_aw_ready_o, + input logic slv_aw_valid_i, + + input logic [AXI_ADDR_WIDTH-1:0] slv_ar_addr_i, + input logic [2:0] slv_ar_prot_i, + input logic [3:0] slv_ar_region_i, + input logic [7:0] slv_ar_len_i, + input logic [2:0] slv_ar_size_i, + input logic [1:0] slv_ar_burst_i, + input logic slv_ar_lock_i, + input logic [3:0] slv_ar_cache_i, + input logic [3:0] slv_ar_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_ar_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_ar_user_i, + output logic slv_ar_ready_o, + input logic slv_ar_valid_i, + + input logic [AXI_DATA_WIDTH-1:0] slv_w_data_i, + input logic [AXI_STRB_WIDTH-1:0] slv_w_strb_i, + input logic [AXI_USER_WIDTH-1:0] slv_w_user_i, + input logic slv_w_last_i, + output logic slv_w_ready_o, + input logic slv_w_valid_i, + + output logic [AXI_DATA_WIDTH-1:0] slv_r_data_o, + output logic [1:0] slv_r_resp_o, + output logic slv_r_last_o, + output logic [AXI_ID_WIDTH-1:0] slv_r_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_r_user_o, + input logic slv_r_ready_i, + output logic slv_r_valid_o, + + output logic [1:0] slv_b_resp_o, + output logic [AXI_ID_WIDTH-1:0] slv_b_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_b_user_o, + input logic slv_b_ready_i, + output logic slv_b_valid_o, + + /// Master Interface + output logic [AXI_ADDR_WIDTH-1:0] mst_aw_addr_o, + output logic [2:0] mst_aw_prot_o, + output logic [3:0] mst_aw_region_o, + output logic [5:0] mst_aw_atop_o, + output logic [7:0] mst_aw_len_o, + output logic [2:0] mst_aw_size_o, + output logic [1:0] mst_aw_burst_o, + output logic mst_aw_lock_o, + output logic [3:0] mst_aw_cache_o, + output logic [3:0] mst_aw_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_aw_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_aw_user_o, + input logic mst_aw_ready_i, + output logic mst_aw_valid_o, + + output logic [AXI_ADDR_WIDTH-1:0] mst_ar_addr_o, + output logic [2:0] mst_ar_prot_o, + output logic [3:0] mst_ar_region_o, + output logic [7:0] mst_ar_len_o, + output logic [2:0] mst_ar_size_o, + output logic [1:0] mst_ar_burst_o, + output logic mst_ar_lock_o, + output logic [3:0] mst_ar_cache_o, + output logic [3:0] mst_ar_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_ar_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_ar_user_o, + input logic mst_ar_ready_i, + output logic mst_ar_valid_o, + + output logic [AXI_DATA_WIDTH-1:0] mst_w_data_o, + output logic [AXI_STRB_WIDTH-1:0] mst_w_strb_o, + output logic [AXI_USER_WIDTH-1:0] mst_w_user_o, + output logic mst_w_last_o, + input logic mst_w_ready_i, + output logic mst_w_valid_o, + + input logic [AXI_DATA_WIDTH-1:0] mst_r_data_i, + input logic [1:0] mst_r_resp_i, + input logic mst_r_last_i, + input logic [AXI_ID_WIDTH-1:0] mst_r_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_r_user_i, + output logic mst_r_ready_o, + input logic mst_r_valid_i, + + input logic [1:0] mst_b_resp_i, + input logic [AXI_ID_WIDTH-1:0] mst_b_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_b_user_i, + output logic mst_b_ready_o, + input logic mst_b_valid_i +); + + // Declarations of Signals and Types + + logic [AXI_ID_WIDTH-1:0] art_check_id, + art_set_id, + w_id_d, w_id_q; + + logic [AXI_ADDR_WIDTH-1:0] art_check_addr, + art_clr_addr, + art_set_addr, + rd_clr_addr, + wr_clr_addr, + w_addr_d, w_addr_q; + + logic art_check_req, art_check_gnt, + art_clr_req, art_clr_gnt, + art_set_req, art_set_gnt, + rd_clr_req, rd_clr_gnt, + wr_clr_req, wr_clr_gnt; + + logic art_check_res; + + logic b_excl_d, b_excl_q, + r_excl_d, r_excl_q; + + typedef enum logic [1:0] {R_IDLE, R_WAIT_AR, R_WAIT_R} r_state_t; + r_state_t r_state_d, r_state_q; + + typedef enum logic [2:0] {AW_IDLE, W_FORWARD, W_BYPASS, W_WAIT_ART_CLR, W_DROP, B_FORWARD, + B_INJECT} w_state_t; + w_state_t w_state_d, w_state_q; + + // AR and R Channel + + // Time-Invariant Signal Assignments + assign mst_ar_addr_o = slv_ar_addr_i; + assign mst_ar_prot_o = slv_ar_prot_i; + assign mst_ar_region_o = slv_ar_region_i; + assign mst_ar_len_o = slv_ar_len_i; + assign mst_ar_size_o = slv_ar_size_i; + assign mst_ar_burst_o = slv_ar_burst_i; + assign mst_ar_lock_o = 1'b0; + assign mst_ar_cache_o = slv_ar_cache_i; + assign mst_ar_qos_o = slv_ar_qos_i; + assign mst_ar_id_o = slv_ar_id_i; + assign mst_ar_user_o = slv_ar_user_i; + assign slv_r_data_o = mst_r_data_i; + assign slv_r_last_o = mst_r_last_i; + assign slv_r_id_o = mst_r_id_i; + assign slv_r_user_o = mst_r_user_i; + + // FSM for Time-Variant Signal Assignments + always_comb begin + mst_ar_valid_o = 1'b0; + slv_ar_ready_o = 1'b0; + mst_r_ready_o = 1'b0; + slv_r_valid_o = 1'b0; + slv_r_resp_o = '0; + art_set_addr = '0; + art_set_id = '0; + art_set_req = 1'b0; + rd_clr_addr = '0; + rd_clr_req = 1'b0; + r_excl_d = r_excl_q; + r_state_d = r_state_q; + + case (r_state_q) + + R_IDLE: begin + if (slv_ar_valid_i) begin + if (slv_ar_addr_i >= ADDR_BEGIN && slv_ar_addr_i <= ADDR_END && slv_ar_lock_i && + slv_ar_len_i == 8'h00) begin + // Inside exclusively-accessible address range and exclusive access and no + // burst + art_set_addr = slv_ar_addr_i; + art_set_id = slv_ar_id_i; + art_set_req = 1'b1; + r_excl_d = 1'b1; + if (art_set_gnt) begin + mst_ar_valid_o = 1'b1; + if (mst_ar_ready_i) begin + slv_ar_ready_o = 1'b1; + r_state_d = R_WAIT_R; + end else begin + r_state_d = R_WAIT_AR; + end + end + end else begin + // Outside exclusively-accessible address range or regular access or burst + r_excl_d = 1'b0; + mst_ar_valid_o = 1'b1; + if (mst_ar_ready_i) begin + slv_ar_ready_o = 1'b1; + r_state_d = R_WAIT_R; + end else begin + r_state_d = R_WAIT_AR; + end + end + end + end + + R_WAIT_AR: begin + mst_ar_valid_o = slv_ar_valid_i; + slv_ar_ready_o = mst_ar_ready_i; + if (mst_ar_ready_i && mst_ar_valid_o) begin + r_state_d = R_WAIT_R; + end + end + + R_WAIT_R: begin + mst_r_ready_o = slv_r_ready_i; + slv_r_valid_o = mst_r_valid_i; + if (mst_r_resp_i[1] == 1'b0) begin + slv_r_resp_o = {1'b0, r_excl_q}; + end else begin + slv_r_resp_o = mst_r_resp_i; + end + if (mst_r_valid_i && mst_r_ready_o && mst_r_last_i) begin + r_excl_d = 1'b0; + r_state_d = R_IDLE; + end + end + + default: begin + r_state_d = R_IDLE; + end + endcase + end + + // AW, W and B Channel + + // Time-Invariant Signal Assignments + assign mst_aw_addr_o = slv_aw_addr_i; + assign mst_aw_prot_o = slv_aw_prot_i; + assign mst_aw_region_o = slv_aw_region_i; + assign mst_aw_atop_o = slv_aw_atop_i; + assign mst_aw_len_o = slv_aw_len_i; + assign mst_aw_size_o = slv_aw_size_i; + assign mst_aw_burst_o = slv_aw_burst_i; + assign mst_aw_lock_o = 1'b0; + assign mst_aw_cache_o = slv_aw_cache_i; + assign mst_aw_qos_o = slv_aw_qos_i; + assign mst_aw_id_o = slv_aw_id_i; + assign mst_aw_user_o = slv_aw_user_i; + assign mst_w_data_o = slv_w_data_i; + assign mst_w_strb_o = slv_w_strb_i; + assign mst_w_user_o = slv_w_user_i; + assign mst_w_last_o = slv_w_last_i; + + always_comb begin + w_addr_d = w_addr_q; + w_id_d = w_id_q; + if (slv_aw_valid_i && slv_aw_ready_o) begin + w_addr_d = slv_aw_addr_i; + w_id_d = slv_aw_id_i; + end + end + + // FSM for Time-Variant Signal Assignments + always_comb begin + mst_aw_valid_o = 1'b0; + slv_aw_ready_o = 1'b0; + mst_w_valid_o = 1'b0; + slv_w_ready_o = 1'b0; + slv_b_valid_o = 1'b0; + mst_b_ready_o = 1'b0; + slv_b_resp_o = '0; + slv_b_id_o = '0; + slv_b_user_o = '0; + art_check_addr = '0; + art_check_id = '0; + art_check_req = 1'b0; + wr_clr_addr = '0; + wr_clr_req = 1'b0; + b_excl_d = b_excl_q; + w_state_d = w_state_q; + + case (w_state_q) + + AW_IDLE: begin + if (slv_aw_valid_i) begin + // New AW, and W channel is idle + if (slv_aw_addr_i >= ADDR_BEGIN && slv_aw_addr_i <= ADDR_END) begin + // Inside exclusively-accessible address range + if (slv_aw_lock_i && slv_aw_len_i == 8'h00) begin + // Exclusive access and no burst, so check if reservation exists + art_check_addr = slv_aw_addr_i; + art_check_id = slv_aw_id_i; + art_check_req = 1'b1; + if (art_check_gnt) begin + if (art_check_res) begin + // Yes, so forward downstream + mst_aw_valid_o = 1'b1; + if (mst_aw_ready_i) begin + slv_aw_ready_o = 1'b1; + b_excl_d = 1'b1; + w_state_d = W_FORWARD; + end + end else begin + // No, drop in W channel. + slv_aw_ready_o = 1'b1; + w_state_d = W_DROP; + end + end + end else begin + // Non-exclusive access or burst, so forward downstream + mst_aw_valid_o = 1'b1; + if (mst_aw_ready_i) begin + slv_aw_ready_o = 1'b1; + w_state_d = W_FORWARD; + end + end + end else begin + // Outside exclusively-accessible address range, so bypass any + // modifications. + mst_aw_valid_o = 1'b1; + slv_aw_ready_o = mst_aw_ready_i; + if (slv_aw_ready_o) begin + w_state_d = W_BYPASS; + end + end + end + end + + W_FORWARD: begin + mst_w_valid_o = slv_w_valid_i; + slv_w_ready_o = mst_w_ready_i; + if (slv_w_valid_i && slv_w_ready_o && slv_w_last_i) begin + wr_clr_addr = w_addr_q; + wr_clr_req = 1'b1; + if (wr_clr_gnt) begin + w_state_d = B_FORWARD; + end else begin + w_state_d = W_WAIT_ART_CLR; + end + end + end + + W_BYPASS: begin + mst_w_valid_o = slv_w_valid_i; + slv_w_ready_o = mst_w_ready_i; + if (slv_w_valid_i && slv_w_ready_o && slv_w_last_i) begin + w_state_d = B_FORWARD; + end + end + + W_WAIT_ART_CLR: begin + wr_clr_addr = w_addr_q; + wr_clr_req = 1'b1; + if (wr_clr_gnt) begin + w_state_d = B_FORWARD; + end + end + + W_DROP: begin + slv_w_ready_o = 1'b1; + if (slv_w_valid_i && slv_w_last_i) begin + w_state_d = B_INJECT; + end + end + + B_FORWARD: begin + mst_b_ready_o = slv_b_ready_i; + slv_b_valid_o = mst_b_valid_i; + slv_b_resp_o[1] = mst_b_resp_i[1]; + slv_b_resp_o[0] = (mst_b_resp_i[1] == 1'b0) ? b_excl_q : mst_b_resp_i[0]; + slv_b_user_o = mst_b_user_i; + slv_b_id_o = mst_b_id_i; + if (slv_b_valid_o && slv_b_ready_i) begin + b_excl_d = 1'b0; + w_state_d = AW_IDLE; + end + end + + B_INJECT: begin + slv_b_id_o = w_id_q; + slv_b_resp_o = 2'b00; + slv_b_valid_o = 1'b1; + if (slv_b_ready_i) begin + w_state_d = AW_IDLE; + end + end + + default: begin + w_state_d = AW_IDLE; + end + endcase + end + + // AXI Reservation Table + axi_res_tbl #( + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_ID_WIDTH (AXI_ID_WIDTH) + ) i_art ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clr_addr_i (art_clr_addr), + .clr_req_i (art_clr_req), + .clr_gnt_o (art_clr_gnt), + .set_addr_i (art_set_addr), + .set_id_i (art_set_id), + .set_req_i (art_set_req), + .set_gnt_o (art_set_gnt), + .check_addr_i (art_check_addr), + .check_id_i (art_check_id), + .check_res_o (art_check_res), + .check_req_i (art_check_req), + .check_gnt_o (art_check_gnt) + ); + + // ART Clear Arbiter + stream_arbiter #( + .DATA_T (logic[AXI_ADDR_WIDTH-1:0]), + .N_INP (2) + ) i_non_excl_acc_arb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .inp_data_i ({rd_clr_addr, wr_clr_addr}), + .inp_valid_i ({rd_clr_req, wr_clr_req}), + .inp_ready_o ({rd_clr_gnt, wr_clr_gnt}), + .oup_data_o (art_clr_addr), + .oup_valid_o (art_clr_req), + .oup_ready_i (art_clr_gnt) + ); + + // Registers + always_ff @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + b_excl_q <= 1'b0; + r_excl_q <= 1'b0; + r_state_q <= R_IDLE; + w_addr_q <= '0; + w_id_q <= '0; + w_state_q <= AW_IDLE; + end else begin + b_excl_q <= b_excl_d; + r_excl_q <= r_excl_d; + r_state_q <= r_state_d; + w_addr_q <= w_addr_d; + w_id_q <= w_id_d; + w_state_q <= w_state_d; + end + end + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (ADDR_END > ADDR_BEGIN) + else $fatal(1, "ADDR_END must be greater than ADDR_BEGIN!"); + assert (AXI_ADDR_WIDTH > 0) + else $fatal(1, "AXI_ADDR_WIDTH must be greater than 0!"); + assert (AXI_DATA_WIDTH > 0) + else $fatal(1, "AXI_DATA_WIDTH must be greater than 0!"); + assert (AXI_ID_WIDTH > 0) + else $fatal(1, "AXI_ID_WIDTH must be greater than 0!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv new file mode 100644 index 0000000..3eb409b --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv @@ -0,0 +1,148 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Wrapper for the AXI RISC-V LR/SC Adapter that exposes AXI SystemVerilog interfaces. +// +// See the header of `axi_riscv_lrsc` for a description. +// +// Maintainer: Andreas Kurth + +module axi_riscv_lrsc_wrap #( + /// Exclusively-accessible address range (closed interval from ADDR_BEGIN to ADDR_END) + parameter longint unsigned ADDR_BEGIN = 0, + parameter longint unsigned ADDR_END = 0, + /// AXI Parameters + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + /// Derived Parameters (do NOT change manually!) + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8 +) ( + input logic clk_i, + input logic rst_ni, + AXI_BUS.Master mst, + AXI_BUS.Slave slv +); + + axi_riscv_lrsc #( + .ADDR_BEGIN (ADDR_BEGIN), + .ADDR_END (ADDR_END), + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ID_WIDTH (AXI_ID_WIDTH), + .AXI_USER_WIDTH (AXI_USER_WIDTH) + ) i_lrsc ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_aw_addr_i ( slv.aw_addr ), + .slv_aw_prot_i ( slv.aw_prot ), + .slv_aw_region_i ( slv.aw_region ), + .slv_aw_atop_i ( slv.aw_atop ), + .slv_aw_len_i ( slv.aw_len ), + .slv_aw_size_i ( slv.aw_size ), + .slv_aw_burst_i ( slv.aw_burst ), + .slv_aw_lock_i ( slv.aw_lock ), + .slv_aw_cache_i ( slv.aw_cache ), + .slv_aw_qos_i ( slv.aw_qos ), + .slv_aw_id_i ( slv.aw_id ), + .slv_aw_user_i ( slv.aw_user ), + .slv_aw_ready_o ( slv.aw_ready ), + .slv_aw_valid_i ( slv.aw_valid ), + .slv_ar_addr_i ( slv.ar_addr ), + .slv_ar_prot_i ( slv.ar_prot ), + .slv_ar_region_i ( slv.ar_region ), + .slv_ar_len_i ( slv.ar_len ), + .slv_ar_size_i ( slv.ar_size ), + .slv_ar_burst_i ( slv.ar_burst ), + .slv_ar_lock_i ( slv.ar_lock ), + .slv_ar_cache_i ( slv.ar_cache ), + .slv_ar_qos_i ( slv.ar_qos ), + .slv_ar_id_i ( slv.ar_id ), + .slv_ar_user_i ( slv.ar_user ), + .slv_ar_ready_o ( slv.ar_ready ), + .slv_ar_valid_i ( slv.ar_valid ), + .slv_w_data_i ( slv.w_data ), + .slv_w_strb_i ( slv.w_strb ), + .slv_w_user_i ( slv.w_user ), + .slv_w_last_i ( slv.w_last ), + .slv_w_ready_o ( slv.w_ready ), + .slv_w_valid_i ( slv.w_valid ), + .slv_r_data_o ( slv.r_data ), + .slv_r_resp_o ( slv.r_resp ), + .slv_r_last_o ( slv.r_last ), + .slv_r_id_o ( slv.r_id ), + .slv_r_user_o ( slv.r_user ), + .slv_r_ready_i ( slv.r_ready ), + .slv_r_valid_o ( slv.r_valid ), + .slv_b_resp_o ( slv.b_resp ), + .slv_b_id_o ( slv.b_id ), + .slv_b_user_o ( slv.b_user ), + .slv_b_ready_i ( slv.b_ready ), + .slv_b_valid_o ( slv.b_valid ), + .mst_aw_addr_o ( mst.aw_addr ), + .mst_aw_prot_o ( mst.aw_prot ), + .mst_aw_region_o ( mst.aw_region ), + .mst_aw_atop_o ( mst.aw_atop ), + .mst_aw_len_o ( mst.aw_len ), + .mst_aw_size_o ( mst.aw_size ), + .mst_aw_burst_o ( mst.aw_burst ), + .mst_aw_lock_o ( mst.aw_lock ), + .mst_aw_cache_o ( mst.aw_cache ), + .mst_aw_qos_o ( mst.aw_qos ), + .mst_aw_id_o ( mst.aw_id ), + .mst_aw_user_o ( mst.aw_user ), + .mst_aw_ready_i ( mst.aw_ready ), + .mst_aw_valid_o ( mst.aw_valid ), + .mst_ar_addr_o ( mst.ar_addr ), + .mst_ar_prot_o ( mst.ar_prot ), + .mst_ar_region_o ( mst.ar_region ), + .mst_ar_len_o ( mst.ar_len ), + .mst_ar_size_o ( mst.ar_size ), + .mst_ar_burst_o ( mst.ar_burst ), + .mst_ar_lock_o ( mst.ar_lock ), + .mst_ar_cache_o ( mst.ar_cache ), + .mst_ar_qos_o ( mst.ar_qos ), + .mst_ar_id_o ( mst.ar_id ), + .mst_ar_user_o ( mst.ar_user ), + .mst_ar_ready_i ( mst.ar_ready ), + .mst_ar_valid_o ( mst.ar_valid ), + .mst_w_data_o ( mst.w_data ), + .mst_w_strb_o ( mst.w_strb ), + .mst_w_user_o ( mst.w_user ), + .mst_w_last_o ( mst.w_last ), + .mst_w_ready_i ( mst.w_ready ), + .mst_w_valid_o ( mst.w_valid ), + .mst_r_data_i ( mst.r_data ), + .mst_r_resp_i ( mst.r_resp ), + .mst_r_last_i ( mst.r_last ), + .mst_r_id_i ( mst.r_id ), + .mst_r_user_i ( mst.r_user ), + .mst_r_ready_o ( mst.r_ready ), + .mst_r_valid_i ( mst.r_valid ), + .mst_b_resp_i ( mst.b_resp ), + .mst_b_id_i ( mst.b_id ), + .mst_b_user_i ( mst.b_user ), + .mst_b_ready_o ( mst.b_ready ), + .mst_b_valid_i ( mst.b_valid ) + ); + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (AXI_STRB_WIDTH == AXI_DATA_WIDTH/8) + else $fatal(1, "AXI_STRB_WIDTH must equal AXI_DATA_WIDTH/8!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/tb/ariane_axi_pkg.sv b/test/type_param/corev_apu/tb/ariane_axi_pkg.sv new file mode 100644 index 0000000..c750336 --- /dev/null +++ b/test/type_param/corev_apu/tb/ariane_axi_pkg.sv @@ -0,0 +1,109 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: ariane_axi_pkg.sv + * Author: Florian Zaruba + * Date: 17.8.2018 + * + * Description: Contains Ariane's AXI ports, does not contain user ports + */ + +package ariane_axi; + + localparam IdWidth = cva6_config_pkg::CVA6ConfigAxiIdWidth; // Recommended by AXI standard + localparam UserWidth = cva6_config_pkg::CVA6ConfigDataUserWidth; + localparam AddrWidth = cva6_config_pkg::CVA6ConfigAxiAddrWidth; + localparam DataWidth = cva6_config_pkg::CVA6ConfigAxiDataWidth; + localparam StrbWidth = DataWidth / 8; + + typedef logic [IdWidth-1:0] id_t; + typedef logic [AddrWidth-1:0] addr_t; + typedef logic [DataWidth-1:0] data_t; + typedef logic [StrbWidth-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; + + // AW Channel + typedef struct packed { + id_t id; + addr_t addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + axi_pkg::atop_t atop; + user_t user; + } aw_chan_t; + + // W Channel - AXI4 doesn't define a wid + typedef struct packed { + data_t data; + strb_t strb; + logic last; + user_t user; + } w_chan_t; + + // B Channel + typedef struct packed { + id_t id; + axi_pkg::resp_t resp; + user_t user; + } b_chan_t; + + // AR Channel + typedef struct packed { + id_t id; + addr_t addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + user_t user; + } ar_chan_t; + + // R Channel + typedef struct packed { + id_t id; + data_t data; + axi_pkg::resp_t resp; + logic last; + user_t user; + } r_chan_t; + + // Request/Response structs + typedef struct packed { + aw_chan_t aw; + logic aw_valid; + w_chan_t w; + logic w_valid; + logic b_ready; + ar_chan_t ar; + logic ar_valid; + logic r_ready; + } req_t; + + typedef struct packed { + logic aw_ready; + logic ar_ready; + logic w_ready; + logic b_valid; + b_chan_t b; + logic r_valid; + r_chan_t r; + } resp_t; + +endpackage diff --git a/test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv b/test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv new file mode 100644 index 0000000..378b0d6 --- /dev/null +++ b/test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv @@ -0,0 +1,102 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: ariane_axi_soc_pkg.sv + * Author: Florian Zaruba + * Date: 17.8.2018 + * + * Description: Contains Ariane's AXI ports on SoC, does not contain user ports + */ + +package ariane_axi_soc; + + localparam UserWidth = ariane_axi::UserWidth; + localparam AddrWidth = ariane_axi::AddrWidth; + localparam DataWidth = ariane_axi::DataWidth; + localparam StrbWidth = DataWidth / 8; + localparam IdWidth = ariane_axi::IdWidth; + localparam IdWidthSlave = IdWidth + $clog2(ariane_soc::NrSlaves); + + typedef logic [IdWidth-1:0] id_t; + typedef logic [IdWidthSlave-1:0] id_slv_t; + typedef logic [AddrWidth-1:0] addr_t; + typedef logic [DataWidth-1:0] data_t; + typedef logic [StrbWidth-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; + + // AW Channel - Slave + typedef struct packed { + id_slv_t id; + addr_t addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + axi_pkg::atop_t atop; + user_t user; + } aw_chan_slv_t; + + // B Channel - Slave + typedef struct packed { + id_slv_t id; + axi_pkg::resp_t resp; + user_t user; + } b_chan_slv_t; + + // AR Channel - Slave + typedef struct packed { + id_slv_t id; + addr_t addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + user_t user; + } ar_chan_slv_t; + + // R Channel - Slave + typedef struct packed { + id_slv_t id; + data_t data; + axi_pkg::resp_t resp; + logic last; + user_t user; + } r_chan_slv_t; + + typedef struct packed { + aw_chan_slv_t aw; + logic aw_valid; + ariane_axi::w_chan_t w; + logic w_valid; + logic b_ready; + ar_chan_slv_t ar; + logic ar_valid; + logic r_ready; + } req_slv_t; + + typedef struct packed { + logic aw_ready; + logic ar_ready; + logic w_ready; + logic b_valid; + b_chan_slv_t b; + logic r_valid; + r_chan_slv_t r; + } resp_slv_t; + +endpackage diff --git a/test/type_param/corev_apu/tb/ariane_peripherals.sv b/test/type_param/corev_apu/tb/ariane_peripherals.sv new file mode 100644 index 0000000..9865af4 --- /dev/null +++ b/test/type_param/corev_apu/tb/ariane_peripherals.sv @@ -0,0 +1,619 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "register_interface/assign.svh" +`include "register_interface/typedef.svh" + +// Xilinx Peripherals +module ariane_peripherals #( + parameter int AxiAddrWidth = -1, + parameter int AxiDataWidth = -1, + parameter int AxiIdWidth = -1, + parameter int AxiUserWidth = 1, + parameter bit InclUART = 1, + parameter bit InclSPI = 0, + parameter bit InclEthernet = 0, + parameter bit InclGPIO = 0, + parameter bit InclTimer = 1 +) ( + input logic clk_i , // Clock + input logic rst_ni , // Asynchronous reset active low + AXI_BUS.Slave plic , + AXI_BUS.Slave uart , + AXI_BUS.Slave spi , + AXI_BUS.Slave ethernet , + AXI_BUS.Slave timer , + output logic [1:0] irq_o , + // UART + input logic rx_i , + output logic tx_o , + // Ethernet + input wire eth_txck , + input wire eth_rxck , + input wire eth_rxctl , + input wire [3:0] eth_rxd , + output wire eth_rst_n , + output wire eth_tx_en , + output wire [3:0] eth_txd , + inout wire phy_mdio , + output logic eth_mdc , + // MDIO Interface + inout mdio , + output mdc , + // SPI + output logic spi_clk_o , + output logic spi_mosi , + input logic spi_miso , + output logic spi_ss +); + + // --------------- + // 1. PLIC + // --------------- + logic [ariane_soc::NumSources-1:0] irq_sources; + + // Unused interrupt sources + assign irq_sources[ariane_soc::NumSources-1:7] = '0; + + REG_BUS #( + .ADDR_WIDTH ( 32 ), + .DATA_WIDTH ( 32 ) + ) reg_bus (clk_i); + + logic plic_penable; + logic plic_pwrite; + logic [31:0] plic_paddr; + logic plic_psel; + logic [31:0] plic_pwdata; + logic [31:0] plic_prdata; + logic plic_pready; + logic plic_pslverr; + + axi2apb_64_32 #( + .AXI4_ADDRESS_WIDTH ( AxiAddrWidth ), + .AXI4_RDATA_WIDTH ( AxiDataWidth ), + .AXI4_WDATA_WIDTH ( AxiDataWidth ), + .AXI4_ID_WIDTH ( AxiIdWidth ), + .AXI4_USER_WIDTH ( AxiUserWidth ), + .BUFF_DEPTH_SLAVE ( 2 ), + .APB_ADDR_WIDTH ( 32 ) + ) i_axi2apb_64_32_plic ( + .ACLK ( clk_i ), + .ARESETn ( rst_ni ), + .test_en_i ( 1'b0 ), + .AWID_i ( plic.aw_id ), + .AWADDR_i ( plic.aw_addr ), + .AWLEN_i ( plic.aw_len ), + .AWSIZE_i ( plic.aw_size ), + .AWBURST_i ( plic.aw_burst ), + .AWLOCK_i ( plic.aw_lock ), + .AWCACHE_i ( plic.aw_cache ), + .AWPROT_i ( plic.aw_prot ), + .AWREGION_i( plic.aw_region ), + .AWUSER_i ( plic.aw_user ), + .AWQOS_i ( plic.aw_qos ), + .AWVALID_i ( plic.aw_valid ), + .AWREADY_o ( plic.aw_ready ), + .WDATA_i ( plic.w_data ), + .WSTRB_i ( plic.w_strb ), + .WLAST_i ( plic.w_last ), + .WUSER_i ( plic.w_user ), + .WVALID_i ( plic.w_valid ), + .WREADY_o ( plic.w_ready ), + .BID_o ( plic.b_id ), + .BRESP_o ( plic.b_resp ), + .BVALID_o ( plic.b_valid ), + .BUSER_o ( plic.b_user ), + .BREADY_i ( plic.b_ready ), + .ARID_i ( plic.ar_id ), + .ARADDR_i ( plic.ar_addr ), + .ARLEN_i ( plic.ar_len ), + .ARSIZE_i ( plic.ar_size ), + .ARBURST_i ( plic.ar_burst ), + .ARLOCK_i ( plic.ar_lock ), + .ARCACHE_i ( plic.ar_cache ), + .ARPROT_i ( plic.ar_prot ), + .ARREGION_i( plic.ar_region ), + .ARUSER_i ( plic.ar_user ), + .ARQOS_i ( plic.ar_qos ), + .ARVALID_i ( plic.ar_valid ), + .ARREADY_o ( plic.ar_ready ), + .RID_o ( plic.r_id ), + .RDATA_o ( plic.r_data ), + .RRESP_o ( plic.r_resp ), + .RLAST_o ( plic.r_last ), + .RUSER_o ( plic.r_user ), + .RVALID_o ( plic.r_valid ), + .RREADY_i ( plic.r_ready ), + .PENABLE ( plic_penable ), + .PWRITE ( plic_pwrite ), + .PADDR ( plic_paddr ), + .PSEL ( plic_psel ), + .PWDATA ( plic_pwdata ), + .PRDATA ( plic_prdata ), + .PREADY ( plic_pready ), + .PSLVERR ( plic_pslverr ) + ); + + apb_to_reg i_apb_to_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .penable_i ( plic_penable ), + .pwrite_i ( plic_pwrite ), + .paddr_i ( plic_paddr ), + .psel_i ( plic_psel ), + .pwdata_i ( plic_pwdata ), + .prdata_o ( plic_prdata ), + .pready_o ( plic_pready ), + .pslverr_o ( plic_pslverr ), + .reg_o ( reg_bus ) + ); + + // define reg type according to REG_BUS above + `REG_BUS_TYPEDEF_ALL(plic, logic[31:0], logic[31:0], logic[3:0]) + plic_req_t plic_req; + plic_rsp_t plic_rsp; + + // assign REG_BUS.out to (req_t, rsp_t) pair + `REG_BUS_ASSIGN_TO_REQ(plic_req, reg_bus) + `REG_BUS_ASSIGN_FROM_RSP(reg_bus, plic_rsp) + + plic_top #( + .N_SOURCE ( ariane_soc::NumSources ), + .N_TARGET ( ariane_soc::NumTargets ), + .MAX_PRIO ( ariane_soc::MaxPriority ), + .reg_req_t ( plic_req_t ), + .reg_rsp_t ( plic_rsp_t ) + ) i_plic ( + .clk_i, + .rst_ni, + .req_i ( plic_req ), + .resp_o ( plic_rsp ), + .le_i ( '0 ), // 0:level 1:edge + .irq_sources_i ( irq_sources ), + .eip_targets_o ( irq_o ) + ); + + // --------------- + // 2. UART + // --------------- + logic uart_penable; + logic uart_pwrite; + logic [31:0] uart_paddr; + logic uart_psel; + logic [31:0] uart_pwdata; + logic [31:0] uart_prdata; + logic uart_pready; + logic uart_pslverr; + + axi2apb_64_32 #( + .AXI4_ADDRESS_WIDTH ( AxiAddrWidth ), + .AXI4_RDATA_WIDTH ( AxiDataWidth ), + .AXI4_WDATA_WIDTH ( AxiDataWidth ), + .AXI4_ID_WIDTH ( AxiIdWidth ), + .AXI4_USER_WIDTH ( AxiUserWidth ), + .BUFF_DEPTH_SLAVE ( 2 ), + .APB_ADDR_WIDTH ( 32 ) + ) i_axi2apb_64_32_uart ( + .ACLK ( clk_i ), + .ARESETn ( rst_ni ), + .test_en_i ( 1'b0 ), + .AWID_i ( uart.aw_id ), + .AWADDR_i ( uart.aw_addr ), + .AWLEN_i ( uart.aw_len ), + .AWSIZE_i ( uart.aw_size ), + .AWBURST_i ( uart.aw_burst ), + .AWLOCK_i ( uart.aw_lock ), + .AWCACHE_i ( uart.aw_cache ), + .AWPROT_i ( uart.aw_prot ), + .AWREGION_i( uart.aw_region ), + .AWUSER_i ( uart.aw_user ), + .AWQOS_i ( uart.aw_qos ), + .AWVALID_i ( uart.aw_valid ), + .AWREADY_o ( uart.aw_ready ), + .WDATA_i ( uart.w_data ), + .WSTRB_i ( uart.w_strb ), + .WLAST_i ( uart.w_last ), + .WUSER_i ( uart.w_user ), + .WVALID_i ( uart.w_valid ), + .WREADY_o ( uart.w_ready ), + .BID_o ( uart.b_id ), + .BRESP_o ( uart.b_resp ), + .BVALID_o ( uart.b_valid ), + .BUSER_o ( uart.b_user ), + .BREADY_i ( uart.b_ready ), + .ARID_i ( uart.ar_id ), + .ARADDR_i ( uart.ar_addr ), + .ARLEN_i ( uart.ar_len ), + .ARSIZE_i ( uart.ar_size ), + .ARBURST_i ( uart.ar_burst ), + .ARLOCK_i ( uart.ar_lock ), + .ARCACHE_i ( uart.ar_cache ), + .ARPROT_i ( uart.ar_prot ), + .ARREGION_i( uart.ar_region ), + .ARUSER_i ( uart.ar_user ), + .ARQOS_i ( uart.ar_qos ), + .ARVALID_i ( uart.ar_valid ), + .ARREADY_o ( uart.ar_ready ), + .RID_o ( uart.r_id ), + .RDATA_o ( uart.r_data ), + .RRESP_o ( uart.r_resp ), + .RLAST_o ( uart.r_last ), + .RUSER_o ( uart.r_user ), + .RVALID_o ( uart.r_valid ), + .RREADY_i ( uart.r_ready ), + .PENABLE ( uart_penable ), + .PWRITE ( uart_pwrite ), + .PADDR ( uart_paddr ), + .PSEL ( uart_psel ), + .PWDATA ( uart_pwdata ), + .PRDATA ( uart_prdata ), + .PREADY ( uart_pready ), + .PSLVERR ( uart_pslverr ) + ); + + if (InclUART) begin : gen_uart + apb_uart i_apb_uart ( + .CLK ( clk_i ), + .RSTN ( rst_ni ), + .PSEL ( uart_psel ), + .PENABLE ( uart_penable ), + .PWRITE ( uart_pwrite ), + .PADDR ( uart_paddr[4:2] ), + .PWDATA ( uart_pwdata ), + .PRDATA ( uart_prdata ), + .PREADY ( uart_pready ), + .PSLVERR ( uart_pslverr ), + .INT ( irq_sources[0] ), + .OUT1N ( ), // keep open + .OUT2N ( ), // keep open + .RTSN ( ), // no flow control + .DTRN ( ), // no flow control + .CTSN ( 1'b0 ), + .DSRN ( 1'b0 ), + .DCDN ( 1'b0 ), + .RIN ( 1'b0 ), + .SIN ( rx_i ), + .SOUT ( tx_o ) + ); + end else begin + assign irq_sources[0] = 1'b0; + /* pragma translate_off */ + mock_uart i_mock_uart ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .penable_i ( uart_penable ), + .pwrite_i ( uart_pwrite ), + .paddr_i ( uart_paddr ), + .psel_i ( uart_psel ), + .pwdata_i ( uart_pwdata ), + .prdata_o ( uart_prdata ), + .pready_o ( uart_pready ), + .pslverr_o ( uart_pslverr ) + ); + /* pragma translate_on */ + end + + // --------------- + // 3. SPI + // --------------- + if (InclSPI) begin : gen_spi + logic [31:0] s_axi_spi_awaddr; + logic [7:0] s_axi_spi_awlen; + logic [2:0] s_axi_spi_awsize; + logic [1:0] s_axi_spi_awburst; + logic [0:0] s_axi_spi_awlock; + logic [3:0] s_axi_spi_awcache; + logic [2:0] s_axi_spi_awprot; + logic [3:0] s_axi_spi_awregion; + logic [3:0] s_axi_spi_awqos; + logic s_axi_spi_awvalid; + logic s_axi_spi_awready; + logic [31:0] s_axi_spi_wdata; + logic [3:0] s_axi_spi_wstrb; + logic s_axi_spi_wlast; + logic s_axi_spi_wvalid; + logic s_axi_spi_wready; + logic [1:0] s_axi_spi_bresp; + logic s_axi_spi_bvalid; + logic s_axi_spi_bready; + logic [31:0] s_axi_spi_araddr; + logic [7:0] s_axi_spi_arlen; + logic [2:0] s_axi_spi_arsize; + logic [1:0] s_axi_spi_arburst; + logic [0:0] s_axi_spi_arlock; + logic [3:0] s_axi_spi_arcache; + logic [2:0] s_axi_spi_arprot; + logic [3:0] s_axi_spi_arregion; + logic [3:0] s_axi_spi_arqos; + logic s_axi_spi_arvalid; + logic s_axi_spi_arready; + logic [31:0] s_axi_spi_rdata; + logic [1:0] s_axi_spi_rresp; + logic s_axi_spi_rlast; + logic s_axi_spi_rvalid; + logic s_axi_spi_rready; + + xlnx_axi_clock_converter i_xlnx_axi_clock_converter_spi ( + .s_axi_aclk ( clk_i ), + .s_axi_aresetn ( rst_ni ), + + .s_axi_awid ( spi.aw_id ), + .s_axi_awaddr ( spi.aw_addr[31:0] ), + .s_axi_awlen ( spi.aw_len ), + .s_axi_awsize ( spi.aw_size ), + .s_axi_awburst ( spi.aw_burst ), + .s_axi_awlock ( spi.aw_lock ), + .s_axi_awcache ( spi.aw_cache ), + .s_axi_awprot ( spi.aw_prot ), + .s_axi_awregion ( spi.aw_region ), + .s_axi_awqos ( spi.aw_qos ), + .s_axi_awvalid ( spi.aw_valid ), + .s_axi_awready ( spi.aw_ready ), + .s_axi_wdata ( spi.w_data ), + .s_axi_wstrb ( spi.w_strb ), + .s_axi_wlast ( spi.w_last ), + .s_axi_wvalid ( spi.w_valid ), + .s_axi_wready ( spi.w_ready ), + .s_axi_bid ( spi.b_id ), + .s_axi_bresp ( spi.b_resp ), + .s_axi_bvalid ( spi.b_valid ), + .s_axi_bready ( spi.b_ready ), + .s_axi_arid ( spi.ar_id ), + .s_axi_araddr ( spi.ar_addr[31:0] ), + .s_axi_arlen ( spi.ar_len ), + .s_axi_arsize ( spi.ar_size ), + .s_axi_arburst ( spi.ar_burst ), + .s_axi_arlock ( spi.ar_lock ), + .s_axi_arcache ( spi.ar_cache ), + .s_axi_arprot ( spi.ar_prot ), + .s_axi_arregion ( spi.ar_region ), + .s_axi_arqos ( spi.ar_qos ), + .s_axi_arvalid ( spi.ar_valid ), + .s_axi_arready ( spi.ar_ready ), + .s_axi_rid ( spi.r_id ), + .s_axi_rdata ( spi.r_data ), + .s_axi_rresp ( spi.r_resp ), + .s_axi_rlast ( spi.r_last ), + .s_axi_rvalid ( spi.r_valid ), + .s_axi_rready ( spi.r_ready ), + + .m_axi_awaddr ( s_axi_spi_awaddr ), + .m_axi_awlen ( s_axi_spi_awlen ), + .m_axi_awsize ( s_axi_spi_awsize ), + .m_axi_awburst ( s_axi_spi_awburst ), + .m_axi_awlock ( s_axi_spi_awlock ), + .m_axi_awcache ( s_axi_spi_awcache ), + .m_axi_awprot ( s_axi_spi_awprot ), + .m_axi_awregion ( s_axi_spi_awregion ), + .m_axi_awqos ( s_axi_spi_awqos ), + .m_axi_awvalid ( s_axi_spi_awvalid ), + .m_axi_awready ( s_axi_spi_awready ), + .m_axi_wdata ( s_axi_spi_wdata ), + .m_axi_wstrb ( s_axi_spi_wstrb ), + .m_axi_wlast ( s_axi_spi_wlast ), + .m_axi_wvalid ( s_axi_spi_wvalid ), + .m_axi_wready ( s_axi_spi_wready ), + .m_axi_bresp ( s_axi_spi_bresp ), + .m_axi_bvalid ( s_axi_spi_bvalid ), + .m_axi_bready ( s_axi_spi_bready ), + .m_axi_araddr ( s_axi_spi_araddr ), + .m_axi_arlen ( s_axi_spi_arlen ), + .m_axi_arsize ( s_axi_spi_arsize ), + .m_axi_arburst ( s_axi_spi_arburst ), + .m_axi_arlock ( s_axi_spi_arlock ), + .m_axi_arcache ( s_axi_spi_arcache ), + .m_axi_arprot ( s_axi_spi_arprot ), + .m_axi_arregion ( s_axi_spi_arregion ), + .m_axi_arqos ( s_axi_spi_arqos ), + .m_axi_arvalid ( s_axi_spi_arvalid ), + .m_axi_arready ( s_axi_spi_arready ), + .m_axi_rdata ( s_axi_spi_rdata ), + .m_axi_rresp ( s_axi_spi_rresp ), + .m_axi_rlast ( s_axi_spi_rlast ), + .m_axi_rvalid ( s_axi_spi_rvalid ), + .m_axi_rready ( s_axi_spi_rready ) + ); + + xlnx_axi_quad_spi i_xlnx_axi_quad_spi ( + .ext_spi_clk ( clk_i ), + .s_axi4_aclk ( clk_i ), + .s_axi4_aresetn ( rst_ni ), + .s_axi4_awaddr ( s_axi_spi_awaddr[23:0] ), + .s_axi4_awlen ( s_axi_spi_awlen ), + .s_axi4_awsize ( s_axi_spi_awsize ), + .s_axi4_awburst ( s_axi_spi_awburst ), + .s_axi4_awlock ( s_axi_spi_awlock ), + .s_axi4_awcache ( s_axi_spi_awcache ), + .s_axi4_awprot ( s_axi_spi_awprot ), + .s_axi4_awvalid ( s_axi_spi_awvalid ), + .s_axi4_awready ( s_axi_spi_awready ), + .s_axi4_wdata ( s_axi_spi_wdata ), + .s_axi4_wstrb ( s_axi_spi_wstrb ), + .s_axi4_wlast ( s_axi_spi_wlast ), + .s_axi4_wvalid ( s_axi_spi_wvalid ), + .s_axi4_wready ( s_axi_spi_wready ), + .s_axi4_bresp ( s_axi_spi_bresp ), + .s_axi4_bvalid ( s_axi_spi_bvalid ), + .s_axi4_bready ( s_axi_spi_bready ), + .s_axi4_araddr ( s_axi_spi_araddr[23:0] ), + .s_axi4_arlen ( s_axi_spi_arlen ), + .s_axi4_arsize ( s_axi_spi_arsize ), + .s_axi4_arburst ( s_axi_spi_arburst ), + .s_axi4_arlock ( s_axi_spi_arlock ), + .s_axi4_arcache ( s_axi_spi_arcache ), + .s_axi4_arprot ( s_axi_spi_arprot ), + .s_axi4_arvalid ( s_axi_spi_arvalid ), + .s_axi4_arready ( s_axi_spi_arready ), + .s_axi4_rdata ( s_axi_spi_rdata ), + .s_axi4_rresp ( s_axi_spi_rresp ), + .s_axi4_rlast ( s_axi_spi_rlast ), + .s_axi4_rvalid ( s_axi_spi_rvalid ), + .s_axi4_rready ( s_axi_spi_rready ), + + .io0_i ( '0 ), + .io0_o ( spi_mosi ), + .io0_t ( '0 ), + .io1_i ( spi_miso ), + .io1_o ( ), + .io1_t ( '0 ), + .ss_i ( '0 ), + .ss_o ( spi_ss ), + .ss_t ( '0 ), + .sck_o ( spi_clk_o ), + .sck_i ( '0 ), + .sck_t ( ), + .ip2intc_irpt ( irq_sources[1] ) + // .ip2intc_irpt ( irq_sources[1] ) + ); + // assign irq_sources [1] = 1'b0; + end else begin + assign spi_clk_o = 1'b0; + assign spi_mosi = 1'b0; + assign spi_ss = 1'b0; + + assign irq_sources [1] = 1'b0; + assign spi.aw_ready = 1'b1; + assign spi.ar_ready = 1'b1; + assign spi.w_ready = 1'b1; + + assign spi.b_valid = spi.aw_valid; + assign spi.b_id = spi.aw_id; + assign spi.b_resp = axi_pkg::RESP_SLVERR; + assign spi.b_user = '0; + + assign spi.r_valid = spi.ar_valid; + assign spi.r_resp = axi_pkg::RESP_SLVERR; + assign spi.r_data = 'hdeadbeef; + assign spi.r_last = 1'b1; + end + + + // --------------- + // 4. Ethernet + // --------------- + if (0) + begin + end + else + begin + assign irq_sources [2] = 1'b0; + assign ethernet.aw_ready = 1'b1; + assign ethernet.ar_ready = 1'b1; + assign ethernet.w_ready = 1'b1; + + assign ethernet.b_valid = ethernet.aw_valid; + assign ethernet.b_id = ethernet.aw_id; + assign ethernet.b_resp = axi_pkg::RESP_SLVERR; + assign ethernet.b_user = '0; + + assign ethernet.r_valid = ethernet.ar_valid; + assign ethernet.r_resp = axi_pkg::RESP_SLVERR; + assign ethernet.r_data = 'hdeadbeef; + assign ethernet.r_last = 1'b1; + end + + // --------------- + // 5. Timer + // --------------- + if (InclTimer) begin : gen_timer + logic timer_penable; + logic timer_pwrite; + logic [31:0] timer_paddr; + logic timer_psel; + logic [31:0] timer_pwdata; + logic [31:0] timer_prdata; + logic timer_pready; + logic timer_pslverr; + + axi2apb_64_32 #( + .AXI4_ADDRESS_WIDTH ( AxiAddrWidth ), + .AXI4_RDATA_WIDTH ( AxiDataWidth ), + .AXI4_WDATA_WIDTH ( AxiDataWidth ), + .AXI4_ID_WIDTH ( AxiIdWidth ), + .AXI4_USER_WIDTH ( AxiUserWidth ), + .BUFF_DEPTH_SLAVE ( 2 ), + .APB_ADDR_WIDTH ( 32 ) + ) i_axi2apb_64_32_timer ( + .ACLK ( clk_i ), + .ARESETn ( rst_ni ), + .test_en_i ( 1'b0 ), + .AWID_i ( timer.aw_id ), + .AWADDR_i ( timer.aw_addr ), + .AWLEN_i ( timer.aw_len ), + .AWSIZE_i ( timer.aw_size ), + .AWBURST_i ( timer.aw_burst ), + .AWLOCK_i ( timer.aw_lock ), + .AWCACHE_i ( timer.aw_cache ), + .AWPROT_i ( timer.aw_prot ), + .AWREGION_i( timer.aw_region ), + .AWUSER_i ( timer.aw_user ), + .AWQOS_i ( timer.aw_qos ), + .AWVALID_i ( timer.aw_valid ), + .AWREADY_o ( timer.aw_ready ), + .WDATA_i ( timer.w_data ), + .WSTRB_i ( timer.w_strb ), + .WLAST_i ( timer.w_last ), + .WUSER_i ( timer.w_user ), + .WVALID_i ( timer.w_valid ), + .WREADY_o ( timer.w_ready ), + .BID_o ( timer.b_id ), + .BRESP_o ( timer.b_resp ), + .BVALID_o ( timer.b_valid ), + .BUSER_o ( timer.b_user ), + .BREADY_i ( timer.b_ready ), + .ARID_i ( timer.ar_id ), + .ARADDR_i ( timer.ar_addr ), + .ARLEN_i ( timer.ar_len ), + .ARSIZE_i ( timer.ar_size ), + .ARBURST_i ( timer.ar_burst ), + .ARLOCK_i ( timer.ar_lock ), + .ARCACHE_i ( timer.ar_cache ), + .ARPROT_i ( timer.ar_prot ), + .ARREGION_i( timer.ar_region ), + .ARUSER_i ( timer.ar_user ), + .ARQOS_i ( timer.ar_qos ), + .ARVALID_i ( timer.ar_valid ), + .ARREADY_o ( timer.ar_ready ), + .RID_o ( timer.r_id ), + .RDATA_o ( timer.r_data ), + .RRESP_o ( timer.r_resp ), + .RLAST_o ( timer.r_last ), + .RUSER_o ( timer.r_user ), + .RVALID_o ( timer.r_valid ), + .RREADY_i ( timer.r_ready ), + .PENABLE ( timer_penable ), + .PWRITE ( timer_pwrite ), + .PADDR ( timer_paddr ), + .PSEL ( timer_psel ), + .PWDATA ( timer_pwdata ), + .PRDATA ( timer_prdata ), + .PREADY ( timer_pready ), + .PSLVERR ( timer_pslverr ) + ); + + apb_timer #( + .APB_ADDR_WIDTH ( 32 ), + .TIMER_CNT ( 2 ) + ) i_timer ( + .HCLK ( clk_i ), + .HRESETn ( rst_ni ), + .PSEL ( timer_psel ), + .PENABLE ( timer_penable ), + .PWRITE ( timer_pwrite ), + .PADDR ( timer_paddr ), + .PWDATA ( timer_pwdata ), + .PRDATA ( timer_prdata ), + .PREADY ( timer_pready ), + .PSLVERR ( timer_pslverr ), + .irq_o ( irq_sources[6:3] ) + ); + end +endmodule diff --git a/test/type_param/corev_apu/tb/ariane_soc_pkg.sv b/test/type_param/corev_apu/tb/ariane_soc_pkg.sv new file mode 100644 index 0000000..cc57f80 --- /dev/null +++ b/test/type_param/corev_apu/tb/ariane_soc_pkg.sv @@ -0,0 +1,68 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Description: Contains SoC information as constants +package ariane_soc; + // M-Mode Hart, S-Mode Hart + localparam int unsigned NumTargets = 2; + // Uart, SPI, Ethernet, reserved + localparam int unsigned NumSources = 30; + localparam int unsigned MaxPriority = 7; + + localparam NrSlaves = 2; // actually masters, but slaves on the crossbar + + typedef enum int unsigned { + DRAM = 0, + GPIO = 1, + Ethernet = 2, + SPI = 3, + Timer = 4, + UART = 5, + PLIC = 6, + CLINT = 7, + ROM = 8, + Debug = 9 + } axi_slaves_t; + + localparam NB_PERIPHERALS = Debug + 1; + + + localparam logic[63:0] DebugLength = 64'h1000; + localparam logic[63:0] ROMLength = 64'h10000; + localparam logic[63:0] CLINTLength = 64'hC0000; + localparam logic[63:0] PLICLength = 64'h3FF_FFFF; + localparam logic[63:0] UARTLength = 64'h1000; + localparam logic[63:0] TimerLength = 64'h1000; + localparam logic[63:0] SPILength = 64'h800000; + localparam logic[63:0] EthernetLength = 64'h10000; + localparam logic[63:0] GPIOLength = 64'h1000; + localparam logic[63:0] DRAMLength = 64'h40000000; // 1GByte of DDR (split between two chips on Genesys2) + localparam logic[63:0] SRAMLength = 64'h1800000; // 24 MByte of SRAM + // Instantiate AXI protocol checkers + localparam bit GenProtocolChecker = 1'b0; + + typedef enum logic [63:0] { + DebugBase = 64'h0000_0000, + ROMBase = 64'h0001_0000, + CLINTBase = 64'h0200_0000, + PLICBase = 64'h0C00_0000, + UARTBase = 64'h1000_0000, + TimerBase = 64'h1800_0000, + SPIBase = 64'h2000_0000, + EthernetBase = 64'h3000_0000, + GPIOBase = 64'h4000_0000, + DRAMBase = 64'h8000_0000 + } soc_bus_start_t; + + localparam NrRegion = 1; + localparam logic [NrRegion-1:0][NB_PERIPHERALS-1:0] ValidRule = {{NrRegion * NB_PERIPHERALS}{1'b1}}; + +endpackage diff --git a/test/type_param/corev_apu/tb/ariane_testharness.sv b/test/type_param/corev_apu/tb/ariane_testharness.sv new file mode 100644 index 0000000..3530da0 --- /dev/null +++ b/test/type_param/corev_apu/tb/ariane_testharness.sv @@ -0,0 +1,807 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.03.2017 +// Description: Test-harness for Ariane +// Instantiates an AXI-Bus and memories + +`include "axi/assign.svh" + +module ariane_testharness #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg, + parameter bit IsRVFI = bit'(cva6_config_pkg::CVA6ConfigRvfiTrace), + // + parameter int unsigned AXI_USER_WIDTH = ariane_pkg::AXI_USER_WIDTH, + parameter int unsigned AXI_USER_EN = ariane_pkg::AXI_USER_EN, + parameter int unsigned AXI_ADDRESS_WIDTH = 64, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter bit InclSimDTM = 1'b1, + parameter int unsigned NUM_WORDS = 2**25, // memory size + parameter bit StallRandomOutput = 1'b0, + parameter bit StallRandomInput = 1'b0 +) ( + input logic clk_i, + input logic rtc_i, + input logic rst_ni, + output logic [31:0] exit_o +); + + localparam [7:0] hart_id = '0; + + localparam type rvfi_instr_t = struct packed { + logic [config_pkg::NRET-1:0] valid; + logic [config_pkg::NRET*64-1:0] order; + logic [config_pkg::NRET*config_pkg::ILEN-1:0] insn; + logic [config_pkg::NRET-1:0] trap; + logic [config_pkg::NRET*riscv::XLEN-1:0] cause; + logic [config_pkg::NRET-1:0] halt; + logic [config_pkg::NRET-1:0] intr; + logic [config_pkg::NRET*2-1:0] mode; + logic [config_pkg::NRET*2-1:0] ixl; + logic [config_pkg::NRET*5-1:0] rs1_addr; + logic [config_pkg::NRET*5-1:0] rs2_addr; + logic [config_pkg::NRET*riscv::XLEN-1:0] rs1_rdata; + logic [config_pkg::NRET*riscv::XLEN-1:0] rs2_rdata; + logic [config_pkg::NRET*5-1:0] rd_addr; + logic [config_pkg::NRET*riscv::XLEN-1:0] rd_wdata; + logic [config_pkg::NRET*riscv::XLEN-1:0] pc_rdata; + logic [config_pkg::NRET*riscv::XLEN-1:0] pc_wdata; + logic [config_pkg::NRET*riscv::VLEN-1:0] mem_addr; + logic [config_pkg::NRET*riscv::PLEN-1:0] mem_paddr; + logic [config_pkg::NRET*(riscv::XLEN/8)-1:0] mem_rmask; + logic [config_pkg::NRET*(riscv::XLEN/8)-1:0] mem_wmask; + logic [config_pkg::NRET*riscv::XLEN-1:0] mem_rdata; + logic [config_pkg::NRET*riscv::XLEN-1:0] mem_wdata; + }; + + localparam type rvfi_probes_t = struct packed { + logic [ariane_pkg::TRANS_ID_BITS-1:0] issue_pointer; + logic [CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] commit_pointer; + logic flush_unissued_instr; + logic decoded_instr_valid; + logic decoded_instr_ack; + logic flush; + logic issue_instr_ack; + logic fetch_entry_valid; + logic [31:0] instruction; + logic is_compressed; + riscv::xlen_t rs1_forwarding; + riscv::xlen_t rs2_forwarding; + ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr; + ariane_pkg::exception_t ex_commit; + riscv::priv_lvl_t priv_lvl; + ariane_pkg::lsu_ctrl_t lsu_ctrl; + logic [((CVA6Cfg.CvxifEn || CVA6Cfg.RVV) ? 5 : 4)-1:0][riscv::XLEN-1:0] wbdata; + logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack; + logic [riscv::PLEN-1:0] mem_paddr; + logic debug_mode; + logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata; + }; + + // disable test-enable + logic test_en; + logic ndmreset; + logic ndmreset_n; + logic debug_req_core; + + int jtag_enable; + logic init_done; + logic [31:0] jtag_exit, dmi_exit; + logic [31:0] rvfi_exit; + + logic jtag_TCK; + logic jtag_TMS; + logic jtag_TDI; + logic jtag_TRSTn; + logic jtag_TDO_data; + logic jtag_TDO_driven; + + logic debug_req_valid; + logic debug_req_ready; + logic debug_resp_valid; + logic debug_resp_ready; + + logic jtag_req_valid; + logic [6:0] jtag_req_bits_addr; + logic [1:0] jtag_req_bits_op; + logic [31:0] jtag_req_bits_data; + logic jtag_resp_ready; + logic jtag_resp_valid; + + logic dmi_req_valid; + logic dmi_resp_ready; + logic dmi_resp_valid; + + dm::dmi_req_t jtag_dmi_req; + dm::dmi_req_t dmi_req; + + dm::dmi_req_t debug_req; + dm::dmi_resp_t debug_resp; + + assign test_en = 1'b0; + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidth ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) slave[ariane_soc::NrSlaves-1:0](); + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) master[ariane_soc::NB_PERIPHERALS-1:0](); + + rstgen i_rstgen_main ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni & (~ndmreset) ), + .test_mode_i ( test_en ), + .rst_no ( ndmreset_n ), + .init_no ( ) // keep open + ); + + // --------------- + // Debug + // --------------- + assign init_done = rst_ni; + + logic debug_enable; + initial begin + if (!$value$plusargs("jtag_rbb_enable=%b", jtag_enable)) jtag_enable = 'h0; + if ($test$plusargs("debug_disable")) debug_enable = 'h0; else debug_enable = 'h1; + if (riscv::XLEN != 32 & riscv::XLEN != 64) $error("XLEN different from 32 and 64"); + end + + // debug if MUX + assign debug_req_valid = (jtag_enable[0]) ? jtag_req_valid : dmi_req_valid; + assign debug_resp_ready = (jtag_enable[0]) ? jtag_resp_ready : dmi_resp_ready; + assign debug_req = (jtag_enable[0]) ? jtag_dmi_req : dmi_req; + if (ariane_pkg::RVFI) begin + assign exit_o = (jtag_enable[0]) ? jtag_exit : rvfi_exit; + end else begin + assign exit_o = (jtag_enable[0]) ? jtag_exit : dmi_exit; + end + assign jtag_resp_valid = (jtag_enable[0]) ? debug_resp_valid : 1'b0; + assign dmi_resp_valid = (jtag_enable[0]) ? 1'b0 : debug_resp_valid; + + // SiFive's SimJTAG Module + // Converts to DPI calls + SimJTAG i_SimJTAG ( + .clock ( clk_i ), + .reset ( ~rst_ni ), + .enable ( jtag_enable[0] ), + .init_done ( init_done ), + .jtag_TCK ( jtag_TCK ), + .jtag_TMS ( jtag_TMS ), + .jtag_TDI ( jtag_TDI ), + .jtag_TRSTn ( jtag_TRSTn ), + .jtag_TDO_data ( jtag_TDO_data ), + .jtag_TDO_driven ( jtag_TDO_driven ), + .exit ( jtag_exit ) + ); + + dmi_jtag i_dmi_jtag ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en ), + .dmi_req_o ( jtag_dmi_req ), + .dmi_req_valid_o ( jtag_req_valid ), + .dmi_req_ready_i ( debug_req_ready ), + .dmi_resp_i ( debug_resp ), + .dmi_resp_ready_o ( jtag_resp_ready ), + .dmi_resp_valid_i ( jtag_resp_valid ), + .dmi_rst_no ( ), // not connected + .tck_i ( jtag_TCK ), + .tms_i ( jtag_TMS ), + .trst_ni ( jtag_TRSTn ), + .td_i ( jtag_TDI ), + .td_o ( jtag_TDO_data ), + .tdo_oe_o ( jtag_TDO_driven ) + ); + + // SiFive's SimDTM Module + // Converts to DPI calls + logic [1:0] debug_req_bits_op; + assign dmi_req.op = dm::dtm_op_e'(debug_req_bits_op); + + if (InclSimDTM) begin + SimDTM i_SimDTM ( + .clk ( clk_i ), + .reset ( ~rst_ni ), + .debug_req_valid ( dmi_req_valid ), + .debug_req_ready ( debug_req_ready ), + .debug_req_bits_addr ( dmi_req.addr ), + .debug_req_bits_op ( debug_req_bits_op ), + .debug_req_bits_data ( dmi_req.data ), + .debug_resp_valid ( dmi_resp_valid ), + .debug_resp_ready ( dmi_resp_ready ), + .debug_resp_bits_resp ( debug_resp.resp ), + .debug_resp_bits_data ( debug_resp.data ), + .exit ( dmi_exit ) + ); + end else begin + assign dmi_req_valid = '0; + assign debug_req_bits_op = '0; + assign dmi_exit = 1'b0; + end + + // this delay window allows the core to read and execute init code + // from the bootrom before the first debug request can interrupt + // core. this is needed in cases where an fsbl is involved that + // expects a0 and a1 to be initialized with the hart id and a + // pointer to the dev tree, respectively. + localparam int unsigned DmiDelCycles = 500; + + logic debug_req_core_ungtd; + int dmi_del_cnt_d, dmi_del_cnt_q; + + assign dmi_del_cnt_d = (dmi_del_cnt_q) ? dmi_del_cnt_q - 1 : 0; + assign debug_req_core = (dmi_del_cnt_q) ? 1'b0 : + (!debug_enable) ? 1'b0 : debug_req_core_ungtd; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_dmi_del_cnt + if(!rst_ni) begin + dmi_del_cnt_q <= DmiDelCycles; + end else begin + dmi_del_cnt_q <= dmi_del_cnt_d; + end + end + + ariane_axi::req_t dm_axi_m_req; + ariane_axi::resp_t dm_axi_m_resp; + + logic dm_slave_req; + logic dm_slave_we; + logic [64-1:0] dm_slave_addr; + logic [64/8-1:0] dm_slave_be; + logic [64-1:0] dm_slave_wdata; + logic [64-1:0] dm_slave_rdata; + + logic dm_master_req; + logic [64-1:0] dm_master_add; + logic dm_master_we; + logic [64-1:0] dm_master_wdata; + logic [64/8-1:0] dm_master_be; + logic dm_master_gnt; + logic dm_master_r_valid; + logic [64-1:0] dm_master_r_rdata; + + // debug module + dm_top #( + .NrHarts ( 1 ), + .BusWidth ( AXI_DATA_WIDTH ), + .SelectableHarts ( 1'b1 ) + ) i_dm_top ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), // PoR + .testmode_i ( test_en ), + .ndmreset_o ( ndmreset ), + .dmactive_o ( ), // active debug session + .debug_req_o ( debug_req_core_ungtd ), + .unavailable_i ( '0 ), + .hartinfo_i ( {ariane_pkg::DebugHartInfo} ), + .slave_req_i ( dm_slave_req ), + .slave_we_i ( dm_slave_we ), + .slave_addr_i ( dm_slave_addr ), + .slave_be_i ( dm_slave_be ), + .slave_wdata_i ( dm_slave_wdata ), + .slave_rdata_o ( dm_slave_rdata ), + .master_req_o ( dm_master_req ), + .master_add_o ( dm_master_add ), + .master_we_o ( dm_master_we ), + .master_wdata_o ( dm_master_wdata ), + .master_be_o ( dm_master_be ), + .master_gnt_i ( dm_master_gnt ), + .master_r_valid_i ( dm_master_r_valid ), + .master_r_rdata_i ( dm_master_r_rdata ), + .dmi_rst_ni ( rst_ni ), + .dmi_req_valid_i ( debug_req_valid ), + .dmi_req_ready_o ( debug_req_ready ), + .dmi_req_i ( debug_req ), + .dmi_resp_valid_o ( debug_resp_valid ), + .dmi_resp_ready_i ( debug_resp_ready ), + .dmi_resp_o ( debug_resp ) + ); + + + axi2mem #( + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) i_dm_axi2mem ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slave ( master[ariane_soc::Debug] ), + .req_o ( dm_slave_req ), + .we_o ( dm_slave_we ), + .addr_o ( dm_slave_addr ), + .be_o ( dm_slave_be ), + .user_o ( ), + .data_o ( dm_slave_wdata ), + .user_i ( '0 ), + .data_i ( dm_slave_rdata ) + ); + + `AXI_ASSIGN_FROM_REQ(slave[1], dm_axi_m_req) + `AXI_ASSIGN_TO_RESP(dm_axi_m_resp, slave[1]) + + axi_adapter #( + .CVA6Cfg ( CVA6Cfg ), + .DATA_WIDTH ( AXI_DATA_WIDTH ), + .axi_req_t ( ariane_axi::req_t ), + .axi_rsp_t ( ariane_axi::resp_t ) + ) i_dm_axi_master ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( dm_master_req ), + .type_i ( ariane_pkg::SINGLE_REQ ), + .amo_i ( ariane_pkg::AMO_NONE ), + .gnt_o ( dm_master_gnt ), + .addr_i ( dm_master_add ), + .we_i ( dm_master_we ), + .wdata_i ( dm_master_wdata ), + .be_i ( dm_master_be ), + .size_i ( 2'b11 ), // always do 64bit here and use byte enables to gate + .id_i ( '0 ), + .valid_o ( dm_master_r_valid ), + .rdata_o ( dm_master_r_rdata ), + .id_o ( ), + .critical_word_o ( ), + .critical_word_valid_o ( ), + .axi_req_o ( dm_axi_m_req ), + .axi_resp_i ( dm_axi_m_resp ) + ); + + + // --------------- + // ROM + // --------------- + logic rom_req; + logic [AXI_ADDRESS_WIDTH-1:0] rom_addr; + logic [AXI_DATA_WIDTH-1:0] rom_rdata; + + axi2mem #( + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) i_axi2rom ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .slave ( master[ariane_soc::ROM] ), + .req_o ( rom_req ), + .we_o ( ), + .addr_o ( rom_addr ), + .be_o ( ), + .user_o ( ), + .data_o ( ), + .user_i ( '0 ), + .data_i ( rom_rdata ) + ); + + bootrom i_bootrom ( + .clk_i ( clk_i ), + .req_i ( rom_req ), + .addr_i ( rom_addr ), + .rdata_o ( rom_rdata ) + ); + + // ------------------------------ + // GPIO + // ------------------------------ + + // GPIO not implemented, adding an error slave here + + ariane_axi_soc::req_slv_t gpio_req; + ariane_axi_soc::resp_slv_t gpio_resp; + `AXI_ASSIGN_TO_REQ(gpio_req, master[ariane_soc::GPIO]) + `AXI_ASSIGN_FROM_RESP(master[ariane_soc::GPIO], gpio_resp) + axi_err_slv #( + .AxiIdWidth ( ariane_axi_soc::IdWidthSlave ), + .req_t ( ariane_axi_soc::req_slv_t ), + .resp_t ( ariane_axi_soc::resp_slv_t ) + ) i_gpio_err_slv ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .test_i ( test_en ), + .slv_req_i ( gpio_req ), + .slv_resp_o ( gpio_resp ) + ); + + + // ------------------------------ + // Memory + Exclusive Access + // ------------------------------ + AXI_BUS #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) dram(); + + logic req; + logic we; + logic [AXI_ADDRESS_WIDTH-1:0] addr; + logic [AXI_DATA_WIDTH/8-1:0] be; + logic [AXI_DATA_WIDTH-1:0] wdata; + logic [AXI_DATA_WIDTH-1:0] rdata; + logic [AXI_USER_WIDTH-1:0] wuser; + logic [AXI_USER_WIDTH-1:0] ruser; + + axi_riscv_atomics_wrap #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .AXI_MAX_WRITE_TXNS ( 1 ), + .RISCV_WORD_WIDTH ( 64 ) + ) i_axi_riscv_atomics ( + .clk_i, + .rst_ni ( ndmreset_n ), + .slv ( master[ariane_soc::DRAM] ), + .mst ( dram ) + ); + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) dram_delayed(); + + axi_delayer_intf #( + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .STALL_RANDOM_INPUT ( StallRandomInput ), + .STALL_RANDOM_OUTPUT ( StallRandomOutput ), + .FIXED_DELAY_INPUT ( 0 ), + .FIXED_DELAY_OUTPUT ( 0 ) + ) i_axi_delayer ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .slv ( dram ), + .mst ( dram_delayed ) + ); + + axi2mem #( + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) i_axi2mem ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .slave ( dram_delayed ), + .req_o ( req ), + .we_o ( we ), + .addr_o ( addr ), + .be_o ( be ), + .user_o ( wuser ), + .data_o ( wdata ), + .user_i ( ruser ), + .data_i ( rdata ) + ); + + sram #( + .DATA_WIDTH ( AXI_DATA_WIDTH ), + .USER_WIDTH ( AXI_USER_WIDTH ), + .USER_EN ( AXI_USER_EN ), +`ifdef VERILATOR + .SIM_INIT ( "none" ), +`else + .SIM_INIT ( "zeros" ), +`endif + .NUM_WORDS ( NUM_WORDS ) + ) i_sram ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req ), + .we_i ( we ), + .addr_i ( addr[$clog2(NUM_WORDS)-1+$clog2(AXI_DATA_WIDTH/8):$clog2(AXI_DATA_WIDTH/8)] ), + .wuser_i ( wuser ), + .wdata_i ( wdata ), + .be_i ( be ), + .ruser_o ( ruser ), + .rdata_o ( rdata ) + ); + + // --------------- + // AXI Xbar + // --------------- + + axi_pkg::xbar_rule_64_t [ariane_soc::NB_PERIPHERALS-1:0] addr_map; + + assign addr_map = '{ + '{ idx: ariane_soc::Debug, start_addr: ariane_soc::DebugBase, end_addr: ariane_soc::DebugBase + ariane_soc::DebugLength }, + '{ idx: ariane_soc::ROM, start_addr: ariane_soc::ROMBase, end_addr: ariane_soc::ROMBase + ariane_soc::ROMLength }, + '{ idx: ariane_soc::CLINT, start_addr: ariane_soc::CLINTBase, end_addr: ariane_soc::CLINTBase + ariane_soc::CLINTLength }, + '{ idx: ariane_soc::PLIC, start_addr: ariane_soc::PLICBase, end_addr: ariane_soc::PLICBase + ariane_soc::PLICLength }, + '{ idx: ariane_soc::UART, start_addr: ariane_soc::UARTBase, end_addr: ariane_soc::UARTBase + ariane_soc::UARTLength }, + '{ idx: ariane_soc::Timer, start_addr: ariane_soc::TimerBase, end_addr: ariane_soc::TimerBase + ariane_soc::TimerLength }, + '{ idx: ariane_soc::SPI, start_addr: ariane_soc::SPIBase, end_addr: ariane_soc::SPIBase + ariane_soc::SPILength }, + '{ idx: ariane_soc::Ethernet, start_addr: ariane_soc::EthernetBase, end_addr: ariane_soc::EthernetBase + ariane_soc::EthernetLength }, + '{ idx: ariane_soc::GPIO, start_addr: ariane_soc::GPIOBase, end_addr: ariane_soc::GPIOBase + ariane_soc::GPIOLength }, + '{ idx: ariane_soc::DRAM, start_addr: ariane_soc::DRAMBase, end_addr: ariane_soc::DRAMBase + ariane_soc::DRAMLength } + }; + + localparam axi_pkg::xbar_cfg_t AXI_XBAR_CFG = '{ + NoSlvPorts: unsigned'(ariane_soc::NrSlaves), + NoMstPorts: unsigned'(ariane_soc::NB_PERIPHERALS), + MaxMstTrans: unsigned'(1), // Probably requires update + MaxSlvTrans: unsigned'(1), // Probably requires update + FallThrough: 1'b0, + LatencyMode: axi_pkg::NO_LATENCY, + AxiIdWidthSlvPorts: unsigned'(ariane_axi_soc::IdWidth), + AxiIdUsedSlvPorts: unsigned'(ariane_axi_soc::IdWidth), + UniqueIds: 1'b0, + AxiAddrWidth: unsigned'(AXI_ADDRESS_WIDTH), + AxiDataWidth: unsigned'(AXI_DATA_WIDTH), + NoAddrRules: unsigned'(ariane_soc::NB_PERIPHERALS) + }; + + axi_xbar_intf #( + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .Cfg ( AXI_XBAR_CFG ), + .rule_t ( axi_pkg::xbar_rule_64_t ) + ) i_axi_xbar ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .test_i ( test_en ), + .slv_ports ( slave ), + .mst_ports ( master ), + .addr_map_i ( addr_map ), + .en_default_mst_port_i ( '0 ), + .default_mst_port_i ( '0 ) + ); + + // --------------- + // CLINT + // --------------- + logic ipi; + logic timer_irq; + + ariane_axi_soc::req_slv_t axi_clint_req; + ariane_axi_soc::resp_slv_t axi_clint_resp; + + clint #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .NR_CORES ( 1 ), + .axi_req_t ( ariane_axi_soc::req_slv_t ), + .axi_resp_t ( ariane_axi_soc::resp_slv_t ) + ) i_clint ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .testmode_i ( test_en ), + .axi_req_i ( axi_clint_req ), + .axi_resp_o ( axi_clint_resp ), + .rtc_i ( rtc_i ), + .timer_irq_o ( timer_irq ), + .ipi_o ( ipi ) + ); + + `AXI_ASSIGN_TO_REQ(axi_clint_req, master[ariane_soc::CLINT]) + `AXI_ASSIGN_FROM_RESP(master[ariane_soc::CLINT], axi_clint_resp) + + // --------------- + // Peripherals + // --------------- + logic tx, rx; + logic [1:0] irqs; + + ariane_peripherals #( + .AxiAddrWidth ( AXI_ADDRESS_WIDTH ), + .AxiDataWidth ( AXI_DATA_WIDTH ), + .AxiIdWidth ( ariane_axi_soc::IdWidthSlave ), + .AxiUserWidth ( AXI_USER_WIDTH ), +`ifndef VERILATOR + .InclUART ( 1'b1 ), +`else + .InclUART ( 1'b0 ), +`endif + .InclSPI ( 1'b0 ), + .InclEthernet ( 1'b0 ) + ) i_ariane_peripherals ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .plic ( master[ariane_soc::PLIC] ), + .uart ( master[ariane_soc::UART] ), + .spi ( master[ariane_soc::SPI] ), + .ethernet ( master[ariane_soc::Ethernet] ), + .timer ( master[ariane_soc::Timer] ), + .irq_o ( irqs ), + .rx_i ( rx ), + .tx_o ( tx ), + .eth_txck ( ), + .eth_rxck ( ), + .eth_rxctl ( ), + .eth_rxd ( ), + .eth_rst_n ( ), + .eth_tx_en ( ), + .eth_txd ( ), + .phy_mdio ( ), + .eth_mdc ( ), + .mdio ( ), + .mdc ( ), + .spi_clk_o ( ), + .spi_mosi ( ), + .spi_miso ( ), + .spi_ss ( ) + ); + + uart_bus #(.BAUD_RATE(115200), .PARITY_EN(0)) i_uart_bus (.rx(tx), .tx(rx), .rx_en(1'b1)); + + // --------------- + // Core + // --------------- + ariane_axi::req_t axi_ariane_req; + ariane_axi::resp_t axi_ariane_resp; + rvfi_probes_t rvfi_probes; + rvfi_instr_t [CVA6Cfg.NrCommitPorts-1:0] rvfi_instr; + + ariane #( + .CVA6Cfg ( CVA6Cfg ), + .IsRVFI ( IsRVFI ), + .rvfi_probes_t ( rvfi_probes_t ), + .noc_req_t ( ariane_axi::req_t ), + .noc_resp_t ( ariane_axi::resp_t ) + ) i_ariane ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .boot_addr_i ( ariane_soc::ROMBase ), // start fetching from ROM + .hart_id_i ( {56'h0, hart_id} ), + .irq_i ( irqs ), + .ipi_i ( ipi ), + .time_irq_i ( timer_irq ), + .rvfi_probes_o ( rvfi_probes ), +// Disable Debug when simulating with Spike +`ifdef SPIKE_TANDEM + .debug_req_i ( 1'b0 ), +`else + .debug_req_i ( debug_req_core ), +`endif + .noc_req_o ( axi_ariane_req ), + .noc_resp_i ( axi_ariane_resp ) + ); + + `AXI_ASSIGN_FROM_REQ(slave[0], axi_ariane_req) + `AXI_ASSIGN_TO_RESP(axi_ariane_resp, slave[0]) + + // ------------- + // Simulation Helper Functions + // ------------- + // check for response errors + always_ff @(posedge clk_i) begin : p_assert + if (axi_ariane_req.r_ready && + axi_ariane_resp.r_valid && + axi_ariane_resp.r.resp inside {axi_pkg::RESP_DECERR, axi_pkg::RESP_SLVERR}) begin + $warning("R Response Errored"); + end + if (axi_ariane_req.b_ready && + axi_ariane_resp.b_valid && + axi_ariane_resp.b.resp inside {axi_pkg::RESP_DECERR, axi_pkg::RESP_SLVERR}) begin + $warning("B Response Errored"); + end + end + + cva6_rvfi #( + .CVA6Cfg (CVA6Cfg), + .rvfi_instr_t(rvfi_instr_t), + .rvfi_probes_t(rvfi_probes_t) + ) i_cva6_rvfi ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .rvfi_probes_i(rvfi_probes), + .rvfi_o(rvfi_instr) + ); + + rvfi_tracer #( + .CVA6Cfg(CVA6Cfg), + .rvfi_instr_t(rvfi_instr_t), + // + .HART_ID(hart_id), + .DEBUG_START(0), + .DEBUG_STOP(0) + ) i_rvfi_tracer ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .rvfi_i(rvfi_instr), + .end_of_test_o(rvfi_exit) + ); + +`ifdef SPIKE_TANDEM + spike #( + .CVA6Cfg ( CVA6Cfg ), + .rvfi_instr_t(rvfi_instr_t) + ) i_spike ( + .clk_i, + .rst_ni, + .clint_tick_i ( rtc_i ), + .rvfi_i ( rvfi_instr ) + ); + initial begin + $display("Running binary in tandem mode"); + end +`endif + + +`ifdef AXI_SVA + // AXI 4 Assertion IP integration - You will need to get your own copy of this IP if you want + // to use it + Axi4PC #( + .DATA_WIDTH(ariane_axi_soc::DataWidth), + .WID_WIDTH(ariane_axi_soc::IdWidthSlave), + .RID_WIDTH(ariane_axi_soc::IdWidthSlave), + .AWUSER_WIDTH(ariane_axi_soc::UserWidth), + .WUSER_WIDTH(ariane_axi_soc::UserWidth), + .BUSER_WIDTH(ariane_axi_soc::UserWidth), + .ARUSER_WIDTH(ariane_axi_soc::UserWidth), + .RUSER_WIDTH(ariane_axi_soc::UserWidth), + .ADDR_WIDTH(ariane_axi_soc::AddrWidth) + ) i_Axi4PC ( + .ACLK(clk_i), + .ARESETn(ndmreset_n), + .AWID(dram.aw_id), + .AWADDR(dram.aw_addr), + .AWLEN(dram.aw_len), + .AWSIZE(dram.aw_size), + .AWBURST(dram.aw_burst), + .AWLOCK(dram.aw_lock), + .AWCACHE(dram.aw_cache), + .AWPROT(dram.aw_prot), + .AWQOS(dram.aw_qos), + .AWREGION(dram.aw_region), + .AWUSER(dram.aw_user), + .AWVALID(dram.aw_valid), + .AWREADY(dram.aw_ready), + .WLAST(dram.w_last), + .WDATA(dram.w_data), + .WSTRB(dram.w_strb), + .WUSER(dram.w_user), + .WVALID(dram.w_valid), + .WREADY(dram.w_ready), + .BID(dram.b_id), + .BRESP(dram.b_resp), + .BUSER(dram.b_user), + .BVALID(dram.b_valid), + .BREADY(dram.b_ready), + .ARID(dram.ar_id), + .ARADDR(dram.ar_addr), + .ARLEN(dram.ar_len), + .ARSIZE(dram.ar_size), + .ARBURST(dram.ar_burst), + .ARLOCK(dram.ar_lock), + .ARCACHE(dram.ar_cache), + .ARPROT(dram.ar_prot), + .ARQOS(dram.ar_qos), + .ARREGION(dram.ar_region), + .ARUSER(dram.ar_user), + .ARVALID(dram.ar_valid), + .ARREADY(dram.ar_ready), + .RID(dram.r_id), + .RLAST(dram.r_last), + .RDATA(dram.r_data), + .RRESP(dram.r_resp), + .RUSER(dram.r_user), + .RVALID(dram.r_valid), + .RREADY(dram.r_ready), + .CACTIVE('0), + .CSYSREQ('0), + .CSYSACK('0) + ); +`endif +endmodule diff --git a/test/type_param/corev_apu/tb/axi_intf.sv b/test/type_param/corev_apu/tb/axi_intf.sv new file mode 100644 index 0000000..41d4b16 --- /dev/null +++ b/test/type_param/corev_apu/tb/axi_intf.sv @@ -0,0 +1,311 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki +// +// This file defines the interfaces we support. + + + +/// An AXI4 interface. +interface AXI_BUS #( + parameter AXI_ADDR_WIDTH = -1, + parameter AXI_DATA_WIDTH = -1, + parameter AXI_ID_WIDTH = -1, + parameter AXI_USER_WIDTH = -1 +); + + import axi_pkg::*; + + localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_STRB_WIDTH-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + typedef logic [5:0] atop_t; + + id_t aw_id; + addr_t aw_addr; + logic [7:0] aw_len; + logic [2:0] aw_size; + burst_t aw_burst; + logic aw_lock; + cache_t aw_cache; + prot_t aw_prot; + qos_t aw_qos; + atop_t aw_atop; + region_t aw_region; + user_t aw_user; + logic aw_valid; + logic aw_ready; + + data_t w_data; + strb_t w_strb; + logic w_last; + user_t w_user; + logic w_valid; + logic w_ready; + + id_t b_id; + resp_t b_resp; + user_t b_user; + logic b_valid; + logic b_ready; + + id_t ar_id; + addr_t ar_addr; + logic [7:0] ar_len; + logic [2:0] ar_size; + burst_t ar_burst; + logic ar_lock; + cache_t ar_cache; + prot_t ar_prot; + qos_t ar_qos; + region_t ar_region; + user_t ar_user; + logic ar_valid; + logic ar_ready; + + id_t r_id; + data_t r_data; + resp_t r_resp; + logic r_last; + user_t r_user; + logic r_valid; + logic r_ready; + + modport Master ( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_valid, input aw_ready, + output w_data, w_strb, w_last, w_user, w_valid, input w_ready, + input b_id, b_resp, b_user, b_valid, output b_ready, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, input ar_ready, + input r_id, r_data, r_resp, r_last, r_user, r_valid, output r_ready + ); + + modport Slave ( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_valid, output aw_ready, + input w_data, w_strb, w_last, w_user, w_valid, output w_ready, + output b_id, b_resp, b_user, b_valid, input b_ready, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, output ar_ready, + output r_id, r_data, r_resp, r_last, r_user, r_valid, input r_ready + ); + +endinterface + + +/// An asynchronous AXI4 interface. +interface AXI_BUS_ASYNC +#( + parameter AXI_ADDR_WIDTH = -1, + parameter AXI_DATA_WIDTH = -1, + parameter AXI_ID_WIDTH = -1, + parameter AXI_USER_WIDTH = -1, + parameter BUFFER_WIDTH = -1 +); + + localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + + logic [AXI_ID_WIDTH-1:0] aw_id; + logic [AXI_ADDR_WIDTH-1:0] aw_addr; + logic [7:0] aw_len; + logic [2:0] aw_size; + logic [1:0] aw_burst; + logic aw_lock; + logic [3:0] aw_cache; + logic [2:0] aw_prot; + logic [3:0] aw_qos; + logic [5:0] aw_atop; + logic [3:0] aw_region; + logic [AXI_USER_WIDTH-1:0] aw_user; + logic [BUFFER_WIDTH-1:0] aw_writetoken; + logic [BUFFER_WIDTH-1:0] aw_readpointer; + + logic [AXI_DATA_WIDTH-1:0] w_data; + logic [AXI_STRB_WIDTH-1:0] w_strb; + logic w_last; + logic [AXI_USER_WIDTH-1:0] w_user; + logic [BUFFER_WIDTH-1:0] w_writetoken; + logic [BUFFER_WIDTH-1:0] w_readpointer; + + logic [AXI_ID_WIDTH-1:0] b_id; + logic [1:0] b_resp; + logic [AXI_USER_WIDTH-1:0] b_user; + logic [BUFFER_WIDTH-1:0] b_writetoken; + logic [BUFFER_WIDTH-1:0] b_readpointer; + + logic [AXI_ID_WIDTH-1:0] ar_id; + logic [AXI_ADDR_WIDTH-1:0] ar_addr; + logic [7:0] ar_len; + logic [2:0] ar_size; + logic [1:0] ar_burst; + logic ar_lock; + logic [3:0] ar_cache; + logic [2:0] ar_prot; + logic [3:0] ar_qos; + logic [3:0] ar_region; + logic [AXI_USER_WIDTH-1:0] ar_user; + logic [BUFFER_WIDTH-1:0] ar_writetoken; + logic [BUFFER_WIDTH-1:0] ar_readpointer; + + logic [AXI_ID_WIDTH-1:0] r_id; + logic [AXI_DATA_WIDTH-1:0] r_data; + logic [1:0] r_resp; + logic r_last; + logic [AXI_USER_WIDTH-1:0] r_user; + logic [BUFFER_WIDTH-1:0] r_writetoken; + logic [BUFFER_WIDTH-1:0] r_readpointer; + + modport Master ( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_writetoken, input aw_readpointer, + output w_data, w_strb, w_last, w_user, w_writetoken, input w_readpointer, + input b_id, b_resp, b_user, b_writetoken, output b_readpointer, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_writetoken, input ar_readpointer, + input r_id, r_data, r_resp, r_last, r_user, r_writetoken, output r_readpointer + ); + + modport Slave ( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_writetoken, output aw_readpointer, + input w_data, w_strb, w_last, w_user, w_writetoken, output w_readpointer, + output b_id, b_resp, b_user, b_writetoken, input b_readpointer, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_writetoken, output ar_readpointer, + output r_id, r_data, r_resp, r_last, r_user, r_writetoken, input r_readpointer + ); + +endinterface + + +/// An AXI4-Lite interface. +interface AXI_LITE #( + parameter AXI_ADDR_WIDTH = -1, + parameter AXI_DATA_WIDTH = -1 +); + + import axi_pkg::*; + + localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_STRB_WIDTH-1:0] strb_t; + + // AW channel + addr_t aw_addr; + prot_t aw_prot; + logic aw_valid; + logic aw_ready; + + data_t w_data; + strb_t w_strb; + logic w_valid; + logic w_ready; + + resp_t b_resp; + logic b_valid; + logic b_ready; + + addr_t ar_addr; + prot_t ar_prot; + logic ar_valid; + logic ar_ready; + + data_t r_data; + resp_t r_resp; + logic r_valid; + logic r_ready; + + modport Master ( + output aw_addr, aw_prot, aw_valid, input aw_ready, + output w_data, w_strb, w_valid, input w_ready, + input b_resp, b_valid, output b_ready, + output ar_addr, ar_prot, ar_valid, input ar_ready, + input r_data, r_resp, r_valid, output r_ready + ); + + modport Slave ( + input aw_addr, aw_prot, aw_valid, output aw_ready, + input w_data, w_strb, w_valid, output w_ready, + output b_resp, b_valid, input b_ready, + input ar_addr, ar_prot, ar_valid, output ar_ready, + output r_data, r_resp, r_valid, input r_ready + ); + + /// The interface as an output (issuing requests, initiator, master). + modport out ( + output aw_addr, aw_valid, input aw_ready, + output w_data, w_strb, w_valid, input w_ready, + input b_resp, b_valid, output b_ready, + output ar_addr, ar_valid, input ar_ready, + input r_data, r_resp, r_valid, output r_ready + ); + + /// The interface as an input (accepting requests, target, slave). + modport in ( + input aw_addr, aw_valid, output aw_ready, + input w_data, w_strb, w_valid, output w_ready, + output b_resp, b_valid, input b_ready, + input ar_addr, ar_valid, output ar_ready, + output r_data, r_resp, r_valid, input r_ready + ); + +endinterface + + +/// An AXI routing table. +/// +/// For each slave, multiple rules can be defined. Each rule consists of an +/// address mask and a base. Addresses are masked and then compared against the +/// base to decide where transfers need to go. +interface AXI_ROUTING_RULES #( + /// The address width. + parameter int AXI_ADDR_WIDTH = -1, + /// The number of slaves in the routing table. + parameter int NUM_SLAVE = -1, + /// The number of rules in the routing table. + parameter int NUM_RULES = -1 +); + + struct packed { + logic enabled; + logic [AXI_ADDR_WIDTH-1:0] mask; + logic [AXI_ADDR_WIDTH-1:0] base; + } [NUM_RULES-1:0] rules [NUM_SLAVE]; + + modport xbar(input rules); + modport cfg(output rules); + +endinterface + + +/// An AXI arbitration interface. +interface AXI_ARBITRATION #( + /// The number of requestors. + parameter int NUM_REQ = -1 +); + + // Incoming requests. + logic [NUM_REQ-1:0] in_req; + logic [NUM_REQ-1:0] in_ack; + + // Outgoing request. + logic out_req; + logic out_ack; + logic [$clog2(NUM_REQ)-1:0] out_sel; + + // The arbiter side of the interface. + modport arb(input in_req, out_ack, output out_req, out_sel, in_ack); + + // The requestor side of the interface. + modport req(output in_req, out_ack, input out_req, out_sel, in_ack); + +endinterface diff --git a/test/type_param/corev_apu/tb/common/mock_uart.sv b/test/type_param/corev_apu/tb/common/mock_uart.sv new file mode 100644 index 0000000..6a14904 --- /dev/null +++ b/test/type_param/corev_apu/tb/common/mock_uart.sv @@ -0,0 +1,120 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 28/09/2018 +// Description: Mock replacement for UART in testbench (not synthesiesable!) + +module mock_uart ( + input logic clk_i, + input logic rst_ni, + input logic penable_i, + input logic pwrite_i, + input logic [31:0] paddr_i, + input logic psel_i, + input logic [31:0] pwdata_i, + output logic [31:0] prdata_o, + output logic pready_o, + output logic pslverr_o +); + localparam RBR = 0; + localparam THR = 0; + localparam IER = 1; + localparam IIR = 2; + localparam FCR = 2; + localparam LCR = 3; + localparam MCR = 4; + localparam LSR = 5; + localparam MSR = 6; + localparam SCR = 7; + localparam DLL = 0; + localparam DLM = 1; + + localparam THRE = 5; // transmit holding register empty + localparam TEMT = 6; // transmit holding register empty + + byte lcr = 0; + byte dlm = 0; + byte dll = 0; + byte mcr = 0; + byte lsr = 0; + byte ier = 0; + byte msr = 0; + byte scr = 0; + logic fifo_enabled = 1'b0; + + assign pready_o = 1'b1; + assign pslverr_o = 1'b0; + + function void uart_tx(byte ch); + $write("%c", ch); + endfunction : uart_tx + +/* verilator lint_off WIDTHTRUNC */ +/* verilator lint_off WIDTHEXPAND */ +/* verilator lint_off WIDTHCONCAT */ + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (rst_ni) begin + if (psel_i & penable_i & pwrite_i) begin + case ((paddr_i >> 'h2) & 'h7) + THR: begin + if (lcr & 'h80) dll <= byte'(pwdata_i[7:0]); + else uart_tx(byte'(pwdata_i[7:0])); + end + IER: begin + if (lcr & 'h80) dlm <= byte'(pwdata_i[7:0]); + else ier <= byte'(pwdata_i[7:0] & 'hF); + end + FCR: begin + if (pwdata_i[0]) fifo_enabled <= 1'b1; + else fifo_enabled <= 1'b0; + end + LCR: lcr <= byte'(pwdata_i[7:0]); + MCR: mcr <= byte'(pwdata_i[7:0] & 'h1F); + LSR: lsr <= byte'(pwdata_i[7:0]); + MSR: msr <= byte'(pwdata_i[7:0]); + SCR: scr <= byte'(pwdata_i[7:0]); + default:; + endcase + end + end + end + + always_comb begin + prdata_o = '0; + if (psel_i & penable_i & ~pwrite_i) begin + case ((paddr_i >> 'h2) & 'h7) + THR: begin + if (lcr & 'h80) prdata_o = {24'b0, dll}; + end + IER: begin + if (lcr & 'h80) prdata_o = {24'b0, dlm}; + else prdata_o = {24'b0, ier}; + end + IIR: begin + if (fifo_enabled) prdata_o = {24'b0, 8'hc0}; + else prdata_o = {24'b0, 8'b0}; + end + LCR: prdata_o = {24'b0, lcr}; + MCR: prdata_o = {24'b0, mcr}; + LSR: prdata_o = {24'b0, (lsr | (1 << THRE) | (1 << TEMT))}; + MSR: prdata_o = {24'b0, msr}; + SCR: prdata_o = {24'b0, scr}; + default:; + endcase + end + end + +/* verilator lint_on WIDTHTRUNC */ +/* verilator lint_on WIDTHEXPAND */ +/* verilator lint_on WIDTHCONCAT */ + +endmodule diff --git a/test/type_param/corev_apu/tb/common/uart.sv b/test/type_param/corev_apu/tb/common/uart.sv new file mode 100644 index 0000000..d45f39a --- /dev/null +++ b/test/type_param/corev_apu/tb/common/uart.sv @@ -0,0 +1,104 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Unknown +// Date: Unknown +// Description: This module takes data over UART and prints them to the console +// A string is printed to the console as soon as a '\n' character is found + +interface uart_bus #( + parameter int unsigned BAUD_RATE = 115200, + parameter int unsigned PARITY_EN = 0 +)( + input logic rx, + output logic tx, + input logic rx_en +); + +/* pragma translate_off */ +`ifndef VERILATOR + localparam time BIT_PERIOD = (1000000000 / BAUD_RATE) * 1ns; + + logic [7:0] character; + logic [256*8-1:0] stringa; + logic parity; + integer charnum; + integer file; + + initial begin + tx = 1'bZ; + file = $fopen("uart", "w"); + end + + always begin + if (rx_en) begin + @(negedge rx); + #(BIT_PERIOD/2); + for (int i = 0; i <= 7; i++) begin + #BIT_PERIOD character[i] = rx; + end + + if (PARITY_EN == 1) begin + // check parity + #BIT_PERIOD parity = rx; + + for (int i=7;i>=0;i--) begin + parity = character[i] ^ parity; + end + + if (parity == 1'b1) begin + $display("Parity error detected"); + end + end + + // STOP BIT + #BIT_PERIOD; + + $fwrite(file, "%c", character); + stringa[(255-charnum)*8 +: 8] = character; + if (character == 8'h0A || charnum == 254) begin // line feed or max. chars reached + if (character == 8'h0A) begin + stringa[(255-charnum)*8 +: 8] = 8'h0; // null terminate string, replace line feed + end else begin + stringa[(255-charnum-1)*8 +: 8] = 8'h0; // null terminate string + end + + $write("[UART]: %s\n", stringa); + charnum = 0; + stringa = ""; + end else begin + charnum = charnum + 1; + end + end else begin + charnum = 0; + stringa = ""; + #10; + end + end + + task send_char(input logic [7:0] c); + int i; + + // start bit + tx = 1'b0; + + for (i = 0; i < 8; i++) begin + #(BIT_PERIOD); + tx = c[i]; + end + + // stop bit + #(BIT_PERIOD); + tx = 1'b1; + #(BIT_PERIOD); + endtask +`endif +/* pragma translate_on */ +endinterface diff --git a/test/type_param/corev_apu/tb/rvfi_tracer.sv b/test/type_param/corev_apu/tb/rvfi_tracer.sv new file mode 100644 index 0000000..75f68be --- /dev/null +++ b/test/type_param/corev_apu/tb/rvfi_tracer.sv @@ -0,0 +1,134 @@ +// Copyright 2020 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Jean-Roch COULON - Thales + +module rvfi_tracer #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type rvfi_instr_t = logic, + // + parameter logic [7:0] HART_ID = '0, + parameter int unsigned DEBUG_START = 0, + parameter int unsigned DEBUG_STOP = 0 +)( + input logic clk_i, + input logic rst_ni, + input rvfi_instr_t[CVA6Cfg.NrCommitPorts-1:0] rvfi_i, + output logic[31:0] end_of_test_o +); + + logic[riscv::PLEN-1:0] TOHOST_ADDR; + int f; + int unsigned SIM_FINISH; + initial begin + f = $fopen($sformatf("trace_rvfi_hart_%h.dasm", HART_ID), "w"); + if (!$value$plusargs("time_out=%d", SIM_FINISH)) SIM_FINISH = 2000000; + if (!$value$plusargs("tohost_addr=%h", TOHOST_ADDR)) TOHOST_ADDR = '0; + if (TOHOST_ADDR == '0) begin + $display("*** [rvf_tracer] WARNING: No valid address of 'tohost' (tohost == 0x%h), termination possible only by timeout or Ctrl-C!\n", TOHOST_ADDR); + $fwrite(f, "*** [rvfi_tracer] WARNING No valid address of 'tohost' (tohost == 0x%h), termination possible only by timeout or Ctrl-C!\n", TOHOST_ADDR); + end + end + + final $fclose(f); + + logic [31:0] cycles; + // Generate the trace based on RVFI + logic [63:0] pc64; + string cause; + logic[31:0] end_of_test_q; + logic[31:0] end_of_test_d; + + assign end_of_test_o = end_of_test_d; + always_ff @(posedge clk_i) begin + end_of_test_q = (rst_ni && (end_of_test_d[0] == 1'b1)) ? end_of_test_d : 0; + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + pc64 = {{riscv::XLEN-riscv::VLEN{rvfi_i[i].pc_rdata[riscv::VLEN-1]}}, rvfi_i[i].pc_rdata}; + // print the instruction information if the instruction is valid or a trap is taken + if (rvfi_i[i].valid) begin + // Instruction information + $fwrite(f, "core 0: 0x%h (0x%h) DASM(%h)\n", + pc64, rvfi_i[i].insn, rvfi_i[i].insn); + // Destination register information + if (rvfi_i[i].insn[1:0] != 2'b11) begin + $fwrite(f, "%h 0x%h (0x%h)", + rvfi_i[i].mode, pc64, rvfi_i[i].insn[15:0]); + end else begin + $fwrite(f, "%h 0x%h (0x%h)", + rvfi_i[i].mode, pc64, rvfi_i[i].insn); + end + // Decode instruction to know if destination register is FP register. + // Handle both uncompressed and compressed instructions. + if ( rvfi_i[i].insn[6:0] == 7'b1001111 || + rvfi_i[i].insn[6:0] == 7'b1001011 || + rvfi_i[i].insn[6:0] == 7'b1000111 || + rvfi_i[i].insn[6:0] == 7'b1000011 || + rvfi_i[i].insn[6:0] == 7'b0000111 || + (rvfi_i[i].insn[6:0] == 7'b1010011 && rvfi_i[i].insn[31:26] != 6'b111000 + && rvfi_i[i].insn[31:26] != 6'b101000 + && rvfi_i[i].insn[31:26] != 6'b110000) || + (rvfi_i[i].insn[0] == 1'b0 && ((rvfi_i[i].insn[15:13] == 3'b001 && riscv::XLEN == 64) || + (rvfi_i[i].insn[15:13] == 3'b011 && riscv::XLEN == 32) ))) begin + $fwrite(f, " f%d 0x%h", rvfi_i[i].rd_addr, rvfi_i[i].rd_wdata); + end else if (rvfi_i[i].rd_addr != 0) begin + $fwrite(f, " x%d 0x%h", rvfi_i[i].rd_addr, rvfi_i[i].rd_wdata); + if (rvfi_i[i].mem_rmask != 0) begin + $fwrite(f, " mem 0x%h", rvfi_i[i].mem_addr); + end + end else begin + if (rvfi_i[i].mem_wmask != 0) begin + $fwrite(f, " mem 0x%h 0x%h", rvfi_i[i].mem_addr, rvfi_i[i].mem_wdata); + if (TOHOST_ADDR != '0 && + rvfi_i[i].mem_paddr == TOHOST_ADDR && + rvfi_i[i].mem_wdata[0] == 1'b1) begin + end_of_test_q = rvfi_i[i].mem_wdata[31:0]; + end + end + end + $fwrite(f, "\n"); + end else begin + if (rvfi_i[i].trap) begin + case (rvfi_i[i].cause) + 32'h0: cause = "INSTR_ADDR_MISALIGNED"; + 32'h1: cause = "INSTR_ACCESS_FAULT"; + 32'h2: cause = "ILLEGAL_INSTR"; + 32'h3: cause = "BREAKPOINT"; + 32'h4: cause = "LD_ADDR_MISALIGNED"; + 32'h5: cause = "LD_ACCESS_FAULT"; + 32'h6: cause = "ST_ADDR_MISALIGNED"; + 32'h7: cause = "ST_ACCESS_FAULT"; + endcase; + $fwrite(f, "%s exception @ 0x%h\n", cause, pc64); + end + end + end + + if (~rst_ni) + cycles <= 0; + else + cycles <= cycles+1; + if (cycles > SIM_FINISH) + end_of_test_q = 32'hffff_ffff; + + end_of_test_d <= end_of_test_q; + end + + + // Trace any custom signals + // Define signals to be traced by adding them into debug and name arrays + string name[0:10]; + logic[63:0] debug[0:10], debug_previous[0:10]; + + always_ff @(posedge clk_i) begin + if (cycles > DEBUG_START && cycles < DEBUG_STOP) + for (int index = 0; index < 100; index++) + if (debug_previous[index] != debug[index]) + $fwrite(f, "%d %s %x\n", cycles, name[index], debug[index]); + debug_previous <= debug; + end + +endmodule // rvfi_tracer diff --git a/test/type_param/sv2v.sh b/test/type_param/sv2v.sh new file mode 100755 index 0000000..4d739e0 --- /dev/null +++ b/test/type_param/sv2v.sh @@ -0,0 +1,249 @@ +sv2v -v --top=ariane_testharness --define=VERILATOR > cva6_nonsys.v \ +--incdir=vendor/pulp-platform/common_cells/include/ \ +--incdir=vendor/pulp-platform/common_cells/src/ \ +--incdir=vendor/pulp-platform/axi/include/ \ +--incdir=common/local/util/ \ +--incdir=corev_apu/axi_node \ +--incdir=core/cache_subsystem/hpdcache/rtl/include \ +--incdir=corev_apu/register_interface/include \ +vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv \ +vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv \ +vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_fma.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_top.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv \ +core/include/config_pkg.sv \ +core/include/cv64a6_imafdc_sv39_config_pkg.sv \ +core/include/riscv_pkg.sv \ +core/include/ariane_pkg.sv \ +vendor/pulp-platform/axi/src/axi_pkg.sv \ +core/include/wt_cache_pkg.sv \ +core/include/std_cache_pkg.sv \ +core/include/instr_tracer_pkg.sv \ +core/include/acc_pkg.sv \ +core/include/cvxif_pkg.sv \ +core/cvxif_example/include/cvxif_instr_pkg.sv \ +core/cvxif_fu.sv \ +core/cvxif_example/cvxif_example_coprocessor.sv \ +core/cvxif_example/instr_decoder.sv \ +vendor/pulp-platform/common_cells/src/cf_math_pkg.sv \ +vendor/pulp-platform/common_cells/src/fifo_v3.sv \ +vendor/pulp-platform/common_cells/src/lfsr.sv \ +vendor/pulp-platform/common_cells/src/lfsr_8bit.sv \ +vendor/pulp-platform/common_cells/src/stream_arbiter.sv \ +vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv \ +vendor/pulp-platform/common_cells/src/stream_mux.sv \ +vendor/pulp-platform/common_cells/src/stream_demux.sv \ +vendor/pulp-platform/common_cells/src/lzc.sv \ +vendor/pulp-platform/common_cells/src/rr_arb_tree.sv \ +vendor/pulp-platform/common_cells/src/shift_reg.sv \ +vendor/pulp-platform/common_cells/src/unread.sv \ +vendor/pulp-platform/common_cells/src/popcount.sv \ +vendor/pulp-platform/common_cells/src/exp_backoff.sv \ +vendor/pulp-platform/common_cells/src/counter.sv \ +vendor/pulp-platform/common_cells/src/delta_counter.sv \ +core/cva6.sv \ +core/cva6_rvfi_probes.sv \ +core/alu.sv \ +core/fpu_wrap.sv \ +core/branch_unit.sv \ +core/compressed_decoder.sv \ +core/controller.sv \ +core/csr_buffer.sv \ +core/csr_regfile.sv \ +core/decoder.sv \ +core/ex_stage.sv \ +core/instr_realign.sv \ +core/id_stage.sv \ +core/issue_read_operands.sv \ +core/issue_stage.sv \ +core/load_unit.sv \ +core/load_store_unit.sv \ +core/lsu_bypass.sv \ +core/mult.sv \ +core/multiplier.sv \ +core/serdiv.sv \ +core/perf_counters.sv \ +core/ariane_regfile_ff.sv \ +core/ariane_regfile_fpga.sv \ +core/scoreboard.sv \ +core/store_buffer.sv \ +core/amo_buffer.sv \ +core/store_unit.sv \ +core/commit_stage.sv \ +core/axi_shim.sv \ +core/cva6_accel_first_pass_decoder_stub.sv \ +core/acc_dispatcher.sv \ +core/frontend/btb.sv \ +core/frontend/bht.sv \ +core/frontend/ras.sv \ +core/frontend/instr_scan.sv \ +core/frontend/instr_queue.sv \ +core/frontend/frontend.sv \ +core/cache_subsystem/wt_dcache_ctrl.sv \ +core/cache_subsystem/wt_dcache_mem.sv \ +core/cache_subsystem/wt_dcache_missunit.sv \ +core/cache_subsystem/wt_dcache_wbuffer.sv \ +core/cache_subsystem/wt_dcache.sv \ +core/cache_subsystem/cva6_icache.sv \ +core/cache_subsystem/wt_cache_subsystem.sv \ +core/cache_subsystem/wt_axi_adapter.sv \ +core/cache_subsystem/tag_cmp.sv \ +core/cache_subsystem/axi_adapter.sv \ +core/cache_subsystem/miss_handler.sv \ +core/cache_subsystem/cache_ctrl.sv \ +core/cache_subsystem/cva6_icache_axi_wrapper.sv \ +core/cache_subsystem/std_cache_subsystem.sv \ +core/cache_subsystem/std_nbdcache.sv \ +core/include/cva6_hpdcache_default_config_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv \ +core/cache_subsystem/cva6_hpdcache_subsystem.sv \ +core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv \ +core/cache_subsystem/cva6_hpdcache_if_adapter.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv \ +core/pmp/src/pmp.sv \ +core/pmp/src/pmp_entry.sv \ +common/local/util/instr_tracer_if.sv \ +common/local/util/instr_tracer.sv \ +common/local/util/tc_sram_wrapper.sv \ +vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv \ +common/local/util/sram.sv \ +core/mmu_sv39/mmu.sv \ +core/mmu_sv39/ptw.sv \ +core/mmu_sv39/tlb.sv \ +core/mmu_sv32/cva6_mmu_sv32.sv \ +core/mmu_sv32/cva6_ptw_sv32.sv \ +core/mmu_sv32/cva6_tlb_sv32.sv \ +core/mmu_sv32/cva6_shared_tlb_sv32.sv \ + core/cva6_rvfi.sv \ + corev_apu/tb/ariane_axi_pkg.sv \ + corev_apu/tb/axi_intf.sv \ + corev_apu/register_interface/src/reg_intf.sv \ + corev_apu/tb/ariane_soc_pkg.sv \ + corev_apu/riscv-dbg/src/dm_pkg.sv \ + corev_apu/tb/ariane_axi_soc_pkg.sv \ + corev_apu/src/ariane.sv \ + corev_apu/bootrom/bootrom.sv \ + corev_apu/clint/axi_lite_interface.sv \ + corev_apu/clint/clint.sv \ + corev_apu/fpga/src/axi2apb/src/axi2apb.sv \ + corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv \ + corev_apu/fpga/src/apb_timer/apb_timer.sv \ + corev_apu/fpga/src/apb_timer/timer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv \ + corev_apu/fpga/src/axi_slice/src/axi_slice.sv \ + corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv \ + corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv \ + corev_apu/axi_mem_if/src/axi2mem.sv \ + corev_apu/rv_plic/rtl/rv_plic_target.sv \ + corev_apu/rv_plic/rtl/rv_plic_gateway.sv \ + corev_apu/rv_plic/rtl/plic_regmap.sv \ + corev_apu/rv_plic/rtl/plic_top.sv \ + corev_apu/riscv-dbg/src/dmi_cdc.sv \ + corev_apu/riscv-dbg/src/dmi_jtag.sv \ + corev_apu/riscv-dbg/src/dmi_jtag_tap.sv \ + corev_apu/riscv-dbg/src/dm_csrs.sv \ + corev_apu/riscv-dbg/src/dm_mem.sv \ + corev_apu/riscv-dbg/src/dm_sba.sv \ + corev_apu/riscv-dbg/src/dm_top.sv \ + corev_apu/riscv-dbg/debug_rom/debug_rom.sv \ + corev_apu/register_interface/src/apb_to_reg.sv \ + vendor/pulp-platform/axi/src/axi_multicut.sv \ + vendor/pulp-platform/common_cells/src/rstgen_bypass.sv \ + vendor/pulp-platform/common_cells/src/rstgen.sv \ + vendor/pulp-platform/common_cells/src/addr_decode.sv \ + vendor/pulp-platform/common_cells/src/stream_register.sv \ + vendor/pulp-platform/axi/src/axi_cut.sv \ + vendor/pulp-platform/axi/src/axi_join.sv \ + vendor/pulp-platform/axi/src/axi_delayer.sv \ + vendor/pulp-platform/axi/src/axi_to_axi_lite.sv \ + vendor/pulp-platform/axi/src/axi_id_prepend.sv \ + vendor/pulp-platform/axi/src/axi_atop_filter.sv \ + vendor/pulp-platform/axi/src/axi_err_slv.sv \ + vendor/pulp-platform/axi/src/axi_mux.sv \ + vendor/pulp-platform/axi/src/axi_demux.sv \ + vendor/pulp-platform/axi/src/axi_xbar.sv \ + vendor/pulp-platform/common_cells/src/cdc_2phase.sv \ + vendor/pulp-platform/common_cells/src/spill_register_flushable.sv \ + vendor/pulp-platform/common_cells/src/spill_register.sv \ + vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv \ + vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv \ + vendor/pulp-platform/common_cells/src/stream_delay.sv \ + vendor/pulp-platform/common_cells/src/lfsr_16bit.sv \ + vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv \ + vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv \ + vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv \ + corev_apu/tb/ariane_testharness.sv \ + corev_apu/tb/ariane_peripherals.sv \ + corev_apu/tb/rvfi_tracer.sv \ + corev_apu/tb/common/uart.sv \ + corev_apu/tb/common/mock_uart.sv \ diff --git a/test/type_param/sv2v_corrected.sh b/test/type_param/sv2v_corrected.sh new file mode 100755 index 0000000..4eb7acc --- /dev/null +++ b/test/type_param/sv2v_corrected.sh @@ -0,0 +1,249 @@ +sv2v -v --top=ariane_testharness --define=VERILATOR > cva6_nonsys.v \ +--incdir=vendor/pulp-platform/common_cells/include/ \ +--incdir=vendor/pulp-platform/common_cells/src/ \ +--incdir=vendor/pulp-platform/axi/include/ \ +--incdir=common/local/util/ \ +--incdir=corev_apu/axi_node \ +--incdir=core/cache_subsystem/hpdcache/rtl/include \ +--incdir=corev_apu/register_interface/include \ +vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv \ +vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv \ +vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_fma.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_top.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv \ +core/include/config_pkg.sv \ +core/include/cv64a6_imafdc_sv39_config_pkg.sv \ +core/include/riscv_pkg.sv \ +core/include/ariane_pkg.sv \ +vendor/pulp-platform/axi/src/axi_pkg.sv \ +core/include/wt_cache_pkg.sv \ +core/include/std_cache_pkg.sv \ +core/include/instr_tracer_pkg.sv \ +core/include/acc_pkg.sv \ +core/include/cvxif_pkg.sv \ +core/cvxif_example/include/cvxif_instr_pkg.sv \ +core/cvxif_fu.sv \ +core/cvxif_example/cvxif_example_coprocessor.sv \ +core/cvxif_example/instr_decoder.sv \ +vendor/pulp-platform/common_cells/src/cf_math_pkg.sv \ +vendor/pulp-platform/common_cells/src/fifo_v3.sv \ +vendor/pulp-platform/common_cells/src/lfsr.sv \ +vendor/pulp-platform/common_cells/src/lfsr_8bit.sv \ +vendor/pulp-platform/common_cells/src/stream_arbiter.sv \ +vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv \ +vendor/pulp-platform/common_cells/src/stream_mux.sv \ +vendor/pulp-platform/common_cells/src/stream_demux.sv \ +vendor/pulp-platform/common_cells/src/lzc.sv \ +vendor/pulp-platform/common_cells/src/rr_arb_tree.sv \ +vendor/pulp-platform/common_cells/src/shift_reg.sv \ +vendor/pulp-platform/common_cells/src/unread.sv \ +vendor/pulp-platform/common_cells/src/popcount.sv \ +vendor/pulp-platform/common_cells/src/exp_backoff.sv \ +vendor/pulp-platform/common_cells/src/counter.sv \ +vendor/pulp-platform/common_cells/src/delta_counter.sv \ +core/cva6.sv \ +core/cva6_rvfi_probes.sv \ +core/alu.sv \ +core/fpu_wrap.sv \ +core/branch_unit.sv \ +core/compressed_decoder.sv \ +core/controller.sv \ +core/csr_buffer.sv \ +core/csr_regfile.sv \ +core/decoder.sv \ +core/ex_stage.sv \ +core/instr_realign.sv \ +core/id_stage.sv \ +core/issue_read_operands.sv \ +core/issue_stage.sv \ +core/load_unit.sv \ +core/load_store_unit.sv \ +core/lsu_bypass.sv \ +core/mult.sv \ +core/multiplier.sv \ +core/serdiv.sv \ +core/perf_counters.sv \ +core/ariane_regfile_ff.sv \ +core/ariane_regfile_fpga.sv \ +core/scoreboard.sv \ +core/store_buffer.sv \ +core/amo_buffer.sv \ +core/store_unit.sv \ +core/commit_stage.sv \ +core/axi_shim.sv \ +core/cva6_accel_first_pass_decoder_stub.sv \ +core/acc_dispatcher_corrected.sv \ +core/frontend/btb.sv \ +core/frontend/bht.sv \ +core/frontend/ras.sv \ +core/frontend/instr_scan.sv \ +core/frontend/instr_queue.sv \ +core/frontend/frontend.sv \ +core/cache_subsystem/wt_dcache_ctrl.sv \ +core/cache_subsystem/wt_dcache_mem.sv \ +core/cache_subsystem/wt_dcache_missunit.sv \ +core/cache_subsystem/wt_dcache_wbuffer.sv \ +core/cache_subsystem/wt_dcache.sv \ +core/cache_subsystem/cva6_icache.sv \ +core/cache_subsystem/wt_cache_subsystem.sv \ +core/cache_subsystem/wt_axi_adapter.sv \ +core/cache_subsystem/tag_cmp.sv \ +core/cache_subsystem/axi_adapter.sv \ +core/cache_subsystem/miss_handler.sv \ +core/cache_subsystem/cache_ctrl.sv \ +core/cache_subsystem/cva6_icache_axi_wrapper.sv \ +core/cache_subsystem/std_cache_subsystem.sv \ +core/cache_subsystem/std_nbdcache.sv \ +core/include/cva6_hpdcache_default_config_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv \ +core/cache_subsystem/cva6_hpdcache_subsystem.sv \ +core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv \ +core/cache_subsystem/cva6_hpdcache_if_adapter.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv \ +core/pmp/src/pmp.sv \ +core/pmp/src/pmp_entry.sv \ +common/local/util/instr_tracer_if.sv \ +common/local/util/instr_tracer.sv \ +common/local/util/tc_sram_wrapper.sv \ +vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv \ +common/local/util/sram.sv \ +core/mmu_sv39/mmu.sv \ +core/mmu_sv39/ptw.sv \ +core/mmu_sv39/tlb.sv \ +core/mmu_sv32/cva6_mmu_sv32.sv \ +core/mmu_sv32/cva6_ptw_sv32.sv \ +core/mmu_sv32/cva6_tlb_sv32.sv \ +core/mmu_sv32/cva6_shared_tlb_sv32.sv \ + core/cva6_rvfi.sv \ + corev_apu/tb/ariane_axi_pkg.sv \ + corev_apu/tb/axi_intf.sv \ + corev_apu/register_interface/src/reg_intf.sv \ + corev_apu/tb/ariane_soc_pkg.sv \ + corev_apu/riscv-dbg/src/dm_pkg.sv \ + corev_apu/tb/ariane_axi_soc_pkg.sv \ + corev_apu/src/ariane.sv \ + corev_apu/bootrom/bootrom.sv \ + corev_apu/clint/axi_lite_interface.sv \ + corev_apu/clint/clint.sv \ + corev_apu/fpga/src/axi2apb/src/axi2apb.sv \ + corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv \ + corev_apu/fpga/src/apb_timer/apb_timer.sv \ + corev_apu/fpga/src/apb_timer/timer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv \ + corev_apu/fpga/src/axi_slice/src/axi_slice.sv \ + corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv \ + corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv \ + corev_apu/axi_mem_if/src/axi2mem.sv \ + corev_apu/rv_plic/rtl/rv_plic_target.sv \ + corev_apu/rv_plic/rtl/rv_plic_gateway.sv \ + corev_apu/rv_plic/rtl/plic_regmap.sv \ + corev_apu/rv_plic/rtl/plic_top.sv \ + corev_apu/riscv-dbg/src/dmi_cdc.sv \ + corev_apu/riscv-dbg/src/dmi_jtag.sv \ + corev_apu/riscv-dbg/src/dmi_jtag_tap.sv \ + corev_apu/riscv-dbg/src/dm_csrs.sv \ + corev_apu/riscv-dbg/src/dm_mem.sv \ + corev_apu/riscv-dbg/src/dm_sba.sv \ + corev_apu/riscv-dbg/src/dm_top.sv \ + corev_apu/riscv-dbg/debug_rom/debug_rom.sv \ + corev_apu/register_interface/src/apb_to_reg.sv \ + vendor/pulp-platform/axi/src/axi_multicut.sv \ + vendor/pulp-platform/common_cells/src/rstgen_bypass.sv \ + vendor/pulp-platform/common_cells/src/rstgen.sv \ + vendor/pulp-platform/common_cells/src/addr_decode.sv \ + vendor/pulp-platform/common_cells/src/stream_register.sv \ + vendor/pulp-platform/axi/src/axi_cut.sv \ + vendor/pulp-platform/axi/src/axi_join.sv \ + vendor/pulp-platform/axi/src/axi_delayer.sv \ + vendor/pulp-platform/axi/src/axi_to_axi_lite.sv \ + vendor/pulp-platform/axi/src/axi_id_prepend.sv \ + vendor/pulp-platform/axi/src/axi_atop_filter.sv \ + vendor/pulp-platform/axi/src/axi_err_slv.sv \ + vendor/pulp-platform/axi/src/axi_mux.sv \ + vendor/pulp-platform/axi/src/axi_demux.sv \ + vendor/pulp-platform/axi/src/axi_xbar.sv \ + vendor/pulp-platform/common_cells/src/cdc_2phase.sv \ + vendor/pulp-platform/common_cells/src/spill_register_flushable.sv \ + vendor/pulp-platform/common_cells/src/spill_register.sv \ + vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv \ + vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv \ + vendor/pulp-platform/common_cells/src/stream_delay.sv \ + vendor/pulp-platform/common_cells/src/lfsr_16bit.sv \ + vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv \ + vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv \ + vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv \ + corev_apu/tb/ariane_testharness.sv \ + corev_apu/tb/ariane_peripherals.sv \ + corev_apu/tb/rvfi_tracer.sv \ + corev_apu/tb/common/uart.sv \ + corev_apu/tb/common/mock_uart.sv \ diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv new file mode 100644 index 0000000..e166d0b --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv @@ -0,0 +1,794 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_cast_multi #( + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, + // FPU configuration + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig), + fpnew_pkg::max_int_width(IntFmtConfig)), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [WIDTH-1:0] operands_i, // 1 operand + input logic [NUM_FORMATS-1:0] is_boxed_i, // 1 operand + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS; + localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig); + + localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig); + + localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits; + localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits; + localparam int unsigned SUPER_BIAS = 2**(SUPER_EXP_BITS - 1) - 1; + + // The internal mantissa includes normal bit or an entire integer + localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SUPER_MAN_BITS + 1, MAX_INT_WIDTH); + // If needed, there will be a LZC for renormalization + localparam int unsigned LZC_RESULT_WIDTH = $clog2(INT_MAN_WIDTH); + // The internal exponent must be able to represent the smallest denormal input value as signed + // or the number of bits in an integer + localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH), + fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1; + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [WIDTH-1:0] operands_q; + logic [NUM_FORMATS-1:0] is_boxed_q; + logic op_mod_q; + fpnew_pkg::fp_format_e src_fmt_q; + fpnew_pkg::fp_format_e dst_fmt_q; + fpnew_pkg::int_format_e int_fmt_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_int_fmt_q[0] = int_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS]; + assign op_mod_q = inp_pipe_op_mod_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign int_fmt_q = inp_pipe_int_fmt_q[NUM_INP_REGS]; + + // ----------------- + // Input processing + // ----------------- + logic src_is_int, dst_is_int; // if 0, it's a float + + assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F); + assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I); + + logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit + + logic [NUM_FORMATS-1:0] fmt_sign; + logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_exponent; + logic [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa; + logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC + + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info; + + logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val; + logic int_sign; + logic [INT_MAN_WIDTH-1:0] int_value, int_mantissa; + + // FP Input initialization + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + // Classify input + fpnew_classifier #( + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .NumOperands ( 1 ) + ) i_fpnew_classifier ( + .operands_i ( operands_q[FP_WIDTH-1:0] ), + .is_boxed_i ( is_boxed_q[fmt] ), + .info_o ( info[fmt] ) + ); + + assign fmt_sign[fmt] = operands_q[FP_WIDTH-1]; + assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]}); + assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad + // Compensation for the difference in mantissa widths used for leading-zero count + assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS); + end else begin : inactive_format + assign info[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled + assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_shift_compensation[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + // Sign-extend INT input + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_sign_extend_int + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format // only active formats + always_comb begin : sign_ext_input + // sign-extend value only if it's signed + ifmt_input_val[ifmt] = '{default: operands_q[INT_WIDTH-1] & ~op_mod_q}; + ifmt_input_val[ifmt][INT_WIDTH-1:0] = operands_q[INT_WIDTH-1:0]; + end + end else begin : inactive_format + assign ifmt_input_val[ifmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + // Construct input mantissa from integer + assign int_value = ifmt_input_val[int_fmt_q]; + assign int_sign = int_value[INT_MAN_WIDTH-1] & ~op_mod_q; // only signed ints are negative + assign int_mantissa = int_sign ? unsigned'(-int_value) : int_value; // get magnitude of negative + + // select mantissa with source format + assign encoded_mant = src_is_int ? int_mantissa : fmt_mantissa[src_fmt_q]; + + // -------------- + // Normalization + // -------------- + logic signed [INT_EXP_WIDTH-1:0] src_bias; // src format bias + logic signed [INT_EXP_WIDTH-1:0] src_exp; // src format exponent (biased) + logic signed [INT_EXP_WIDTH-1:0] src_subnormal; // src is subnormal + logic signed [INT_EXP_WIDTH-1:0] src_offset; // src offset within mantissa + + assign src_bias = signed'(fpnew_pkg::bias(src_fmt_q)); + assign src_exp = fmt_exponent[src_fmt_q]; + assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal}); + assign src_offset = fmt_shift_compensation[src_fmt_q]; + + logic input_sign; // input sign + logic signed [INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent + logic [INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa + logic mant_is_zero; // for integer zeroes + + logic signed [INT_EXP_WIDTH-1:0] fp_input_exp; + logic signed [INT_EXP_WIDTH-1:0] int_input_exp; + + // Input mantissa needs to be normalized + logic [LZC_RESULT_WIDTH-1:0] renorm_shamt; // renormalization shift amount + logic [LZC_RESULT_WIDTH:0] renorm_shamt_sgn; // signed form for calculations + + // Leading-zero counter is needed for renormalization + lzc #( + .WIDTH ( INT_MAN_WIDTH ), + .MODE ( 1 ) // MODE = 1 counts leading zeroes + ) i_lzc ( + .in_i ( encoded_mant ), + .cnt_o ( renorm_shamt ), + .empty_o ( mant_is_zero ) + ); + assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt}); + + // Get the sign from the proper source + assign input_sign = src_is_int ? int_sign : fmt_sign[src_fmt_q]; + // Realign input mantissa, append zeroes if destination is wider + assign input_mant = encoded_mant << renorm_shamt; + // Unbias exponent and compensate for shift + assign fp_input_exp = signed'(src_exp + src_subnormal - src_bias - + renorm_shamt_sgn + src_offset); // compensate for shift + assign int_input_exp = signed'(INT_MAN_WIDTH - 1 - renorm_shamt_sgn); + + assign input_exp = src_is_int ? int_input_exp : fp_input_exp; + + logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination + + // Rebias the exponent + assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q)); + + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic input_sign_q; + logic signed [INT_EXP_WIDTH-1:0] input_exp_q; + logic [INT_MAN_WIDTH-1:0] input_mant_q; + logic signed [INT_EXP_WIDTH-1:0] destination_exp_q; + logic src_is_int_q; + logic dst_is_int_q; + fpnew_pkg::fp_info_t info_q; + logic mant_is_zero_q; + logic op_mod_q2; + fpnew_pkg::roundmode_e rnd_mode_q; + fpnew_pkg::fp_format_e src_fmt_q2; + fpnew_pkg::fp_format_e dst_fmt_q2; + fpnew_pkg::int_format_e int_fmt_q2; + // Internal pipeline signals, index i holds signal after i register stages + + + logic [0:NUM_MID_REGS] mid_pipe_input_sign_q; + logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q; + logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q; + logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q; + logic [0:NUM_MID_REGS] mid_pipe_src_is_int_q; + logic [0:NUM_MID_REGS] mid_pipe_dst_is_int_q; + fpnew_pkg::fp_info_t [0:NUM_MID_REGS] mid_pipe_info_q; + logic [0:NUM_MID_REGS] mid_pipe_mant_zero_q; + logic [0:NUM_MID_REGS] mid_pipe_op_mod_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_input_sign_q[0] = input_sign; + assign mid_pipe_input_exp_q[0] = input_exp; + assign mid_pipe_input_mant_q[0] = input_mant; + assign mid_pipe_dest_exp_q[0] = destination_exp; + assign mid_pipe_src_is_int_q[0] = src_is_int; + assign mid_pipe_dst_is_int_q[0] = dst_is_int; + assign mid_pipe_info_q[0] = info[src_fmt_q]; + assign mid_pipe_mant_zero_q[0] = mant_is_zero; + assign mid_pipe_op_mod_q[0] = op_mod_q; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_src_fmt_q[0] = src_fmt_q; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_int_fmt_q[0] = int_fmt_q; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0) + `FFL(mid_pipe_dest_exp_q[i+1], mid_pipe_dest_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0) + `FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0) + `FFL(mid_pipe_info_q[i+1], mid_pipe_info_q[i], reg_ena, '0) + `FFL(mid_pipe_mant_zero_q[i+1], mid_pipe_mant_zero_q[i], reg_ena, '0) + `FFL(mid_pipe_op_mod_q[i+1], mid_pipe_op_mod_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS]; + assign input_exp_q = mid_pipe_input_exp_q[NUM_MID_REGS]; + assign input_mant_q = mid_pipe_input_mant_q[NUM_MID_REGS]; + assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS]; + assign src_is_int_q = mid_pipe_src_is_int_q[NUM_MID_REGS]; + assign dst_is_int_q = mid_pipe_dst_is_int_q[NUM_MID_REGS]; + assign info_q = mid_pipe_info_q[NUM_MID_REGS]; + assign mant_is_zero_q = mid_pipe_mant_zero_q[NUM_MID_REGS]; + assign op_mod_q2 = mid_pipe_op_mod_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign src_fmt_q2 = mid_pipe_src_fmt_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign int_fmt_q2 = mid_pipe_int_fmt_q[NUM_MID_REGS]; + + // -------- + // Casting + // -------- + logic [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments + + logic [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift + logic [2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit + logic [SUPER_MAN_BITS-1:0] final_mant; // mantissa after adjustments + logic [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position + + logic [$clog2(INT_MAN_WIDTH+1)-1:0] denorm_shamt; // shift amount for denormalization + + logic [1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits; + logic of_before_round, uf_before_round; + + + // Perform adjustments to mantissa and exponent + always_comb begin : cast_value + // Default assignment + final_exp = unsigned'(destination_exp_q); // take exponent as is, only look at lower bits + preshift_mant = '0; // initialize mantissa container with zeroes + denorm_shamt = SUPER_MAN_BITS - fpnew_pkg::man_bits(dst_fmt_q2); // right of mantissa + of_before_round = 1'b0; + uf_before_round = 1'b0; + + // Place mantissa to the left of the shifter + preshift_mant = input_mant_q << (INT_MAN_WIDTH + 1); + + // Handle INT casts + if (dst_is_int_q) begin + // By default right shift mantissa to be an integer + denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q); + // overflow: when converting to unsigned the range is larger by one + if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin + denorm_shamt = '0; // prevent shifting + of_before_round = 1'b1; + // underflow + end else if (input_exp_q < -1) begin + denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky + uf_before_round = 1'b1; + end + // Handle FP over-/underflows + end else begin + // Overflow or infinities (for proper rounding) + if ((destination_exp_q >= signed'(2**fpnew_pkg::exp_bits(dst_fmt_q2))-1) || + (~src_is_int_q && info_q.is_inf)) begin + final_exp = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value + preshift_mant = '1; // largest normal value and RS bits set + of_before_round = 1'b1; + // Denormalize underflowing values + end else if (destination_exp_q < 1 && + destination_exp_q >= -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin + final_exp = '0; // denormal result + denorm_shamt = unsigned'(denorm_shamt + 1 - destination_exp_q); // adjust right shifting + uf_before_round = 1'b1; + // Limit the shift to retain sticky bits + end else if (destination_exp_q < -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin + final_exp = '0; // denormal result + denorm_shamt = unsigned'(denorm_shamt + 2 + fpnew_pkg::man_bits(dst_fmt_q2)); // to sticky + uf_before_round = 1'b1; + end + end + end + + localparam NUM_FP_STICKY = 2 * INT_MAN_WIDTH - SUPER_MAN_BITS - 1; // removed mantissa, 1. and R + localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R + + // Mantissa adjustment shift + assign destination_mant = preshift_mant >> denorm_shamt; + // Extract final mantissa and round bit, discard the normal bit (for FP) + assign {final_mant, fp_round_sticky_bits[1]} = + destination_mant[2*INT_MAN_WIDTH-1-:SUPER_MAN_BITS+1]; + assign {final_int, int_round_sticky_bits[1]} = destination_mant[2*INT_MAN_WIDTH-:MAX_INT_WIDTH+1]; + // Collapse sticky bits + assign fp_round_sticky_bits[0] = (| {destination_mant[NUM_FP_STICKY-1:0]}); + assign int_round_sticky_bits[0] = (| {destination_mant[NUM_INT_STICKY-1:0]}); + + // select RS bits for destination operation + assign round_sticky_bits = dst_is_int_q ? int_round_sticky_bits : fp_round_sticky_bits; + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic [WIDTH-1:0] pre_round_abs; // absolute value of result before rnd + logic of_after_round; // overflow + logic uf_after_round; // underflow + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_pre_round_abs; // per format + logic [NUM_FORMATS-1:0] fmt_of_after_round; + logic [NUM_FORMATS-1:0] fmt_uf_after_round; + + logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format + logic [NUM_INT_FORMATS-1:0] ifmt_of_after_round; + + logic rounded_sign; + logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding + logic result_true_zero; + + logic [WIDTH-1:0] rounded_int_res; // after possible inversion + logic rounded_int_res_zero; // after rounding + + + // Pack exponent and mantissa into proper rounding form + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble + // Set up some constants + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : assemble_result + fmt_pre_round_abs[fmt] = {final_exp[EXP_BITS-1:0], final_mant[MAN_BITS-1:0]}; // 0-extend + end + end else begin : inactive_format + assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Sign-extend integer result + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_res_sign_ext + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format + always_comb begin : assemble_result + // sign-extend reusult + ifmt_pre_round_abs[ifmt] = '{default: final_int[INT_WIDTH-1]}; + ifmt_pre_round_abs[ifmt][INT_WIDTH-1:0] = final_int[INT_WIDTH-1:0]; + end + end else begin : inactive_format + assign ifmt_pre_round_abs[ifmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Select output with destination format and operation + assign pre_round_abs = dst_is_int_q ? ifmt_pre_round_abs[int_fmt_q2] : fmt_pre_round_abs[dst_fmt_q2]; + + fpnew_rounding #( + .AbsWidth ( WIDTH ) + ) i_fpnew_rounding ( + .abs_value_i ( pre_round_abs ), + .sign_i ( input_sign_q ), // source format + .round_sticky_bits_i ( round_sticky_bits ), + .rnd_mode_i ( rnd_mode_q ), + .effective_subtraction_i ( 1'b0 ), // no operation happened + .abs_rounded_o ( rounded_abs ), + .sign_o ( rounded_sign ), + .exact_zero_o ( result_true_zero ) + ); + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result; + + // Detect overflows and inject sign + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : post_process + // detect of / uf + fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. + + // Assemble regular result, nan box short ones. Int zeroes need to be detected` + fmt_result[fmt] = '1; + fmt_result[fmt][FP_WIDTH-1:0] = src_is_int_q & mant_is_zero_q + ? '0 + : {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]}; + end + end else begin : inactive_format + assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Negative integer result needs to be brought into two's complement + assign rounded_int_res = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs; + assign rounded_int_res_zero = (rounded_int_res == '0); + + // Detect integer overflows after rounding (only positives) + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_overflow + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format + always_comb begin : detect_overflow + ifmt_of_after_round[ifmt] = 1'b0; + // Int result can overflow if we're at the max exponent + if (!rounded_sign && input_exp_q == signed'(INT_WIDTH - 2 + op_mod_q2)) begin + // Check whether the rounded MSB differs from unrounded MSB + ifmt_of_after_round[ifmt] = ~rounded_int_res[INT_WIDTH-2+op_mod_q2]; + end + end + end else begin : inactive_format + assign ifmt_of_after_round[ifmt] = fpnew_pkg::DONT_CARE; + end + end + + // Classification after rounding select by destination format + assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; + assign of_after_round = dst_is_int_q ? ifmt_of_after_round[int_fmt_q2] : fmt_of_after_round[dst_fmt_q2]; + + // ------------------------- + // FP Special case handling + // ------------------------- + logic [WIDTH-1:0] fp_special_result; + fpnew_pkg::status_t fp_special_status; + logic fp_result_is_special; + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result; + + // Special result construction + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1; + localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : special_results + logic [FP_WIDTH-1:0] special_res; + special_res = info_q.is_zero + ? input_sign_q << FP_WIDTH-1 // signed zero + : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN + + // Initialize special result with ones (NaN-box) + fmt_special_result[fmt] = '1; + fmt_special_result[fmt][FP_WIDTH-1:0] = special_res; + end + end else begin : inactive_format + assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Detect special case from source format, I2F casts don't produce a special result + assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero | + info_q.is_nan | + ~info_q.is_boxed); + + // Signalling input NaNs raise invalid flag, otherwise no flags set + assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0}; + + // Assemble result according to destination format + assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format + + // -------------------------- + // INT Special case handling + // -------------------------- + logic [WIDTH-1:0] int_special_result; + fpnew_pkg::status_t int_special_status; + logic int_result_is_special; + + logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_special_result; + + // Special result construction + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_special_results_int + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format + always_comb begin : special_results + automatic logic [INT_WIDTH-1:0] special_res; + + // Default is overflow to positive max, which is 2**INT_WIDTH-1 or 2**(INT_WIDTH-1)-1 + special_res[INT_WIDTH-2:0] = '1; // alone yields 2**(INT_WIDTH-1)-1 + special_res[INT_WIDTH-1] = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1 + + // Negative special case (except for nans) tie to -max or 0 + if (input_sign_q && !info_q.is_nan) + special_res = ~special_res; + + // Initialize special result with sign-extension + ifmt_special_result[ifmt] = '{default: special_res[INT_WIDTH-1]}; + ifmt_special_result[ifmt][INT_WIDTH-1:0] = special_res; + end + end else begin : inactive_format + assign ifmt_special_result[ifmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) + assign int_result_is_special = info_q.is_nan | info_q.is_inf | + of_before_round | of_after_round | ~info_q.is_boxed | + (input_sign_q & op_mod_q2 & ~rounded_int_res_zero); + + // All integer special cases are invalid + assign int_special_status = '{NV: 1'b1, default: 1'b0}; + + // Assemble result according to destination format + assign int_special_result = ifmt_special_result[int_fmt_q2]; // destination format + + // ----------------- + // Result selection + // ----------------- + fpnew_pkg::status_t int_regular_status, fp_regular_status; + + logic [WIDTH-1:0] fp_result, int_result; + fpnew_pkg::status_t fp_status, int_status; + + assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts + assign fp_regular_status.DZ = 1'b0; // no divisions + assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF + assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX; + assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f + : (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round)); + assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0}; + + assign fp_result = fp_result_is_special ? fp_special_result : fmt_result[dst_fmt_q2]; + assign fp_status = fp_result_is_special ? fp_special_status : fp_regular_status; + assign int_result = int_result_is_special ? int_special_result : rounded_int_res; + assign int_status = int_result_is_special ? int_special_status : int_regular_status; + + // Final results for output pipeline + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + logic extension_bit; + + // Select output depending on special case detection + assign result_d = dst_is_int_q ? int_result : fp_result; + assign status_d = dst_is_int_q ? int_status : fp_status; + + // MSB of int result decides extension, otherwise NaN box + assign extension_bit = dst_is_int_q ? int_result[WIDTH-1] : 1'b1; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_ext_bit_q[0] = extension_bit; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv new file mode 100644 index 0000000..a322946 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv @@ -0,0 +1,74 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +module fpnew_classifier #( + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + parameter int unsigned NumOperands = 1, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) +) ( + input logic [NumOperands-1:0][WIDTH-1:0] operands_i, + input logic [NumOperands-1:0] is_boxed_i, + output fpnew_pkg::fp_info_t [NumOperands-1:0] info_o +); + + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + + // Type definition + typedef struct packed { + logic sign; + logic [EXP_BITS-1:0] exponent; + logic [MAN_BITS-1:0] mantissa; + } fp_t; + + // Iterate through all operands + for (genvar op = 0; op < int'(NumOperands); op++) begin : gen_num_values + + fp_t value; + logic is_boxed; + logic is_normal; + logic is_inf; + logic is_nan; + logic is_signalling; + logic is_quiet; + logic is_zero; + logic is_subnormal; + + // --------------- + // Classify Input + // --------------- + always_comb begin : classify_input + value = operands_i[op]; + is_boxed = is_boxed_i[op]; + is_normal = is_boxed && (value.exponent != '0) && (value.exponent != '1); + is_zero = is_boxed && (value.exponent == '0) && (value.mantissa == '0); + is_subnormal = is_boxed && (value.exponent == '0) && !is_zero; + is_inf = is_boxed && ((value.exponent == '1) && (value.mantissa == '0)); + is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0)); + is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0); + is_quiet = is_nan && !is_signalling; + // Assign output for current input + info_o[op].is_normal = is_normal; + info_o[op].is_subnormal = is_subnormal; + info_o[op].is_zero = is_zero; + info_o[op].is_inf = is_inf; + info_o[op].is_nan = is_nan; + info_o[op].is_signalling = is_signalling; + info_o[op].is_quiet = is_quiet; + info_o[op].is_boxed = is_boxed; + end + end +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv new file mode 100644 index 0000000..0f7ea5d --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv @@ -0,0 +1,366 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_divsqrt_multi #( + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + // FPU configuration + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [1:0][WIDTH-1:0] operands_i, // 2 operands + input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + output logic divsqrt_done_o, + input logic simd_synch_done_i, + output logic divsqrt_ready_o, + input logic simd_synch_rdy_i, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [1:0][WIDTH-1:0] operands_q; + fpnew_pkg::roundmode_e rnd_mode_q; + fpnew_pkg::operation_e op_q; + fpnew_pkg::fp_format_e dst_fmt_q; + logic in_valid_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign op_q = inp_pipe_op_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; + + // ----------------- + // Input processing + // ----------------- + logic [1:0] divsqrt_fmt; + logic [1:0][63:0] divsqrt_operands; // those are fixed to 64bit + logic input_is_fp8; + + // Translate fpnew formats into divsqrt formats + always_comb begin : translate_fmt + unique case (dst_fmt_q) + fpnew_pkg::FP32: divsqrt_fmt = 2'b00; + fpnew_pkg::FP64: divsqrt_fmt = 2'b01; + fpnew_pkg::FP16: divsqrt_fmt = 2'b10; + fpnew_pkg::FP16ALT: divsqrt_fmt = 2'b11; + default: divsqrt_fmt = 2'b10; // maps also FP8 to FP16 + endcase + + // Only if FP8 is enabled + input_is_fp8 = FpFmtConfig[fpnew_pkg::FP8] & (dst_fmt_q == fpnew_pkg::FP8); + + // If FP8 is supported, map it to an FP16 value + divsqrt_operands[0] = input_is_fp8 ? operands_q[0] << 8 : operands_q[0]; + divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1]; + end + + // ------------ + // Control FSM + // ------------ + + logic in_ready; // input handshake with upstream + logic div_valid, sqrt_valid; // input signalling with unit + logic unit_ready, unit_done, unit_done_q; // status signals from unit instance + logic op_starting; // high in the cycle a new operation starts + logic out_valid, out_ready; // output handshake with downstream + logic unit_busy; // valid data in flight + // FSM states + typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; + fsm_state_e state_q, state_d; + + // Ready synch with other lanes + // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes + assign divsqrt_ready_o = in_ready; + // Upstream ready comes from sanitization FSM, and it is synched among all the lanes + assign inp_pipe_ready[NUM_INP_REGS] = simd_synch_rdy_i; + + // Valid synch with other lanes + // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes + // As soon as all the lanes are over, we can clear this FF and start with a new operation + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done_i, 1'b0, clk_i, rst_ni); + // Tell the other units that this unit has finished now or in the past + assign divsqrt_done_o = unit_done_q | unit_done; + + // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. + assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; + assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; + assign op_starting = div_valid | sqrt_valid; + + // FSM to safely apply and receive data from DIVSQRT unit + always_comb begin : flag_fsm + // Default assignments + in_ready = 1'b0; + out_valid = 1'b0; + unit_busy = 1'b0; + state_d = state_q; + + unique case (state_q) + // Waiting for work + IDLE: begin + in_ready = 1'b1; // we're ready + if (in_valid_q && unit_ready) begin // New work arrives + state_d = BUSY; // go into processing state + end + end + // Operation in progress + BUSY: begin + unit_busy = 1'b1; // data in flight + // If all the lanes are done with processing + if (simd_synch_done_i) begin + out_valid = 1'b1; // try to commit result downstream + // If downstream accepts our result + if (out_ready) begin + state_d = IDLE; // we anticipate going back to idling.. + if (in_valid_q && unit_ready) begin // ..unless new work comes in + in_ready = 1'b1; // we acknowledge the instruction + state_d = BUSY; // and stay busy with it + end + // Otherwise if downstream is not ready for the result + end else begin + state_d = HOLD; // wait for the pipeline to take the data + end + end + end + // Waiting with valid result for downstream + HOLD: begin + unit_busy = 1'b1; // data in flight + out_valid = 1'b1; // try to commit result downstream + // If the result is accepted by downstream + if (out_ready) begin + state_d = IDLE; // go back to idle.. + if (in_valid_q && unit_ready) begin // ..unless new work comes in + in_ready = 1'b1; // acknowledge the new transaction + state_d = BUSY; // will be busy with the next instruction + end + end + end + // fall into idle state otherwise + default: state_d = IDLE; + endcase + + // Flushing overrides the other actions + if (flush_i) begin + unit_busy = 1'b0; // data is invalidated + out_valid = 1'b0; // cancel any valid data + state_d = IDLE; // go to default state + end + end + + // FSM status register (asynch active low reset) + `FF(state_q, state_d, IDLE) + + // Hold additional information while the operation is in progress + logic result_is_fp8_q; + TagType result_tag_q; + logic result_mask_q; + AuxType result_aux_q; + + // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) + `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0) + `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) + `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) + `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) + + // ----------------- + // DIVSQRT instance + // ----------------- + logic [63:0] unit_result; + logic [WIDTH-1:0] adjusted_result, held_result_q; + fpnew_pkg::status_t unit_status, held_status_q; + logic hold_en; + + div_sqrt_top_mvp i_divsqrt_lei ( + .Clk_CI ( clk_i ), + .Rst_RBI ( rst_ni ), + .Div_start_SI ( div_valid ), + .Sqrt_start_SI ( sqrt_valid ), + .Operand_a_DI ( divsqrt_operands[0] ), + .Operand_b_DI ( divsqrt_operands[1] ), + .RM_SI ( rnd_mode_q ), + .Precision_ctl_SI ( '0 ), + .Format_sel_SI ( divsqrt_fmt ), + .Kill_SI ( flush_i ), + .Result_DO ( unit_result ), + .Fflags_SO ( unit_status ), + .Ready_SO ( unit_ready ), + .Done_SO ( unit_done ) + ); + + // Adjust result width and fix FP8 + assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; + + // Hold the result when one lane has finished execution, except when all the lanes finish together + // and the result can be accepted downstream + assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready); + // The Hold register (load, no reset) + `FFLNR(held_result_q, adjusted_result, hold_en, clk_i) + `FFLNR(held_status_q, unit_status, hold_en, clk_i) + + // -------------- + // Output Select + // -------------- + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + // Prioritize hold register data + assign result_d = unit_done_q ? held_result_q : adjusted_result; + assign status_d = unit_done_q ? held_status_q : unit_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = result_tag_q; + assign out_pipe_mask_q[0] = result_mask_q; + assign out_pipe_aux_q[0] = result_aux_q; + assign out_pipe_valid_q[0] = out_valid; + // Input stage: Propagate pipeline ready signal to inside pipe + assign out_ready = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv new file mode 100644 index 0000000..c29e7b3 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv @@ -0,0 +1,690 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_fma #( + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [2:0][WIDTH-1:0] operands_i, // 3 operands + input logic [2:0] is_boxed_i, // 3 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + localparam int unsigned BIAS = fpnew_pkg::bias(FpFormat); + // Precision bits 'p' include the implicit bit + localparam int unsigned PRECISION_BITS = MAN_BITS + 1; + // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection + localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3; + localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH); + // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid + // datapath leakage. This is either given by the exponent bits or the width of the LZC result. + // In most reasonable FP formats the internal exponent will be wider than the LZC result. + localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH)); + // Shift amount width: maximum internal mantissa size is 3p+4 bits + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5); + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise + + // ---------------- + // Type definition + // ---------------- + typedef struct packed { + logic sign; + logic [EXP_BITS-1:0] exponent; + logic [MAN_BITS-1:0] mantissa; + } fp_t; + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][2:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + + // ----------------- + // Input processing + // ----------------- + fpnew_pkg::fp_info_t [2:0] info_q; + + // Classify input + fpnew_classifier #( + .FpFormat ( FpFormat ), + .NumOperands ( 3 ) + ) i_class_inputs ( + .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ), + .info_o ( info_q ) + ); + + fp_t operand_a, operand_b, operand_c; + fpnew_pkg::fp_info_t info_a, info_b, info_c; + + // Operation selection and operand adjustment + // | \c op_q | \c op_mod_q | Operation Adjustment + // |:--------:|:-----------:|--------------------- + // | FMADD | \c 0 | FMADD: none + // | FMADD | \c 1 | FMSUB: Invert sign of operand C + // | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A + // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C + // | ADD | \c 0 | ADD: Set operand A to +1.0 + // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C + // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode + // | *others* | \c - | *invalid* + // \note \c op_mod_q always inverts the sign of the addend. + always_comb begin : op_select + + // Default assignments - packing-order-agnostic + operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; + operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; + operand_c = inp_pipe_operands_q[NUM_INP_REGS][2]; + info_a = info_q[0]; + info_b = info_q[1]; + info_c = info_q[2]; + + // op_mod_q inverts sign of operand C + operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + + unique case (inp_pipe_op_q[NUM_INP_REGS]) + fpnew_pkg::FMADD: ; // do nothing + fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product + fpnew_pkg::ADD: begin // Set multiplicand to +1 + operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0}; + info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. + end + fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN + if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN) + operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0}; + else + operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; + info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. + end + default: begin // propagate don't cares + operand_a = '{default: fpnew_pkg::DONT_CARE}; + operand_b = '{default: fpnew_pkg::DONT_CARE}; + operand_c = '{default: fpnew_pkg::DONT_CARE}; + info_a = '{default: fpnew_pkg::DONT_CARE}; + info_b = '{default: fpnew_pkg::DONT_CARE}; + info_c = '{default: fpnew_pkg::DONT_CARE}; + end + endcase + end + + // --------------------- + // Input classification + // --------------------- + logic any_operand_inf; + logic any_operand_nan; + logic signalling_nan; + logic effective_subtraction; + logic tentative_sign; + + // Reduction for special case handling + assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf}); + assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan}); + assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling}); + // Effective subtraction in FMA occurs when product and addend signs differ + assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign; + // The tentative sign of the FMA shall be the sign of the product + assign tentative_sign = operand_a.sign ^ operand_b.sign; + + // ---------------------- + // Special case handling + // ---------------------- + fp_t special_result; + fpnew_pkg::status_t special_status; + logic result_is_special; + + always_comb begin : special_cases + // Default assignments + special_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN + special_status = '0; + result_is_special = 1'b0; + + // Handle potentially mixed nan & infinity input => important for the case where infinity and + // zero are multiplied and added to a qnan. + // RISC-V mandates raising the NV exception in these cases: + // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs) + if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin + result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN + special_status.NV = 1'b1; // invalid operation + // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP + end else if (any_operand_nan) begin + result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN + special_status.NV = signalling_nan; // raise the invalid operation flag if signalling + // Special cases involving infinity + end else if (any_operand_inf) begin + result_is_special = 1'b1; // bypass FMA + // Effective addition of opposite infinities (±inf - ±inf) is invalid! + if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction) + special_status.NV = 1'b1; // invalid operation + // Handle cases where output will be inf because of inf product input + else if (info_a.is_inf || info_b.is_inf) begin + // Result is infinity with the sign of the product + special_result = '{sign: operand_a.sign ^ operand_b.sign, exponent: '1, mantissa: '0}; + // Handle cases where the addend is inf + end else if (info_c.is_inf) begin + // Result is inifinity with sign of the addend (= operand_c) + special_result = '{sign: operand_c.sign, exponent: '1, mantissa: '0}; + end + end + end + + // --------------------------- + // Initial exponent data path + // --------------------------- + logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c; + logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference; + logic signed [EXP_WIDTH-1:0] tentative_exponent; + + // Zero-extend exponents into signed container - implicit width extension + assign exponent_a = signed'({1'b0, operand_a.exponent}); + assign exponent_b = signed'({1'b0, operand_b.exponent}); + assign exponent_c = signed'({1'b0, operand_c.exponent}); + + // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx) + // with Ex the encoded exponent and nx the implicit bit. Internal exponents stay biased. + assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm + // Biased product exponent is the sum of encoded exponents minus the bias. + assign exponent_product = (info_a.is_zero || info_b.is_zero) + ? 2 - signed'(BIAS) // in case the product is zero, set minimum exp. + : signed'(exponent_a + info_a.is_subnormal + + exponent_b + info_b.is_subnormal + - signed'(BIAS)); + // Exponent difference is the addend exponent minus the product exponent + assign exponent_difference = exponent_addend - exponent_product; + // The tentative exponent will be the larger of the product or addend exponent + assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product; + + // Shift amount for addend based on exponents (unsigned as only right shifts) + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt; + + always_comb begin : addend_shift_amount + // Product-anchored case, saturated shift (addend is only in the sticky bit) + if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1)) + addend_shamt = 3 * PRECISION_BITS + 4; + // Addend and product will have mutual bits to add + else if (exponent_difference <= signed'(PRECISION_BITS + 2)) + addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference); + // Addend-anchored case, saturated shift (product is only in the sticky bit) + else + addend_shamt = 0; + end + + // ------------------ + // Product data path + // ------------------ + logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c; + logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide + logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R) + + // Add implicit bits to mantissae + assign mantissa_a = {info_a.is_normal, operand_a.mantissa}; + assign mantissa_b = {info_b.is_normal, operand_b.mantissa}; + assign mantissa_c = {info_c.is_normal, operand_c.mantissa}; + + // Mantissa multiplier (a*b) + assign product = mantissa_a * mantissa_b; + + // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky: + // | 000...000 | product | RS | + // <- p+2 -> <- 2p -> < 2> + assign product_shifted = product << 2; // constant shift + + // ----------------- + // Addend data path + // ----------------- + logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on + logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky + logic sticky_before_add; // they are compressed into a single sticky bit + logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R) + logic inject_carry_in; // inject carry for subtractions if needed + + // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits + // are shifted out and compressed into a sticky bit. + // BEFORE THE SHIFT: + // | mantissa_c | 000..000 | + // <- p -> <- 3p+4 -> + // AFTER THE SHIFT: + // | 000..........000 | mantissa_c | 000...............0GR | sticky bits | + // <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p -> + assign {addend_after_shift, addend_sticky_bits} = + (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt; + + assign sticky_before_add = (| addend_sticky_bits); + // assign addend_after_shift[0] = sticky_before_add; + + // In case of a subtraction, the addend is inverted + assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; + assign inject_carry_in = effective_subtraction & ~sticky_before_add; + + // ------ + // Adder + // ------ + logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry + logic sum_carry; // observe carry bit from sum for sign fixing + logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow + logic final_sign; + + //Mantissa adder (ab+c). In normal addition, it cannot overflow. + assign sum_raw = product_shifted + addend_shifted + inject_carry_in; + assign sum_carry = sum_raw[3*PRECISION_BITS+4]; + + // Complement negative sum (can only happen in subtraction -> overflows for positive results) + assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; + + // In case of a mispredicted subtraction result, do a sign flip + assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) + ? 1'b1 + : (effective_subtraction ? 1'b0 : tentative_sign); + + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic effective_subtraction_q; + logic signed [EXP_WIDTH-1:0] exponent_product_q; + logic signed [EXP_WIDTH-1:0] exponent_difference_q; + logic signed [EXP_WIDTH-1:0] tentative_exponent_q; + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; + logic sticky_before_add_q; + logic [3*PRECISION_BITS+3:0] sum_q; + logic final_sign_q; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + fp_t special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; + logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; + logic [0:NUM_MID_REGS] mid_pipe_sticky_q; + logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; + logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_eff_sub_q[0] = effective_subtraction; + assign mid_pipe_exp_prod_q[0] = exponent_product; + assign mid_pipe_exp_diff_q[0] = exponent_difference; + assign mid_pipe_tent_exp_q[0] = tentative_exponent; + assign mid_pipe_add_shamt_q[0] = addend_shamt; + assign mid_pipe_sticky_q[0] = sticky_before_add; + assign mid_pipe_sum_q[0] = sum; + assign mid_pipe_final_sign_q[0] = final_sign; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) + `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) + `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) + `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) + `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; + assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; + assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; + assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; + assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; + assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; + assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; + assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; + + // -------------- + // Normalization + // -------------- + logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched + logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes + logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count + logic lzc_zeroes; // in case only zeroes found + + logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount + logic signed [EXP_WIDTH-1:0] normalized_exponent; + + logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift + logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit + logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization + logic sticky_after_norm; // sticky bit after normalization + + logic signed [EXP_WIDTH-1:0] final_exponent; + + assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0]; + + // Leading zero counter for cancellations + lzc #( + .WIDTH ( LOWER_SUM_WIDTH ), + .MODE ( 1 ) // MODE = 1 counts leading zeroes + ) i_lzc ( + .in_i ( sum_lower ), + .cnt_o ( leading_zero_count ), + .empty_o ( lzc_zeroes ) + ); + + assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count}); + + // Normalization shift amount based on exponents and LZC (unsigned as only left shifts) + always_comb begin : norm_shift_amount + // Product-anchored case or cancellations require LZC + if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin + // Normal result (biased exponent > 0 and not a zero) + if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin + // Undo initial product shift, remove the counted zeroes + norm_shamt = PRECISION_BITS + 2 + leading_zero_count; + normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift + // Subnormal result + end else begin + // Cap the shift distance to align mantissa with minimum exponent + norm_shamt = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q); + normalized_exponent = 0; // subnormals encoded as 0 + end + // Addend-anchored case + end else begin + norm_shamt = addend_shamt_q; // Undo the initial shift + normalized_exponent = tentative_exponent_q; + end + end + + // Do the large normalization shift + assign sum_shifted = sum_q << norm_shamt; + + // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left + // or right of the (non-carry) MSB of the sum. + always_comb begin : small_norm + // Default assignment, discarding carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted; + final_exponent = normalized_exponent; + + // The normalized sum has overflown, align right and fix exponent + if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted >> 1; + final_exponent = normalized_exponent + 1; + // The normalized sum is normal, nothing to do + end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB + // do nothing + // The normalized sum is still denormal, align left - unless the result is not already subnormal + end else if (normalized_exponent > 1) begin + {final_mantissa, sum_sticky_bits} = sum_shifted << 1; + final_exponent = normalized_exponent - 1; + // Otherwise we're denormal + end else begin + final_exponent = '0; + end + end + + // Update the sticky bit with the shifted-out bits + assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q; + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic pre_round_sign; + logic [EXP_BITS-1:0] pre_round_exponent; + logic [MAN_BITS-1:0] pre_round_mantissa; + logic [EXP_BITS+MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding + logic [1:0] round_sticky_bits; + + logic of_before_round, of_after_round; // overflow + logic uf_before_round, uf_after_round; // underflow + logic result_zero; + + logic rounded_sign; + logic [EXP_BITS+MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding + + // Classification before round. RISC-V mandates checking underflow AFTER rounding! + assign of_before_round = final_exponent >= 2**(EXP_BITS)-1; // infinity exponent is all ones + assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0 + + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign pre_round_sign = final_sign_q; + assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]); + assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit + assign pre_round_abs = {pre_round_exponent, pre_round_mantissa}; + + // In case of overflow, the round and sticky bits are set for proper rounding + assign round_sticky_bits = (of_before_round) ? 2'b11 : {final_mantissa[0], sticky_after_norm}; + + // Perform the rounding + fpnew_rounding #( + .AbsWidth ( EXP_BITS + MAN_BITS ) + ) i_fpnew_rounding ( + .abs_value_i ( pre_round_abs ), + .sign_i ( pre_round_sign ), + .round_sticky_bits_i ( round_sticky_bits ), + .rnd_mode_i ( rnd_mode_q ), + .effective_subtraction_i ( effective_subtraction_q ), + .abs_rounded_o ( rounded_abs ), + .sign_o ( rounded_sign ), + .exact_zero_o ( result_zero ) + ); + + // Classification after rounding + assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0 + assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones + + // ----------------- + // Result selection + // ----------------- + logic [WIDTH-1:0] regular_result; + fpnew_pkg::status_t regular_status; + + // Assemble regular result + assign regular_result = {rounded_sign, rounded_abs}; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; + + // Final results for output pipeline + fp_t result_d; + fpnew_pkg::status_t status_d; + + // Select output depending on special case detection + assign result_d = result_is_special_q ? special_result_q : regular_result; + assign status_d = result_is_special_q ? special_status_q : regular_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + fp_t [0:NUM_OUT_REGS] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv new file mode 100644 index 0000000..cceeae3 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv @@ -0,0 +1,839 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_fma_multi #( + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [2:0][WIDTH-1:0] operands_i, // 3 operands + input logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands + input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + // The super-format that can hold all formats + localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig); + + localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits; + localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits; + + // Precision bits 'p' include the implicit bit + localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1; + // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection + localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3; + localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH); + // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid + // datapath leakage. This is either given by the exponent bits or the width of the LZC result. + // In most reasonable FP formats the internal exponent will be wider than the LZC result. + localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH); + // Shift amount width: maximum internal mantissa size is 3p+4 bits + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5); + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise + + // ---------------- + // Type definition + // ---------------- + typedef struct packed { + logic sign; + logic [SUPER_EXP_BITS-1:0] exponent; + logic [SUPER_MAN_BITS-1:0] mantissa; + } fp_t; + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [2:0][WIDTH-1:0] operands_q; + fpnew_pkg::fp_format_e src_fmt_q; + fpnew_pkg::fp_format_e dst_fmt_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + + // ----------------- + // Input processing + // ----------------- + logic [NUM_FORMATS-1:0][2:0] fmt_sign; + logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent; + logic [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa; + + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q; + + // FP Input initialization + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + logic [2:0][FP_WIDTH-1:0] trimmed_ops; + + // Classify input + fpnew_classifier #( + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .NumOperands ( 3 ) + ) i_fpnew_classifier ( + .operands_i ( trimmed_ops ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ), + .info_o ( info_q[fmt] ) + ); + for (genvar op = 0; op < 3; op++) begin : gen_operands + assign trimmed_ops[op] = operands_q[op][FP_WIDTH-1:0]; + assign fmt_sign[fmt][op] = operands_q[op][FP_WIDTH-1]; + assign fmt_exponent[fmt][op] = signed'({1'b0, operands_q[op][MAN_BITS+:EXP_BITS]}); + assign fmt_mantissa[fmt][op] = {info_q[fmt][op].is_normal, operands_q[op][MAN_BITS-1:0]} << + (SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa + end + end else begin : inactive_format + assign info_q[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled + assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + fp_t operand_a, operand_b, operand_c; + fpnew_pkg::fp_info_t info_a, info_b, info_c; + + // Operation selection and operand adjustment + // | \c op_q | \c op_mod_q | Operation Adjustment + // |:--------:|:-----------:|--------------------- + // | FMADD | \c 0 | FMADD: none + // | FMADD | \c 1 | FMSUB: Invert sign of operand C + // | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A + // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C + // | ADD | \c 0 | ADD: Set operand A to +1.0 + // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C + // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode + // | *others* | \c - | *invalid* + // \note \c op_mod_q always inverts the sign of the addend. + always_comb begin : op_select + + // Default assignments - packing-order-agnostic + operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]}; + operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]}; + operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]}; + info_a = info_q[src_fmt_q][0]; + info_b = info_q[src_fmt_q][1]; + info_c = info_q[dst_fmt_q][2]; + + // op_mod_q inverts sign of operand C + operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + + unique case (inp_pipe_op_q[NUM_INP_REGS]) + fpnew_pkg::FMADD: ; // do nothing + fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product + fpnew_pkg::ADD: begin // Set multiplicand to +1 + operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0}; + info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. + end + fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN + if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN) + operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0}; + else + operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; + info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. + end + default: begin // propagate don't cares + operand_a = '{default: fpnew_pkg::DONT_CARE}; + operand_b = '{default: fpnew_pkg::DONT_CARE}; + operand_c = '{default: fpnew_pkg::DONT_CARE}; + info_a = '{default: fpnew_pkg::DONT_CARE}; + info_b = '{default: fpnew_pkg::DONT_CARE}; + info_c = '{default: fpnew_pkg::DONT_CARE}; + end + endcase + end + + // --------------------- + // Input classification + // --------------------- + logic any_operand_inf; + logic any_operand_nan; + logic signalling_nan; + logic effective_subtraction; + logic tentative_sign; + + // Reduction for special case handling + assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf}); + assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan}); + assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling}); + // Effective subtraction in FMA occurs when product and addend signs differ + assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign; + // The tentative sign of the FMA shall be the sign of the product + assign tentative_sign = operand_a.sign ^ operand_b.sign; + + // ---------------------- + // Special case handling + // ---------------------- + logic [WIDTH-1:0] special_result; + fpnew_pkg::status_t special_status; + logic result_is_special; + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result; + fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status; + logic [NUM_FORMATS-1:0] fmt_result_is_special; + + + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1; + localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1); + localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0; + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : special_results + logic [FP_WIDTH-1:0] special_res; + + // Default assignment + special_res = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN + fmt_special_status[fmt] = '0; + fmt_result_is_special[fmt] = 1'b0; + + // Handle potentially mixed nan & infinity input => important for the case where infinity and + // zero are multiplied and added to a qnan. + // RISC-V mandates raising the NV exception in these cases: + // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs) + if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin + fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN + fmt_special_status[fmt].NV = 1'b1; // invalid operation + // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP + end else if (any_operand_nan) begin + fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN + fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling + // Special cases involving infinity + end else if (any_operand_inf) begin + fmt_result_is_special[fmt] = 1'b1; // bypass FMA + // Effective addition of opposite infinities (±inf - ±inf) is invalid! + if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction) + fmt_special_status[fmt].NV = 1'b1; // invalid operation + // Handle cases where output will be inf because of inf product input + else if (info_a.is_inf || info_b.is_inf) begin + // Result is infinity with the sign of the product + special_res = {operand_a.sign ^ operand_b.sign, QNAN_EXPONENT, ZERO_MANTISSA}; + // Handle cases where the addend is inf + end else if (info_c.is_inf) begin + // Result is inifinity with sign of the addend (= operand_c) + special_res = {operand_c.sign, QNAN_EXPONENT, ZERO_MANTISSA}; + end + end + // Initialize special result with ones (NaN-box) + fmt_special_result[fmt] = '1; + fmt_special_result[fmt][FP_WIDTH-1:0] = special_res; + end + end else begin : inactive_format + assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_special_status[fmt] = '0; + assign fmt_result_is_special[fmt] = 1'b0; + end + end + + // Detect special case from source format, I2F casts don't produce a special result + assign result_is_special = fmt_result_is_special[dst_fmt_q]; // they're all the same + // Signalling input NaNs raise invalid flag, otherwise no flags set + assign special_status = fmt_special_status[dst_fmt_q]; + // Assemble result according to destination format + assign special_result = fmt_special_result[dst_fmt_q]; // destination format + + // --------------------------- + // Initial exponent data path + // --------------------------- + logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c; + logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference; + logic signed [EXP_WIDTH-1:0] tentative_exponent; + + // Zero-extend exponents into signed container - implicit width extension + assign exponent_a = signed'({1'b0, operand_a.exponent}); + assign exponent_b = signed'({1'b0, operand_b.exponent}); + assign exponent_c = signed'({1'b0, operand_c.exponent}); + + // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx) + // with Ex the encoded exponent and nx the implicit bit. Internal exponents are biased to dst fmt. + assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm + // Biased product exponent is the sum of encoded exponents minus the bias. + assign exponent_product = (info_a.is_zero || info_b.is_zero) // in case the product is zero, set minimum exp. + ? 2 - signed'(fpnew_pkg::bias(dst_fmt_q)) + : signed'(exponent_a + info_a.is_subnormal + + exponent_b + info_b.is_subnormal + - 2*signed'(fpnew_pkg::bias(src_fmt_q)) + + signed'(fpnew_pkg::bias(dst_fmt_q))); // rebias for dst fmt + // Exponent difference is the addend exponent minus the product exponent + assign exponent_difference = exponent_addend - exponent_product; + // The tentative exponent will be the larger of the product or addend exponent + assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product; + + // Shift amount for addend based on exponents (unsigned as only right shifts) + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt; + + always_comb begin : addend_shift_amount + // Product-anchored case, saturated shift (addend is only in the sticky bit) + if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1)) + addend_shamt = 3 * PRECISION_BITS + 4; + // Addend and product will have mutual bits to add + else if (exponent_difference <= signed'(PRECISION_BITS + 2)) + addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference); + // Addend-anchored case, saturated shift (product is only in the sticky bit) + else + addend_shamt = 0; + end + + // ------------------ + // Product data path + // ------------------ + logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c; + logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide + logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R) + + // Add implicit bits to mantissae + assign mantissa_a = {info_a.is_normal, operand_a.mantissa}; + assign mantissa_b = {info_b.is_normal, operand_b.mantissa}; + assign mantissa_c = {info_c.is_normal, operand_c.mantissa}; + + // Mantissa multiplier (a*b) + assign product = mantissa_a * mantissa_b; + + // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky: + // | 000...000 | product | RS | + // <- p+2 -> <- 2p -> < 2> + assign product_shifted = product << 2; // constant shift + + // ----------------- + // Addend data path + // ----------------- + logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on + logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky + logic sticky_before_add; // they are compressed into a single sticky bit + logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R) + logic inject_carry_in; // inject carry for subtractions if needed + + // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits are + // shifted out and compressed into a sticky bit. + // BEFORE THE SHIFT: + // | mantissa_c | 000..000 | + // <- p -> <- 3p+4 -> + // AFTER THE SHIFT: + // | 000..........000 | mantissa_c | 000...............0GR | sticky bits | + // <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p -> + assign {addend_after_shift, addend_sticky_bits} = + (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt; + + assign sticky_before_add = (| addend_sticky_bits); + + // In case of a subtraction, the addend is inverted + assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; + assign inject_carry_in = effective_subtraction & ~sticky_before_add; + + // ------ + // Adder + // ------ + logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry + logic sum_carry; // observe carry bit from sum for sign fixing + logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow + logic final_sign; + + //Mantissa adder (ab+c). In normal addition, it cannot overflow. + assign sum_raw = product_shifted + addend_shifted + inject_carry_in; + assign sum_carry = sum_raw[3*PRECISION_BITS+4]; + + // Complement negative sum (can only happen in subtraction -> overflows for positive results) + assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; + + // In case of a mispredicted subtraction result, do a sign flip + assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) + ? 1'b1 + : (effective_subtraction ? 1'b0 : tentative_sign); + + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic effective_subtraction_q; + logic signed [EXP_WIDTH-1:0] exponent_product_q; + logic signed [EXP_WIDTH-1:0] exponent_difference_q; + logic signed [EXP_WIDTH-1:0] tentative_exponent_q; + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; + logic sticky_before_add_q; + logic [3*PRECISION_BITS+3:0] sum_q; + logic final_sign_q; + fpnew_pkg::fp_format_e dst_fmt_q2; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + fp_t special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; + logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; + logic [0:NUM_MID_REGS] mid_pipe_sticky_q; + logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; + logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_eff_sub_q[0] = effective_subtraction; + assign mid_pipe_exp_prod_q[0] = exponent_product; + assign mid_pipe_exp_diff_q[0] = exponent_difference; + assign mid_pipe_tent_exp_q[0] = tentative_exponent; + assign mid_pipe_add_shamt_q[0] = addend_shamt; + assign mid_pipe_sticky_q[0] = sticky_before_add; + assign mid_pipe_sum_q[0] = sum; + assign mid_pipe_final_sign_q[0] = final_sign; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) + `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) + `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) + `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) + `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; + assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; + assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; + assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; + assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; + assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; + assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; + assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; + + // -------------- + // Normalization + // -------------- + logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched + logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes + logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count + logic lzc_zeroes; // in case only zeroes found + + logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount + logic signed [EXP_WIDTH-1:0] normalized_exponent; + + logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift + logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit + logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization + logic sticky_after_norm; // sticky bit after normalization + + logic signed [EXP_WIDTH-1:0] final_exponent; + + assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0]; + + // Leading zero counter for cancellations + lzc #( + .WIDTH ( LOWER_SUM_WIDTH ), + .MODE ( 1 ) // MODE = 1 counts leading zeroes + ) i_lzc ( + .in_i ( sum_lower ), + .cnt_o ( leading_zero_count ), + .empty_o ( lzc_zeroes ) + ); + + assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count}); + + // Normalization shift amount based on exponents and LZC (unsigned as only left shifts) + always_comb begin : norm_shift_amount + // Product-anchored case or cancellations require LZC + if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin + // Normal result (biased exponent > 0 and not a zero) + if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin + // Undo initial product shift, remove the counted zeroes + norm_shamt = PRECISION_BITS + 2 + leading_zero_count; + normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift + // Subnormal result + end else begin + // Cap the shift distance to align mantissa with minimum exponent + norm_shamt = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q)); + normalized_exponent = 0; // subnormals encoded as 0 + end + // Addend-anchored case + end else begin + norm_shamt = addend_shamt_q; // Undo the initial shift + normalized_exponent = tentative_exponent_q; + end + end + + // Do the large normalization shift + assign sum_shifted = sum_q << norm_shamt; + + // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left + // or right of the (non-carry) MSB of the sum. + always_comb begin : small_norm + // Default assignment, discarding carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted; + final_exponent = normalized_exponent; + + // The normalized sum has overflown, align right and fix exponent + if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted >> 1; + final_exponent = normalized_exponent + 1; + // The normalized sum is normal, nothing to do + end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB + // do nothing + // The normalized sum is still denormal, align left - unless the result is not already subnormal + end else if (normalized_exponent > 1) begin + {final_mantissa, sum_sticky_bits} = sum_shifted << 1; + final_exponent = normalized_exponent - 1; + // Otherwise we're denormal + end else begin + final_exponent = '0; + end + end + + // Update the sticky bit with the shifted-out bits + assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q; + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic pre_round_sign; + logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding + logic [1:0] round_sticky_bits; + + logic of_before_round, of_after_round; // overflow + logic uf_before_round, uf_after_round; // underflow + + logic [NUM_FORMATS-1:0][SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] fmt_pre_round_abs; // per format + logic [NUM_FORMATS-1:0][1:0] fmt_round_sticky_bits; + + logic [NUM_FORMATS-1:0] fmt_of_after_round; + logic [NUM_FORMATS-1:0] fmt_uf_after_round; + + logic rounded_sign; + logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding + logic result_zero; + + // Classification before round. RISC-V mandates checking underflow AFTER rounding! + assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones + assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0 + + // Pack exponent and mantissa into proper rounding form + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble + // Set up some constants + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + logic [EXP_BITS-1:0] pre_round_exponent; + logic [MAN_BITS-1:0] pre_round_mantissa; + + if (FpFmtConfig[fmt]) begin : active_format + + assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0]; + assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_MAN_BITS-:MAN_BITS]; + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend + + // Round bit is after mantissa (1 in case of overflow for rounding) + assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_MAN_BITS-MAN_BITS] | + of_before_round; + + // remaining bits in mantissa to sticky (1 in case of overflow for rounding) + if (MAN_BITS < SUPER_MAN_BITS) begin : narrow_sticky + assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_MAN_BITS-MAN_BITS-1:0]) | + sticky_after_norm | of_before_round; + end else begin : normal_sticky + assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round; + end + end else begin : inactive_format + assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign pre_round_sign = final_sign_q; + assign pre_round_abs = fmt_pre_round_abs[dst_fmt_q2]; + + // In case of overflow, the round and sticky bits are set for proper rounding + assign round_sticky_bits = fmt_round_sticky_bits[dst_fmt_q2]; + + // Perform the rounding + fpnew_rounding #( + .AbsWidth ( SUPER_EXP_BITS + SUPER_MAN_BITS ) + ) i_fpnew_rounding ( + .abs_value_i ( pre_round_abs ), + .sign_i ( pre_round_sign ), + .round_sticky_bits_i ( round_sticky_bits ), + .rnd_mode_i ( rnd_mode_q ), + .effective_subtraction_i ( effective_subtraction_q ), + .abs_rounded_o ( rounded_abs ), + .sign_o ( rounded_sign ), + .exact_zero_o ( result_zero ) + ); + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result; + + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : post_process + // detect of / uf + fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. + + // Assemble regular result, nan box short ones. + fmt_result[fmt] = '1; + fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]}; + end + end else begin : inactive_format + assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Classification after rounding select by destination format + assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; + assign of_after_round = fmt_of_after_round[dst_fmt_q2]; + + + // ----------------- + // Result selection + // ----------------- + logic [WIDTH-1:0] regular_result; + fpnew_pkg::status_t regular_status; + + // Assemble regular result + assign regular_result = fmt_result[dst_fmt_q2]; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; + + // Final results for output pipeline + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + + // Select output depending on special case detection + assign result_d = result_is_special_q ? special_result_q : regular_result; + assign status_d = result_is_special_q ? special_status_q : regular_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv new file mode 100644 index 0000000..8a18261 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv @@ -0,0 +1,415 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_noncomp #( + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [1:0][WIDTH-1:0] operands_i, // 2 operands + input logic [1:0] is_boxed_i, // 2 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output fpnew_pkg::classmask_e class_mask_o, + output logic is_class_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise + + // ---------------- + // Type definition + // ---------------- + typedef struct packed { + logic sign; + logic [EXP_BITS-1:0] exponent; + logic [MAN_BITS-1:0] mantissa; + } fp_t; + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + + // --------------------- + // Input classification + // --------------------- + fpnew_pkg::fp_info_t [1:0] info_q; + + // Classify input + fpnew_classifier #( + .FpFormat ( FpFormat ), + .NumOperands ( 2 ) + ) i_class_a ( + .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ), + .info_o ( info_q ) + ); + + fp_t operand_a, operand_b; + fpnew_pkg::fp_info_t info_a, info_b; + + // Packing-order-agnostic assignments + assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; + assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; + assign info_a = info_q[0]; + assign info_b = info_q[1]; + + logic any_operand_inf; + logic any_operand_nan; + logic signalling_nan; + + // Reduction for special case handling + assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf}); + assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan}); + assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling}); + + logic operands_equal, operand_a_smaller; + + // Equality checks for zeroes too + assign operands_equal = (operand_a == operand_b) || (info_a.is_zero && info_b.is_zero); + // Invert result if non-zero signs involved (unsigned comparison) + assign operand_a_smaller = (operand_a < operand_b) ^ (operand_a.sign || operand_b.sign); + + // --------------- + // Sign Injection + // --------------- + fp_t sgnj_result; + fpnew_pkg::status_t sgnj_status; + logic sgnj_extension_bit; + + // Sign Injection - operation is encoded in rnd_mode_q: + // RNE = SGNJ, RTZ = SGNJN, RDN = SGNJX, RUP = Passthrough (no NaN-box check) + always_comb begin : sign_injections + logic sign_a, sign_b; // internal signs + // Default assignment + sgnj_result = operand_a; // result based on operand a + + // NaN-boxing check will treat invalid inputs as canonical NaNs + if (!info_a.is_boxed) sgnj_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; + + // Internal signs are treated as positive in case of non-NaN-boxed values + sign_a = operand_a.sign & info_a.is_boxed; + sign_b = operand_b.sign & info_b.is_boxed; + + // Do the sign injection based on rm field + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) + fpnew_pkg::RNE: sgnj_result.sign = sign_b; // SGNJ + fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b; // SGNJN + fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX + fpnew_pkg::RUP: sgnj_result = operand_a; // passthrough + default: sgnj_result = '{default: fpnew_pkg::DONT_CARE}; // don't care + endcase + end + + assign sgnj_status = '0; // sign injections never raise exceptions + + // op_mod_q enables integer sign-extension of result (for storing to integer regfile) + assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1; + + // ------------------ + // Minimum / Maximum + // ------------------ + fp_t minmax_result; + fpnew_pkg::status_t minmax_status; + logic minmax_extension_bit; + + // Minimum/Maximum - operation is encoded in rnd_mode_q: + // RNE = MIN, RTZ = MAX + always_comb begin : min_max + // Default assignment + minmax_status = '0; + + // Min/Max use quiet comparisons - only sNaN are invalid + minmax_status.NV = signalling_nan; + + // Both NaN inputs cause a NaN output + if (info_a.is_nan && info_b.is_nan) + minmax_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN + // If one operand is NaN, the non-NaN operand is returned + else if (info_a.is_nan) minmax_result = operand_b; + else if (info_b.is_nan) minmax_result = operand_a; + // Otherwise decide according to the operation + else begin + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) + fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN + fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX + default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care + endcase + end + end + + assign minmax_extension_bit = 1'b1; // NaN-box as result is always a float value + + // ------------ + // Comparisons + // ------------ + fp_t cmp_result; + fpnew_pkg::status_t cmp_status; + logic cmp_extension_bit; + + // Comparisons - operation is encoded in rnd_mode_q: + // RNE = LE, RTZ = LT, RDN = EQ + // op_mod_q inverts boolean outputs + always_comb begin : comparisons + // Default assignment + cmp_result = '0; // false + cmp_status = '0; // no flags + + // Signalling NaNs always compare as false and are illegal + if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation + // Otherwise do comparisons + else begin + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) + fpnew_pkg::RNE: begin // Less than or equal + if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid + else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + end + fpnew_pkg::RTZ: begin // Less than + if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid + else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + end + fpnew_pkg::RDN: begin // Equal + if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal + else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + end + default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care + endcase + end + end + + assign cmp_extension_bit = 1'b0; // Comparisons always produce booleans in integer registers + + // --------------- + // Classification + // --------------- + fpnew_pkg::status_t class_status; + logic class_extension_bit; + fpnew_pkg::classmask_e class_mask_d; // the result is actually here + + // Classification - always return the classification mask on the dedicated port + always_comb begin : classify + if (info_a.is_normal) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGNORM : fpnew_pkg::POSNORM; + end else if (info_a.is_subnormal) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGSUBNORM : fpnew_pkg::POSSUBNORM; + end else if (info_a.is_zero) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGZERO : fpnew_pkg::POSZERO; + end else if (info_a.is_inf) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGINF : fpnew_pkg::POSINF; + end else if (info_a.is_nan) begin + class_mask_d = info_a.is_signalling ? fpnew_pkg::SNAN : fpnew_pkg::QNAN; + end else begin + class_mask_d = fpnew_pkg::QNAN; // default value + end + end + + assign class_status = '0; // classification does not set flags + assign class_extension_bit = 1'b0; // classification always produces results in integer registers + + // ----------------- + // Result selection + // ----------------- + fp_t result_d; + fpnew_pkg::status_t status_d; + logic extension_bit_d; + logic is_class_d; + + // Select result + always_comb begin : select_result + unique case (inp_pipe_op_q[NUM_INP_REGS]) + fpnew_pkg::SGNJ: begin + result_d = sgnj_result; + status_d = sgnj_status; + extension_bit_d = sgnj_extension_bit; + end + fpnew_pkg::MINMAX: begin + result_d = minmax_result; + status_d = minmax_status; + extension_bit_d = minmax_extension_bit; + end + fpnew_pkg::CMP: begin + result_d = cmp_result; + status_d = cmp_status; + extension_bit_d = cmp_extension_bit; + end + fpnew_pkg::CLASSIFY: begin + result_d = '{default: fpnew_pkg::DONT_CARE}; // unused + status_d = class_status; + extension_bit_d = class_extension_bit; + end + default: begin + result_d = '{default: fpnew_pkg::DONT_CARE}; // dont care + status_d = '{default: fpnew_pkg::DONT_CARE}; // dont care + extension_bit_d = fpnew_pkg::DONT_CARE; // dont care + end + endcase + end + + assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY); + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + fp_t [0:NUM_OUT_REGS] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q; + fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; + logic [0:NUM_OUT_REGS] out_pipe_is_class_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_extension_bit_q[0] = extension_bit_d; + assign out_pipe_class_mask_q[0] = class_mask_d; + assign out_pipe_is_class_q[0] = is_class_d; + assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign out_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; + assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0) + `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) + `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS]; + assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; + assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv new file mode 100644 index 0000000..2633406 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv @@ -0,0 +1,244 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +module fpnew_opgroup_block #( + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL, + // FPU configuration + parameter int unsigned Width = 32, + parameter logic EnableVectors = 1'b1, + parameter fpnew_pkg::fmt_logic_t FpFmtMask = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtMask = '1, + parameter fpnew_pkg::fmt_unsigned_t FmtPipeRegs = '{default: 0}, + parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes = '{default: fpnew_pkg::PARALLEL}, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, + // Do not change + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtMask, EnableVectors), + localparam type MaskType = logic [NUM_LANES-1:0] +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i, + input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input logic vectorial_op_i, + input TagType tag_i, + input MaskType simd_mask_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [Width-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------------- + // Type Definition + // ---------------- + typedef struct packed { + logic [Width-1:0] result; + fpnew_pkg::status_t status; + logic ext_bit; + TagType tag; + } output_t; + + // Handshake signals for the slices + logic [NUM_FORMATS-1:0] fmt_in_ready, fmt_out_valid, fmt_out_ready, fmt_busy; + output_t [NUM_FORMATS-1:0] fmt_outputs; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = in_valid_i & fmt_in_ready[dst_fmt_i]; // Ready is given by selected format + + // ------------------------- + // Generate Parallel Slices + // ------------------------- + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices + // Some constants for this format + localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask); + localparam logic IS_FIRST_MERGED = + fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask); + + // Generate slice only if format enabled + if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format + + logic in_valid; + + assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format + + // Forward masks related to the right SIMD lane + localparam int unsigned INTERNAL_LANES = fpnew_pkg::num_lanes(Width, fpnew_pkg::fp_format_e'(fmt), EnableVectors); + logic [INTERNAL_LANES-1:0] mask_slice; + always_comb for (int b = 0; b < INTERNAL_LANES; b++) mask_slice[b] = simd_mask_i[(NUM_LANES/INTERNAL_LANES)*b]; + + fpnew_opgroup_fmt_slice #( + .OpGroup ( OpGroup ), + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .Width ( Width ), + .EnableVectors ( EnableVectors ), + .NumPipeRegs ( FmtPipeRegs[fmt] ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .TrueSIMDClass ( TrueSIMDClass ) + ) i_fmt_slice ( + .clk_i, + .rst_ni, + .operands_i ( operands_i ), + .is_boxed_i ( is_boxed_i[fmt] ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .vectorial_op_i, + .tag_i, + .simd_mask_i ( mask_slice ), + .in_valid_i ( in_valid ), + .in_ready_o ( fmt_in_ready[fmt] ), + .flush_i, + .result_o ( fmt_outputs[fmt].result ), + .status_o ( fmt_outputs[fmt].status ), + .extension_bit_o( fmt_outputs[fmt].ext_bit ), + .tag_o ( fmt_outputs[fmt].tag ), + .out_valid_o ( fmt_out_valid[fmt] ), + .out_ready_i ( fmt_out_ready[fmt] ), + .busy_o ( fmt_busy[fmt] ) + ); + // If the format wants to use merged ops, tie off the dangling ones not used here + end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused + + localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask); + // Ready is split up into formats + assign fmt_in_ready[fmt] = fmt_in_ready[int'(FMT)]; + + assign fmt_out_valid[fmt] = 1'b0; // don't emit values + assign fmt_busy[fmt] = 1'b0; // never busy + // Outputs are don't care + assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); + + // Tie off disabled formats + end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt + assign fmt_in_ready[fmt] = 1'b0; // don't accept operations + assign fmt_out_valid[fmt] = 1'b0; // don't emit values + assign fmt_busy[fmt] = 1'b0; // never busy + // Outputs are don't care + assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); + end + end + + // ---------------------- + // Generate Merged Slice + // ---------------------- + if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice + + localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask); + localparam REG = fpnew_pkg::get_num_regs_multi(FmtPipeRegs, FmtUnitTypes, FpFmtMask); + + logic in_valid; + + assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED); + + fpnew_opgroup_multifmt_slice #( + .OpGroup ( OpGroup ), + .Width ( Width ), + .FpFmtConfig ( FpFmtMask ), + .IntFmtConfig ( IntFmtMask ), + .EnableVectors ( EnableVectors ), + .NumPipeRegs ( REG ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ) + ) i_multifmt_slice ( + .clk_i, + .rst_ni, + .operands_i, + .is_boxed_i, + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .int_fmt_i, + .vectorial_op_i, + .tag_i, + .simd_mask_i ( simd_mask_i ), + .in_valid_i ( in_valid ), + .in_ready_o ( fmt_in_ready[FMT] ), + .flush_i, + .result_o ( fmt_outputs[FMT].result ), + .status_o ( fmt_outputs[FMT].status ), + .extension_bit_o ( fmt_outputs[FMT].ext_bit ), + .tag_o ( fmt_outputs[FMT].tag ), + .out_valid_o ( fmt_out_valid[FMT] ), + .out_ready_i ( fmt_out_ready[FMT] ), + .busy_o ( fmt_busy[FMT] ) + ); + + end + + // ------------------ + // Arbitrate Outputs + // ------------------ + output_t arbiter_output; + + // Round-Robin arbiter to decide which result to use + rr_arb_tree #( + .NumIn ( NUM_FORMATS ), + .DataType ( output_t ), + .AxiVldRdy ( 1'b1 ) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ( '0 ), + .req_i ( fmt_out_valid ), + .gnt_o ( fmt_out_ready ), + .data_i ( fmt_outputs ), + .gnt_i ( out_ready_i ), + .req_o ( out_valid_o ), + .data_o ( arbiter_output ), + .idx_o ( /* unused */ ) + ); + + // Unpack output + assign result_o = arbiter_output.result; + assign status_o = arbiter_output.status; + assign extension_bit_o = arbiter_output.ext_bit; + assign tag_o = arbiter_output.tag; + + assign busy_o = (| fmt_busy); + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv new file mode 100644 index 0000000..35fbe48 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv @@ -0,0 +1,292 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +module fpnew_opgroup_fmt_slice #( + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL, + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + // FPU configuration + parameter int unsigned Width = 32, + parameter logic EnableVectors = 1'b1, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, + // Do not change + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors), + localparam type MaskType = logic [NUM_LANES-1:0] +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i, + input logic [NUM_OPERANDS-1:0] is_boxed_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input logic vectorial_op_i, + input TagType tag_i, + input MaskType simd_mask_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [Width-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); + localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES); + + + logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes + logic vectorial_op; + + logic [NUM_LANES*FP_WIDTH-1:0] slice_result; + logic [Width-1:0] slice_regular_result, slice_class_result, slice_vec_class_result; + + fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; + logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used + fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask; + TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used + logic [NUM_LANES-1:0] lane_masks; + logic [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito + + logic result_is_vector, result_is_class; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane + assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + + // --------------- + // Generate Lanes + // --------------- + for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes + logic [FP_WIDTH-1:0] local_result; // lane-local results + logic local_sign; + + // Generate instances only if needed, lane 0 always generated + if ((lane == 0) || EnableVectors) begin : active_lane + logic in_valid, out_valid, out_ready; // lane-local handshake + + logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands + logic [FP_WIDTH-1:0] op_result; // lane-local results + fpnew_pkg::status_t op_status; + + assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + // Slice out the operands for this lane + always_comb begin : prepare_input + for (int i = 0; i < int'(NUM_OPERANDS); i++) begin + local_operands[i] = operands_i[i][(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH]; + end + end + + // Instantiate the operation from the selected opgroup + if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance + fpnew_fma #( + .FpFormat ( FpFormat ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic ) + ) i_fma ( + .clk_i, + .rst_ni, + .operands_i ( local_operands ), + .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( vectorial_op ), // Remember whether operation was vectorial + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_vectorial[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + assign lane_is_class[lane] = 1'b0; + assign lane_class_mask[lane] = fpnew_pkg::NEGINF; + end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance + // fpnew_divsqrt #( + // .FpFormat (FpFormat), + // .NumPipeRegs(NumPipeRegs), + // .PipeConfig (PipeConfig), + // .TagType (TagType), + // .AuxType (logic) + // ) i_divsqrt ( + // .clk_i, + // .rst_ni, + // .operands_i ( local_operands ), + // .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), + // .rnd_mode_i, + // .op_i, + // .op_mod_i, + // .tag_i, + // .aux_i ( vectorial_op ), // Remember whether operation was vectorial + // .in_valid_i ( in_valid ), + // .in_ready_o ( lane_in_ready[lane] ), + // .flush_i, + // .result_o ( op_result ), + // .status_o ( op_status ), + // .extension_bit_o ( lane_ext_bit[lane] ), + // .tag_o ( lane_tags[lane] ), + // .aux_o ( lane_vectorial[lane] ), + // .out_valid_o ( out_valid ), + // .out_ready_i ( out_ready ), + // .busy_o ( lane_busy[lane] ) + // ); + // assign lane_is_class[lane] = 1'b0; + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance + fpnew_noncomp #( + .FpFormat (FpFormat), + .NumPipeRegs(NumPipeRegs), + .PipeConfig (PipeConfig), + .TagType (TagType), + .AuxType (logic) + ) i_noncomp ( + .clk_i, + .rst_ni, + .operands_i ( local_operands ), + .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( vectorial_op ), // Remember whether operation was vectorial + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .class_mask_o ( lane_class_mask[lane] ), + .is_class_o ( lane_is_class[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_vectorial[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + end // ADD OTHER OPTIONS HERE + + // Handshakes are only done if the lane is actually used + assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); + assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); + + // Properly NaN-box or sign-extend the slice result if not in use + assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + + // Otherwise generate constant sign-extension + end else begin + assign lane_out_valid[lane] = 1'b0; // unused lane + assign lane_in_ready[lane] = 1'b0; // unused lane + assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box + assign lane_status[lane] = '0; + assign lane_busy[lane] = 1'b0; + assign lane_is_class[lane] = 1'b0; + end + + // Insert lane result into slice result + assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result; + + // Create Classification results + if (TrueSIMDClass && SIMD_WIDTH >= 10) begin : vectorial_true_class // true vectorial class blocks are 10bits in size + assign slice_vec_class_result[lane*SIMD_WIDTH +: 10] = lane_class_mask[lane]; + assign slice_vec_class_result[(lane+1)*SIMD_WIDTH-1 -: SIMD_WIDTH-10] = '0; + end else if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size + assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF || + lane_class_mask[lane] == fpnew_pkg::NEGNORM || + lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM || + lane_class_mask[lane] == fpnew_pkg::NEGZERO); + // Write the current block segment + assign slice_vec_class_result[(lane+1)*8-1:lane*8] = { + local_sign, // BIT 7 + ~local_sign, // BIT 6 + lane_class_mask[lane] == fpnew_pkg::QNAN, // BIT 5 + lane_class_mask[lane] == fpnew_pkg::SNAN, // BIT 4 + lane_class_mask[lane] == fpnew_pkg::POSZERO + || lane_class_mask[lane] == fpnew_pkg::NEGZERO, // BIT 3 + lane_class_mask[lane] == fpnew_pkg::POSSUBNORM + || lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM, // BIT 2 + lane_class_mask[lane] == fpnew_pkg::POSNORM + || lane_class_mask[lane] == fpnew_pkg::NEGNORM, // BIT 1 + lane_class_mask[lane] == fpnew_pkg::POSINF + || lane_class_mask[lane] == fpnew_pkg::NEGINF // BIT 0 + }; + end + end + + // ------------ + // Output Side + // ------------ + assign result_is_vector = lane_vectorial[0]; + assign result_is_class = lane_is_class[0]; + + assign slice_regular_result = $signed({extension_bit_o, slice_result}); + + localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8; + + // Pad out unused vec_class bits if each classify result is on 8 bits + if (!(TrueSIMDClass && SIMD_WIDTH >= 10)) begin + if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class + assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0; + end + end + + // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; + + assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0]; + + // Select the proper result + assign result_o = result_is_class ? slice_class_result : slice_regular_result; + + assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused + assign tag_o = lane_tags[0]; // upper lanes unused + assign busy_o = (| lane_busy); + assign out_valid_o = lane_out_valid[0]; // upper lanes unused + + + // Collapse the lane status + always_comb begin : output_processing + // Collapse the status + automatic fpnew_pkg::status_t temp_status; + temp_status = '0; + for (int i = 0; i < int'(NUM_LANES); i++) + temp_status |= lane_status[i] & {5{lane_masks[i]}}; + status_o = temp_status; + end +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv new file mode 100644 index 0000000..08facb8 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv @@ -0,0 +1,449 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_opgroup_multifmt_slice #( + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV, + parameter int unsigned Width = 64, + // FPU configuration + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, + parameter logic EnableVectors = 1'b1, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + // Do not change + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned NUM_SIMD_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, EnableVectors), + localparam type MaskType = logic [NUM_SIMD_LANES-1:0] +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i, + input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input logic vectorial_op_i, + input TagType tag_i, + input MaskType simd_mask_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [Width-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig); + localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig); + localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1); + localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS; + // We will send the format information along with the data + localparam int unsigned FMT_BITS = + fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS)); + localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags + + logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes + logic vectorial_op; + logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation + logic [AUX_BITS-1:0] aux_data; + + // additional flags for CONV + logic dst_fmt_is_int, dst_is_cpk; + logic [1:0] dst_vec_op; // info for vectorial results (for packing) + logic [2:0] target_aux_d, target_aux_q; + logic is_up_cast, is_down_cast; + + logic [NUM_FORMATS-1:0][Width-1:0] fmt_slice_result; + logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result; + logic [Width-1:0] conv_slice_result; + + + logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register + + fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; + logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used + TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used + logic [NUM_LANES-1:0] lane_masks; + logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used + logic [NUM_LANES-1:0] lane_busy; // dito + + logic result_is_vector; + logic [FMT_BITS-1:0] result_fmt; + logic result_fmt_is_int, result_is_cpk; + logic [1:0] result_vec_op; // info for vectorial results (for packing) + + logic simd_synch_rdy, simd_synch_done; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane + assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + + // Cast-and-Pack ops are encoded in operation and modifier + assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I); + assign dst_is_cpk = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB || + op_i == fpnew_pkg::CPKCD); + assign dst_vec_op = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i}; + + assign is_up_cast = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i)); + assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i)); + + // The destination format is the int format for F2I casts + assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i; + + // The data sent along consists of the vectorial flag and format bits + assign aux_data = {dst_fmt_is_int, vectorial_op, dst_fmt}; + assign target_aux_d = {dst_vec_op, dst_is_cpk}; + + // CONV passes one operand for assembly after the unit: opC for cpk, opB for others + if (OpGroup == fpnew_pkg::CONV) begin : conv_target + assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1]; + end + + // For 2-operand units, prepare boxing info + logic [NUM_FORMATS-1:0] is_boxed_1op; + logic [NUM_FORMATS-1:0][1:0] is_boxed_2op; + + always_comb begin : boxed_2op + for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin + is_boxed_1op[fmt] = is_boxed_i[fmt][0]; + is_boxed_2op[fmt] = is_boxed_i[fmt][1:0]; + end + end + + // --------------- + // Generate Lanes + // --------------- + for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes + localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter + // Get a mask of active formats for this lane + localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS = + fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE); + localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS = + fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE); + localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS); + + // Cast-specific parameters + localparam fpnew_pkg::fmt_logic_t CONV_FORMATS = + fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE); + localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS = + fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE); + localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS); + + // Lane parameters from Opgroup + localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV) + ? CONV_FORMATS : ACTIVE_FORMATS; + localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH; + + logic [LANE_WIDTH-1:0] local_result; // lane-local results + + // Generate instances only if needed, lane 0 always generated + if ((lane == 0) || EnableVectors) begin : active_lane + logic in_valid, out_valid, out_ready; // lane-local handshake + + logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands + logic [LANE_WIDTH-1:0] op_result; // lane-local results + fpnew_pkg::status_t op_status; + + assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + + // Slice out the operands for this lane, upper bits are ignored in the unit + always_comb begin : prepare_input + for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin + local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i); + end + + // override operand 0 for some conversions + if (OpGroup == fpnew_pkg::CONV) begin + // Source is an integer + if (op_i == fpnew_pkg::I2F) begin + local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i); + // vectorial F2F up casts + end else if (op_i == fpnew_pkg::F2F) begin + if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half + local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) + + MAX_FP_WIDTH/2; + end + // CPK + end else if (dst_is_cpk) begin + if (lane == 1) begin + local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument + end + end + end + end + + // Instantiate the operation from the selected opgroup + if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance + fpnew_fma_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_fma_multi ( + .clk_i, + .rst_ni, + .operands_i ( local_operands ), + .is_boxed_i, + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + + end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance + fpnew_divsqrt_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_divsqrt_multi ( + .clk_i, + .rst_ni, + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i, + .op_i, + .dst_fmt_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .divsqrt_done_o ( divsqrt_done[lane] ), + .simd_synch_done_i( simd_synch_done ), + .divsqrt_ready_o ( divsqrt_ready[lane]), + .simd_synch_rdy_i( simd_synch_rdy ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance + + end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance + fpnew_cast_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .IntFmtConfig ( CONV_INT_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_cast_multi ( + .clk_i, + .rst_ni, + .operands_i ( local_operands[0] ), + .is_boxed_i ( is_boxed_1op ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .int_fmt_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + end // ADD OTHER OPTIONS HERE + + // Handshakes are only done if the lane is actually used + assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); + assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); + + // Properly NaN-box or sign-extend the slice result if not in use + assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + + // Otherwise generate constant sign-extension + end else begin : inactive_lane + assign lane_out_valid[lane] = 1'b0; // unused lane + assign lane_in_ready[lane] = 1'b0; // unused lane + assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box + assign lane_status[lane] = '0; + assign lane_busy[lane] = 1'b0; + end + + // Generate result packing depending on float format + for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + // only for active formats within the lane + if (ACTIVE_FORMATS[fmt]) begin + assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] = + local_result[FP_WIDTH-1:0]; + end else if ((LANE+1)*FP_WIDTH <= Width) begin + assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] = + '{default: lane_ext_bit[LANE]}; + end else if (LANE*FP_WIDTH < Width) begin + assign fmt_slice_result[fmt][Width-1:LANE*FP_WIDTH] = + '{default: lane_ext_bit[LANE]}; + end + end + + // Generate result packing depending on integer format + if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled + for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + if (ACTIVE_INT_FORMATS[ifmt]) begin + assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = + local_result[INT_WIDTH-1:0]; + end else if ((LANE+1)*INT_WIDTH <= Width) begin + assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = '0; + end else if (LANE*INT_WIDTH < Width) begin + assign ifmt_slice_result[ifmt][Width-1:LANE*INT_WIDTH] = '0; + end + end + end + end + + // Extend slice result if needed + for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + if (NUM_LANES*FP_WIDTH < Width) + assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]}; + end + + // Mute int results if unused + for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled + if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result + assign ifmt_slice_result[ifmt] = '0; + end + end + + // Bypass lanes with target operand for vectorial casts + if (OpGroup == fpnew_pkg::CONV) begin : target_regs + // Bypass pipeline signals, index i holds signal after i register stages + logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q; + logic [0:NumPipeRegs][2:0] byp_pipe_aux_q; + logic [0:NumPipeRegs] byp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NumPipeRegs] byp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign byp_pipe_target_q[0] = conv_target_d; + assign byp_pipe_aux_q[0] = target_aux_d; + assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op; + // Generate the register stages + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) + `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector; + // Output stage: assign module outputs + assign conv_target_q = byp_pipe_target_q[NumPipeRegs]; + + // decode the aux data + assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs]; + end else begin : no_conv + assign {result_vec_op, result_is_cpk} = '0; + end + + // Synch lanes if there is more than one + assign simd_synch_rdy = EnableVectors ? &divsqrt_ready : divsqrt_ready[0]; + assign simd_synch_done = EnableVectors ? &divsqrt_done : divsqrt_done[0]; + + // ------------ + // Output Side + // ------------ + assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0]; + + assign result_o = result_fmt_is_int + ? ifmt_slice_result[result_fmt] + : fmt_slice_result[result_fmt]; + + assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones + assign tag_o = lane_tags[0]; // don't care about upper ones + assign busy_o = (| lane_busy); + + assign out_valid_o = lane_out_valid[0]; // don't care about upper ones + + // Collapse the status + always_comb begin : output_processing + // Collapse the status + automatic fpnew_pkg::status_t temp_status; + temp_status = '0; + for (int i = 0; i < int'(NUM_LANES); i++) + temp_status |= lane_status[i] & {5{lane_masks[i]}}; + status_o = temp_status; + end + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv new file mode 100644 index 0000000..7addc3e --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv @@ -0,0 +1,495 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +package fpnew_pkg; + + // --------- + // FP TYPES + // --------- + // | Enumerator | Format | Width | EXP_BITS | MAN_BITS + // |:----------:|------------------|-------:|:--------:|:--------: + // | FP32 | IEEE binary32 | 32 bit | 8 | 23 + // | FP64 | IEEE binary64 | 64 bit | 11 | 52 + // | FP16 | IEEE binary16 | 16 bit | 5 | 10 + // | FP8 | binary8 | 8 bit | 5 | 2 + // | FP16ALT | binary16alt | 16 bit | 8 | 7 + // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty! + + // Encoding for a format + typedef struct packed { + int unsigned exp_bits; + int unsigned man_bits; + } fp_encoding_t; + + localparam int unsigned NUM_FP_FORMATS = 5; // change me to add formats + localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS); + + // FP formats + typedef enum logic [FP_FORMAT_BITS-1:0] { + FP32 = 'd0, + FP64 = 'd1, + FP16 = 'd2, + FP8 = 'd3, + FP16ALT = 'd4 + // add new formats here + } fp_format_e; + + // Encodings for supported FP formats + localparam fp_encoding_t [0:NUM_FP_FORMATS-1] FP_ENCODINGS = '{ + '{8, 23}, // IEEE binary32 (single) + '{11, 52}, // IEEE binary64 (double) + '{5, 10}, // IEEE binary16 (half) + '{5, 2}, // custom binary8 + '{8, 7} // custom binary16alt + // add new formats here + }; + + typedef logic [0:NUM_FP_FORMATS-1] fmt_logic_t; // Logic indexed by FP format (for masks) + typedef logic [0:NUM_FP_FORMATS-1][31:0] fmt_unsigned_t; // Unsigned indexed by FP format + + localparam fmt_logic_t CPK_FORMATS = 5'b11000; // FP32 and FP64 can provide CPK only + + // --------- + // INT TYPES + // --------- + // | Enumerator | Width | + // |:----------:|-------:| + // | INT8 | 8 bit | + // | INT16 | 16 bit | + // | INT32 | 32 bit | + // | INT64 | 64 bit | + // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty! + + localparam int unsigned NUM_INT_FORMATS = 4; // change me to add formats + localparam int unsigned INT_FORMAT_BITS = $clog2(NUM_INT_FORMATS); + + // Int formats + typedef enum logic [INT_FORMAT_BITS-1:0] { + INT8, + INT16, + INT32, + INT64 + // add new formats here + } int_format_e; + + // Returns the width of an INT format by index + function automatic int unsigned int_width(int_format_e ifmt); + unique case (ifmt) + INT8: return 8; + INT16: return 16; + INT32: return 32; + INT64: return 64; + default: begin + // pragma translate_off + $fatal(1, "Invalid INT format supplied"); + // pragma translate_on + // just return any integer to avoid any latches + // hopefully this error is caught by simulation + return INT8; + end + endcase + endfunction + + typedef logic [0:NUM_INT_FORMATS-1] ifmt_logic_t; // Logic indexed by INT format (for masks) + + // -------------- + // FP OPERATIONS + // -------------- + localparam int unsigned NUM_OPGROUPS = 4; + + // Each FP operation belongs to an operation group + typedef enum logic [1:0] { + ADDMUL, DIVSQRT, NONCOMP, CONV + } opgroup_e; + + localparam int unsigned OP_BITS = 4; + + typedef enum logic [OP_BITS-1:0] { + FMADD, FNMSUB, ADD, MUL, // ADDMUL operation group + DIV, SQRT, // DIVSQRT operation group + SGNJ, MINMAX, CMP, CLASSIFY, // NONCOMP operation group + F2F, F2I, I2F, CPKAB, CPKCD // CONV operation group + } operation_e; + + // ------------------- + // RISC-V FP-SPECIFIC + // ------------------- + // Rounding modes + typedef enum logic [2:0] { + RNE = 3'b000, + RTZ = 3'b001, + RDN = 3'b010, + RUP = 3'b011, + RMM = 3'b100, + ROD = 3'b101, // This mode is not defined in RISC-V FP-SPEC + DYN = 3'b111 + } roundmode_e; + + // Status flags + typedef struct packed { + logic NV; // Invalid + logic DZ; // Divide by zero + logic OF; // Overflow + logic UF; // Underflow + logic NX; // Inexact + } status_t; + + // Information about a floating point value + typedef struct packed { + logic is_normal; // is the value normal + logic is_subnormal; // is the value subnormal + logic is_zero; // is the value zero + logic is_inf; // is the value infinity + logic is_nan; // is the value NaN + logic is_signalling; // is the value a signalling NaN + logic is_quiet; // is the value a quiet NaN + logic is_boxed; // is the value properly NaN-boxed (RISC-V specific) + } fp_info_t; + + // Classification mask + typedef enum logic [9:0] { + NEGINF = 10'b00_0000_0001, + NEGNORM = 10'b00_0000_0010, + NEGSUBNORM = 10'b00_0000_0100, + NEGZERO = 10'b00_0000_1000, + POSZERO = 10'b00_0001_0000, + POSSUBNORM = 10'b00_0010_0000, + POSNORM = 10'b00_0100_0000, + POSINF = 10'b00_1000_0000, + SNAN = 10'b01_0000_0000, + QNAN = 10'b10_0000_0000 + } classmask_e; + + // ------------------ + // FPU configuration + // ------------------ + // Pipelining registers can be inserted (at elaboration time) into operational units + typedef enum logic [1:0] { + BEFORE, // registers are inserted at the inputs of the unit + AFTER, // registers are inserted at the outputs of the unit + INSIDE, // registers are inserted at predetermined (suboptimal) locations in the unit + DISTRIBUTED // registers are evenly distributed, INSIDE >= AFTER >= BEFORE + } pipe_config_t; + + // Arithmetic units can be arranged in parallel (per format), merged (multi-format) or not at all. + typedef enum logic [1:0] { + DISABLED, // arithmetic units are not generated + PARALLEL, // arithmetic units are generated in prallel slices, one for each format + MERGED // arithmetic units are contained within a merged unit holding multiple formats + } unit_type_t; + + // Array of unit types indexed by format + typedef unit_type_t [0:NUM_FP_FORMATS-1] fmt_unit_types_t; + + // Array of format-specific unit types by opgroup + typedef fmt_unit_types_t [0:NUM_OPGROUPS-1] opgrp_fmt_unit_types_t; + // same with unsigned + typedef fmt_unsigned_t [0:NUM_OPGROUPS-1] opgrp_fmt_unsigned_t; + + // FPU configuration: features + typedef struct packed { + int unsigned Width; + logic EnableVectors; + logic EnableNanBox; + fmt_logic_t FpFmtMask; + ifmt_logic_t IntFmtMask; + } fpu_features_t; + + localparam fpu_features_t RV64D = '{ + Width: 64, + EnableVectors: 1'b0, + EnableNanBox: 1'b1, + FpFmtMask: 5'b11000, + IntFmtMask: 4'b0011 + }; + + localparam fpu_features_t RV32D = '{ + Width: 64, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b11000, + IntFmtMask: 4'b0010 + }; + + localparam fpu_features_t RV32F = '{ + Width: 32, + EnableVectors: 1'b0, + EnableNanBox: 1'b1, + FpFmtMask: 5'b10000, + IntFmtMask: 4'b0010 + }; + + localparam fpu_features_t RV64D_Xsflt = '{ + Width: 64, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b11111, + IntFmtMask: 4'b1111 + }; + + localparam fpu_features_t RV32F_Xsflt = '{ + Width: 32, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b10111, + IntFmtMask: 4'b1110 + }; + + localparam fpu_features_t RV32F_Xf16alt_Xfvec = '{ + Width: 32, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b10001, + IntFmtMask: 4'b0110 + }; + + + // FPU configuraion: implementation + typedef struct packed { + opgrp_fmt_unsigned_t PipeRegs; + opgrp_fmt_unit_types_t UnitTypes; + pipe_config_t PipeConfig; + } fpu_implementation_t; + + localparam fpu_implementation_t DEFAULT_NOREGS = '{ + PipeRegs: '{default: 0}, + UnitTypes: '{'{default: PARALLEL}, // ADDMUL + '{default: MERGED}, // DIVSQRT + '{default: PARALLEL}, // NONCOMP + '{default: MERGED}}, // CONV + PipeConfig: BEFORE + }; + + localparam fpu_implementation_t DEFAULT_SNITCH = '{ + PipeRegs: '{default: 1}, + UnitTypes: '{'{default: PARALLEL}, // ADDMUL + '{default: DISABLED}, // DIVSQRT + '{default: PARALLEL}, // NONCOMP + '{default: MERGED}}, // CONV + PipeConfig: BEFORE + }; + + // ----------------------- + // Synthesis optimization + // ----------------------- + localparam logic DONT_CARE = 1'b1; // the value to assign as don't care + + // ------------------------- + // General helper functions + // ------------------------- + function automatic int minimum(int a, int b); + return (a < b) ? a : b; + endfunction + + function automatic int maximum(int a, int b); + return (a > b) ? a : b; + endfunction + + // ------------------------------------------- + // Helper functions for FP formats and values + // ------------------------------------------- + // Returns the width of a FP format + function automatic int unsigned fp_width(fp_format_e fmt); + return FP_ENCODINGS[fmt].exp_bits + FP_ENCODINGS[fmt].man_bits + 1; + endfunction + + // Returns the widest FP format present + function automatic int unsigned max_fp_width(fmt_logic_t cfg); + automatic int unsigned res = 0; + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i]) + res = unsigned'(maximum(res, fp_width(fp_format_e'(i)))); + return res; + endfunction + + // Returns the narrowest FP format present + function automatic int unsigned min_fp_width(fmt_logic_t cfg); + automatic int unsigned res = max_fp_width(cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i]) + res = unsigned'(minimum(res, fp_width(fp_format_e'(i)))); + return res; + endfunction + + // Returns the number of expoent bits for a format + function automatic int unsigned exp_bits(fp_format_e fmt); + return FP_ENCODINGS[fmt].exp_bits; + endfunction + + // Returns the number of mantissa bits for a format + function automatic int unsigned man_bits(fp_format_e fmt); + return FP_ENCODINGS[fmt].man_bits; + endfunction + + // Returns the bias value for a given format (as per IEEE 754-2008) + function automatic int unsigned bias(fp_format_e fmt); + return unsigned'(2**(FP_ENCODINGS[fmt].exp_bits-1)-1); // symmetrical bias + endfunction + + function automatic fp_encoding_t super_format(fmt_logic_t cfg); + automatic fp_encoding_t res; + res = '0; + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + if (cfg[fmt]) begin // only active format + res.exp_bits = unsigned'(maximum(res.exp_bits, exp_bits(fp_format_e'(fmt)))); + res.man_bits = unsigned'(maximum(res.man_bits, man_bits(fp_format_e'(fmt)))); + end + return res; + endfunction + + // ------------------------------------------- + // Helper functions for INT formats and values + // ------------------------------------------- + // Returns the widest INT format present + function automatic int unsigned max_int_width(ifmt_logic_t cfg); + automatic int unsigned res = 0; + for (int ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin + if (cfg[ifmt]) res = maximum(res, int_width(int_format_e'(ifmt))); + end + return res; + endfunction + + // -------------------------------------------------- + // Helper functions for operations and FPU structure + // -------------------------------------------------- + // Returns the operation group of the given operation + function automatic opgroup_e get_opgroup(operation_e op); + unique case (op) + FMADD, FNMSUB, ADD, MUL: return ADDMUL; + DIV, SQRT: return DIVSQRT; + SGNJ, MINMAX, CMP, CLASSIFY: return NONCOMP; + F2F, F2I, I2F, CPKAB, CPKCD: return CONV; + default: return NONCOMP; + endcase + endfunction + + // Returns the number of operands by operation group + function automatic int unsigned num_operands(opgroup_e grp); + unique case (grp) + ADDMUL: return 3; + DIVSQRT: return 2; + NONCOMP: return 2; + CONV: return 3; // vectorial casts use 3 operands + default: return 0; + endcase + endfunction + + // Returns the number of lanes according to width, format and vectors + function automatic int unsigned num_lanes(int unsigned width, fp_format_e fmt, logic vec); + return vec ? width / fp_width(fmt) : 1; // if no vectors, only one lane + endfunction + + // Returns the maximum number of lanes in the FPU according to width, format config and vectors + function automatic int unsigned max_num_lanes(int unsigned width, fmt_logic_t cfg, logic vec); + return vec ? width / min_fp_width(cfg) : 1; // if no vectors, only one lane + endfunction + + // Returns a mask of active FP formats that are present in lane lane_no of a multiformat slice + function automatic fmt_logic_t get_lane_formats(int unsigned width, + fmt_logic_t cfg, + int unsigned lane_no); + automatic fmt_logic_t res; + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active formats with the number of lanes for that format + res[fmt] = cfg[fmt] & (width / fp_width(fp_format_e'(fmt)) > lane_no); + return res; + endfunction + + // Returns a mask of active INT formats that are present in lane lane_no of a multiformat slice + function automatic ifmt_logic_t get_lane_int_formats(int unsigned width, + fmt_logic_t cfg, + ifmt_logic_t icfg, + int unsigned lane_no); + automatic ifmt_logic_t res; + automatic fmt_logic_t lanefmts; + res = '0; + lanefmts = get_lane_formats(width, cfg, lane_no); + + for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active int formats with the width of the float formats + if ((fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt)))) + res[ifmt] |= icfg[ifmt] && lanefmts[fmt]; + return res; + endfunction + + // Returns a mask of active FP formats that are present in lane lane_no of a CONV slice + function automatic fmt_logic_t get_conv_lane_formats(int unsigned width, + fmt_logic_t cfg, + int unsigned lane_no); + automatic fmt_logic_t res; + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active formats with the number of lanes for that format, CPK at least twice + res[fmt] = cfg[fmt] && ((width / fp_width(fp_format_e'(fmt)) > lane_no) || + (CPK_FORMATS[fmt] && (lane_no < 2))); + return res; + endfunction + + // Returns a mask of active INT formats that are present in lane lane_no of a CONV slice + function automatic ifmt_logic_t get_conv_lane_int_formats(int unsigned width, + fmt_logic_t cfg, + ifmt_logic_t icfg, + int unsigned lane_no); + automatic ifmt_logic_t res; + automatic fmt_logic_t lanefmts; + res = '0; + lanefmts = get_conv_lane_formats(width, cfg, lane_no); + + for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active int formats with the width of the float formats + res[ifmt] |= icfg[ifmt] && lanefmts[fmt] && + (fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt))); + return res; + endfunction + + // Return whether any active format is set as MERGED + function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i] && types[i] == MERGED) + return 1'b1; + return 1'b0; + endfunction + + // Return whether the given format is the first active one set as MERGED + function automatic logic is_first_enabled_multi(fp_format_e fmt, + fmt_unit_types_t types, + fmt_logic_t cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt); + end + return 1'b0; + endfunction + + // Returns the first format that is active and is set as MERGED + function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i] && types[i] == MERGED) + return fp_format_e'(i); + return fp_format_e'(0); + endfunction + + // Returns the largest number of regs that is active and is set as MERGED + function automatic int unsigned get_num_regs_multi(fmt_unsigned_t regs, + fmt_unit_types_t types, + fmt_logic_t cfg); + automatic int unsigned res = 0; + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (cfg[i] && types[i] == MERGED) res = maximum(res, regs[i]); + end + return res; + endfunction + +endpackage diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv new file mode 100644 index 0000000..4e67720 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv @@ -0,0 +1,76 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +module fpnew_rounding #( + parameter int unsigned AbsWidth=2 // Width of the abolute value, without sign bit +) ( + // Input value + input logic [AbsWidth-1:0] abs_value_i, // absolute value without sign + input logic sign_i, + // Rounding information + input logic [1:0] round_sticky_bits_i, // round and sticky bits {RS} + input fpnew_pkg::roundmode_e rnd_mode_i, + input logic effective_subtraction_i, // sign of inputs affects rounding of zeroes + // Output value + output logic [AbsWidth-1:0] abs_rounded_o, // absolute value without sign + output logic sign_o, + // Output classification + output logic exact_zero_o // output is an exact zero +); + + logic round_up; // Rounding decision + + // Take the rounding decision according to RISC-V spec + // RoundMode | Mnemonic | Meaning + // :--------:|:--------:|:------- + // 000 | RNE | Round to Nearest, ties to Even + // 001 | RTZ | Round towards Zero + // 010 | RDN | Round Down (towards -\infty) + // 011 | RUP | Round Up (towards \infty) + // 100 | RMM | Round to Nearest, ties to Max Magnitude + // 101 | ROD | Round towards odd (this mode is not define in RISC-V FP-SPEC) + // others | | *invalid* + always_comb begin : rounding_decision + unique case (rnd_mode_i) + fpnew_pkg::RNE: // Decide accoring to round/sticky bits + unique case (round_sticky_bits_i) + 2'b00, + 2'b01: round_up = 1'b0; // < ulp/2 away, round down + 2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result + 2'b11: round_up = 1'b1; // > ulp/2 away, round up + default: round_up = fpnew_pkg::DONT_CARE; + endcase + fpnew_pkg::RTZ: round_up = 1'b0; // always round down + fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i : 1'b0; // to 0 if +, away if - + fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if + + fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up + fpnew_pkg::ROD: round_up = ~abs_value_i[0] & (| round_sticky_bits_i); + default: round_up = fpnew_pkg::DONT_CARE; // propagate x + endcase + end + + // Perform the rounding, exponent change and overflow to inf happens automagically + assign abs_rounded_o = abs_value_i + round_up; + + // True zero result is a zero result without dirty round/sticky bits + assign exact_zero_o = (abs_value_i == '0) && (round_sticky_bits_i == '0); + + // In case of effective subtraction (thus signs of addition operands must have differed) and a + // true zero result, the result sign is '-' in case of RDN and '+' for other modes. + assign sign_o = (exact_zero_o && effective_subtraction_i) + ? (rnd_mode_i == fpnew_pkg::RDN) + : sign_i; + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv new file mode 100644 index 0000000..f6116a5 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv @@ -0,0 +1,185 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +module fpnew_top #( + // FPU configuration + parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt, + parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS, + parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, + parameter int unsigned EnableSIMDMask = 0, + // Do not change + localparam int unsigned NumLanes = fpnew_pkg::max_num_lanes(Features.Width, Features.FpFmtMask, Features.EnableVectors), + localparam type MaskType = logic [NumLanes-1:0], + localparam int unsigned WIDTH = Features.Width, + localparam int unsigned NUM_OPERANDS = 3 +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input logic vectorial_op_i, + input TagType tag_i, + input MaskType simd_mask_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS; + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS; + + // ---------------- + // Type Definition + // ---------------- + typedef struct packed { + logic [WIDTH-1:0] result; + fpnew_pkg::status_t status; + TagType tag; + } output_t; + + // Handshake signals for the blocks + logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy; + output_t [NUM_OPGROUPS-1:0] opgrp_outputs; + + logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)]; + + // NaN-boxing check + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + // NaN boxing is only generated if it's enabled and needed + if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check + for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands + assign is_boxed[fmt][op] = (!vectorial_op_i) + ? operands_i[op][WIDTH-1:FP_WIDTH] == '1 + : 1'b1; + end + end else begin : no_check + assign is_boxed[fmt] = '1; + end + end + + // Filter out the mask if not used + MaskType simd_mask; + assign simd_mask = simd_mask_i | ~{NumLanes{logic'(EnableSIMDMask)}}; + + // ------------------------- + // Generate Operation Blocks + // ------------------------- + for (genvar opgrp = 0; opgrp < int'(NUM_OPGROUPS); opgrp++) begin : gen_operation_groups + localparam int unsigned NUM_OPS = fpnew_pkg::num_operands(fpnew_pkg::opgroup_e'(opgrp)); + + logic in_valid; + logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed; + + assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp)); + + // slice out input boxing + always_comb begin : slice_inputs + for (int unsigned fmt = 0; fmt < NUM_FORMATS; fmt++) + input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0]; + end + + fpnew_opgroup_block #( + .OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ), + .Width ( WIDTH ), + .EnableVectors ( Features.EnableVectors ), + .FpFmtMask ( Features.FpFmtMask ), + .IntFmtMask ( Features.IntFmtMask ), + .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), + .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ), + .PipeConfig ( Implementation.PipeConfig ), + .TagType ( TagType ), + .TrueSIMDClass ( TrueSIMDClass ) + ) i_opgroup_block ( + .clk_i, + .rst_ni, + .operands_i ( operands_i[NUM_OPS-1:0] ), + .is_boxed_i ( input_boxed ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .int_fmt_i, + .vectorial_op_i, + .tag_i, + .simd_mask_i ( simd_mask ), + .in_valid_i ( in_valid ), + .in_ready_o ( opgrp_in_ready[opgrp] ), + .flush_i, + .result_o ( opgrp_outputs[opgrp].result ), + .status_o ( opgrp_outputs[opgrp].status ), + .extension_bit_o ( opgrp_ext[opgrp] ), + .tag_o ( opgrp_outputs[opgrp].tag ), + .out_valid_o ( opgrp_out_valid[opgrp] ), + .out_ready_i ( opgrp_out_ready[opgrp] ), + .busy_o ( opgrp_busy[opgrp] ) + ); + end + + // ------------------ + // Arbitrate Outputs + // ------------------ + output_t arbiter_output; + + // Round-Robin arbiter to decide which result to use + rr_arb_tree #( + .NumIn ( NUM_OPGROUPS ), + .DataType ( output_t ), + .AxiVldRdy ( 1'b1 ) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ( '0 ), + .req_i ( opgrp_out_valid ), + .gnt_o ( opgrp_out_ready ), + .data_i ( opgrp_outputs ), + .gnt_i ( out_ready_i ), + .req_o ( out_valid_o ), + .data_o ( arbiter_output ), + .idx_o ( /* unused */ ) + ); + + // Unpack output + assign result_o = arbiter_output.result; + assign status_o = arbiter_output.status; + assign tag_o = arbiter_output.tag; + + assign busy_o = (| opgrp_busy); + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv new file mode 100644 index 0000000..bda9c01 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv @@ -0,0 +1,3413 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 04/03/2018 // +// Design Name: FPU // +// Module Name: control_mvp.sv // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: the control logic of div and sqrt // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +// Revision Date: 13/04/2018 // +// Lei Li // +// To fix some bug found in Control FSM // +// when Iteration_unit_num_S = 2'b10 // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module control_mvp + + (//Input + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI , + input logic Sqrt_start_SI, + input logic Start_SI, + input logic Kill_SI, + input logic Special_case_SBI, + input logic Special_case_dly_SBI, + input logic [C_PC-1:0] Precision_ctl_SI, + input logic [1:0] Format_sel_SI, + input logic [C_MANT_FP64:0] Numerator_DI, + input logic [C_EXP_FP64:0] Exp_num_DI, + input logic [C_MANT_FP64:0] Denominator_DI, + input logic [C_EXP_FP64:0] Exp_den_DI, + + + output logic Div_start_dly_SO , + output logic Sqrt_start_dly_SO, + output logic Div_enable_SO, + output logic Sqrt_enable_SO, + + + //To next stage + output logic Full_precision_SO, + output logic FP32_SO, + output logic FP64_SO, + output logic FP16_SO, + output logic FP16ALT_SO, + + output logic Ready_SO, + output logic Done_SO, + + output logic [C_MANT_FP64+4:0] Mant_result_prenorm_DO, + // output logic [3:0] Round_bit_DO, + output logic [C_EXP_FP64+1:0] Exp_result_prenorm_DO + ); + + logic [C_MANT_FP64+1+4:0] Partial_remainder_DN,Partial_remainder_DP; //58bits,r=q+2 + logic [C_MANT_FP64+4:0] Quotient_DP; //57bits + ///////////////////////////////////////////////////////////////////////////// + // Assign Inputs // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64+1:0] Numerator_se_D; //sign extension and hidden bit + logic [C_MANT_FP64+1:0] Denominator_se_D; //signa extension and hidden bit + logic [C_MANT_FP64+1:0] Denominator_se_DB; //1's complement + + assign Numerator_se_D={1'b0,Numerator_DI}; + + assign Denominator_se_D={1'b0,Denominator_DI}; + + always_comb + begin + if(FP32_SO) + begin + Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} }; + end + else if(FP64_SO) begin + Denominator_se_DB=~Denominator_se_D; + end + else if(FP16_SO) begin + Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} }; + end + else begin + Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; + end + end + + + logic [C_MANT_FP64+1:0] Mant_D_sqrt_Norm; + + assign Mant_D_sqrt_Norm=Exp_num_DI[0]?{1'b0,Numerator_DI}:{Numerator_DI,1'b0}; //for sqrt + + ///////////////////////////////////////////////////////////////////////////// + // Format Selection // + ///////////////////////////////////////////////////////////////////////////// + logic [1:0] Format_sel_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Format_sel_S<='b0; + end + else if(Start_SI&&Ready_SO) + begin + Format_sel_S<=Format_sel_SI; + end + else + begin + Format_sel_S<=Format_sel_S; + end + end + + assign FP32_SO = (Format_sel_S==2'b00); + assign FP64_SO = (Format_sel_S==2'b01); + assign FP16_SO = (Format_sel_S==2'b10); + assign FP16ALT_SO = (Format_sel_S==2'b11); + + + + ///////////////////////////////////////////////////////////////////////////// + // Precision Control // + ///////////////////////////////////////////////////////////////////////////// + + logic [C_PC-1:0] Precision_ctl_S; + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Precision_ctl_S<='b0; + end + else if(Start_SI&&Ready_SO) + begin + Precision_ctl_S<=Precision_ctl_SI; + end + else + begin + Precision_ctl_S<=Precision_ctl_S; + end + end + assign Full_precision_SO = (Precision_ctl_S==6'h00); + + + + logic [5:0] State_ctl_S; + logic [5:0] State_Two_iteration_unit_S; + logic [5:0] State_Four_iteration_unit_S; + + assign State_Two_iteration_unit_S = Precision_ctl_S[C_PC-1:1]; //Two iteration units + assign State_Four_iteration_unit_S = Precision_ctl_S[C_PC-1:2]; //Four iteration units + always_comb + begin + case(Iteration_unit_num_S) +//////////////////////one iteration unit, start/////////////////////////////////////// + 2'b00: //one iteration unit + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h1b; //24+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + 2'b01: //FP64 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h38; //53+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + 2'b10: //FP16 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0e; //11+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + 2'b11: //FP16ALT + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0b; //8+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + endcase + end +//////////////////////one iteration unit, end/////////////////////////////////////// + +//////////////////////two iteration units, start/////////////////////////////////////// + 2'b01: //two iteration units + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0d; //24+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + 2'b01: //FP64 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h1b; //53+3 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + 2'b10: //FP16 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h06; //11+3 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + 2'b11: //FP16ALT + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h05; //8+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + endcase + end +//////////////////////two iteration units, end/////////////////////////////////////// + +//////////////////////three iteration units, start/////////////////////////////////////// + 2'b10: //three iteration units + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h08; //24+3 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + 6'h09,6'h0a,6'h0b: + begin + State_ctl_S = 6'h03; + end + 6'h0c,6'h0d,6'h0e: + begin + State_ctl_S = 6'h04; + end + 6'h0f,6'h10,6'h11: + begin + State_ctl_S = 6'h05; + end + 6'h12,6'h13,6'h14: + begin + State_ctl_S = 6'h06; + end + 6'h15,6'h16,6'h17: + begin + State_ctl_S = 6'h07; + end + default: + begin + State_ctl_S = 6'h08; //24+3 more iterations for rounding bits + end + endcase + end + 2'b01: //FP64 + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h12; //53+4 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + 6'h09,6'h0a,6'h0b: + begin + State_ctl_S = 6'h03; + end + 6'h0c,6'h0d,6'h0e: + begin + State_ctl_S = 6'h04; + end + 6'h0f,6'h10,6'h11: + begin + State_ctl_S = 6'h05; + end + 6'h12,6'h13,6'h14: + begin + State_ctl_S = 6'h06; + end + 6'h15,6'h16,6'h17: + begin + State_ctl_S = 6'h07; + end + 6'h18,6'h19,6'h1a: + begin + State_ctl_S = 6'h08; + end + 6'h1b,6'h1c,6'h1d: + begin + State_ctl_S = 6'h09; + end + 6'h1e,6'h1f,6'h20: + begin + State_ctl_S = 6'h0a; + end + 6'h21,6'h22,6'h23: + begin + State_ctl_S = 6'h0b; + end + 6'h24,6'h25,6'h26: + begin + State_ctl_S = 6'h0c; + end + 6'h27,6'h28,6'h29: + begin + State_ctl_S = 6'h0d; + end + 6'h2a,6'h2b,6'h2c: + begin + State_ctl_S = 6'h0e; + end + 6'h2d,6'h2e,6'h2f: + begin + State_ctl_S = 6'h0f; + end + 6'h30,6'h31,6'h32: + begin + State_ctl_S = 6'h10; + end + 6'h33,6'h34,6'h35: + begin + State_ctl_S = 6'h11; + end + default: + begin + State_ctl_S = 6'h12; //53+4 more iterations for rounding bits + end + endcase + end + 2'b10: //FP16 + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h04; //12+3 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + 6'h09,6'h0a,6'h0b: + begin + State_ctl_S = 6'h03; + end + default: + begin + State_ctl_S = 6'h04; //12+3 more iterations for rounding bits + end + endcase + end + 2'b11: //FP16ALT + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h03; //8+4 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + default: + begin + State_ctl_S = 6'h03; //8+4 more iterations for rounding bits + end + endcase + end + endcase + end +//////////////////////three iteration units, end/////////////////////////////////////// + +//////////////////////four iteration units, start/////////////////////////////////////// + 2'b11: //four iteration units + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h06; //24+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + 2'b01: //FP64 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0d; //53+3 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + 2'b10: //FP16 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h03; //11+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + 2'b11: //FP16ALT + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h02; //8+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + endcase + end +//////////////////////four iteration units, end/////////////////////////////////////// + + endcase + end + + + ///////////////////////////////////////////////////////////////////////////// + // control logic // + ///////////////////////////////////////////////////////////////////////////// + + logic Div_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // generate Div_start_dly_S signal + begin + if(~Rst_RBI) + begin + Div_start_dly_S<=1'b0; + end + else if(Div_start_SI&&Ready_SO) + begin + Div_start_dly_S<=1'b1; + end + else + begin + Div_start_dly_S<=1'b0; + end + end + + assign Div_start_dly_SO=Div_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) begin // generate Div_enable_SO signal + if(~Rst_RBI) + Div_enable_SO<=1'b0; + // Synchronous reset with Flush + else if (Kill_SI) + Div_enable_SO <= 1'b0; + else if(Div_start_SI&&Ready_SO) + Div_enable_SO<=1'b1; + else if(Done_SO) + Div_enable_SO<=1'b0; + else + Div_enable_SO<=Div_enable_SO; + end + + logic Sqrt_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // generate Sqrt_start_dly_SI signal + begin + if(~Rst_RBI) + begin + Sqrt_start_dly_S<=1'b0; + end + else if(Sqrt_start_SI&&Ready_SO) + begin + Sqrt_start_dly_S<=1'b1; + end + else + begin + Sqrt_start_dly_S<=1'b0; + end + end + assign Sqrt_start_dly_SO=Sqrt_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) begin // generate Sqrt_enable_SO signal + if(~Rst_RBI) + Sqrt_enable_SO<=1'b0; + else if (Kill_SI) + Sqrt_enable_SO <= 1'b0; + else if(Sqrt_start_SI&&Ready_SO) + Sqrt_enable_SO<=1'b1; + else if(Done_SO) + Sqrt_enable_SO<=1'b0; + else + Sqrt_enable_SO<=Sqrt_enable_SO; + end + + logic [5:0] Crtl_cnt_S; + logic Start_dly_S; + + assign Start_dly_S=Div_start_dly_S |Sqrt_start_dly_S; + + logic Fsm_enable_S; + assign Fsm_enable_S=( (Start_dly_S | (| Crtl_cnt_S)) && (~Kill_SI) && Special_case_dly_SBI); + + logic Final_state_S; + assign Final_state_S= (Crtl_cnt_S==State_ctl_S); + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) //control_FSM + begin + if (~Rst_RBI) + begin + Crtl_cnt_S <= '0; + end + else if (Final_state_S | Kill_SI) + begin + Crtl_cnt_S <= '0; + end + else if(Fsm_enable_S) // one cycle Start_SI + begin + Crtl_cnt_S <= Crtl_cnt_S+1; + end + else + begin + Crtl_cnt_S <= '0; + end + end // always_ff + + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate Done_SO, they can share this Done_SO. + begin + if(~Rst_RBI) + begin + Done_SO<=1'b0; + end + else if(Start_SI&&Ready_SO) + begin + if(~Special_case_SBI) + begin + Done_SO<=1'b1; + end + else + begin + Done_SO<=1'b0; + end + end + else if(Final_state_S) + begin + Done_SO<=1'b1; + end + else + begin + Done_SO<=1'b0; + end + end + + + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate Ready_SO + begin + if(~Rst_RBI) + begin + Ready_SO<=1'b1; + end + + else if(Start_SI&&Ready_SO) + begin + if(~Special_case_SBI) + begin + Ready_SO<=1'b1; + end + else + begin + Ready_SO<=1'b0; + end + end + else if(Final_state_S | Kill_SI) + begin + Ready_SO<=1'b1; + end + else + begin + Ready_SO<=Ready_SO; + end + end + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b00, start // + //////////////////////////////////////////////////////////////////////////// + + logic Qcnt_one_0; + logic Qcnt_one_1; + logic [1:0] Qcnt_one_2; + logic [2:0] Qcnt_one_3; + logic [3:0] Qcnt_one_4; + logic [4:0] Qcnt_one_5; + logic [5:0] Qcnt_one_6; + logic [6:0] Qcnt_one_7; + logic [7:0] Qcnt_one_8; + logic [8:0] Qcnt_one_9; + logic [9:0] Qcnt_one_10; + logic [10:0] Qcnt_one_11; + logic [11:0] Qcnt_one_12; + logic [12:0] Qcnt_one_13; + logic [13:0] Qcnt_one_14; + logic [14:0] Qcnt_one_15; + logic [15:0] Qcnt_one_16; + logic [16:0] Qcnt_one_17; + logic [17:0] Qcnt_one_18; + logic [18:0] Qcnt_one_19; + logic [19:0] Qcnt_one_20; + logic [20:0] Qcnt_one_21; + logic [21:0] Qcnt_one_22; + logic [22:0] Qcnt_one_23; + logic [23:0] Qcnt_one_24; + logic [24:0] Qcnt_one_25; + logic [25:0] Qcnt_one_26; + logic [26:0] Qcnt_one_27; + logic [27:0] Qcnt_one_28; + logic [28:0] Qcnt_one_29; + logic [29:0] Qcnt_one_30; + logic [30:0] Qcnt_one_31; + logic [31:0] Qcnt_one_32; + logic [32:0] Qcnt_one_33; + logic [33:0] Qcnt_one_34; + logic [34:0] Qcnt_one_35; + logic [35:0] Qcnt_one_36; + logic [36:0] Qcnt_one_37; + logic [37:0] Qcnt_one_38; + logic [38:0] Qcnt_one_39; + logic [39:0] Qcnt_one_40; + logic [40:0] Qcnt_one_41; + logic [41:0] Qcnt_one_42; + logic [42:0] Qcnt_one_43; + logic [43:0] Qcnt_one_44; + logic [44:0] Qcnt_one_45; + logic [45:0] Qcnt_one_46; + logic [46:0] Qcnt_one_47; + logic [47:0] Qcnt_one_48; + logic [48:0] Qcnt_one_49; + logic [49:0] Qcnt_one_50; + logic [50:0] Qcnt_one_51; + logic [51:0] Qcnt_one_52; + logic [52:0] Qcnt_one_53; + logic [53:0] Qcnt_one_54; + logic [54:0] Qcnt_one_55; + logic [55:0] Qcnt_one_56; + logic [56:0] Qcnt_one_57; + logic [57:0] Qcnt_one_58; + logic [58:0] Qcnt_one_59; + logic [59:0] Qcnt_one_60; + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b00, end // + //////////////////////////////////////////////////////////////////////////// + + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b01, start // + //////////////////////////////////////////////////////////////////////////// + logic [1:0] Qcnt_two_0; + logic [2:0] Qcnt_two_1; + logic [4:0] Qcnt_two_2; + logic [6:0] Qcnt_two_3; + logic [8:0] Qcnt_two_4; + logic [10:0] Qcnt_two_5; + logic [12:0] Qcnt_two_6; + logic [14:0] Qcnt_two_7; + logic [16:0] Qcnt_two_8; + logic [18:0] Qcnt_two_9; + logic [20:0] Qcnt_two_10; + logic [22:0] Qcnt_two_11; + logic [24:0] Qcnt_two_12; + logic [26:0] Qcnt_two_13; + logic [28:0] Qcnt_two_14; + logic [30:0] Qcnt_two_15; + logic [32:0] Qcnt_two_16; + logic [34:0] Qcnt_two_17; + logic [36:0] Qcnt_two_18; + logic [38:0] Qcnt_two_19; + logic [40:0] Qcnt_two_20; + logic [42:0] Qcnt_two_21; + logic [44:0] Qcnt_two_22; + logic [46:0] Qcnt_two_23; + logic [48:0] Qcnt_two_24; + logic [50:0] Qcnt_two_25; + logic [52:0] Qcnt_two_26; + logic [54:0] Qcnt_two_27; + logic [56:0] Qcnt_two_28; + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b01, end // + //////////////////////////////////////////////////////////////////////////// + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b10, start // + //////////////////////////////////////////////////////////////////////////// + logic [2:0] Qcnt_three_0; + logic [4:0] Qcnt_three_1; + logic [7:0] Qcnt_three_2; + logic [10:0] Qcnt_three_3; + logic [13:0] Qcnt_three_4; + logic [16:0] Qcnt_three_5; + logic [19:0] Qcnt_three_6; + logic [22:0] Qcnt_three_7; + logic [25:0] Qcnt_three_8; + logic [28:0] Qcnt_three_9; + logic [31:0] Qcnt_three_10; + logic [34:0] Qcnt_three_11; + logic [37:0] Qcnt_three_12; + logic [40:0] Qcnt_three_13; + logic [43:0] Qcnt_three_14; + logic [46:0] Qcnt_three_15; + logic [49:0] Qcnt_three_16; + logic [52:0] Qcnt_three_17; + logic [55:0] Qcnt_three_18; + logic [58:0] Qcnt_three_19; + logic [61:0] Qcnt_three_20; + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b10, end // + //////////////////////////////////////////////////////////////////////////// + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b11, start // + //////////////////////////////////////////////////////////////////////////// + logic [3:0] Qcnt_four_0; + logic [6:0] Qcnt_four_1; + logic [10:0] Qcnt_four_2; + logic [14:0] Qcnt_four_3; + logic [18:0] Qcnt_four_4; + logic [22:0] Qcnt_four_5; + logic [26:0] Qcnt_four_6; + logic [30:0] Qcnt_four_7; + logic [34:0] Qcnt_four_8; + logic [38:0] Qcnt_four_9; + logic [42:0] Qcnt_four_10; + logic [46:0] Qcnt_four_11; + logic [50:0] Qcnt_four_12; + logic [54:0] Qcnt_four_13; + logic [58:0] Qcnt_four_14; + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b11, end // + //////////////////////////////////////////////////////////////////////////// + + + + logic [C_MANT_FP64+1+4:0] Sqrt_R0,Sqrt_Q0,Q_sqrt0,Q_sqrt_com_0; + logic [C_MANT_FP64+1+4:0] Sqrt_R1,Sqrt_Q1,Q_sqrt1,Q_sqrt_com_1; + logic [C_MANT_FP64+1+4:0] Sqrt_R2,Sqrt_Q2,Q_sqrt2,Q_sqrt_com_2; + logic [C_MANT_FP64+1+4:0] Sqrt_R3,Sqrt_Q3,Q_sqrt3,Q_sqrt_com_3,Sqrt_R4; //Sqrt_Q4; + + + logic [1:0] Sqrt_DI [3:0]; + logic [1:0] Sqrt_DO [3:0]; + logic Sqrt_carry_DO; + + + logic [C_MANT_FP64+1+4:0] Iteration_cell_a_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_b_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_a_BMASK_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_b_BMASK_D [3:0]; + logic Iteration_cell_carry_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_sum_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_sum_AMASK_D [3:0]; + + + logic [3:0] Sqrt_quotinent_S; + + + always_comb + begin // + case (Format_sel_S) + 2'b00: + begin + Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP32+5])}; + Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt0[C_MANT_FP32+5:0] }; + Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt1[C_MANT_FP32+5:0] }; + Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt2[C_MANT_FP32+5:0] }; + Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt3[C_MANT_FP32+5:0] }; + end + 2'b01: + begin + Sqrt_quotinent_S = {Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]}; + Q_sqrt_com_0=~Q_sqrt0; + Q_sqrt_com_1=~Q_sqrt1; + Q_sqrt_com_2=~Q_sqrt2; + Q_sqrt_com_3=~Q_sqrt3; + end + 2'b10: + begin + Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16+5])}; + Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt0[C_MANT_FP16+5:0] }; + Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt1[C_MANT_FP16+5:0] }; + Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt2[C_MANT_FP16+5:0] }; + Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt3[C_MANT_FP16+5:0] }; + end + 2'b11: + begin + Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16ALT+5])}; + Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt0[C_MANT_FP16ALT+5:0] }; + Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt1[C_MANT_FP16ALT+5:0] }; + Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt2[C_MANT_FP16ALT+5:0] }; + Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt3[C_MANT_FP16ALT+5:0] }; + end + endcase + end + + + + assign Qcnt_one_0= {1'b0}; //qk for each feedback + assign Qcnt_one_1= {Quotient_DP[0]}; + assign Qcnt_one_2= {Quotient_DP[1:0]}; + assign Qcnt_one_3= {Quotient_DP[2:0]}; + assign Qcnt_one_4= {Quotient_DP[3:0]}; + assign Qcnt_one_5= {Quotient_DP[4:0]}; + assign Qcnt_one_6= {Quotient_DP[5:0]}; + assign Qcnt_one_7= {Quotient_DP[6:0]}; + assign Qcnt_one_8= {Quotient_DP[7:0]}; + assign Qcnt_one_9= {Quotient_DP[8:0]}; + assign Qcnt_one_10= {Quotient_DP[9:0]}; + assign Qcnt_one_11= {Quotient_DP[10:0]}; + assign Qcnt_one_12= {Quotient_DP[11:0]}; + assign Qcnt_one_13= {Quotient_DP[12:0]}; + assign Qcnt_one_14= {Quotient_DP[13:0]}; + assign Qcnt_one_15= {Quotient_DP[14:0]}; + assign Qcnt_one_16= {Quotient_DP[15:0]}; + assign Qcnt_one_17= {Quotient_DP[16:0]}; + assign Qcnt_one_18= {Quotient_DP[17:0]}; + assign Qcnt_one_19= {Quotient_DP[18:0]}; + assign Qcnt_one_20= {Quotient_DP[19:0]}; + assign Qcnt_one_21= {Quotient_DP[20:0]}; + assign Qcnt_one_22= {Quotient_DP[21:0]}; + assign Qcnt_one_23= {Quotient_DP[22:0]}; + assign Qcnt_one_24= {Quotient_DP[23:0]}; + assign Qcnt_one_25= {Quotient_DP[24:0]}; + assign Qcnt_one_26= {Quotient_DP[25:0]}; + assign Qcnt_one_27= {Quotient_DP[26:0]}; + assign Qcnt_one_28= {Quotient_DP[27:0]}; + assign Qcnt_one_29= {Quotient_DP[28:0]}; + assign Qcnt_one_30= {Quotient_DP[29:0]}; + assign Qcnt_one_31= {Quotient_DP[30:0]}; + assign Qcnt_one_32= {Quotient_DP[31:0]}; + assign Qcnt_one_33= {Quotient_DP[32:0]}; + assign Qcnt_one_34= {Quotient_DP[33:0]}; + assign Qcnt_one_35= {Quotient_DP[34:0]}; + assign Qcnt_one_36= {Quotient_DP[35:0]}; + assign Qcnt_one_37= {Quotient_DP[36:0]}; + assign Qcnt_one_38= {Quotient_DP[37:0]}; + assign Qcnt_one_39= {Quotient_DP[38:0]}; + assign Qcnt_one_40= {Quotient_DP[39:0]}; + assign Qcnt_one_41= {Quotient_DP[40:0]}; + assign Qcnt_one_42= {Quotient_DP[41:0]}; + assign Qcnt_one_43= {Quotient_DP[42:0]}; + assign Qcnt_one_44= {Quotient_DP[43:0]}; + assign Qcnt_one_45= {Quotient_DP[44:0]}; + assign Qcnt_one_46= {Quotient_DP[45:0]}; + assign Qcnt_one_47= {Quotient_DP[46:0]}; + assign Qcnt_one_48= {Quotient_DP[47:0]}; + assign Qcnt_one_49= {Quotient_DP[48:0]}; + assign Qcnt_one_50= {Quotient_DP[49:0]}; + assign Qcnt_one_51= {Quotient_DP[50:0]}; + assign Qcnt_one_52= {Quotient_DP[51:0]}; + assign Qcnt_one_53= {Quotient_DP[52:0]}; + assign Qcnt_one_54= {Quotient_DP[53:0]}; + assign Qcnt_one_55= {Quotient_DP[54:0]}; + assign Qcnt_one_56= {Quotient_DP[55:0]}; + assign Qcnt_one_57= {Quotient_DP[56:0]}; + + + assign Qcnt_two_0 = {1'b0, Sqrt_quotinent_S[3]}; //qk for each feedback + assign Qcnt_two_1 = {Quotient_DP[1:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_2 = {Quotient_DP[3:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_3 = {Quotient_DP[5:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_4 = {Quotient_DP[7:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_5 = {Quotient_DP[9:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_6 = {Quotient_DP[11:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_7 = {Quotient_DP[13:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_8 = {Quotient_DP[15:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_9 = {Quotient_DP[17:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_10 = {Quotient_DP[19:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_11 = {Quotient_DP[21:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_12 = {Quotient_DP[23:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_13 = {Quotient_DP[25:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_14 = {Quotient_DP[27:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_15 = {Quotient_DP[29:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_16 = {Quotient_DP[31:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_17 = {Quotient_DP[33:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_18 = {Quotient_DP[35:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_19 = {Quotient_DP[37:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_20 = {Quotient_DP[39:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_21 = {Quotient_DP[41:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_22 = {Quotient_DP[43:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_23 = {Quotient_DP[45:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_24 = {Quotient_DP[47:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_25 = {Quotient_DP[49:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_26 = {Quotient_DP[51:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_27 = {Quotient_DP[53:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_28 = {Quotient_DP[55:0],Sqrt_quotinent_S[3]}; + + + assign Qcnt_three_0 = {1'b0, Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; //qk for each feedback + assign Qcnt_three_1 = {Quotient_DP[2:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_2 = {Quotient_DP[5:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_3 = {Quotient_DP[8:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_4 = {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_5 = {Quotient_DP[14:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_6 = {Quotient_DP[17:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_7 = {Quotient_DP[20:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_8 = {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_9 = {Quotient_DP[26:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_10 = {Quotient_DP[29:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_11 = {Quotient_DP[32:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_12 = {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_13 = {Quotient_DP[38:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_14 = {Quotient_DP[41:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_15 = {Quotient_DP[44:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_16 = {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_17 = {Quotient_DP[50:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_18 = {Quotient_DP[53:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_19 = {Quotient_DP[56:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + + + assign Qcnt_four_0 = {1'b0, Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_1 = {Quotient_DP[3:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_2 = {Quotient_DP[7:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_3 = {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_4 = {Quotient_DP[15:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_5 = {Quotient_DP[19:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_6 = {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_7 = {Quotient_DP[27:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_8 = {Quotient_DP[31:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_9 = {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_10 = {Quotient_DP[39:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_11 = {Quotient_DP[43:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_12 = {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_13 = {Quotient_DP[51:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_14 = {Quotient_DP[55:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + + + + + always_comb begin // the intermediate operands for sqrt + + case(Iteration_unit_num_S) + 2'b00: + begin + + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b00, start // + ///////////////////////////////////////////////////////////////////////////// + + + + + case(Crtl_cnt_S) + + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_0}; + Sqrt_Q0=Q_sqrt_com_0; + end + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_1}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_one_2}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt0={{(C_MANT_FP64+3){1'b0}},Qcnt_one_3}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_one_4}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_one_5}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_one_6}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_one_7}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_one_8}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_one_9}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_one_10}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt0={{(C_MANT_FP64-5){1'b0}},Qcnt_one_11}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_one_12}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_one_13}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_one_14}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt0={{(C_MANT_FP64-9){1'b0}},Qcnt_one_15}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_one_16}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_one_17}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_one_18}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_one_19}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_one_20}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_one_21}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_one_22}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt0={{(C_MANT_FP64-17){1'b0}},Qcnt_one_23}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_one_24}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_one_25}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_one_26}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-21){1'b0}},Qcnt_one_27}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_one_28}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_one_29}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_one_30}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_one_31}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_one_32}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_one_33}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_one_34}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-29){1'b0}},Qcnt_one_35}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_one_36}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_one_37}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_one_38}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-33){1'b0}},Qcnt_one_39}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_one_40}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_one_41}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_one_42}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_one_43}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_one_44}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_one_45}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_one_46}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-41){1'b0}},Qcnt_one_47}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_one_48}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_one_49}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_one_50}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-45){1'b0}},Qcnt_one_51}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_one_52}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_one_53}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_one_54}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_one_55}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b111000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_one_56}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + + default: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0='0; + Sqrt_Q0='0; + end + endcase + end + + + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b00, end // + ///////////////////////////////////////////////////////////////////////////// + + + 2'b01: + begin + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b01, start // + ///////////////////////////////////////////////////////////////////////////// + case(Crtl_cnt_S) + + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_two_1[2:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt1={{(C_MANT_FP64+3){1'b0}},Qcnt_two_1[2:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_two_2[4:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_two_2[4:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_two_3[6:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt1={{(C_MANT_FP64-1){1'b0}},Qcnt_two_3[6:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_two_4[8:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt1={{(C_MANT_FP64-3){1'b0}},Qcnt_two_4[8:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_two_5[10:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_two_5[10:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_two_6[12:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt1={{(C_MANT_FP64-7){1'b0}},Qcnt_two_6[12:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_two_7[14:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt1={{(C_MANT_FP64-9){1'b0}},Qcnt_two_7[14:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_two_8[16:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_two_8[16:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_two_9[18:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt1={{(C_MANT_FP64-13){1'b0}},Qcnt_two_9[18:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_two_10[20:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt1={{(C_MANT_FP64-15){1'b0}},Qcnt_two_10[20:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_two_11[22:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_two_11[22:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_two_12[24:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt1={{(C_MANT_FP64-19){1'b0}},Qcnt_two_12[24:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_two_13[26:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-21){1'b0}},Qcnt_two_13[26:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_two_14[28:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_two_14[28:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_two_15[30:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-25){1'b0}},Qcnt_two_15[30:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_two_16[32:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-27){1'b0}},Qcnt_two_16[32:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_two_17[34:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_two_17[34:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_two_18[36:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-31){1'b0}},Qcnt_two_18[36:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_two_19[38:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-33){1'b0}},Qcnt_two_19[38:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_two_20[40:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_two_20[40:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_two_21[42:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-37){1'b0}},Qcnt_two_21[42:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_two_22[44:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-39){1'b0}},Qcnt_two_22[44:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_two_23[46:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_two_23[46:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_two_24[48:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-43){1'b0}},Qcnt_two_24[48:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_two_25[50:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-45){1'b0}},Qcnt_two_25[50:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_two_26[52:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_two_26[52:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_two_27[54:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-49){1'b0}},Qcnt_two_27[54:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_two_28[56:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-51){1'b0}},Qcnt_two_28[56:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + default: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + endcase + end + + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b01, end // + ///////////////////////////////////////////////////////////////////////////// + + + 2'b10: + begin + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b10, start // + ///////////////////////////////////////////////////////////////////////////// + + case(Crtl_cnt_S) + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_three_1[4:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_three_1[4:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt2={{(C_MANT_FP64){1'b0}},Qcnt_three_1[4:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_three_2[7:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt1={{(C_MANT_FP64-2){1'b0}},Qcnt_three_2[7:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt2={{(C_MANT_FP64-3){1'b0}},Qcnt_three_2[7:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_three_3[10:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_three_3[10:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt2={{(C_MANT_FP64-6){1'b0}},Qcnt_three_3[10:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_three_4[13:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_three_4[13:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_three_4[13:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_three_5[16:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_three_5[16:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt2={{(C_MANT_FP64-12){1'b0}},Qcnt_three_5[16:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_three_6[19:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt1={{(C_MANT_FP64-14){1'b0}},Qcnt_three_6[19:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt2={{(C_MANT_FP64-15){1'b0}},Qcnt_three_6[19:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_three_7[22:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_three_7[22:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt2={{(C_MANT_FP64-18){1'b0}},Qcnt_three_7[22:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_three_8[25:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_three_8[25:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_three_8[25:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_three_9[28:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_three_9[28:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-24){1'b0}},Qcnt_three_9[28:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_three_10[31:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-26){1'b0}},Qcnt_three_10[31:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-27){1'b0}},Qcnt_three_10[31:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_three_11[34:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_three_11[34:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-30){1'b0}},Qcnt_three_11[34:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_three_12[37:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_three_12[37:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_three_12[37:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_three_13[40:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_three_13[40:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-36){1'b0}},Qcnt_three_13[40:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_three_14[43:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-38){1'b0}},Qcnt_three_14[43:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-39){1'b0}},Qcnt_three_14[43:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_three_15[46:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_three_15[46:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-42){1'b0}},Qcnt_three_15[46:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b010000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_three_16[49:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_three_16[49:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_three_16[49:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b010001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_three_17[52:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_three_17[52:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-48){1'b0}},Qcnt_three_17[52:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b010010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_three_18[55:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-50){1'b0}},Qcnt_three_18[55:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-51){1'b0}},Qcnt_three_18[55:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + default : + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + endcase + + end + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b10, end // + ///////////////////////////////////////////////////////////////////////////// + + + 2'b11: + begin + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b11, start // + ///////////////////////////////////////////////////////////////////////////// + + case(Crtl_cnt_S) + + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_four_1[6:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt1={{(C_MANT_FP64){1'b0}},Qcnt_four_1[6:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt2={{(C_MANT_FP64-1){1'b0}},Qcnt_four_1[6:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt3={{(C_MANT_FP64-2){1'b0}},Qcnt_four_1[6:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_four_2[10:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt1={{(C_MANT_FP64-4){1'b0}},Qcnt_four_2[10:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt2={{(C_MANT_FP64-5){1'b0}},Qcnt_four_2[10:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt3={{(C_MANT_FP64-6){1'b0}},Qcnt_four_2[10:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_four_3[14:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_four_3[14:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_four_3[14:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt3={{(C_MANT_FP64-10){1'b0}},Qcnt_four_3[14:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_four_4[18:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt1={{(C_MANT_FP64-12){1'b0}},Qcnt_four_4[18:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt2={{(C_MANT_FP64-13){1'b0}},Qcnt_four_4[18:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt3={{(C_MANT_FP64-14){1'b0}},Qcnt_four_4[18:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_four_5[22:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt1={{(C_MANT_FP64-16){1'b0}},Qcnt_four_5[22:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt2={{(C_MANT_FP64-17){1'b0}},Qcnt_four_5[22:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt3={{(C_MANT_FP64-18){1'b0}},Qcnt_four_5[22:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_four_6[26:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_four_6[26:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_four_6[26:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-22){1'b0}},Qcnt_four_6[26:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_four_7[30:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-24){1'b0}},Qcnt_four_7[30:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-25){1'b0}},Qcnt_four_7[30:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-26){1'b0}},Qcnt_four_7[30:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_four_8[34:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-28){1'b0}},Qcnt_four_8[34:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-29){1'b0}},Qcnt_four_8[34:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-30){1'b0}},Qcnt_four_8[34:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_four_9[38:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_four_9[38:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_four_9[38:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-34){1'b0}},Qcnt_four_9[38:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_four_10[42:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-36){1'b0}},Qcnt_four_10[42:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-37){1'b0}},Qcnt_four_10[42:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-38){1'b0}},Qcnt_four_10[42:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_four_11[46:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-40){1'b0}},Qcnt_four_11[46:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-41){1'b0}},Qcnt_four_11[46:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-42){1'b0}},Qcnt_four_11[46:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_four_12[50:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_four_12[50:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_four_12[50:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-46){1'b0}},Qcnt_four_12[50:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_four_13[54:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-48){1'b0}},Qcnt_four_13[54:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-49){1'b0}},Qcnt_four_13[54:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-50){1'b0}},Qcnt_four_13[54:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + default: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + endcase + end + endcase + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b11, end // + ///////////////////////////////////////////////////////////////////////////// + end + + + + assign Sqrt_R0= ((Sqrt_start_dly_S)?'0:{Partial_remainder_DP[C_MANT_FP64+5:0]}); + assign Sqrt_R1= {Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+2:0],Sqrt_DO[0]} ; + assign Sqrt_R2= {Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+2:0],Sqrt_DO[1]}; + assign Sqrt_R3= {Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+2:0],Sqrt_DO[2]}; + assign Sqrt_R4= {Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+2:0],Sqrt_DO[3]}; + + logic [C_MANT_FP64+5:0] Denominator_se_format_DB; // + + assign Denominator_se_format_DB={Denominator_se_DB[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-1]}, + Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-1]}, + Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-1]}, + Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0} ; + // for iteration cell_U0 + logic [C_MANT_FP64+5:0] First_iteration_cell_div_a_D,First_iteration_cell_div_b_D; + logic Sel_b_for_first_S; + + + assign First_iteration_cell_div_a_D=(Div_start_dly_S)?{Numerator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-1]}, + Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16-1]}, + Numerator_se_D[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP32-1]}, + Numerator_se_D[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0} + :{Partial_remainder_DP[C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+2]}, + Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+2]}, + Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+2]}, + Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Quotient_DP[0],3'b0}; + assign Sel_b_for_first_S=(Div_start_dly_S)?1:Quotient_DP[0]; + assign First_iteration_cell_div_b_D=Sel_b_for_first_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[0]=Sqrt_enable_SO?Sqrt_R0:{First_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[0]=Sqrt_enable_SO?Sqrt_Q0:{First_iteration_cell_div_b_D}; + + + + // for iteration cell_U1 + logic [C_MANT_FP64+5:0] Sec_iteration_cell_div_a_D,Sec_iteration_cell_div_b_D; + logic Sel_b_for_sec_S; + generate + if(|Iteration_unit_num_S) + begin + assign Sel_b_for_sec_S=~Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5]; + assign Sec_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+2]}, + Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+2]}, + Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+2]}, + Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_sec_S,3'b0}; + assign Sec_iteration_cell_div_b_D=Sel_b_for_sec_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[1]=Sqrt_enable_SO?Sqrt_R1:{Sec_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[1]=Sqrt_enable_SO?Sqrt_Q1:{Sec_iteration_cell_div_b_D}; + end + endgenerate + + // for iteration cell_U2 + logic [C_MANT_FP64+5:0] Thi_iteration_cell_div_a_D,Thi_iteration_cell_div_b_D; + logic Sel_b_for_thi_S; + generate + if((Iteration_unit_num_S==2'b10) | (Iteration_unit_num_S==2'b11)) + begin + assign Sel_b_for_thi_S=~Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5]; + assign Thi_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+2]}, + Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+2]}, + Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+2]}, + Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_thi_S,3'b0}; + assign Thi_iteration_cell_div_b_D=Sel_b_for_thi_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[2]=Sqrt_enable_SO?Sqrt_R2:{Thi_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[2]=Sqrt_enable_SO?Sqrt_Q2:{Thi_iteration_cell_div_b_D}; + end + endgenerate + + // for iteration cell_U3 + logic [C_MANT_FP64+5:0] Fou_iteration_cell_div_a_D,Fou_iteration_cell_div_b_D; + logic Sel_b_for_fou_S; + + generate + if(Iteration_unit_num_S==2'b11) + begin + assign Sel_b_for_fou_S=~Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5]; + assign Fou_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+2]}, + Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+2]}, + Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+2]}, + Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_fou_S,3'b0}; + assign Fou_iteration_cell_div_b_D=Sel_b_for_fou_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[3]=Sqrt_enable_SO?Sqrt_R3:{Fou_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[3]=Sqrt_enable_SO?Sqrt_Q3:{Fou_iteration_cell_div_b_D}; + end + endgenerate + + ///////////////////////////////////////////////////////////////////////////// + // Masking Contrl // + ///////////////////////////////////////////////////////////////////////////// + + + logic [C_MANT_FP64+1+4:0] Mask_bits_ctl_S; //For extension + + assign Mask_bits_ctl_S =58'h3ff_ffff_ffff_ffff; //It is not needed. The corresponding process is handled the above codes + + ///////////////////////////////////////////////////////////////////////////// + // Iteration Instances with masking control // + ///////////////////////////////////////////////////////////////////////////// + + + logic Div_enable_SI [3:0]; + logic Div_start_dly_SI [3:0]; + logic Sqrt_enable_SI [3:0]; + generate + genvar i,j; + for (i=0; i <= Iteration_unit_num_S ; i++) + begin + for (j = 0; j <= C_MANT_FP64+5; j++) begin + assign Iteration_cell_a_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_a_BMASK_D[i][j]; + assign Iteration_cell_b_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_b_BMASK_D[i][j]; + assign Iteration_cell_sum_AMASK_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_sum_D[i][j]; + end + + assign Div_enable_SI[i] = Div_enable_SO; + assign Div_start_dly_SI[i] = Div_start_dly_S; + assign Sqrt_enable_SI[i] = Sqrt_enable_SO; + iteration_div_sqrt_mvp #(C_MANT_FP64+6) iteration_div_sqrt + ( + .A_DI (Iteration_cell_a_D[i] ), + .B_DI (Iteration_cell_b_D[i] ), + .Div_enable_SI (Div_enable_SI[i] ), + .Div_start_dly_SI (Div_start_dly_SI[i] ), + .Sqrt_enable_SI (Sqrt_enable_SI[i] ), + .D_DI (Sqrt_DI[i] ), + .D_DO (Sqrt_DO[i] ), + .Sum_DO (Iteration_cell_sum_D[i] ), + .Carry_out_DO (Iteration_cell_carry_D[i] ) + ); + + end + + endgenerate + + + + always_comb + begin + case (Iteration_unit_num_S) + 2'b00: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R1:Iteration_cell_sum_AMASK_D[0]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + 2'b01: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R2:Iteration_cell_sum_AMASK_D[1]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + 2'b10: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R3:Iteration_cell_sum_AMASK_D[2]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + 2'b11: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R4:Iteration_cell_sum_AMASK_D[3]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + endcase + end + + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // partial_remainder + begin + if(~Rst_RBI) + begin + Partial_remainder_DP <= '0; + end + else + begin + Partial_remainder_DP <= Partial_remainder_DN; + end + end + + logic [C_MANT_FP64+4:0] Quotient_DN; + + always_comb // Can choosen the different carry-outs based on different operations + begin + case (Iteration_unit_num_S) + 2'b00: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+3:0],Sqrt_quotinent_S[3]} :{Quotient_DP[C_MANT_FP64+3:0],Iteration_cell_carry_D[0]}; + else + Quotient_DN= Quotient_DP; + end + 2'b01: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+2:0],Sqrt_quotinent_S[3:2]} :{Quotient_DP[C_MANT_FP64+2:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1]}; + else + Quotient_DN= Quotient_DP; + end + 2'b10: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+1:0],Sqrt_quotinent_S[3:1]} : {Quotient_DP[C_MANT_FP64+1:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2]}; + else + Quotient_DN= Quotient_DP; + end + 2'b11: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64:0],Sqrt_quotinent_S } : {Quotient_DP[C_MANT_FP64:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]}; + else + Quotient_DN= Quotient_DP; + end + endcase + end + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // Quotient + begin + if(~Rst_RBI) + begin + Quotient_DP <= '0; + end + else + Quotient_DP <= Quotient_DN; + end + + + ///////////////////////////////////////////////////////////////////////////// + // Precision Control for outputs // + ///////////////////////////////////////////////////////////////////////////// + + +//////////////////////one iteration unit, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b00) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + 6'h17: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-1:0],{(C_MANT_FP64-C_MANT_FP32+4+1){1'b0}}}; //Precision_ctl_S+1 + end + 6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-5:0],{(C_MANT_FP64-C_MANT_FP32+4+5){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-7:0],{(C_MANT_FP64-C_MANT_FP32+4+7){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-11:0],{(C_MANT_FP64-C_MANT_FP32+4+11){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-13:0],{(C_MANT_FP64-C_MANT_FP32+4+13){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + endcase + end + + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + 6'h34: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64:0],{(4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h33: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}}}; //Precision_ctl_S+1 + end + 6'h32: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h31: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}}}; //Precision_ctl_S+1 + end + 6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-4:0],{(4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-6:0],{(4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-10:0],{(4+10){1'b0}}}; //Precision_ctl_S+1 + end + 6'h29: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}}}; //Precision_ctl_S+1 + end + 6'h28: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-12:0],{(4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h27: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}}}; //Precision_ctl_S+1 + end + 6'h26: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}}}; //Precision_ctl_S+1 + end + 6'h25: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}}}; //Precision_ctl_S+1 + end + 6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-16:0],{(4+16){1'b0}}}; //Precision_ctl_S+1 + end + 6'h23: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}}}; //Precision_ctl_S+1 + end + 6'h22: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-18:0],{(4+18){1'b0}}}; //Precision_ctl_S+1 + end + 6'h21: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}}}; //Precision_ctl_S+1 + end + 6'h20: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-22:0],{(4+22){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-24:0],{(4+24){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}}}; //Precision_ctl_S+1 + end + 6'h19: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}}}; //Precision_ctl_S+1 + end + 6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-28:0],{(4+28){1'b0}}}; //Precision_ctl_S+1 + end + 6'h17: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}}}; //Precision_ctl_S+1 + end + 6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-30:0],{(4+30){1'b0}}}; //Precision_ctl_S+1 + end + 6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}}}; //Precision_ctl_S+1 + end + 6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}}}; //Precision_ctl_S+1 + end + 6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-34:0],{(4+34){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}}}; //Precision_ctl_S+1 + end + 6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-36:0],{(4+36){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-40:0],{(4+40){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-42:0],{(4+42){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16:0],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}}}; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}}}; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////one iteration unit, end////////////////////////////////////////// + +//////////////////////two iteration units, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b01) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + 6'h17,6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f,6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b,6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + endcase + end + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3 + end + 6'h34: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h33,6'h32: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}} }; //Precision_ctl_S+1 + end + 6'h31,6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2f,6'h2e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2d,6'h2c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2b,6'h2a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}} }; //Precision_ctl_S+1 + end + 6'h29,6'h28: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1 + end + 6'h27,6'h26: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}} }; //Precision_ctl_S+1 + end + 6'h25,6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}} }; //Precision_ctl_S+1 + end + 6'h23,6'h22: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1 + end + 6'h21,6'h20: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1f,6'h1e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1d,6'h1c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1b,6'h1a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}} }; //Precision_ctl_S+1 + end + 6'h19,6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}} }; //Precision_ctl_S+1 + end + 6'h17,6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1 + end + 6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}} }; //Precision_ctl_S+1 + end + 6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}} }; //Precision_ctl_S+1 + end + 6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0f,6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0b,6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1 + end + 6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}} }; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+3:0],{(C_MANT_FP64-C_MANT_FP16+1){1'b0}} }; //+3 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////two iteration units, end////////////////////////////////////////// + +//////////////////////three iteration units, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b10) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3 + end + 6'h17,6'h16,6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h14,6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11,6'h10,6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08,6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3 + end + endcase + end + + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + 6'h34,6'h33: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h32,6'h31,6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2f,6'h2e,6'h2d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2c,6'h2b,6'h2a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}} }; //Precision_ctl_S+1 + end + 6'h29,6'h28,6'h27: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1 + end + 6'h26,6'h25,6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}} }; //Precision_ctl_S+1 + end + 6'h23,6'h22,6'h21: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1 + end + 6'h20,6'h1f,6'h1e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1d,6'h1c,6'h1b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1a,6'h19,6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}} }; //Precision_ctl_S+1 + end + 6'h17,6'h16,6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1 + end + 6'h14,6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}} }; //Precision_ctl_S+1 + end + 6'h11,6'h10,6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1 + end + 6'h08,6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}} }; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4 + end + 6'h0a,6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h08,6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+1:1],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////three iteration units, end////////////////////////////////////////// + +//////////////////////four iteration units, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b11) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + 6'h17,6'h16,6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13,6'h12,6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f,6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + endcase + end + + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3 + end + 6'h34: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}} }; //Precision_ctl_S+1 + end + 6'h33,6'h32,6'h31,6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(5){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2f,6'h2e,6'h2d,6'h2c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(9){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2b,6'h2a,6'h29,6'h28: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(13){1'b0}} }; //Precision_ctl_S+1 + end + 6'h27,6'h26,6'h25,6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(17){1'b0}} }; //Precision_ctl_S+1 + end + 6'h23,6'h22,6'h21,6'h20: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(21){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1f,6'h1e,6'h1d,6'h1c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(25){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1b,6'h1a,6'h19,6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(29){1'b0}} }; //Precision_ctl_S+1 + end + 6'h17,6'h16,6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(33){1'b0}} }; //Precision_ctl_S+1 + end + 6'h13,6'h12,6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(37){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0f,6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(41){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(45){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(49){1'b0}} }; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5 + end + 6'h0a,6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1-4:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////four iteration units, end/////////////////////////////////////// + + + + + +// resultant exponent + logic [C_EXP_FP64+1:0] Exp_result_prenorm_DN,Exp_result_prenorm_DP; + + logic [C_EXP_FP64+1:0] Exp_add_a_D; + logic [C_EXP_FP64+1:0] Exp_add_b_D; + logic [C_EXP_FP64+1:0] Exp_add_c_D; + + integer C_BIAS_AONE, C_HALF_BIAS; + always_comb + begin // + case (Format_sel_S) + 2'b00: + begin + C_BIAS_AONE =C_BIAS_AONE_FP32; + C_HALF_BIAS =C_HALF_BIAS_FP32; + end + 2'b01: + begin + C_BIAS_AONE =C_BIAS_AONE_FP64; + C_HALF_BIAS =C_HALF_BIAS_FP64; + end + 2'b10: + begin + C_BIAS_AONE =C_BIAS_AONE_FP16; + C_HALF_BIAS =C_HALF_BIAS_FP16; + end + 2'b11: + begin + C_BIAS_AONE =C_BIAS_AONE_FP16ALT; + C_HALF_BIAS =C_HALF_BIAS_FP16ALT; + end + endcase + end + +//For division, exponent=(Exp_a_D-LZ1)-(Exp_b_D-LZ2)+BIAS +//For square root, exponent=(Exp_a_D-LZ1)/2+(Exp_a_D-LZ1)%2+C_HALF_BIAS +//For exponent, in preprorces module, (Exp_a_D-LZ1) and (Exp_b_D-LZ2) have been processed with the corresponding process for denormal numbers. + + assign Exp_add_a_D = {Sqrt_start_dly_S?{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64:1]}:{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI}}; + assign Exp_add_b_D = {Sqrt_start_dly_S?{1'b0,{C_EXP_ZERO_FP64},Exp_num_DI[0]}:{~Exp_den_DI[C_EXP_FP64],~Exp_den_DI[C_EXP_FP64],~Exp_den_DI}}; + assign Exp_add_c_D = {Div_start_dly_S?{{C_BIAS_AONE}}:{{C_HALF_BIAS}}}; + assign Exp_result_prenorm_DN = (Start_dly_S)?{Exp_add_a_D + Exp_add_b_D + Exp_add_c_D}:Exp_result_prenorm_DP; + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Exp_result_prenorm_DP <= '0; + end + else + begin + Exp_result_prenorm_DP<= Exp_result_prenorm_DN; + end + end + + assign Exp_result_prenorm_DO = Exp_result_prenorm_DP; + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv new file mode 100644 index 0000000..b3f41fe --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv @@ -0,0 +1,83 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// This file contains all div_sqrt_top_mvp parameters +// Authors : Lei Li (lile@iis.ee.ethz.ch) + +package defs_div_sqrt_mvp; + + // op command + localparam C_RM = 3; + localparam C_RM_NEAREST = 3'h0; + localparam C_RM_TRUNC = 3'h1; + localparam C_RM_PLUSINF = 3'h2; + localparam C_RM_MINUSINF = 3'h3; + localparam C_PC = 6; // Precision Control + localparam C_FS = 2; // Format Selection + localparam C_IUNC = 2; // Iteration Unit Number Control + localparam Iteration_unit_num_S = 2'b10; + + // FP64 + localparam C_OP_FP64 = 64; + localparam C_MANT_FP64 = 52; + localparam C_EXP_FP64 = 11; + localparam C_BIAS_FP64 = 1023; + localparam C_BIAS_AONE_FP64 = 11'h400; + localparam C_HALF_BIAS_FP64 = 511; + localparam C_EXP_ZERO_FP64 = 11'h000; + localparam C_EXP_ONE_FP64 = 13'h001; // Bit width is in agreement with in norm + localparam C_EXP_INF_FP64 = 11'h7FF; + localparam C_MANT_ZERO_FP64 = 52'h0; + localparam C_MANT_NAN_FP64 = 52'h8_0000_0000_0000; + localparam C_PZERO_FP64 = 64'h0000_0000_0000_0000; + localparam C_MZERO_FP64 = 64'h8000_0000_0000_0000; + localparam C_QNAN_FP64 = 64'h7FF8_0000_0000_0000; + + // FP32 + localparam C_OP_FP32 = 32; + localparam C_MANT_FP32 = 23; + localparam C_EXP_FP32 = 8; + localparam C_BIAS_FP32 = 127; + localparam C_BIAS_AONE_FP32 = 8'h80; + localparam C_HALF_BIAS_FP32 = 63; + localparam C_EXP_ZERO_FP32 = 8'h00; + localparam C_EXP_INF_FP32 = 8'hFF; + localparam C_MANT_ZERO_FP32 = 23'h0; + localparam C_PZERO_FP32 = 32'h0000_0000; + localparam C_MZERO_FP32 = 32'h8000_0000; + localparam C_QNAN_FP32 = 32'h7FC0_0000; + + // FP16 + localparam C_OP_FP16 = 16; + localparam C_MANT_FP16 = 10; + localparam C_EXP_FP16 = 5; + localparam C_BIAS_FP16 = 15; + localparam C_BIAS_AONE_FP16 = 5'h10; + localparam C_HALF_BIAS_FP16 = 7; + localparam C_EXP_ZERO_FP16 = 5'h00; + localparam C_EXP_INF_FP16 = 5'h1F; + localparam C_MANT_ZERO_FP16 = 10'h0; + localparam C_PZERO_FP16 = 16'h0000; + localparam C_MZERO_FP16 = 16'h8000; + localparam C_QNAN_FP16 = 16'h7E00; + + // FP16alt + localparam C_OP_FP16ALT = 16; + localparam C_MANT_FP16ALT = 7; + localparam C_EXP_FP16ALT = 8; + localparam C_BIAS_FP16ALT = 127; + localparam C_BIAS_AONE_FP16ALT = 8'h80; + localparam C_HALF_BIAS_FP16ALT = 63; + localparam C_EXP_ZERO_FP16ALT = 8'h00; + localparam C_EXP_INF_FP16ALT = 8'hFF; + localparam C_MANT_ZERO_FP16ALT = 7'h0; + localparam C_QNAN_FP16ALT = 16'h7FC0; + +endpackage : defs_div_sqrt_mvp diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv new file mode 100644 index 0000000..3af6081 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv @@ -0,0 +1,180 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li -- lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 03/03/2018 // +// Design Name: div_sqrt_top_mvp // +// Module Name: div_sqrt_top_mvp.sv // +// Project Name: The shared divisor and square root // +// Language: SystemVerilog // +// // +// Description: The top of div and sqrt // +// // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module div_sqrt_top_mvp + + (//Input + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI, + input logic Sqrt_start_SI, + + //Input Operands + input logic [C_OP_FP64-1:0] Operand_a_DI, + input logic [C_OP_FP64-1:0] Operand_b_DI, + + // Input Control + input logic [C_RM-1:0] RM_SI, //Rounding Mode + input logic [C_PC-1:0] Precision_ctl_SI, // Precision Control + input logic [C_FS-1:0] Format_sel_SI, // Format Selection, + input logic Kill_SI, + + //Output Result + output logic [C_OP_FP64-1:0] Result_DO, + + //Output-Flags + output logic [4:0] Fflags_SO, + output logic Ready_SO, + output logic Done_SO + ); + + + + + + //Operand components + logic [C_EXP_FP64:0] Exp_a_D; + logic [C_EXP_FP64:0] Exp_b_D; + logic [C_MANT_FP64:0] Mant_a_D; + logic [C_MANT_FP64:0] Mant_b_D; + + logic [C_EXP_FP64+1:0] Exp_z_D; + logic [C_MANT_FP64+4:0] Mant_z_D; + logic Sign_z_D; + logic Start_S; + logic [C_RM-1:0] RM_dly_S; + logic Div_enable_S; + logic Sqrt_enable_S; + logic Inf_a_S; + logic Inf_b_S; + logic Zero_a_S; + logic Zero_b_S; + logic NaN_a_S; + logic NaN_b_S; + logic SNaN_S; + logic Special_case_SB,Special_case_dly_SB; + + logic Full_precision_S; + logic FP32_S; + logic FP64_S; + logic FP16_S; + logic FP16ALT_S; + + + preprocess_mvp preprocess_U0 + ( + .Clk_CI (Clk_CI ), + .Rst_RBI (Rst_RBI ), + .Div_start_SI (Div_start_SI ), + .Sqrt_start_SI (Sqrt_start_SI ), + .Ready_SI (Ready_SO ), + .Operand_a_DI (Operand_a_DI ), + .Operand_b_DI (Operand_b_DI ), + .RM_SI (RM_SI ), + .Format_sel_SI (Format_sel_SI ), + .Start_SO (Start_S ), + .Exp_a_DO_norm (Exp_a_D ), + .Exp_b_DO_norm (Exp_b_D ), + .Mant_a_DO_norm (Mant_a_D ), + .Mant_b_DO_norm (Mant_b_D ), + .RM_dly_SO (RM_dly_S ), + .Sign_z_DO (Sign_z_D ), + .Inf_a_SO (Inf_a_S ), + .Inf_b_SO (Inf_b_S ), + .Zero_a_SO (Zero_a_S ), + .Zero_b_SO (Zero_b_S ), + .NaN_a_SO (NaN_a_S ), + .NaN_b_SO (NaN_b_S ), + .SNaN_SO (SNaN_S ), + .Special_case_SBO (Special_case_SB ), + .Special_case_dly_SBO (Special_case_dly_SB) + ); + + nrbd_nrsc_mvp nrbd_nrsc_U0 + ( + .Clk_CI (Clk_CI ), + .Rst_RBI (Rst_RBI ), + .Div_start_SI (Div_start_SI ) , + .Sqrt_start_SI (Sqrt_start_SI ), + .Start_SI (Start_S ), + .Kill_SI (Kill_SI ), + .Special_case_SBI (Special_case_SB ), + .Special_case_dly_SBI (Special_case_dly_SB), + .Div_enable_SO (Div_enable_S ), + .Sqrt_enable_SO (Sqrt_enable_S ), + .Precision_ctl_SI (Precision_ctl_SI ), + .Format_sel_SI (Format_sel_SI ), + .Exp_a_DI (Exp_a_D ), + .Exp_b_DI (Exp_b_D ), + .Mant_a_DI (Mant_a_D ), + .Mant_b_DI (Mant_b_D ), + .Full_precision_SO (Full_precision_S ), + .FP32_SO (FP32_S ), + .FP64_SO (FP64_S ), + .FP16_SO (FP16_S ), + .FP16ALT_SO (FP16ALT_S ), + .Ready_SO (Ready_SO ), + .Done_SO (Done_SO ), + .Exp_z_DO (Exp_z_D ), + .Mant_z_DO (Mant_z_D ) + ); + + + norm_div_sqrt_mvp fpu_norm_U0 + ( + .Mant_in_DI (Mant_z_D ), + .Exp_in_DI (Exp_z_D ), + .Sign_in_DI (Sign_z_D ), + .Div_enable_SI (Div_enable_S ), + .Sqrt_enable_SI (Sqrt_enable_S ), + .Inf_a_SI (Inf_a_S ), + .Inf_b_SI (Inf_b_S ), + .Zero_a_SI (Zero_a_S ), + .Zero_b_SI (Zero_b_S ), + .NaN_a_SI (NaN_a_S ), + .NaN_b_SI (NaN_b_S ), + .SNaN_SI (SNaN_S ), + .RM_SI (RM_dly_S ), + .Full_precision_SI (Full_precision_S ), + .FP32_SI (FP32_S ), + .FP64_SI (FP64_S ), + .FP16_SI (FP16_S ), + .FP16ALT_SI (FP16ALT_S ), + .Result_DO (Result_DO ), + .Fflags_SO (Fflags_SO ) //{NV,DZ,OF,UF,NX} + ); + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv new file mode 100644 index 0000000..0c645e6 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv @@ -0,0 +1,61 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 12/01/2017 // +// Design Name: FPU // +// Module Name: iteration_div_sqrt_mvp // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: iteration unit for div and sqrt // +// // +// // +// Revision: 03/14/2018 // +// For div_sqrt_mvp // +//////////////////////////////////////////////////////////////////////////////// + +module iteration_div_sqrt_mvp +#( + parameter WIDTH=25 +) + (//Input + + input logic [WIDTH-1:0] A_DI, + input logic [WIDTH-1:0] B_DI, + input logic Div_enable_SI, + input logic Div_start_dly_SI, + input logic Sqrt_enable_SI, + input logic [1:0] D_DI, + + output logic [1:0] D_DO, + output logic [WIDTH-1:0] Sum_DO, + output logic Carry_out_DO + ); + + logic D_carry_D; + logic Sqrt_cin_D; + logic Cin_D; + + assign D_DO[0]=~D_DI[0]; + assign D_DO[1]=~(D_DI[1] ^ D_DI[0]); + assign D_carry_D=D_DI[1] | D_DI[0]; + assign Sqrt_cin_D=Sqrt_enable_SI&&D_carry_D; + assign Cin_D=Div_enable_SI?1'b0:Sqrt_cin_D; + assign {Carry_out_DO,Sum_DO}=A_DI+B_DI+Cin_D; + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv new file mode 100644 index 0000000..590abe9 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv @@ -0,0 +1,470 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 09/03/2018 // +// Design Name: FPU // +// Module Name: norm_div_sqrt_mvp.sv // +// Project Name: // +// Language: SystemVerilog // +// // +// Description: Floating point Normalizer/Rounding unit // +// Since this module is design as a combinatinal logic, it can// +// be added arbinary register stages for different frequency // +// in the wrapper module. // +// // +// // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan // +// // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module norm_div_sqrt_mvp + (//Inputs + input logic [C_MANT_FP64+4:0] Mant_in_DI, // Include the needed 4-bit for rounding and hidden bit + input logic signed [C_EXP_FP64+1:0] Exp_in_DI, + input logic Sign_in_DI, + input logic Div_enable_SI, + input logic Sqrt_enable_SI, + input logic Inf_a_SI, + input logic Inf_b_SI, + input logic Zero_a_SI, + input logic Zero_b_SI, + input logic NaN_a_SI, + input logic NaN_b_SI, + input logic SNaN_SI, + input logic [C_RM-1:0] RM_SI, + input logic Full_precision_SI, + input logic FP32_SI, + input logic FP64_SI, + input logic FP16_SI, + input logic FP16ALT_SI, + //Outputs + output logic [C_EXP_FP64+C_MANT_FP64:0] Result_DO, + output logic [4:0] Fflags_SO //{NV,DZ,OF,UF,NX} + ); + + + logic Sign_res_D; + + logic NV_OP_S; + logic Exp_OF_S; + logic Exp_UF_S; + logic Div_Zero_S; + logic In_Exact_S; + + ///////////////////////////////////////////////////////////////////////////// + // Normalization // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64:0] Mant_res_norm_D; + logic [C_EXP_FP64-1:0] Exp_res_norm_D; + + ///////////////////////////////////////////////////////////////////////////// + // Right shift operations for negtive exponents // + ///////////////////////////////////////////////////////////////////////////// + + logic [C_EXP_FP64+1:0] Exp_Max_RS_FP64_D; + logic [C_EXP_FP32+1:0] Exp_Max_RS_FP32_D; + logic [C_EXP_FP16+1:0] Exp_Max_RS_FP16_D; + logic [C_EXP_FP16ALT+1:0] Exp_Max_RS_FP16ALT_D; + // + assign Exp_Max_RS_FP64_D=Exp_in_DI[C_EXP_FP64:0]+C_MANT_FP64+1; // to check exponent after (C_MANT_FP64+1)-bit >> when Exp_in_DI is negative + assign Exp_Max_RS_FP32_D=Exp_in_DI[C_EXP_FP32:0]+C_MANT_FP32+1; // to check exponent after (C_MANT_FP32+1)-bit >> when Exp_in_DI is negative + assign Exp_Max_RS_FP16_D=Exp_in_DI[C_EXP_FP16:0]+C_MANT_FP16+1; // to check exponent after (C_MANT_FP16+1)-bit >> when Exp_in_DI is negative + assign Exp_Max_RS_FP16ALT_D=Exp_in_DI[C_EXP_FP16ALT:0]+C_MANT_FP16ALT+1; // to check exponent after (C_MANT_FP16ALT+1)-bit >> when Exp_in_DI is negative + logic [C_EXP_FP64+1:0] Num_RS_D; + assign Num_RS_D=~Exp_in_DI+1+1; // How many right shifts(RS) are needed to generate a denormal number? >> is need only when Exp_in_DI is negative + logic [C_MANT_FP64:0] Mant_RS_D; + logic [C_MANT_FP64+4:0] Mant_forsticky_D; + assign {Mant_RS_D,Mant_forsticky_D} ={Mant_in_DI,{(C_MANT_FP64+1){1'b0}} } >>(Num_RS_D); // +// + logic [C_EXP_FP64+1:0] Exp_subOne_D; + assign Exp_subOne_D = Exp_in_DI -1; + + //normalization + logic [1:0] Mant_lower_D; + logic Mant_sticky_bit_D; + logic [C_MANT_FP64+4:0] Mant_forround_D; + + always_comb + begin + + if(NaN_a_SI) // if a is NaN, return NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = SNaN_SI; + end + + else if(NaN_b_SI) //if b is NaN, return NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = SNaN_SI; + end + + else if(Inf_a_SI) + begin + if(Div_enable_SI&&Inf_b_SI) //Inf/Inf, retrurn NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end + else if (Sqrt_enable_SI && Sign_in_DI) begin // catch sqrt(-inf) + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end else begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if(Div_enable_SI&&Inf_b_SI) + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='0; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if(Zero_a_SI) + begin + if(Div_enable_SI&&Zero_b_SI) + begin + Div_Zero_S=1'b1; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end + else + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='0; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if(Div_enable_SI&&(Zero_b_SI)) //div Zero + begin + Div_Zero_S=1'b1; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if(Sign_in_DI&&Sqrt_enable_SI) //sqrt(-a) + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end + + else if((Exp_in_DI[C_EXP_FP64:0]=='0)) + begin + if(Mant_in_DI!='0) //Exp=0, Mant!=0, it is denormal + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b1; + Mant_res_norm_D={1'b0,Mant_in_DI[C_MANT_FP64+4:5]}; + Exp_res_norm_D='0; + Mant_forround_D={Mant_in_DI[4:0],{(C_MANT_FP64){1'b0}} }; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + else // Zero + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='0; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if((Exp_in_DI[C_EXP_FP64:0]==C_EXP_ONE_FP64)&&(~Mant_in_DI[C_MANT_FP64+4])) //denormal + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b1; + Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+4:4]; + Exp_res_norm_D='0; + Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if(Exp_in_DI[C_EXP_FP64+1]) //minus //consider format + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b1; + Mant_res_norm_D={Mant_RS_D[C_MANT_FP64:0]}; + Exp_res_norm_D='0; + Mant_forround_D={Mant_forsticky_D[C_MANT_FP64+4:0]}; //?? + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if( (Exp_in_DI[C_EXP_FP32]&&FP32_SI) | (Exp_in_DI[C_EXP_FP64]&&FP64_SI) | (Exp_in_DI[C_EXP_FP16]&&FP16_SI) | (Exp_in_DI[C_EXP_FP16ALT]&&FP16ALT_SI) ) //OF + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if( ((Exp_in_DI[C_EXP_FP32-1:0]=='1)&&FP32_SI) | ((Exp_in_DI[C_EXP_FP64-1:0]=='1)&&FP64_SI) | ((Exp_in_DI[C_EXP_FP16-1:0]=='1)&&FP16_SI) | ((Exp_in_DI[C_EXP_FP16ALT-1:0]=='1)&&FP16ALT_SI) )//255 + begin + if(~Mant_in_DI[C_MANT_FP64+4]) // MSB=0 + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3]; + Exp_res_norm_D=Exp_subOne_D; + Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + else if(Mant_in_DI!='0) //NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + else //infinity + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if(Mant_in_DI[C_MANT_FP64+4]) //normal numbers with 1.XXX + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D= Mant_in_DI[C_MANT_FP64+4:4]; + Exp_res_norm_D=Exp_in_DI[C_EXP_FP64-1:0]; + Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else //normal numbers with 0.1XX + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3]; + Exp_res_norm_D=Exp_subOne_D; + Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + end + + ///////////////////////////////////////////////////////////////////////////// + // Rounding enable only for full precision (Full_precision_SI==1'b1) // + ///////////////////////////////////////////////////////////////////////////// + + logic [C_MANT_FP64:0] Mant_upper_D; + logic [C_MANT_FP64+1:0] Mant_upperRounded_D; + logic Mant_roundUp_S; + logic Mant_rounded_S; + + always_comb //determine which bits for Mant_lower_D and Mant_sticky_bit_D + begin + if(FP32_SI) + begin + Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} }; + Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-1:C_MANT_FP64-C_MANT_FP32-2]; + Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-3:0]; + end + else if(FP64_SI) + begin + Mant_upper_D = Mant_res_norm_D[C_MANT_FP64:0]; + Mant_lower_D = Mant_forround_D[C_MANT_FP64+4:C_MANT_FP64+3]; + Mant_sticky_bit_D = | Mant_forround_D[C_MANT_FP64+3:0]; + end + else if(FP16_SI) + begin + Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} }; + Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-1:C_MANT_FP64-C_MANT_FP16-2]; + Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-3:30]; + end + else //FP16ALT + begin + Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; + Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-1:C_MANT_FP64-C_MANT_FP16ALT-2]; + Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-3:30]; + end + end + + assign Mant_rounded_S = (|(Mant_lower_D))| Mant_sticky_bit_D; + + + + + always_comb //determine whether to round up or not + begin + Mant_roundUp_S = 1'b0; + case (RM_SI) + C_RM_NEAREST : + Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_bit_D )| ( (FP32_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP32]) | (FP64_SI&&Mant_upper_D[0]) | (FP16_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16]) | (FP16ALT_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16ALT]) ) ); + C_RM_TRUNC : + Mant_roundUp_S = 0; + C_RM_PLUSINF : + Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI; + C_RM_MINUSINF: + Mant_roundUp_S = Mant_rounded_S & Sign_in_DI; + default : + Mant_roundUp_S = 0; + endcase // case (RM_DI) + end // always_comb begin + + logic Mant_renorm_S; + logic [C_MANT_FP64:0] Mant_roundUp_Vector_S; // for all the formats + + assign Mant_roundUp_Vector_S={7'h0,(FP16ALT_SI&&Mant_roundUp_S),2'h0,(FP16_SI&&Mant_roundUp_S),12'h0,(FP32_SI&&Mant_roundUp_S),28'h0,(FP64_SI&&Mant_roundUp_S)}; + + + assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_Vector_S; + assign Mant_renorm_S = Mant_upperRounded_D[C_MANT_FP64+1]; + + ///////////////////////////////////////////////////////////////////////////// + // Renormalization for Rounding // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64-1:0] Mant_res_round_D; + logic [C_EXP_FP64-1:0] Exp_res_round_D; + + + assign Mant_res_round_D = (Mant_renorm_S)?Mant_upperRounded_D[C_MANT_FP64:1]:Mant_upperRounded_D[C_MANT_FP64-1:0]; // including the process of the hidden bit + assign Exp_res_round_D = Exp_res_norm_D+Mant_renorm_S; + + ///////////////////////////////////////////////////////////////////////////// + // Output Assignments // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64-1:0] Mant_before_format_ctl_D; + logic [C_EXP_FP64-1:0] Exp_before_format_ctl_D; + assign Mant_before_format_ctl_D = Full_precision_SI ? Mant_res_round_D : Mant_res_norm_D; + assign Exp_before_format_ctl_D = Full_precision_SI ? Exp_res_round_D : Exp_res_norm_D; + + always_comb //NaN Boxing + begin // + if(FP32_SI) + begin + Result_DO ={32'hffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP32-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP32]}; + end + else if(FP64_SI) + begin + Result_DO ={Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP64-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:0]}; + end + else if(FP16_SI) + begin + Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16]}; + end + else + begin + Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16ALT-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16ALT]}; + end + end + +assign In_Exact_S = (~Full_precision_SI) | Mant_rounded_S; +assign Fflags_SO = {NV_OP_S,Div_Zero_S,Exp_OF_S,Exp_UF_S,In_Exact_S}; //{NV,DZ,OF,UF,NX} + +endmodule // norm_div_sqrt_mvp diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv new file mode 100644 index 0000000..62bd147 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv @@ -0,0 +1,104 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 10/04/2018 // +// Design Name: FPU // +// Module Name: nrbd_nrsc_mvp.sv // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: non restroring binary divisior/ square root // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module nrbd_nrsc_mvp + + (//Input + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI, + input logic Sqrt_start_SI, + input logic Start_SI, + input logic Kill_SI, + input logic Special_case_SBI, + input logic Special_case_dly_SBI, + input logic [C_PC-1:0] Precision_ctl_SI, + input logic [1:0] Format_sel_SI, + input logic [C_MANT_FP64:0] Mant_a_DI, + input logic [C_MANT_FP64:0] Mant_b_DI, + input logic [C_EXP_FP64:0] Exp_a_DI, + input logic [C_EXP_FP64:0] Exp_b_DI, + //output + output logic Div_enable_SO, + output logic Sqrt_enable_SO, + + output logic Full_precision_SO, + output logic FP32_SO, + output logic FP64_SO, + output logic FP16_SO, + output logic FP16ALT_SO, + output logic Ready_SO, + output logic Done_SO, + output logic [C_MANT_FP64+4:0] Mant_z_DO, + output logic [C_EXP_FP64+1:0] Exp_z_DO + ); + + + logic Div_start_dly_S,Sqrt_start_dly_S; + + +control_mvp control_U0 +( .Clk_CI (Clk_CI ), + .Rst_RBI (Rst_RBI ), + .Div_start_SI (Div_start_SI ), + .Sqrt_start_SI (Sqrt_start_SI ), + .Start_SI (Start_SI ), + .Kill_SI (Kill_SI ), + .Special_case_SBI (Special_case_SBI ), + .Special_case_dly_SBI (Special_case_dly_SBI ), + .Precision_ctl_SI (Precision_ctl_SI ), + .Format_sel_SI (Format_sel_SI ), + .Numerator_DI (Mant_a_DI ), + .Exp_num_DI (Exp_a_DI ), + .Denominator_DI (Mant_b_DI ), + .Exp_den_DI (Exp_b_DI ), + .Div_start_dly_SO (Div_start_dly_S ), + .Sqrt_start_dly_SO (Sqrt_start_dly_S ), + .Div_enable_SO (Div_enable_SO ), + .Sqrt_enable_SO (Sqrt_enable_SO ), + .Full_precision_SO (Full_precision_SO ), + .FP32_SO (FP32_SO ), + .FP64_SO (FP64_SO ), + .FP16_SO (FP16_SO ), + .FP16ALT_SO (FP16ALT_SO ), + .Ready_SO (Ready_SO ), + .Done_SO (Done_SO ), + .Mant_result_prenorm_DO (Mant_z_DO ), + .Exp_result_prenorm_DO (Exp_z_DO ) +); + + + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv new file mode 100644 index 0000000..9e0d25f --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv @@ -0,0 +1,425 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li //lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 01/03/2018 // +// Design Name: FPU // +// Module Name: preprocess_mvp.sv // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: decode and data preparation // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module preprocess_mvp + ( + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI, + input logic Sqrt_start_SI, + input logic Ready_SI, + //Input Operands + input logic [C_OP_FP64-1:0] Operand_a_DI, + input logic [C_OP_FP64-1:0] Operand_b_DI, + input logic [C_RM-1:0] RM_SI, //Rounding Mode + input logic [C_FS-1:0] Format_sel_SI, // Format Selection + + // to control + output logic Start_SO, + output logic [C_EXP_FP64:0] Exp_a_DO_norm, + output logic [C_EXP_FP64:0] Exp_b_DO_norm, + output logic [C_MANT_FP64:0] Mant_a_DO_norm, + output logic [C_MANT_FP64:0] Mant_b_DO_norm, + + output logic [C_RM-1:0] RM_dly_SO, + + output logic Sign_z_DO, + output logic Inf_a_SO, + output logic Inf_b_SO, + output logic Zero_a_SO, + output logic Zero_b_SO, + output logic NaN_a_SO, + output logic NaN_b_SO, + output logic SNaN_SO, + output logic Special_case_SBO, + output logic Special_case_dly_SBO + ); + + //Hidden Bits + logic Hb_a_D; + logic Hb_b_D; + + logic [C_EXP_FP64-1:0] Exp_a_D; + logic [C_EXP_FP64-1:0] Exp_b_D; + logic [C_MANT_FP64-1:0] Mant_a_NonH_D; + logic [C_MANT_FP64-1:0] Mant_b_NonH_D; + logic [C_MANT_FP64:0] Mant_a_D; + logic [C_MANT_FP64:0] Mant_b_D; + + ///////////////////////////////////////////////////////////////////////////// + // Disassemble operands + ///////////////////////////////////////////////////////////////////////////// + logic Sign_a_D,Sign_b_D; + logic Start_S; + + always_comb + begin + case(Format_sel_SI) + 2'b00: + begin + Sign_a_D = Operand_a_DI[C_OP_FP32-1]; + Sign_b_D = Operand_b_DI[C_OP_FP32-1]; + Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP32-2:C_MANT_FP32]}; + Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP32-2:C_MANT_FP32]}; + Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP32-1:0],29'h0}; + Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP32-1:0],29'h0}; + end + 2'b01: + begin + Sign_a_D = Operand_a_DI[C_OP_FP64-1]; + Sign_b_D = Operand_b_DI[C_OP_FP64-1]; + Exp_a_D = Operand_a_DI[C_OP_FP64-2:C_MANT_FP64]; + Exp_b_D = Operand_b_DI[C_OP_FP64-2:C_MANT_FP64]; + Mant_a_NonH_D = Operand_a_DI[C_MANT_FP64-1:0]; + Mant_b_NonH_D = Operand_b_DI[C_MANT_FP64-1:0]; + end + 2'b10: + begin + Sign_a_D = Operand_a_DI[C_OP_FP16-1]; + Sign_b_D = Operand_b_DI[C_OP_FP16-1]; + Exp_a_D = {6'h00, Operand_a_DI[C_OP_FP16-2:C_MANT_FP16]}; + Exp_b_D = {6'h00, Operand_b_DI[C_OP_FP16-2:C_MANT_FP16]}; + Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16-1:0],42'h0}; + Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16-1:0],42'h0}; + end + 2'b11: + begin + Sign_a_D = Operand_a_DI[C_OP_FP16ALT-1]; + Sign_b_D = Operand_b_DI[C_OP_FP16ALT-1]; + Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]}; + Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]}; + Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16ALT-1:0],45'h0}; + Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16ALT-1:0],45'h0}; + end + endcase + end + + + assign Mant_a_D = {Hb_a_D,Mant_a_NonH_D}; + assign Mant_b_D = {Hb_b_D,Mant_b_NonH_D}; + + assign Hb_a_D = | Exp_a_D; // hidden bit + assign Hb_b_D = | Exp_b_D; // hidden bit + + assign Start_S= Div_start_SI | Sqrt_start_SI; + + + + ///////////////////////////////////////////////////////////////////////////// + // preliminary checks for infinite/zero/NaN operands // + ///////////////////////////////////////////////////////////////////////////// + + logic Mant_a_prenorm_zero_S; + logic Mant_b_prenorm_zero_S; + + logic Exp_a_prenorm_zero_S; + logic Exp_b_prenorm_zero_S; + assign Exp_a_prenorm_zero_S = ~Hb_a_D; + assign Exp_b_prenorm_zero_S = ~Hb_b_D; + + logic Exp_a_prenorm_Inf_NaN_S; + logic Exp_b_prenorm_Inf_NaN_S; + + logic Mant_a_prenorm_QNaN_S; + logic Mant_a_prenorm_SNaN_S; + logic Mant_b_prenorm_QNaN_S; + logic Mant_b_prenorm_SNaN_S; + + assign Mant_a_prenorm_QNaN_S=Mant_a_NonH_D[C_MANT_FP64-1]&&(~(|Mant_a_NonH_D[C_MANT_FP64-2:0])); + assign Mant_a_prenorm_SNaN_S=(~Mant_a_NonH_D[C_MANT_FP64-1])&&((|Mant_a_NonH_D[C_MANT_FP64-2:0])); + assign Mant_b_prenorm_QNaN_S=Mant_b_NonH_D[C_MANT_FP64-1]&&(~(|Mant_b_NonH_D[C_MANT_FP64-2:0])); + assign Mant_b_prenorm_SNaN_S=(~Mant_b_NonH_D[C_MANT_FP64-1])&&((|Mant_b_NonH_D[C_MANT_FP64-2:0])); + + always_comb + begin + case(Format_sel_SI) + 2'b00: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32); + end + 2'b01: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64); + end + 2'b10: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16); + end + 2'b11: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT); + end + endcase + end + + + + + logic Zero_a_SN,Zero_a_SP; + logic Zero_b_SN,Zero_b_SP; + logic Inf_a_SN,Inf_a_SP; + logic Inf_b_SN,Inf_b_SP; + logic NaN_a_SN,NaN_a_SP; + logic NaN_b_SN,NaN_b_SP; + logic SNaN_SN,SNaN_SP; + + assign Zero_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_zero_S&&Mant_a_prenorm_zero_S):Zero_a_SP; + assign Zero_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_zero_S&&Mant_b_prenorm_zero_S):Zero_b_SP; + assign Inf_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&Mant_a_prenorm_zero_S):Inf_a_SP; + assign Inf_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&Mant_b_prenorm_zero_S):Inf_b_SP; + assign NaN_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&(~Mant_a_prenorm_zero_S)):NaN_a_SP; + assign NaN_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&(~Mant_b_prenorm_zero_S)):NaN_b_SP; + assign SNaN_SN = (Start_S&&Ready_SI) ? ((Mant_a_prenorm_SNaN_S&&NaN_a_SN) | (Mant_b_prenorm_SNaN_S&&NaN_b_SN)) : SNaN_SP; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Zero_a_SP <='0; + Zero_b_SP <='0; + Inf_a_SP <='0; + Inf_b_SP <='0; + NaN_a_SP <='0; + NaN_b_SP <='0; + SNaN_SP <= '0; + end + else + begin + Inf_a_SP <=Inf_a_SN; + Inf_b_SP <=Inf_b_SN; + Zero_a_SP <=Zero_a_SN; + Zero_b_SP <=Zero_b_SN; + NaN_a_SP <=NaN_a_SN; + NaN_b_SP <=NaN_b_SN; + SNaN_SP <= SNaN_SN; + end + end + + ///////////////////////////////////////////////////////////////////////////// + // Low power control + ///////////////////////////////////////////////////////////////////////////// + + assign Special_case_SBO=(~{(Div_start_SI)?(Zero_a_SN | Zero_b_SN | Inf_a_SN | Inf_b_SN | NaN_a_SN | NaN_b_SN): (Zero_a_SN | Inf_a_SN | NaN_a_SN | Sign_a_D) })&&(Start_S&&Ready_SI); + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Special_case_dly_SBO <= '0; + end + else if((Start_S&&Ready_SI)) + begin + Special_case_dly_SBO <= Special_case_SBO; + end + else if(Special_case_dly_SBO) + begin + Special_case_dly_SBO <= 1'b1; + end + else + begin + Special_case_dly_SBO <= '0; + end + end + + ///////////////////////////////////////////////////////////////////////////// + // Delay sign for normalization and round // + ///////////////////////////////////////////////////////////////////////////// + + logic Sign_z_DN; + logic Sign_z_DP; + + always_comb + begin + if(Div_start_SI&&Ready_SI) + Sign_z_DN = Sign_a_D ^ Sign_b_D; + else if(Sqrt_start_SI&&Ready_SI) + Sign_z_DN = Sign_a_D; + else + Sign_z_DN = Sign_z_DP; + end + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Sign_z_DP <= '0; + end + else + begin + Sign_z_DP <= Sign_z_DN; + end + end + + logic [C_RM-1:0] RM_DN; + logic [C_RM-1:0] RM_DP; + + always_comb + begin + if(Start_S&&Ready_SI) + RM_DN = RM_SI; + else + RM_DN = RM_DP; + end + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + RM_DP <= '0; + end + else + begin + RM_DP <= RM_DN; + end + end + assign RM_dly_SO = RM_DP; + + logic [5:0] Mant_leadingOne_a, Mant_leadingOne_b; + logic Mant_zero_S_a,Mant_zero_S_b; + + lzc #( + .WIDTH ( C_MANT_FP64+1 ), + .MODE ( 1 ) + ) LOD_Ua ( + .in_i ( Mant_a_D ), + .cnt_o ( Mant_leadingOne_a ), + .empty_o ( Mant_zero_S_a ) + ); + + logic [C_MANT_FP64:0] Mant_a_norm_DN,Mant_a_norm_DP; + + assign Mant_a_norm_DN = ((Start_S&&Ready_SI))?(Mant_a_D<<(Mant_leadingOne_a)):Mant_a_norm_DP; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Mant_a_norm_DP <= '0; + end + else + begin + Mant_a_norm_DP<=Mant_a_norm_DN; + end + end + + logic [C_EXP_FP64:0] Exp_a_norm_DN,Exp_a_norm_DP; + assign Exp_a_norm_DN = ((Start_S&&Ready_SI))?(Exp_a_D-Mant_leadingOne_a+(|Mant_leadingOne_a)):Exp_a_norm_DP; //Covering the process of denormal numbers + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Exp_a_norm_DP <= '0; + end + else + begin + Exp_a_norm_DP<=Exp_a_norm_DN; + end + end + + lzc #( + .WIDTH ( C_MANT_FP64+1 ), + .MODE ( 1 ) + ) LOD_Ub ( + .in_i ( Mant_b_D ), + .cnt_o ( Mant_leadingOne_b ), + .empty_o ( Mant_zero_S_b ) + ); + + + logic [C_MANT_FP64:0] Mant_b_norm_DN,Mant_b_norm_DP; + + assign Mant_b_norm_DN = ((Start_S&&Ready_SI))?(Mant_b_D<<(Mant_leadingOne_b)):Mant_b_norm_DP; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Mant_b_norm_DP <= '0; + end + else + begin + Mant_b_norm_DP<=Mant_b_norm_DN; + end + end + + logic [C_EXP_FP64:0] Exp_b_norm_DN,Exp_b_norm_DP; + assign Exp_b_norm_DN = ((Start_S&&Ready_SI))?(Exp_b_D-Mant_leadingOne_b+(|Mant_leadingOne_b)):Exp_b_norm_DP; //Covering the process of denormal numbers + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Exp_b_norm_DP <= '0; + end + else + begin + Exp_b_norm_DP<=Exp_b_norm_DN; + end + end + + ///////////////////////////////////////////////////////////////////////////// + // Output assignments // + ///////////////////////////////////////////////////////////////////////////// + + assign Start_SO=Start_S; + assign Exp_a_DO_norm=Exp_a_norm_DP; + assign Exp_b_DO_norm=Exp_b_norm_DP; + assign Mant_a_DO_norm=Mant_a_norm_DP; + assign Mant_b_DO_norm=Mant_b_norm_DP; + assign Sign_z_DO=Sign_z_DP; + assign Inf_a_SO=Inf_a_SP; + assign Inf_b_SO=Inf_b_SP; + assign Zero_a_SO=Zero_a_SP; + assign Zero_b_SO=Zero_b_SP; + assign NaN_a_SO=NaN_a_SP; + assign NaN_b_SO=NaN_b_SP; + assign SNaN_SO=SNaN_SP; + +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh b/test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh new file mode 100644 index 0000000..14bb194 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh @@ -0,0 +1,541 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Wolfgang Roenninger + +// Macros to assign AXI Interfaces and Structs + +`ifndef AXI_ASSIGN_SVH_ +`define AXI_ASSIGN_SVH_ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Internal implementation for assigning one AXI struct or interface to another struct or interface. +// The path to the signals on each side is defined by the `__sep*` arguments. The `__opt_as` +// argument allows to use this standalone (with `__opt_as = assign`) or in assignments inside +// processes (with `__opt_as` void). +`define __AXI_TO_AW(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``len = __rhs``__rhs_sep``len; \ + __opt_as __lhs``__lhs_sep``size = __rhs``__rhs_sep``size; \ + __opt_as __lhs``__lhs_sep``burst = __rhs``__rhs_sep``burst; \ + __opt_as __lhs``__lhs_sep``lock = __rhs``__rhs_sep``lock; \ + __opt_as __lhs``__lhs_sep``cache = __rhs``__rhs_sep``cache; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; \ + __opt_as __lhs``__lhs_sep``qos = __rhs``__rhs_sep``qos; \ + __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region; \ + __opt_as __lhs``__lhs_sep``atop = __rhs``__rhs_sep``atop; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_W(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``strb = __rhs``__rhs_sep``strb; \ + __opt_as __lhs``__lhs_sep``last = __rhs``__rhs_sep``last; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_B(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_AR(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``len = __rhs``__rhs_sep``len; \ + __opt_as __lhs``__lhs_sep``size = __rhs``__rhs_sep``size; \ + __opt_as __lhs``__lhs_sep``burst = __rhs``__rhs_sep``burst; \ + __opt_as __lhs``__lhs_sep``lock = __rhs``__rhs_sep``lock; \ + __opt_as __lhs``__lhs_sep``cache = __rhs``__rhs_sep``cache; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; \ + __opt_as __lhs``__lhs_sep``qos = __rhs``__rhs_sep``qos; \ + __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; \ + __opt_as __lhs``__lhs_sep``last = __rhs``__rhs_sep``last; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_REQ(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + `__AXI_TO_AW(__opt_as, __lhs.aw, __lhs_sep, __rhs.aw, __rhs_sep) \ + __opt_as __lhs.aw_valid = __rhs.aw_valid; \ + `__AXI_TO_W(__opt_as, __lhs.w, __lhs_sep, __rhs.w, __rhs_sep) \ + __opt_as __lhs.w_valid = __rhs.w_valid; \ + __opt_as __lhs.b_ready = __rhs.b_ready; \ + `__AXI_TO_AR(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep) \ + __opt_as __lhs.ar_valid = __rhs.ar_valid; \ + __opt_as __lhs.r_ready = __rhs.r_ready; +`define __AXI_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs.aw_ready = __rhs.aw_ready; \ + __opt_as __lhs.ar_ready = __rhs.ar_ready; \ + __opt_as __lhs.w_ready = __rhs.w_ready; \ + __opt_as __lhs.b_valid = __rhs.b_valid; \ + `__AXI_TO_B(__opt_as, __lhs.b, __lhs_sep, __rhs.b, __rhs_sep) \ + __opt_as __lhs.r_valid = __rhs.r_valid; \ + `__AXI_TO_R(__opt_as, __lhs.r, __lhs_sep, __rhs.r, __rhs_sep) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning one AXI4+ATOP interface to another, as if you would do `assign slv = mst;` +// +// The channel assignments `AXI_ASSIGN_XX(dst, src)` assign all payload and the valid signal of the +// `XX` channel from the `src` to the `dst` interface and they assign the ready signal from the +// `src` to the `dst` interface. +// The interface assignment `AXI_ASSIGN(dst, src)` assigns all channels including handshakes as if +// `src` was the master of `dst`. +// +// Usage Example: +// `AXI_ASSIGN(slv, mst) +// `AXI_ASSIGN_AW(dst, src) +// `AXI_ASSIGN_R(dst, src) +`define AXI_ASSIGN_AW(dst, src) \ + `__AXI_TO_AW(assign, dst.aw, _, src.aw, _) \ + assign dst.aw_valid = src.aw_valid; \ + assign src.aw_ready = dst.aw_ready; +`define AXI_ASSIGN_W(dst, src) \ + `__AXI_TO_W(assign, dst.w, _, src.w, _) \ + assign dst.w_valid = src.w_valid; \ + assign src.w_ready = dst.w_ready; +`define AXI_ASSIGN_B(dst, src) \ + `__AXI_TO_B(assign, dst.b, _, src.b, _) \ + assign dst.b_valid = src.b_valid; \ + assign src.b_ready = dst.b_ready; +`define AXI_ASSIGN_AR(dst, src) \ + `__AXI_TO_AR(assign, dst.ar, _, src.ar, _) \ + assign dst.ar_valid = src.ar_valid; \ + assign src.ar_ready = dst.ar_ready; +`define AXI_ASSIGN_R(dst, src) \ + `__AXI_TO_R(assign, dst.r, _, src.r, _) \ + assign dst.r_valid = src.r_valid; \ + assign src.r_ready = dst.r_ready; +`define AXI_ASSIGN(slv, mst) \ + `AXI_ASSIGN_AW(slv, mst) \ + `AXI_ASSIGN_W(slv, mst) \ + `AXI_ASSIGN_B(mst, slv) \ + `AXI_ASSIGN_AR(slv, mst) \ + `AXI_ASSIGN_R(mst, slv) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning a AXI4+ATOP interface to a monitor modport, as if you would do `assign mon = axi_if;` +// +// The channel assignment `AXI_ASSIGN_MONITOR(mon_dv, axi_if)` assigns all signals from `axi_if` +// to the `mon_dv` interface. +// +// Usage Example: +// `AXI_ASSIGN_MONITOR(mon_dv, axi_if) +`define AXI_ASSIGN_MONITOR(mon_dv, axi_if) \ + `__AXI_TO_AW(assign, mon_dv.aw, _, axi_if.aw, _) \ + assign mon_dv.aw_valid = axi_if.aw_valid; \ + assign mon_dv.aw_ready = axi_if.aw_ready; \ + `__AXI_TO_W(assign, mon_dv.w, _, axi_if.w, _) \ + assign mon_dv.w_valid = axi_if.w_valid; \ + assign mon_dv.w_ready = axi_if.w_ready; \ + `__AXI_TO_B(assign, mon_dv.b, _, axi_if.b, _) \ + assign mon_dv.b_valid = axi_if.b_valid; \ + assign mon_dv.b_ready = axi_if.b_ready; \ + `__AXI_TO_AR(assign, mon_dv.ar, _, axi_if.ar, _) \ + assign mon_dv.ar_valid = axi_if.ar_valid; \ + assign mon_dv.ar_ready = axi_if.ar_ready; \ + `__AXI_TO_R(assign, mon_dv.r, _, axi_if.r, _) \ + assign mon_dv.r_valid = axi_if.r_valid; \ + assign mon_dv.r_ready = axi_if.r_ready; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting an interface from channel or request/response structs inside a process. +// +// The channel macros `AXI_SET_FROM_XX(axi_if, xx_struct)` set the payload signals of the `axi_if` +// interface from the signals in `xx_struct`. They do not set the handshake signals. +// The request macro `AXI_SET_FROM_REQ(axi_if, req_struct)` sets all request channels (AW, W, AR) +// and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the `axi_if` +// interface from the signals in `req_struct`. +// The response macro `AXI_SET_FROM_RESP(axi_if, resp_struct)` sets both response channels (B and R) +// and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the `axi_if` +// interface from the signals in `resp_struct`. +// +// Usage Example: +// always_comb begin +// `AXI_SET_FROM_REQ(my_if, my_req_struct) +// end +`define AXI_SET_FROM_AW(axi_if, aw_struct) `__AXI_TO_AW(, axi_if.aw, _, aw_struct, .) +`define AXI_SET_FROM_W(axi_if, w_struct) `__AXI_TO_W(, axi_if.w, _, w_struct, .) +`define AXI_SET_FROM_B(axi_if, b_struct) `__AXI_TO_B(, axi_if.b, _, b_struct, .) +`define AXI_SET_FROM_AR(axi_if, ar_struct) `__AXI_TO_AR(, axi_if.ar, _, ar_struct, .) +`define AXI_SET_FROM_R(axi_if, r_struct) `__AXI_TO_R(, axi_if.r, _, r_struct, .) +`define AXI_SET_FROM_REQ(axi_if, req_struct) `__AXI_TO_REQ(, axi_if, _, req_struct, .) +`define AXI_SET_FROM_RESP(axi_if, resp_struct) `__AXI_TO_RESP(, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning an interface from channel or request/response structs outside a process. +// +// The channel macros `AXI_ASSIGN_FROM_XX(axi_if, xx_struct)` assign the payload signals of the +// `axi_if` interface from the signals in `xx_struct`. They do not assign the handshake signals. +// The request macro `AXI_ASSIGN_FROM_REQ(axi_if, req_struct)` assigns all request channels (AW, W, +// AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the +// `axi_if` interface from the signals in `req_struct`. +// The response macro `AXI_ASSIGN_FROM_RESP(axi_if, resp_struct)` assigns both response channels (B +// and R) and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the +// `axi_if` interface from the signals in `resp_struct`. +// +// Usage Example: +// `AXI_ASSIGN_FROM_REQ(my_if, my_req_struct) +`define AXI_ASSIGN_FROM_AW(axi_if, aw_struct) `__AXI_TO_AW(assign, axi_if.aw, _, aw_struct, .) +`define AXI_ASSIGN_FROM_W(axi_if, w_struct) `__AXI_TO_W(assign, axi_if.w, _, w_struct, .) +`define AXI_ASSIGN_FROM_B(axi_if, b_struct) `__AXI_TO_B(assign, axi_if.b, _, b_struct, .) +`define AXI_ASSIGN_FROM_AR(axi_if, ar_struct) `__AXI_TO_AR(assign, axi_if.ar, _, ar_struct, .) +`define AXI_ASSIGN_FROM_R(axi_if, r_struct) `__AXI_TO_R(assign, axi_if.r, _, r_struct, .) +`define AXI_ASSIGN_FROM_REQ(axi_if, req_struct) `__AXI_TO_REQ(assign, axi_if, _, req_struct, .) +`define AXI_ASSIGN_FROM_RESP(axi_if, resp_struct) `__AXI_TO_RESP(assign, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from an interface inside a process. +// +// The channel macros `AXI_SET_TO_XX(xx_struct, axi_if)` set the signals of `xx_struct` to the +// payload signals of that channel in the `axi_if` interface. They do not set the handshake +// signals. +// The request macro `AXI_SET_TO_REQ(axi_if, req_struct)` sets all signals of `req_struct` (i.e., +// request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR valid and +// B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_SET_TO_RESP(axi_if, resp_struct)` sets all signals of `resp_struct` +// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and +// AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// always_comb begin +// `AXI_SET_TO_REQ(my_req_struct, my_if) +// end +`define AXI_SET_TO_AW(aw_struct, axi_if) `__AXI_TO_AW(, aw_struct, ., axi_if.aw, _) +`define AXI_SET_TO_W(w_struct, axi_if) `__AXI_TO_W(, w_struct, ., axi_if.w, _) +`define AXI_SET_TO_B(b_struct, axi_if) `__AXI_TO_B(, b_struct, ., axi_if.b, _) +`define AXI_SET_TO_AR(ar_struct, axi_if) `__AXI_TO_AR(, ar_struct, ., axi_if.ar, _) +`define AXI_SET_TO_R(r_struct, axi_if) `__AXI_TO_R(, r_struct, ., axi_if.r, _) +`define AXI_SET_TO_REQ(req_struct, axi_if) `__AXI_TO_REQ(, req_struct, ., axi_if, _) +`define AXI_SET_TO_RESP(resp_struct, axi_if) `__AXI_TO_RESP(, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from an interface outside a process. +// +// The channel macros `AXI_ASSIGN_TO_XX(xx_struct, axi_if)` assign the signals of `xx_struct` to the +// payload signals of that channel in the `axi_if` interface. They do not assign the handshake +// signals. +// The request macro `AXI_ASSIGN_TO_REQ(axi_if, req_struct)` assigns all signals of `req_struct` +// (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR +// valid and B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_ASSIGN_TO_RESP(axi_if, resp_struct)` assigns all signals of `resp_struct` +// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and +// AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// `AXI_ASSIGN_TO_REQ(my_req_struct, my_if) +`define AXI_ASSIGN_TO_AW(aw_struct, axi_if) `__AXI_TO_AW(assign, aw_struct, ., axi_if.aw, _) +`define AXI_ASSIGN_TO_W(w_struct, axi_if) `__AXI_TO_W(assign, w_struct, ., axi_if.w, _) +`define AXI_ASSIGN_TO_B(b_struct, axi_if) `__AXI_TO_B(assign, b_struct, ., axi_if.b, _) +`define AXI_ASSIGN_TO_AR(ar_struct, axi_if) `__AXI_TO_AR(assign, ar_struct, ., axi_if.ar, _) +`define AXI_ASSIGN_TO_R(r_struct, axi_if) `__AXI_TO_R(assign, r_struct, ., axi_if.r, _) +`define AXI_ASSIGN_TO_REQ(req_struct, axi_if) `__AXI_TO_REQ(assign, req_struct, ., axi_if, _) +`define AXI_ASSIGN_TO_RESP(resp_struct, axi_if) `__AXI_TO_RESP(assign, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from another struct inside a process. +// +// The channel macros `AXI_SET_XX_STRUCT(lhs, rhs)` set the fields of the `lhs` channel struct to +// the fields of the `rhs` channel struct. They do not set the handshake signals, which are not +// part of channel structs. +// The request macro `AXI_SET_REQ_STRUCT(lhs, rhs)` sets all fields of the `lhs` request struct to +// the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) payload +// and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_SET_RESP_STRUCT(lhs, rhs)` sets all fields of the `lhs` response struct +// to the fields of the `rhs` response struct. This includes all response channel (B and R) payload +// and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// always_comb begin +// `AXI_SET_REQ_STRUCT(my_req_struct, another_req_struct) +// end +`define AXI_SET_AW_STRUCT(lhs, rhs) `__AXI_TO_AW(, lhs, ., rhs, .) +`define AXI_SET_W_STRUCT(lhs, rhs) `__AXI_TO_W(, lhs, ., rhs, .) +`define AXI_SET_B_STRUCT(lhs, rhs) `__AXI_TO_B(, lhs, ., rhs, .) +`define AXI_SET_AR_STRUCT(lhs, rhs) `__AXI_TO_AR(, lhs, ., rhs, .) +`define AXI_SET_R_STRUCT(lhs, rhs) `__AXI_TO_R(, lhs, ., rhs, .) +`define AXI_SET_REQ_STRUCT(lhs, rhs) `__AXI_TO_REQ(, lhs, ., rhs, .) +`define AXI_SET_RESP_STRUCT(lhs, rhs) `__AXI_TO_RESP(, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from another struct outside a process. +// +// The channel macros `AXI_ASSIGN_XX_STRUCT(lhs, rhs)` assign the fields of the `lhs` channel struct +// to the fields of the `rhs` channel struct. They do not assign the handshake signals, which are +// not part of the channel structs. +// The request macro `AXI_ASSIGN_REQ_STRUCT(lhs, rhs)` assigns all fields of the `lhs` request +// struct to the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) +// payload and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_ASSIGN_RESP_STRUCT(lhs, rhs)` assigns all fields of the `lhs` response +// struct to the fields of the `rhs` response struct. This includes all response channel (B and R) +// payload and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// `AXI_ASSIGN_REQ_STRUCT(my_req_struct, another_req_struct) +`define AXI_ASSIGN_AW_STRUCT(lhs, rhs) `__AXI_TO_AW(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_W_STRUCT(lhs, rhs) `__AXI_TO_W(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_B_STRUCT(lhs, rhs) `__AXI_TO_B(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_AR_STRUCT(lhs, rhs) `__AXI_TO_AR(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_R_STRUCT(lhs, rhs) `__AXI_TO_R(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_REQ_STRUCT(lhs, rhs) `__AXI_TO_REQ(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_RESP_STRUCT(lhs, rhs) `__AXI_TO_RESP(assign, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Internal implementation for assigning one Lite structs or interface to another struct or +// interface. The path to the signals on each side is defined by the `__sep*` arguments. The +// `__opt_as` argument allows to use this standalne (with `__opt_as = assign`) or in assignments +// inside processes (with `__opt_as` void). +`define __AXI_LITE_TO_AX(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; +`define __AXI_LITE_TO_W(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``strb = __rhs``__rhs_sep``strb; +`define __AXI_LITE_TO_B(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; +`define __AXI_LITE_TO_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; +`define __AXI_LITE_TO_REQ(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + `__AXI_LITE_TO_AX(__opt_as, __lhs.aw, __lhs_sep, __rhs.aw, __rhs_sep) \ + __opt_as __lhs.aw_valid = __rhs.aw_valid; \ + `__AXI_LITE_TO_W(__opt_as, __lhs.w, __lhs_sep, __rhs.w, __rhs_sep) \ + __opt_as __lhs.w_valid = __rhs.w_valid; \ + __opt_as __lhs.b_ready = __rhs.b_ready; \ + `__AXI_LITE_TO_AX(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep) \ + __opt_as __lhs.ar_valid = __rhs.ar_valid; \ + __opt_as __lhs.r_ready = __rhs.r_ready; +`define __AXI_LITE_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs.aw_ready = __rhs.aw_ready; \ + __opt_as __lhs.ar_ready = __rhs.ar_ready; \ + __opt_as __lhs.w_ready = __rhs.w_ready; \ + __opt_as __lhs.b_valid = __rhs.b_valid; \ + `__AXI_LITE_TO_B(__opt_as, __lhs.b, __lhs_sep, __rhs.b, __rhs_sep) \ + __opt_as __lhs.r_valid = __rhs.r_valid; \ + `__AXI_LITE_TO_R(__opt_as, __lhs.r, __lhs_sep, __rhs.r, __rhs_sep) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning one AXI-Lite interface to another, as if you would do `assign slv = mst;` +// +// The channel assignments `AXI_LITE_ASSIGN_XX(dst, src)` assign all payload and the valid signal of +// the `XX` channel from the `src` to the `dst` interface and they assign the ready signal from the +// `src` to the `dst` interface. +// The interface assignment `AXI_LITE_ASSIGN(dst, src)` assigns all channels including handshakes as +// if `src` was the master of `dst`. +// +// Usage Example: +// `AXI_LITE_ASSIGN(slv, mst) +// `AXI_LITE_ASSIGN_AW(dst, src) +// `AXI_LITE_ASSIGN_R(dst, src) +`define AXI_LITE_ASSIGN_AW(dst, src) \ + `__AXI_LITE_TO_AX(assign, dst.aw, _, src.aw, _) \ + assign dst.aw_valid = src.aw_valid; \ + assign src.aw_ready = dst.aw_ready; +`define AXI_LITE_ASSIGN_W(dst, src) \ + `__AXI_LITE_TO_W(assign, dst.w, _, src.w, _) \ + assign dst.w_valid = src.w_valid; \ + assign src.w_ready = dst.w_ready; +`define AXI_LITE_ASSIGN_B(dst, src) \ + `__AXI_LITE_TO_B(assign, dst.b, _, src.b, _) \ + assign dst.b_valid = src.b_valid; \ + assign src.b_ready = dst.b_ready; +`define AXI_LITE_ASSIGN_AR(dst, src) \ + `__AXI_LITE_TO_AX(assign, dst.ar, _, src.ar, _) \ + assign dst.ar_valid = src.ar_valid; \ + assign src.ar_ready = dst.ar_ready; +`define AXI_LITE_ASSIGN_R(dst, src) \ + `__AXI_LITE_TO_R(assign, dst.r, _, src.r, _) \ + assign dst.r_valid = src.r_valid; \ + assign src.r_ready = dst.r_ready; +`define AXI_LITE_ASSIGN(slv, mst) \ + `AXI_LITE_ASSIGN_AW(slv, mst) \ + `AXI_LITE_ASSIGN_W(slv, mst) \ + `AXI_LITE_ASSIGN_B(mst, slv) \ + `AXI_LITE_ASSIGN_AR(slv, mst) \ + `AXI_LITE_ASSIGN_R(mst, slv) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting a Lite interface from channel or request/response structs inside a process. +// +// The channel macros `AXI_LITE_SET_FROM_XX(axi_if, xx_struct)` set the payload signals of the +// `axi_if` interface from the signals in `xx_struct`. They do not set the handshake signals. +// The request macro `AXI_LITE_SET_FROM_REQ(axi_if, req_struct)` sets all request channels (AW, W, +// AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the +// `axi_if` interface from the signals in `req_struct`. +// The response macro `AXI_LITE_SET_FROM_RESP(axi_if, resp_struct)` sets both response channels (B +// and R) and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the +// `axi_if` interface from the signals in `resp_struct`. +// +// Usage Example: +// always_comb begin +// `AXI_LITE_SET_FROM_REQ(my_if, my_req_struct) +// end +`define AXI_LITE_SET_FROM_AW(axi_if, aw_struct) `__AXI_LITE_TO_AX(, axi_if.aw, _, aw_struct, .) +`define AXI_LITE_SET_FROM_W(axi_if, w_struct) `__AXI_LITE_TO_W(, axi_if.w, _, w_struct, .) +`define AXI_LITE_SET_FROM_B(axi_if, b_struct) `__AXI_LITE_TO_B(, axi_if.b, _, b_struct, .) +`define AXI_LITE_SET_FROM_AR(axi_if, ar_struct) `__AXI_LITE_TO_AX(, axi_if.ar, _, ar_struct, .) +`define AXI_LITE_SET_FROM_R(axi_if, r_struct) `__AXI_LITE_TO_R(, axi_if.r, _, r_struct, .) +`define AXI_LITE_SET_FROM_REQ(axi_if, req_struct) `__AXI_LITE_TO_REQ(, axi_if, _, req_struct, .) +`define AXI_LITE_SET_FROM_RESP(axi_if, resp_struct) `__AXI_LITE_TO_RESP(, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning a Lite interface from channel or request/response structs outside a process. +// +// The channel macros `AXI_LITE_ASSIGN_FROM_XX(axi_if, xx_struct)` assign the payload signals of the +// `axi_if` interface from the signals in `xx_struct`. They do not assign the handshake signals. +// The request macro `AXI_LITE_ASSIGN_FROM_REQ(axi_if, req_struct)` assigns all request channels +// (AW, W, AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the +// `axi_if` interface from the signals in `req_struct`. +// The response macro `AXI_LITE_ASSIGN_FROM_RESP(axi_if, resp_struct)` assigns both response +// channels (B and R) and the response-side handshake signals (B and R valid and AW, W, and AR +// ready) of the `axi_if` interface from the signals in `resp_struct`. +// +// Usage Example: +// `AXI_LITE_ASSIGN_FROM_REQ(my_if, my_req_struct) +`define AXI_LITE_ASSIGN_FROM_AW(axi_if, aw_struct) `__AXI_LITE_TO_AX(assign, axi_if.aw, _, aw_struct, .) +`define AXI_LITE_ASSIGN_FROM_W(axi_if, w_struct) `__AXI_LITE_TO_W(assign, axi_if.w, _, w_struct, .) +`define AXI_LITE_ASSIGN_FROM_B(axi_if, b_struct) `__AXI_LITE_TO_B(assign, axi_if.b, _, b_struct, .) +`define AXI_LITE_ASSIGN_FROM_AR(axi_if, ar_struct) `__AXI_LITE_TO_AX(assign, axi_if.ar, _, ar_struct, .) +`define AXI_LITE_ASSIGN_FROM_R(axi_if, r_struct) `__AXI_LITE_TO_R(assign, axi_if.r, _, r_struct, .) +`define AXI_LITE_ASSIGN_FROM_REQ(axi_if, req_struct) `__AXI_LITE_TO_REQ(assign, axi_if, _, req_struct, .) +`define AXI_LITE_ASSIGN_FROM_RESP(axi_if, resp_struct) `__AXI_LITE_TO_RESP(assign, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from an interface inside a process. +// +// The channel macros `AXI_LITE_SET_TO_XX(xx_struct, axi_if)` set the signals of `xx_struct` to the +// payload signals of that channel in the `axi_if` interface. They do not set the handshake +// signals. +// The request macro `AXI_LITE_SET_TO_REQ(axi_if, req_struct)` sets all signals of `req_struct` +// (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR +// valid and B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_LITE_SET_TO_RESP(axi_if, resp_struct)` sets all signals of `resp_struct` +// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and +// AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// always_comb begin +// `AXI_LITE_SET_TO_REQ(my_req_struct, my_if) +// end +`define AXI_LITE_SET_TO_AW(aw_struct, axi_if) `__AXI_LITE_TO_AX(, aw_struct, ., axi_if.aw, _) +`define AXI_LITE_SET_TO_W(w_struct, axi_if) `__AXI_LITE_TO_W(, w_struct, ., axi_if.w, _) +`define AXI_LITE_SET_TO_B(b_struct, axi_if) `__AXI_LITE_TO_B(, b_struct, ., axi_if.b, _) +`define AXI_LITE_SET_TO_AR(ar_struct, axi_if) `__AXI_LITE_TO_AX(, ar_struct, ., axi_if.ar, _) +`define AXI_LITE_SET_TO_R(r_struct, axi_if) `__AXI_LITE_TO_R(, r_struct, ., axi_if.r, _) +`define AXI_LITE_SET_TO_REQ(req_struct, axi_if) `__AXI_LITE_TO_REQ(, req_struct, ., axi_if, _) +`define AXI_LITE_SET_TO_RESP(resp_struct, axi_if) `__AXI_LITE_TO_RESP(, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from an interface outside a process. +// +// The channel macros `AXI_LITE_ASSIGN_TO_XX(xx_struct, axi_if)` assign the signals of `xx_struct` +// to the payload signals of that channel in the `axi_if` interface. They do not assign the +// handshake signals. +// The request macro `AXI_LITE_ASSIGN_TO_REQ(axi_if, req_struct)` assigns all signals of +// `req_struct` (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, +// W, and AR valid and B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_LITE_ASSIGN_TO_RESP(axi_if, resp_struct)` assigns all signals of +// `resp_struct` (i.e., response channel (B and R) payload and response-side handshake signals (B +// and R valid and AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// `AXI_LITE_ASSIGN_TO_REQ(my_req_struct, my_if) +`define AXI_LITE_ASSIGN_TO_AW(aw_struct, axi_if) `__AXI_LITE_TO_AX(assign, aw_struct, ., axi_if.aw, _) +`define AXI_LITE_ASSIGN_TO_W(w_struct, axi_if) `__AXI_LITE_TO_W(assign, w_struct, ., axi_if.w, _) +`define AXI_LITE_ASSIGN_TO_B(b_struct, axi_if) `__AXI_LITE_TO_B(assign, b_struct, ., axi_if.b, _) +`define AXI_LITE_ASSIGN_TO_AR(ar_struct, axi_if) `__AXI_LITE_TO_AX(assign, ar_struct, ., axi_if.ar, _) +`define AXI_LITE_ASSIGN_TO_R(r_struct, axi_if) `__AXI_LITE_TO_R(assign, r_struct, ., axi_if.r, _) +`define AXI_LITE_ASSIGN_TO_REQ(req_struct, axi_if) `__AXI_LITE_TO_REQ(assign, req_struct, ., axi_if, _) +`define AXI_LITE_ASSIGN_TO_RESP(resp_struct, axi_if) `__AXI_LITE_TO_RESP(assign, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from another struct inside a process. +// +// The channel macros `AXI_LITE_SET_XX_STRUCT(lhs, rhs)` set the fields of the `lhs` channel struct +// to the fields of the `rhs` channel struct. They do not set the handshake signals, which are not +// part of channel structs. +// The request macro `AXI_LITE_SET_REQ_STRUCT(lhs, rhs)` sets all fields of the `lhs` request struct +// to the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) payload +// and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_LITE_SET_RESP_STRUCT(lhs, rhs)` sets all fields of the `lhs` response +// struct to the fields of the `rhs` response struct. This includes all response channel (B and R) +// payload and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// always_comb begin +// `AXI_LITE_SET_REQ_STRUCT(my_req_struct, another_req_struct) +// end +`define AXI_LITE_SET_AW_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(, lhs, ., rhs, .) +`define AXI_LITE_SET_W_STRUCT(lhs, rhs) `__AXI_LITE_TO_W(, lhs, ., rhs, .) +`define AXI_LITE_SET_B_STRUCT(lhs, rhs) `__AXI_LITE_TO_B(, lhs, ., rhs, .) +`define AXI_LITE_SET_AR_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(, lhs, ., rhs, .) +`define AXI_LITE_SET_R_STRUCT(lhs, rhs) `__AXI_LITE_TO_R(, lhs, ., rhs, .) +`define AXI_LITE_SET_REQ_STRUCT(lhs, rhs) `__AXI_LITE_TO_REQ(, lhs, ., rhs, .) +`define AXI_LITE_SET_RESP_STRUCT(lhs, rhs) `__AXI_LITE_TO_RESP(, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from another struct outside a process. +// +// The channel macros `AXI_LITE_ASSIGN_XX_STRUCT(lhs, rhs)` assign the fields of the `lhs` channel +// struct to the fields of the `rhs` channel struct. They do not assign the handshake signals, +// which are not part of the channel structs. +// The request macro `AXI_LITE_ASSIGN_REQ_STRUCT(lhs, rhs)` assigns all fields of the `lhs` request +// struct to the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) +// payload and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_LITE_ASSIGN_RESP_STRUCT(lhs, rhs)` assigns all fields of the `lhs` +// response struct to the fields of the `rhs` response struct. This includes all response channel +// (B and R) payload and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// `AXI_LITE_ASSIGN_REQ_STRUCT(my_req_struct, another_req_struct) +`define AXI_LITE_ASSIGN_AW_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_W_STRUCT(lhs, rhs) `__AXI_LITE_TO_W(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_B_STRUCT(lhs, rhs) `__AXI_LITE_TO_B(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_AR_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_R_STRUCT(lhs, rhs) `__AXI_LITE_TO_R(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_REQ_STRUCT(lhs, rhs) `__AXI_LITE_TO_REQ(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_RESP_STRUCT(lhs, rhs) `__AXI_LITE_TO_RESP(assign, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +`endif diff --git a/test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh b/test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh new file mode 100644 index 0000000..a2a860e --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh @@ -0,0 +1,211 @@ +// Copyright (c) 2019 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Florian Zaruba +// - Wolfgang Roenninger + +// Macros to define AXI and AXI-Lite Channel and Request/Response Structs + +`ifndef AXI_TYPEDEF_SVH_ +`define AXI_TYPEDEF_SVH_ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// AXI4+ATOP Channel and Request/Response Structs +// +// Usage Example: +// `AXI_TYPEDEF_AW_CHAN_T(axi_aw_t, axi_addr_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_W_CHAN_T(axi_w_t, axi_data_t, axi_strb_t, axi_user_t) +// `AXI_TYPEDEF_B_CHAN_T(axi_b_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_AR_CHAN_T(axi_ar_t, axi_addr_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_R_CHAN_T(axi_r_t, axi_data_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_REQ_T(axi_req_t, axi_aw_t, axi_w_t, axi_ar_t) +// `AXI_TYPEDEF_RESP_T(axi_resp_t, axi_b_t, axi_r_t) +`define AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + addr_t addr; \ + axi_pkg::len_t len; \ + axi_pkg::size_t size; \ + axi_pkg::burst_t burst; \ + logic lock; \ + axi_pkg::cache_t cache; \ + axi_pkg::prot_t prot; \ + axi_pkg::qos_t qos; \ + axi_pkg::region_t region; \ + axi_pkg::atop_t atop; \ + user_t user; \ + } aw_chan_t; +`define AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) \ + typedef struct packed { \ + data_t data; \ + strb_t strb; \ + logic last; \ + user_t user; \ + } w_chan_t; +`define AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + axi_pkg::resp_t resp; \ + user_t user; \ + } b_chan_t; +`define AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + addr_t addr; \ + axi_pkg::len_t len; \ + axi_pkg::size_t size; \ + axi_pkg::burst_t burst; \ + logic lock; \ + axi_pkg::cache_t cache; \ + axi_pkg::prot_t prot; \ + axi_pkg::qos_t qos; \ + axi_pkg::region_t region; \ + user_t user; \ + } ar_chan_t; +`define AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + data_t data; \ + axi_pkg::resp_t resp; \ + logic last; \ + user_t user; \ + } r_chan_t; +`define AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) \ + typedef struct packed { \ + aw_chan_t aw; \ + logic aw_valid; \ + w_chan_t w; \ + logic w_valid; \ + logic b_ready; \ + ar_chan_t ar; \ + logic ar_valid; \ + logic r_ready; \ + } req_t; +`define AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) \ + typedef struct packed { \ + logic aw_ready; \ + logic ar_ready; \ + logic w_ready; \ + logic b_valid; \ + b_chan_t b; \ + logic r_valid; \ + r_chan_t r; \ + } resp_t; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// All AXI4+ATOP Channels and Request/Response Structs in One Macro +// +// This can be used whenever the user is not interested in "precise" control of the naming of the +// individual channels. +// +// Usage Example: +// `AXI_TYPEDEF_ALL(axi, addr_t, id_t, data_t, strb_t, user_t) +// +// This defines `axi_req_t` and `axi_resp_t` request/response structs as well as `axi_aw_chan_t`, +// `axi_w_chan_t`, `axi_b_chan_t`, `axi_ar_chan_t`, and `axi_r_chan_t` channel structs. +`define AXI_TYPEDEF_ALL(__name, __addr_t, __id_t, __data_t, __strb_t, __user_t) \ + `AXI_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t, __id_t, __user_t) \ + `AXI_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t, __user_t) \ + `AXI_TYPEDEF_B_CHAN_T(__name``_b_chan_t, __id_t, __user_t) \ + `AXI_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t, __id_t, __user_t) \ + `AXI_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t, __id_t, __user_t) \ + `AXI_TYPEDEF_REQ_T(__name``_req_t, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \ + `AXI_TYPEDEF_RESP_T(__name``_resp_t, __name``_b_chan_t, __name``_r_chan_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// AXI4-Lite Channel and Request/Response Structs +// +// Usage Example: +// `AXI_LITE_TYPEDEF_AW_CHAN_T(axi_lite_aw_t, axi_lite_addr_t) +// `AXI_LITE_TYPEDEF_W_CHAN_T(axi_lite_w_t, axi_lite_data_t, axi_lite_strb_t) +// `AXI_LITE_TYPEDEF_B_CHAN_T(axi_lite_b_t) +// `AXI_LITE_TYPEDEF_AR_CHAN_T(axi_lite_ar_t, axi_lite_addr_t) +// `AXI_LITE_TYPEDEF_R_CHAN_T(axi_lite_r_t, axi_lite_data_t) +// `AXI_LITE_TYPEDEF_REQ_T(axi_lite_req_t, axi_lite_aw_t, axi_lite_w_t, axi_lite_ar_t) +// `AXI_LITE_TYPEDEF_RESP_T(axi_lite_resp_t, axi_lite_b_t, axi_lite_r_t) +`define AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_lite_t, addr_t) \ + typedef struct packed { \ + addr_t addr; \ + axi_pkg::prot_t prot; \ + } aw_chan_lite_t; +`define AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_lite_t, data_t, strb_t) \ + typedef struct packed { \ + data_t data; \ + strb_t strb; \ + } w_chan_lite_t; +`define AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_lite_t) \ + typedef struct packed { \ + axi_pkg::resp_t resp; \ + } b_chan_lite_t; +`define AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_lite_t, addr_t) \ + typedef struct packed { \ + addr_t addr; \ + axi_pkg::prot_t prot; \ + } ar_chan_lite_t; +`define AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_lite_t, data_t) \ + typedef struct packed { \ + data_t data; \ + axi_pkg::resp_t resp; \ + } r_chan_lite_t; +`define AXI_LITE_TYPEDEF_REQ_T(req_lite_t, aw_chan_lite_t, w_chan_lite_t, ar_chan_lite_t) \ + typedef struct packed { \ + aw_chan_lite_t aw; \ + logic aw_valid; \ + w_chan_lite_t w; \ + logic w_valid; \ + logic b_ready; \ + ar_chan_lite_t ar; \ + logic ar_valid; \ + logic r_ready; \ + } req_lite_t; +`define AXI_LITE_TYPEDEF_RESP_T(resp_lite_t, b_chan_lite_t, r_chan_lite_t) \ + typedef struct packed { \ + logic aw_ready; \ + logic w_ready; \ + b_chan_lite_t b; \ + logic b_valid; \ + logic ar_ready; \ + r_chan_lite_t r; \ + logic r_valid; \ + } resp_lite_t; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// All AXI4-Lite Channels and Request/Response Structs in One Macro +// +// This can be used whenever the user is not interested in "precise" control of the naming of the +// individual channels. +// +// Usage Example: +// `AXI_LITE_TYPEDEF_ALL(axi_lite, addr_t, data_t, strb_t) +// +// This defines `axi_lite_req_t` and `axi_lite_resp_t` request/response structs as well as +// `axi_lite_aw_chan_t`, `axi_lite_w_chan_t`, `axi_lite_b_chan_t`, `axi_lite_ar_chan_t`, and +// `axi_lite_r_chan_t` channel structs. +`define AXI_LITE_TYPEDEF_ALL(__name, __addr_t, __data_t, __strb_t) \ + `AXI_LITE_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t) \ + `AXI_LITE_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t) \ + `AXI_LITE_TYPEDEF_B_CHAN_T(__name``_b_chan_t) \ + `AXI_LITE_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t) \ + `AXI_LITE_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t) \ + `AXI_LITE_TYPEDEF_REQ_T(__name``_req_t, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \ + `AXI_LITE_TYPEDEF_RESP_T(__name``_resp_t, __name``_b_chan_t, __name``_r_chan_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +`endif diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv new file mode 100644 index 0000000..4a2ecff --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv @@ -0,0 +1,444 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Wolfgang Roenninger + +/// Filter atomic operations (ATOPs) in a protocol-compliant manner. +/// +/// This module filters atomic operations (ATOPs), i.e., write transactions that have a non-zero +/// `aw_atop` value, from its `slv` to its `mst` port. This module guarantees that: +/// +/// 1) `aw_atop` is always zero on the `mst` port; +/// +/// 2) write transactions with non-zero `aw_atop` on the `slv` port are handled in conformance with +/// the AXI standard by replying to such write transactions with the proper B and R responses. +/// The response code on atomic operations that reach this module is always SLVERR +/// (implementation-specific, not defined in the AXI standard). +/// +/// ## Intended usage +/// This module is intended to be placed between masters that may issue ATOPs and slaves that do not +/// support ATOPs. That way, this module ensures that the AXI protocol remains in a defined state on +/// systems with mixed ATOP capabilities. +/// +/// ## Specification reminder +/// The AXI standard specifies that there may be no ordering requirements between different atomic +/// bursts (i.e., a burst started by an AW with ATOP other than 0) and none between atomic bursts +/// and non-atomic bursts [E2.1.4]. That is, **an atomic burst may never have the same ID as any +/// other write or read burst that is in-flight at the same time**. +module axi_atop_filter #( + /// AXI ID width + parameter int unsigned AxiIdWidth = 0, + /// Maximum number of in-flight AXI write transactions + parameter int unsigned AxiMaxWriteTxns = 0, + /// AXI request type + parameter type req_t = logic, + /// AXI response type + parameter type resp_t = logic +) ( + /// Rising-edge clock of both ports + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + /// Slave port request + input req_t slv_req_i, + /// Slave port response + output resp_t slv_resp_o, + /// Master port request + output req_t mst_req_o, + /// Master port response + input resp_t mst_resp_i +); + + // Minimum counter width is 2 to detect underflows. + localparam int unsigned COUNTER_WIDTH = (AxiMaxWriteTxns == 1) ? 2 : $clog2(AxiMaxWriteTxns+1); + typedef struct packed { + logic underflow; + logic [COUNTER_WIDTH-1:0] cnt; + } cnt_t; + cnt_t w_cnt_d, w_cnt_q; + + typedef enum logic [2:0] { + W_FEEDTHROUGH, BLOCK_AW, ABSORB_W, HOLD_B, INJECT_B, WAIT_R + } w_state_e; + w_state_e w_state_d, w_state_q; + + typedef enum logic [1:0] { R_FEEDTHROUGH, INJECT_R, R_HOLD } r_state_e; + r_state_e r_state_d, r_state_q; + + typedef logic [AxiIdWidth-1:0] id_t; + id_t id_d, id_q; + + typedef logic [7:0] len_t; + len_t r_beats_d, r_beats_q; + + typedef struct packed { + len_t len; + } r_resp_cmd_t; + r_resp_cmd_t r_resp_cmd_push, r_resp_cmd_pop; + + logic aw_without_complete_w_downstream, + complete_w_without_aw_downstream, + r_resp_cmd_push_valid, r_resp_cmd_push_ready, + r_resp_cmd_pop_valid, r_resp_cmd_pop_ready; + + // An AW without a complete W burst is in-flight downstream if the W counter is > 0 and not + // underflowed. + assign aw_without_complete_w_downstream = !w_cnt_q.underflow && (w_cnt_q.cnt > 0); + // A complete W burst without AW is in-flight downstream if the W counter is -1. + assign complete_w_without_aw_downstream = w_cnt_q.underflow && &(w_cnt_q.cnt); + + // Manage AW, W, and B channels. + always_comb begin + // Defaults: + // Disable AW and W handshakes. + mst_req_o.aw_valid = 1'b0; + slv_resp_o.aw_ready = 1'b0; + mst_req_o.w_valid = 1'b0; + slv_resp_o.w_ready = 1'b0; + // Feed write responses through. + mst_req_o.b_ready = slv_req_i.b_ready; + slv_resp_o.b_valid = mst_resp_i.b_valid; + slv_resp_o.b = mst_resp_i.b; + // Keep ID stored for B and R response. + id_d = id_q; + // Do not push R response commands. + r_resp_cmd_push_valid = 1'b0; + // Keep the current state. + w_state_d = w_state_q; + + unique case (w_state_q) + W_FEEDTHROUGH: begin + // Feed AW channel through if the maximum number of outstanding bursts is not reached. + if (complete_w_without_aw_downstream || (w_cnt_q.cnt < AxiMaxWriteTxns)) begin + mst_req_o.aw_valid = slv_req_i.aw_valid; + slv_resp_o.aw_ready = mst_resp_i.aw_ready; + end + // Feed W channel through if .. + if (aw_without_complete_w_downstream // .. downstream is missing W bursts .. + // .. or a new non-ATOP AW is being applied and there is not already a complete W burst + // downstream (to prevent underflows of w_cnt). + || ((slv_req_i.aw_valid && slv_req_i.aw.atop[5:4] == axi_pkg::ATOP_NONE) + && !complete_w_without_aw_downstream) + ) begin + mst_req_o.w_valid = slv_req_i.w_valid; + slv_resp_o.w_ready = mst_resp_i.w_ready; + end + // Filter out AWs that are atomic operations. + if (slv_req_i.aw_valid && slv_req_i.aw.atop[5:4] != axi_pkg::ATOP_NONE) begin + mst_req_o.aw_valid = 1'b0; // Do not let AW pass to master port. + slv_resp_o.aw_ready = 1'b1; // Absorb AW on slave port. + id_d = slv_req_i.aw.id; // Store ID for B response. + // All atomic operations except atomic stores require a response on the R channel. + if (slv_req_i.aw.atop[5:4] != axi_pkg::ATOP_ATOMICSTORE) begin + // Push R response command. We do not have to wait for the ready of the register + // because we know it is ready: we are its only master and will wait for the register to + // be emptied before going back to the `W_FEEDTHROUGH` state. + r_resp_cmd_push_valid = 1'b1; + end + // If downstream is missing W beats, block the AW channel and let the W bursts complete. + if (aw_without_complete_w_downstream) begin + w_state_d = BLOCK_AW; + // If downstream is not missing W beats, absorb the W beats for this atomic AW. + end else begin + mst_req_o.w_valid = 1'b0; // Do not let W beats pass to master port. + slv_resp_o.w_ready = 1'b1; // Absorb W beats on slave port. + if (slv_req_i.w_valid && slv_req_i.w.last) begin + // If the W beat is valid and the last, proceed by injecting the B response. + // However, if there is a non-handshaked B on our response port, we must let that + // complete first. + if (slv_resp_o.b_valid && !slv_req_i.b_ready) begin + w_state_d = HOLD_B; + end else begin + w_state_d = INJECT_B; + end + end else begin + // Otherwise continue with absorbing W beats. + w_state_d = ABSORB_W; + end + end + end + end + + BLOCK_AW: begin + // Feed W channel through to let outstanding bursts complete. + if (aw_without_complete_w_downstream) begin + mst_req_o.w_valid = slv_req_i.w_valid; + slv_resp_o.w_ready = mst_resp_i.w_ready; + end else begin + // If there are no more outstanding W bursts, start absorbing the next W burst. + slv_resp_o.w_ready = 1'b1; + if (slv_req_i.w_valid && slv_req_i.w.last) begin + // If the W beat is valid and the last, proceed by injecting the B response. + if (slv_resp_o.b_valid && !slv_req_i.b_ready) begin + w_state_d = HOLD_B; + end else begin + w_state_d = INJECT_B; + end + end else begin + // Otherwise continue with absorbing W beats. + w_state_d = ABSORB_W; + end + end + end + + ABSORB_W: begin + // Absorb all W beats of the current burst. + slv_resp_o.w_ready = 1'b1; + if (slv_req_i.w_valid && slv_req_i.w.last) begin + if (slv_resp_o.b_valid && !slv_req_i.b_ready) begin + w_state_d = HOLD_B; + end else begin + w_state_d = INJECT_B; + end + end + end + + HOLD_B: begin + // Proceed with injection of B response upon handshake. + if (slv_resp_o.b_valid && slv_req_i.b_ready) begin + w_state_d = INJECT_B; + end + end + + INJECT_B: begin + // Pause forwarding of B response. + mst_req_o.b_ready = 1'b0; + // Inject error response instead. Since the B channel has an ID and the atomic burst we are + // replying to is guaranteed to be the only burst with this ID in flight, we do not have to + // observe any ordering and can immediately inject on the B channel. + slv_resp_o.b = '0; + slv_resp_o.b.id = id_q; + slv_resp_o.b.resp = axi_pkg::RESP_SLVERR; + slv_resp_o.b_valid = 1'b1; + if (slv_req_i.b_ready) begin + // If not all beats of the R response have been injected, wait for them. Otherwise, return + // to `W_FEEDTHROUGH`. + if (r_resp_cmd_pop_valid && !r_resp_cmd_pop_ready) begin + w_state_d = WAIT_R; + end else begin + w_state_d = W_FEEDTHROUGH; + end + end + end + + WAIT_R: begin + // Wait with returning to `W_FEEDTHROUGH` until all beats of the R response have been + // injected. + if (!r_resp_cmd_pop_valid) begin + w_state_d = W_FEEDTHROUGH; + end + end + + default: w_state_d = W_FEEDTHROUGH; + endcase + end + // Connect signals on AW and W channel that are not managed by the control FSM from slave port to + // master port. + // Feed-through of the AW and W vectors, make sure that downstream aw.atop is always zero + always_comb begin + // overwrite the atop signal + mst_req_o.aw = slv_req_i.aw; + mst_req_o.aw.atop = '0; + end + assign mst_req_o.w = slv_req_i.w; + + // Manage R channel. + always_comb begin + // Defaults: + // Feed read responses through. + slv_resp_o.r = mst_resp_i.r; + slv_resp_o.r_valid = mst_resp_i.r_valid; + mst_req_o.r_ready = slv_req_i.r_ready; + // Do not pop R response command. + r_resp_cmd_pop_ready = 1'b0; + // Keep the current value of the beats counter. + r_beats_d = r_beats_q; + // Keep the current state. + r_state_d = r_state_q; + + unique case (r_state_q) + R_FEEDTHROUGH: begin + if (mst_resp_i.r_valid && !slv_req_i.r_ready) begin + r_state_d = R_HOLD; + end else if (r_resp_cmd_pop_valid) begin + // Upon a command to inject an R response, immediately proceed with doing so because there + // are no ordering requirements with other bursts that may be ongoing on the R channel at + // this moment. + r_beats_d = r_resp_cmd_pop.len; + r_state_d = INJECT_R; + end + end + + INJECT_R: begin + mst_req_o.r_ready = 1'b0; + slv_resp_o.r = '0; + slv_resp_o.r.id = id_q; + slv_resp_o.r.resp = axi_pkg::RESP_SLVERR; + slv_resp_o.r.last = (r_beats_q == '0); + slv_resp_o.r_valid = 1'b1; + if (slv_req_i.r_ready) begin + if (slv_resp_o.r.last) begin + r_resp_cmd_pop_ready = 1'b1; + r_state_d = R_FEEDTHROUGH; + end else begin + r_beats_d -= 1; + end + end + end + + R_HOLD: begin + if (mst_resp_i.r_valid && slv_req_i.r_ready) begin + r_state_d = R_FEEDTHROUGH; + end + end + + default: r_state_d = R_FEEDTHROUGH; + endcase + end + // Feed all signals on AR through. + assign mst_req_o.ar = slv_req_i.ar; + assign mst_req_o.ar_valid = slv_req_i.ar_valid; + assign slv_resp_o.ar_ready = mst_resp_i.ar_ready; + + // Keep track of outstanding downstream write bursts and responses. + always_comb begin + w_cnt_d = w_cnt_q; + if (mst_req_o.aw_valid && mst_resp_i.aw_ready) begin + w_cnt_d.cnt += 1; + end + if (mst_req_o.w_valid && mst_resp_i.w_ready && mst_req_o.w.last) begin + w_cnt_d.cnt -= 1; + end + if (w_cnt_q.underflow && (w_cnt_d.cnt == '0)) begin + w_cnt_d.underflow = 1'b0; + end else if (w_cnt_q.cnt == '0 && &(w_cnt_d.cnt)) begin + w_cnt_d.underflow = 1'b1; + end + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + id_q <= '0; + r_beats_q <= '0; + r_state_q <= R_FEEDTHROUGH; + w_cnt_q <= '{default: '0}; + w_state_q <= W_FEEDTHROUGH; + end else begin + id_q <= id_d; + r_beats_q <= r_beats_d; + r_state_q <= r_state_d; + w_cnt_q <= w_cnt_d; + w_state_q <= w_state_d; + end + end + + stream_register #( + .T(r_resp_cmd_t) + ) r_resp_cmd ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clr_i (1'b0), + .testmode_i (1'b0), + .valid_i (r_resp_cmd_push_valid), + .ready_o (r_resp_cmd_push_ready), + .data_i (r_resp_cmd_push), + .valid_o (r_resp_cmd_pop_valid), + .ready_i (r_resp_cmd_pop_ready), + .data_o (r_resp_cmd_pop) + ); + assign r_resp_cmd_push.len = slv_req_i.aw.len; + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (AxiIdWidth >= 1) else $fatal(1, "AXI ID width must be at least 1!"); + assert (AxiMaxWriteTxns >= 1) + else $fatal(1, "Maximum number of outstanding write transactions must be at least 1!"); + end +`endif +// pragma translate_on +endmodule + +`include "axi/assign.svh" +`include "axi/typedef.svh" + +/// Interface variant of [`axi_atop_filter`](module.axi_atop_filter). +module axi_atop_filter_intf #( + /// AXI ID width + parameter int unsigned AXI_ID_WIDTH = 0, + /// AXI address width + parameter int unsigned AXI_ADDR_WIDTH = 0, + /// AXI data width + parameter int unsigned AXI_DATA_WIDTH = 0, + /// AXI user signal width + parameter int unsigned AXI_USER_WIDTH = 0, + /// Maximum number of in-flight AXI write transactions + parameter int unsigned AXI_MAX_WRITE_TXNS = 0 +) ( + /// Rising-edge clock of both ports + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + /// Slave interface port + AXI_BUS.Slave slv, + /// Master interface port + AXI_BUS.Master mst +); + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_ASSIGN_TO_REQ(slv_req, slv) + `AXI_ASSIGN_FROM_RESP(slv, slv_resp) + + `AXI_ASSIGN_FROM_REQ(mst, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, mst) + + axi_atop_filter #( + .AxiIdWidth ( AXI_ID_WIDTH ), + // Maximum number of AXI write bursts outstanding at the same time + .AxiMaxWriteTxns ( AXI_MAX_WRITE_TXNS ), + // AXI request & response type + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_axi_atop_filter ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (AXI_ADDR_WIDTH >= 1) else $fatal(1, "AXI ADDR width must be at least 1!"); + assert (AXI_DATA_WIDTH >= 1) else $fatal(1, "AXI DATA width must be at least 1!"); + assert (AXI_USER_WIDTH >= 1) else $fatal(1, "AXI USER width must be at least 1!"); + end +`endif +// pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv new file mode 100644 index 0000000..6c31321 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv @@ -0,0 +1,265 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Fabian Schuiki +// - Andreas Kurth + +/// An AXI4 cut. +/// +/// Breaks all combinatorial paths between its input and output. +module axi_cut #( + // bypass enable + parameter bit Bypass = 1'b0, + // AXI channel structs + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic, + // AXI request & response structs + parameter type req_t = logic, + parameter type resp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + // salve port + input req_t slv_req_i, + output resp_t slv_resp_o, + // master port + output req_t mst_req_o, + input resp_t mst_resp_i +); + + // a spill register for each channel + spill_register #( + .T ( aw_chan_t ), + .Bypass ( Bypass ) + ) i_reg_aw ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.aw_valid ), + .ready_o ( slv_resp_o.aw_ready ), + .data_i ( slv_req_i.aw ), + .valid_o ( mst_req_o.aw_valid ), + .ready_i ( mst_resp_i.aw_ready ), + .data_o ( mst_req_o.aw ) + ); + + spill_register #( + .T ( w_chan_t ), + .Bypass ( Bypass ) + ) i_reg_w ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.w_valid ), + .ready_o ( slv_resp_o.w_ready ), + .data_i ( slv_req_i.w ), + .valid_o ( mst_req_o.w_valid ), + .ready_i ( mst_resp_i.w_ready ), + .data_o ( mst_req_o.w ) + ); + + spill_register #( + .T ( b_chan_t ), + .Bypass ( Bypass ) + ) i_reg_b ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_resp_i.b_valid ), + .ready_o ( mst_req_o.b_ready ), + .data_i ( mst_resp_i.b ), + .valid_o ( slv_resp_o.b_valid ), + .ready_i ( slv_req_i.b_ready ), + .data_o ( slv_resp_o.b ) + ); + + spill_register #( + .T ( ar_chan_t ), + .Bypass ( Bypass ) + ) i_reg_ar ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.ar_valid ), + .ready_o ( slv_resp_o.ar_ready ), + .data_i ( slv_req_i.ar ), + .valid_o ( mst_req_o.ar_valid ), + .ready_i ( mst_resp_i.ar_ready ), + .data_o ( mst_req_o.ar ) + ); + + spill_register #( + .T ( r_chan_t ), + .Bypass ( Bypass ) + ) i_reg_r ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_resp_i.r_valid ), + .ready_o ( mst_req_o.r_ready ), + .data_i ( mst_resp_i.r ), + .valid_o ( slv_resp_o.r_valid ), + .ready_i ( slv_req_i.r_ready ), + .data_o ( slv_resp_o.r ) + ); +endmodule + +`include "axi/assign.svh" +`include "axi/typedef.svh" + +// interface wrapper +module axi_cut_intf #( + // Bypass eneable + parameter bit BYPASS = 1'b0, + // The address width. + parameter int unsigned ADDR_WIDTH = 0, + // The data width. + parameter int unsigned DATA_WIDTH = 0, + // The ID width. + parameter int unsigned ID_WIDTH = 0, + // The user data width. + parameter int unsigned USER_WIDTH = 0 +) ( + input logic clk_i , + input logic rst_ni , + AXI_BUS.Slave in , + AXI_BUS.Master out +); + + typedef logic [ID_WIDTH-1:0] id_t; + typedef logic [ADDR_WIDTH-1:0] addr_t; + typedef logic [DATA_WIDTH-1:0] data_t; + typedef logic [DATA_WIDTH/8-1:0] strb_t; + typedef logic [USER_WIDTH-1:0] user_t; + + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_ASSIGN_TO_REQ(slv_req, in) + `AXI_ASSIGN_FROM_RESP(in, slv_resp) + + `AXI_ASSIGN_FROM_REQ(out, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, out) + + axi_cut #( + .Bypass ( BYPASS ), + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_axi_cut ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); + + // Check the invariants. + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter"); + assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter"); + assert (ID_WIDTH > 0) else $fatal(1, "Wrong id width parameter"); + assert (USER_WIDTH > 0) else $fatal(1, "Wrong user width parameter"); + assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_ID_WIDTH == ID_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ID_WIDTH == ID_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition"); + end + `endif + // pragma translate_on +endmodule + +module axi_lite_cut_intf #( + // bypass enable + parameter bit BYPASS = 1'b0, + /// The address width. + parameter int unsigned ADDR_WIDTH = 0, + /// The data width. + parameter int unsigned DATA_WIDTH = 0 +) ( + input logic clk_i , + input logic rst_ni , + AXI_LITE.Slave in , + AXI_LITE.Master out +); + + typedef logic [ADDR_WIDTH-1:0] addr_t; + typedef logic [DATA_WIDTH-1:0] data_t; + typedef logic [DATA_WIDTH/8-1:0] strb_t; + + `AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t) + `AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t) + `AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_t) + `AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t) + `AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_t, data_t) + `AXI_LITE_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_LITE_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_LITE_ASSIGN_TO_REQ(slv_req, in) + `AXI_LITE_ASSIGN_FROM_RESP(in, slv_resp) + + `AXI_LITE_ASSIGN_FROM_REQ(out, mst_req) + `AXI_LITE_ASSIGN_TO_RESP(mst_resp, out) + + axi_cut #( + .Bypass ( BYPASS ), + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_axi_cut ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); + + // Check the invariants. + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter"); + assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter"); + assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + end + `endif + // pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv new file mode 100644 index 0000000..cab18eb --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv @@ -0,0 +1,198 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Florian Zaruba +// - Andreas Kurth + +/// Synthesizable module that (randomly) delays AXI channels. +module axi_delayer #( + // AXI channel types + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic, + // AXI request & response types + parameter type req_t = logic, + parameter type resp_t = logic, + // delay parameters + parameter bit StallRandomInput = 0, + parameter bit StallRandomOutput = 0, + parameter int unsigned FixedDelayInput = 1, + parameter int unsigned FixedDelayOutput = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // slave port + input req_t slv_req_i, + output resp_t slv_resp_o, + // master port + output req_t mst_req_o, + input resp_t mst_resp_i +); + // AW + stream_delay #( + .StallRandom ( StallRandomInput ), + .FixedDelay ( FixedDelayInput ), + .payload_t ( aw_chan_t ) + ) i_stream_delay_aw ( + .clk_i, + .rst_ni, + .payload_i ( slv_req_i.aw ), + .ready_o ( slv_resp_o.aw_ready ), + .valid_i ( slv_req_i.aw_valid ), + .payload_o ( mst_req_o.aw ), + .ready_i ( mst_resp_i.aw_ready ), + .valid_o ( mst_req_o.aw_valid ) + ); + + // AR + stream_delay #( + .StallRandom ( StallRandomInput ), + .FixedDelay ( FixedDelayInput ), + .payload_t ( ar_chan_t ) + ) i_stream_delay_ar ( + .clk_i, + .rst_ni, + .payload_i ( slv_req_i.ar ), + .ready_o ( slv_resp_o.ar_ready ), + .valid_i ( slv_req_i.ar_valid ), + .payload_o ( mst_req_o.ar ), + .ready_i ( mst_resp_i.ar_ready ), + .valid_o ( mst_req_o.ar_valid ) + ); + + // W + stream_delay #( + .StallRandom ( StallRandomInput ), + .FixedDelay ( FixedDelayInput ), + .payload_t ( w_chan_t ) + ) i_stream_delay_w ( + .clk_i, + .rst_ni, + .payload_i ( slv_req_i.w ), + .ready_o ( slv_resp_o.w_ready ), + .valid_i ( slv_req_i.w_valid ), + .payload_o ( mst_req_o.w ), + .ready_i ( mst_resp_i.w_ready ), + .valid_o ( mst_req_o.w_valid ) + ); + + // B + stream_delay #( + .StallRandom ( StallRandomOutput ), + .FixedDelay ( FixedDelayOutput ), + .payload_t ( b_chan_t ) + ) i_stream_delay_b ( + .clk_i, + .rst_ni, + .payload_i ( mst_resp_i.b ), + .ready_o ( mst_req_o.b_ready ), + .valid_i ( mst_resp_i.b_valid ), + .payload_o ( slv_resp_o.b ), + .ready_i ( slv_req_i.b_ready ), + .valid_o ( slv_resp_o.b_valid ) + ); + + // R + stream_delay #( + .StallRandom ( StallRandomOutput ), + .FixedDelay ( FixedDelayOutput ), + .payload_t ( r_chan_t ) + ) i_stream_delay_r ( + .clk_i, + .rst_ni, + .payload_i ( mst_resp_i.r ), + .ready_o ( mst_req_o.r_ready ), + .valid_i ( mst_resp_i.r_valid ), + .payload_o ( slv_resp_o.r ), + .ready_i ( slv_req_i.r_ready ), + .valid_o ( slv_resp_o.r_valid ) + ); +endmodule + +`include "axi/typedef.svh" +`include "axi/assign.svh" + +// interface wrapper +module axi_delayer_intf #( + // Synopsys DC requires a default value for parameters. + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + parameter bit STALL_RANDOM_INPUT = 0, + parameter bit STALL_RANDOM_OUTPUT = 0, + parameter int unsigned FIXED_DELAY_INPUT = 1, + parameter int unsigned FIXED_DELAY_OUTPUT = 1 +) ( + input logic clk_i, + input logic rst_ni, + AXI_BUS.Slave slv, + AXI_BUS.Master mst +); + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_ASSIGN_TO_REQ(slv_req, slv) + `AXI_ASSIGN_FROM_RESP(slv, slv_resp) + + `AXI_ASSIGN_FROM_REQ(mst, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, mst) + + axi_delayer #( + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ), + .StallRandomInput ( STALL_RANDOM_INPUT ), + .StallRandomOutput ( STALL_RANDOM_OUTPUT ), + .FixedDelayInput ( FIXED_DELAY_INPUT ), + .FixedDelayOutput ( FIXED_DELAY_OUTPUT ) + ) i_axi_delayer ( + .clk_i, // Clock + .rst_ni, // Asynchronous reset active low + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (AXI_ID_WIDTH >= 1) else $fatal(1, "AXI ID width must be at least 1!"); + assert (AXI_ADDR_WIDTH >= 1) else $fatal(1, "AXI ADDR width must be at least 1!"); + assert (AXI_DATA_WIDTH >= 1) else $fatal(1, "AXI DATA width must be at least 1!"); + assert (AXI_USER_WIDTH >= 1) else $fatal(1, "AXI USER width must be at least 1!"); + end +`endif +// pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv new file mode 100644 index 0000000..99a18c8 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv @@ -0,0 +1,786 @@ +// Copyright (c) 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth + +`include "common_cells/registers.svh" + +// axi_demux: Demultiplex an AXI bus from one slave port to multiple master ports. +// See `doc/axi_demux.md` for the documentation, including the definition of parameters and ports. +module axi_demux #( + parameter int unsigned AxiIdWidth = 32'd0, + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic, + parameter type req_t = logic, + parameter type resp_t = logic, + parameter int unsigned NoMstPorts = 32'd0, + parameter int unsigned MaxTrans = 32'd8, + parameter int unsigned AxiLookBits = 32'd3, + parameter bit UniqueIds = 1'b0, + parameter bit FallThrough = 1'b0, + parameter bit SpillAw = 1'b1, + parameter bit SpillW = 1'b0, + parameter bit SpillB = 1'b0, + parameter bit SpillAr = 1'b1, + parameter bit SpillR = 1'b0, + // Dependent parameters, DO NOT OVERRIDE! + parameter int unsigned SelectWidth = (NoMstPorts > 32'd1) ? $clog2(NoMstPorts) : 32'd1, + parameter type select_t = logic [SelectWidth-1:0] +) ( + input logic clk_i, + input logic rst_ni, + input logic test_i, + // Slave Port + input req_t slv_req_i, + input select_t slv_aw_select_i, + input select_t slv_ar_select_i, + output resp_t slv_resp_o, + // Master Ports + output req_t [NoMstPorts-1:0] mst_reqs_o, + input resp_t [NoMstPorts-1:0] mst_resps_i +); + + localparam int unsigned IdCounterWidth = MaxTrans > 1 ? $clog2(MaxTrans) : 1; + + //-------------------------------------- + // Typedefs for the FIFOs / Queues + //-------------------------------------- + typedef struct packed { + aw_chan_t aw_chan; + select_t aw_select; + } aw_chan_select_t; + typedef struct packed { + ar_chan_t ar_chan; + select_t ar_select; + } ar_chan_select_t; + + // pass through if only one master port + if (NoMstPorts == 32'h1) begin : gen_no_demux + assign mst_reqs_o[0] = slv_req_i; + assign slv_resp_o = mst_resps_i; + // other non degenerate cases + end else begin : gen_demux + + //-------------------------------------- + //-------------------------------------- + // Signal Declarations + //-------------------------------------- + //-------------------------------------- + + //-------------------------------------- + // Write Transaction + //-------------------------------------- + // comes from spill register at input + aw_chan_select_t slv_aw_chan_select; + logic slv_aw_valid, slv_aw_ready; + + // AW ID counter + select_t lookup_aw_select; + logic aw_select_occupied, aw_id_cnt_full; + logic aw_push; + // Upon an ATOP load, inject IDs from the AW into the AR channel + logic atop_inject; + + // W FIFO: stores the decision to which master W beats should go + logic w_fifo_pop; + logic w_fifo_full, w_fifo_empty; + select_t w_select; + + // Register which locks the AW valid signal + logic lock_aw_valid_d, lock_aw_valid_q, load_aw_lock; + logic aw_valid, aw_ready; + + // W channel from spill reg + w_chan_t slv_w_chan; + logic slv_w_valid, slv_w_ready; + + // B channles input into the arbitration + b_chan_t [NoMstPorts-1:0] mst_b_chans; + logic [NoMstPorts-1:0] mst_b_valids, mst_b_readies; + + // B channel to spill register + b_chan_t slv_b_chan; + logic slv_b_valid, slv_b_ready; + + //-------------------------------------- + // Read Transaction + //-------------------------------------- + // comes from spill register at input + ar_chan_select_t slv_ar_chan_select; + logic slv_ar_valid, slv_ar_ready; + + // AR ID counter + select_t lookup_ar_select; + logic ar_select_occupied, ar_id_cnt_full; + logic ar_push; + + // Register which locks the AR valid signel + logic lock_ar_valid_d, lock_ar_valid_q, load_ar_lock; + logic ar_valid, ar_ready; + + // R channles input into the arbitration + r_chan_t [NoMstPorts-1:0] mst_r_chans; + logic [NoMstPorts-1:0] mst_r_valids, mst_r_readies; + + // R channel to spill register + r_chan_t slv_r_chan; + logic slv_r_valid, slv_r_ready; + + //-------------------------------------- + //-------------------------------------- + // Channel Control + //-------------------------------------- + //-------------------------------------- + + //-------------------------------------- + // AW Channel + //-------------------------------------- + // spill register at the channel input + `ifdef TARGET_VSIM + // Workaround for bug in Questa 2020.2 and 2021.1: Flatten the struct into a logic vector before + // instantiating `spill_register`. + typedef logic [$bits(aw_chan_select_t)-1:0] aw_chan_select_flat_t; + `else + typedef aw_chan_select_t aw_chan_select_flat_t; + `endif + aw_chan_select_flat_t slv_aw_chan_select_in_flat, + slv_aw_chan_select_out_flat; + assign slv_aw_chan_select_in_flat = {slv_req_i.aw, slv_aw_select_i}; + spill_register #( + .T ( aw_chan_select_flat_t ), + .Bypass ( ~SpillAw ) // because module param indicates if we want a spill reg + ) i_aw_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.aw_valid ), + .ready_o ( slv_resp_o.aw_ready ), + .data_i ( slv_aw_chan_select_in_flat ), + .valid_o ( slv_aw_valid ), + .ready_i ( slv_aw_ready ), + .data_o ( slv_aw_chan_select_out_flat ) + ); + assign slv_aw_chan_select = slv_aw_chan_select_out_flat; + + // Control of the AW handshake + always_comb begin + // AXI Handshakes + slv_aw_ready = 1'b0; + aw_valid = 1'b0; + // `lock_aw_valid`, used to be protocol conform as it is not allowed to deassert + // a valid if there was no corresponding ready. As this process has to be able to inject + // an AXI ID into the counter of the AR channel on an ATOP, there could be a case where + // this process waits on `aw_ready` but in the mean time on the AR channel the counter gets + // full. + lock_aw_valid_d = lock_aw_valid_q; + load_aw_lock = 1'b0; + // AW ID counter and W FIFO + aw_push = 1'b0; + // ATOP injection into ar counter + atop_inject = 1'b0; + // we had an arbitration decision, the valid is locked, wait for the transaction + if (lock_aw_valid_q) begin + aw_valid = 1'b1; + // transaction + if (aw_ready) begin + slv_aw_ready = 1'b1; + lock_aw_valid_d = 1'b0; + load_aw_lock = 1'b1; + atop_inject = slv_aw_chan_select.aw_chan.atop[5]; // inject the ATOP if necessary + end + end else begin + // Process can start handling a transaction if its `i_aw_id_counter` and `w_fifo` have + // space in them. Further check if we could inject something on the AR channel. + if (!aw_id_cnt_full && !w_fifo_full && !ar_id_cnt_full) begin + // there is a valid AW vector make the id lookup and go further, if it passes + if (slv_aw_valid && (!aw_select_occupied || + (slv_aw_chan_select.aw_select == lookup_aw_select))) begin + // connect the handshake + aw_valid = 1'b1; + // push arbitration to the W FIFO regardless, do not wait for the AW transaction + aw_push = 1'b1; + // on AW transaction + if (aw_ready) begin + slv_aw_ready = 1'b1; + atop_inject = slv_aw_chan_select.aw_chan.atop[5]; + // no AW transaction this cycle, lock the decision + end else begin + lock_aw_valid_d = 1'b1; + load_aw_lock = 1'b1; + end + end + end + end + end + + // lock the valid signal, as the selection gets pushed into the W FIFO on first assertion, + // prevent further pushing + `FFLARN(lock_aw_valid_q, lock_aw_valid_d, load_aw_lock, '0, clk_i, rst_ni) + + if (UniqueIds) begin : gen_unique_ids_aw + // If the `UniqueIds` parameter is set, each write transaction has an ID that is unique among + // all in-flight write transactions, or all write transactions with a given ID target the same + // master port as all write transactions with the same ID, or both. This means that the + // signals that are driven by the ID counters if this parameter is not set can instead be + // derived from existing signals. The ID counters can therefore be omitted. + assign lookup_aw_select = slv_aw_chan_select.aw_select; + assign aw_select_occupied = 1'b0; + assign aw_id_cnt_full = 1'b0; + end else begin : gen_aw_id_counter + axi_demux_id_counters #( + .AxiIdBits ( AxiLookBits ), + .CounterWidth ( IdCounterWidth ), + .mst_port_select_t ( select_t ) + ) i_aw_id_counter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .lookup_axi_id_i ( slv_aw_chan_select.aw_chan.id[0+:AxiLookBits] ), + .lookup_mst_select_o ( lookup_aw_select ), + .lookup_mst_select_occupied_o ( aw_select_occupied ), + .full_o ( aw_id_cnt_full ), + .inject_axi_id_i ( '0 ), + .inject_i ( 1'b0 ), + .push_axi_id_i ( slv_aw_chan_select.aw_chan.id[0+:AxiLookBits] ), + .push_mst_select_i ( slv_aw_chan_select.aw_select ), + .push_i ( aw_push ), + .pop_axi_id_i ( slv_b_chan.id[0+:AxiLookBits] ), + .pop_i ( slv_b_valid & slv_b_ready ) + ); + // pop from ID counter on outward transaction + end + + // FIFO to save W selection + fifo_v3 #( + .FALL_THROUGH ( FallThrough ), + .DEPTH ( MaxTrans ), + .dtype ( select_t ) + ) i_w_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i( test_i ), + .full_o ( w_fifo_full ), + .empty_o ( w_fifo_empty ), + .usage_o ( ), + .data_i ( slv_aw_chan_select.aw_select ), + .push_i ( aw_push ), // controlled from proc_aw_chan + .data_o ( w_select ), // where the w beat should go + .pop_i ( w_fifo_pop ) // controlled from proc_w_chan + ); + + //-------------------------------------- + // W Channel + //-------------------------------------- + spill_register #( + .T ( w_chan_t ), + .Bypass ( ~SpillW ) + ) i_w_spill_reg( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.w_valid ), + .ready_o ( slv_resp_o.w_ready ), + .data_i ( slv_req_i.w ), + .valid_o ( slv_w_valid ), + .ready_i ( slv_w_ready ), + .data_o ( slv_w_chan ) + ); + + //-------------------------------------- + // B Channel + //-------------------------------------- + // optional spill register + spill_register #( + .T ( b_chan_t ), + .Bypass ( ~SpillB ) + ) i_b_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_b_valid ), + .ready_o ( slv_b_ready ), + .data_i ( slv_b_chan ), + .valid_o ( slv_resp_o.b_valid ), + .ready_i ( slv_req_i.b_ready ), + .data_o ( slv_resp_o.b ) + ); + + // Arbitration of the different B responses + rr_arb_tree #( + .NumIn ( NoMstPorts ), + .DataType ( b_chan_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ) + ) i_b_mux ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( '0 ), + .req_i ( mst_b_valids ), + .gnt_o ( mst_b_readies ), + .data_i ( mst_b_chans ), + .gnt_i ( slv_b_ready ), + .req_o ( slv_b_valid ), + .data_o ( slv_b_chan ), + .idx_o ( ) + ); + + //-------------------------------------- + // AR Channel + //-------------------------------------- + `ifdef TARGET_VSIM + // Workaround for bug in Questa 2020.2 and 2021.1: Flatten the struct into a logic vector before + // instantiating `spill_register`. + typedef logic [$bits(ar_chan_select_t)-1:0] ar_chan_select_flat_t; + `else + typedef ar_chan_select_t ar_chan_select_flat_t; + `endif + ar_chan_select_flat_t slv_ar_chan_select_in_flat, + slv_ar_chan_select_out_flat; + assign slv_ar_chan_select_in_flat = {slv_req_i.ar, slv_ar_select_i}; + spill_register #( + .T ( ar_chan_select_flat_t ), + .Bypass ( ~SpillAr ) + ) i_ar_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.ar_valid ), + .ready_o ( slv_resp_o.ar_ready ), + .data_i ( slv_ar_chan_select_in_flat ), + .valid_o ( slv_ar_valid ), + .ready_i ( slv_ar_ready ), + .data_o ( slv_ar_chan_select_out_flat ) + ); + assign slv_ar_chan_select = slv_ar_chan_select_out_flat; + + // control of the AR handshake + always_comb begin + // AXI Handshakes + slv_ar_ready = 1'b0; + ar_valid = 1'b0; + // `lock_ar_valid`: Used to be protocol conform as it is not allowed to deassert `ar_valid` + // if there was no corresponding `ar_ready`. There is the possibility that an injection + // of a R response from an `atop` from the AW channel can change the occupied flag of the + // `i_ar_id_counter`, even if it was previously empty. This FF prevents the deassertion. + lock_ar_valid_d = lock_ar_valid_q; + load_ar_lock = 1'b0; + // AR id counter + ar_push = 1'b0; + // The process had an arbitration decision in a previous cycle, the valid is locked, + // wait for the AR transaction. + if (lock_ar_valid_q) begin + ar_valid = 1'b1; + // transaction + if (ar_ready) begin + slv_ar_ready = 1'b1; + ar_push = 1'b1; + lock_ar_valid_d = 1'b0; + load_ar_lock = 1'b1; + end + end else begin + // The process can start handling AR transaction if `i_ar_id_counter` has space. + if (!ar_id_cnt_full) begin + // There is a valid AR, so look the ID up. + if (slv_ar_valid && (!ar_select_occupied || + (slv_ar_chan_select.ar_select == lookup_ar_select))) begin + // connect the AR handshake + ar_valid = 1'b1; + // on transaction + if (ar_ready) begin + slv_ar_ready = 1'b1; + ar_push = 1'b1; + // no transaction this cycle, lock the valid decision! + end else begin + lock_ar_valid_d = 1'b1; + load_ar_lock = 1'b1; + end + end + end + end + end + + // this ff is needed so that ar does not get de-asserted if an atop gets injected + `FFLARN(lock_ar_valid_q, lock_ar_valid_d, load_ar_lock, '0, clk_i, rst_ni) + + if (UniqueIds) begin : gen_unique_ids_ar + // If the `UniqueIds` parameter is set, each read transaction has an ID that is unique among + // all in-flight read transactions, or all read transactions with a given ID target the same + // master port as all read transactions with the same ID, or both. This means that the + // signals that are driven by the ID counters if this parameter is not set can instead be + // derived from existing signals. The ID counters can therefore be omitted. + assign lookup_ar_select = slv_ar_chan_select.ar_select; + assign ar_select_occupied = 1'b0; + assign ar_id_cnt_full = 1'b0; + end else begin : gen_ar_id_counter + axi_demux_id_counters #( + .AxiIdBits ( AxiLookBits ), + .CounterWidth ( IdCounterWidth ), + .mst_port_select_t ( select_t ) + ) i_ar_id_counter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .lookup_axi_id_i ( slv_ar_chan_select.ar_chan.id[0+:AxiLookBits] ), + .lookup_mst_select_o ( lookup_ar_select ), + .lookup_mst_select_occupied_o ( ar_select_occupied ), + .full_o ( ar_id_cnt_full ), + .inject_axi_id_i ( slv_aw_chan_select.aw_chan.id[0+:AxiLookBits] ), + .inject_i ( atop_inject ), + .push_axi_id_i ( slv_ar_chan_select.ar_chan.id[0+:AxiLookBits] ), + .push_mst_select_i ( slv_ar_chan_select.ar_select ), + .push_i ( ar_push ), + .pop_axi_id_i ( slv_r_chan.id[0+:AxiLookBits] ), + .pop_i ( slv_r_valid & slv_r_ready & slv_r_chan.last ) + ); + end + + //-------------------------------------- + // R Channel + //-------------------------------------- + // optional spill register + spill_register #( + .T ( r_chan_t ), + .Bypass ( ~SpillR ) + ) i_r_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_r_valid ), + .ready_o ( slv_r_ready ), + .data_i ( slv_r_chan ), + .valid_o ( slv_resp_o.r_valid ), + .ready_i ( slv_req_i.r_ready ), + .data_o ( slv_resp_o.r ) + ); + + // Arbitration of the different r responses + rr_arb_tree #( + .NumIn ( NoMstPorts ), + .DataType ( r_chan_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ) + ) i_r_mux ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( '0 ), + .req_i ( mst_r_valids ), + .gnt_o ( mst_r_readies ), + .data_i ( mst_r_chans ), + .gnt_i ( slv_r_ready ), + .req_o ( slv_r_valid ), + .data_o ( slv_r_chan ), + .idx_o ( ) + ); + + assign ar_ready = ar_valid & mst_resps_i[slv_ar_chan_select.ar_select].ar_ready; + assign aw_ready = aw_valid & mst_resps_i[slv_aw_chan_select.aw_select].aw_ready; + + // process that defines the individual demuxes and assignments for the arbitration + // as mst_reqs_o has to be drivem from the same always comb block! + always_comb begin + // default assignments + mst_reqs_o = '0; + slv_w_ready = 1'b0; + w_fifo_pop = 1'b0; + + for (int unsigned i = 0; i < NoMstPorts; i++) begin + // AW channel + mst_reqs_o[i].aw = slv_aw_chan_select.aw_chan; + mst_reqs_o[i].aw_valid = 1'b0; + if (aw_valid && (slv_aw_chan_select.aw_select == i)) begin + mst_reqs_o[i].aw_valid = 1'b1; + end + + // W channel + mst_reqs_o[i].w = slv_w_chan; + mst_reqs_o[i].w_valid = 1'b0; + if (!w_fifo_empty && (w_select == i)) begin + mst_reqs_o[i].w_valid = slv_w_valid; + slv_w_ready = mst_resps_i[i].w_ready; + w_fifo_pop = slv_w_valid & mst_resps_i[i].w_ready & slv_w_chan.last; + end + + // B channel + mst_reqs_o[i].b_ready = mst_b_readies[i]; + + // AR channel + mst_reqs_o[i].ar = slv_ar_chan_select.ar_chan; + mst_reqs_o[i].ar_valid = 1'b0; + if (ar_valid && (slv_ar_chan_select.ar_select == i)) begin + mst_reqs_o[i].ar_valid = 1'b1; + end + + // R channel + mst_reqs_o[i].r_ready = mst_r_readies[i]; + end + end + // unpack the response B and R channels for the arbitration + for (genvar i = 0; i < NoMstPorts; i++) begin : gen_b_channels + assign mst_b_chans[i] = mst_resps_i[i].b; + assign mst_b_valids[i] = mst_resps_i[i].b_valid; + assign mst_r_chans[i] = mst_resps_i[i].r; + assign mst_r_valids[i] = mst_resps_i[i].r_valid; + end + + +// Validate parameters. +// pragma translate_off +`ifndef VERILATOR +`ifndef XSIM + initial begin: validate_params + no_mst_ports: assume (NoMstPorts > 0) else + $fatal(1, "The Number of slaves (NoMstPorts) has to be at least 1"); + AXI_ID_BITS: assume (AxiIdWidth >= AxiLookBits) else + $fatal(1, "AxiIdBits has to be equal or smaller than AxiIdWidth."); + end + default disable iff (!rst_ni); + aw_select: assume property( @(posedge clk_i) (slv_req_i.aw_valid |-> + (slv_aw_select_i < NoMstPorts))) else + $fatal(1, "slv_aw_select_i is %d: AW has selected a slave that is not defined.\ + NoMstPorts: %d", slv_aw_select_i, NoMstPorts); + ar_select: assume property( @(posedge clk_i) (slv_req_i.ar_valid |-> + (slv_ar_select_i < NoMstPorts))) else + $fatal(1, "slv_ar_select_i is %d: AR has selected a slave that is not defined.\ + NoMstPorts: %d", slv_ar_select_i, NoMstPorts); + aw_valid_stable: assert property( @(posedge clk_i) (aw_valid && !aw_ready) |=> aw_valid) else + $fatal(1, "aw_valid was deasserted, when aw_ready = 0 in last cycle."); + ar_valid_stable: assert property( @(posedge clk_i) + (ar_valid && !ar_ready) |=> ar_valid) else + $fatal(1, "ar_valid was deasserted, when ar_ready = 0 in last cycle."); + aw_stable: assert property( @(posedge clk_i) (aw_valid && !aw_ready) + |=> $stable(slv_aw_chan_select)) else + $fatal(1, "slv_aw_chan_select unstable with valid set."); + ar_stable: assert property( @(posedge clk_i) (ar_valid && !ar_ready) + |=> $stable(slv_ar_chan_select)) else + $fatal(1, "slv_aw_chan_select unstable with valid set."); + internal_ar_select: assert property( @(posedge clk_i) + (ar_valid |-> slv_ar_chan_select.ar_select < NoMstPorts)) + else $fatal(1, "slv_ar_chan_select.ar_select illegal while ar_valid."); + internal_aw_select: assert property( @(posedge clk_i) + (aw_valid |-> slv_aw_chan_select.aw_select < NoMstPorts)) + else $fatal(1, "slv_aw_chan_select.aw_select illegal while aw_valid."); +`endif +`endif +// pragma translate_on + end +endmodule + +module axi_demux_id_counters #( + // the lower bits of the AXI ID that should be considered, results in 2**AXI_ID_BITS counters + parameter int unsigned AxiIdBits = 2, + parameter int unsigned CounterWidth = 4, + parameter type mst_port_select_t = logic +) ( + input clk_i, // Clock + input rst_ni, // Asynchronous reset active low + // lookup + input logic [AxiIdBits-1:0] lookup_axi_id_i, + output mst_port_select_t lookup_mst_select_o, + output logic lookup_mst_select_occupied_o, + // push + output logic full_o, + input logic [AxiIdBits-1:0] push_axi_id_i, + input mst_port_select_t push_mst_select_i, + input logic push_i, + // inject ATOPs in AR channel + input logic [AxiIdBits-1:0] inject_axi_id_i, + input logic inject_i, + // pop + input logic [AxiIdBits-1:0] pop_axi_id_i, + input logic pop_i +); + localparam int unsigned NoCounters = 2**AxiIdBits; + typedef logic [CounterWidth-1:0] cnt_t; + + // registers, each gets loaded when push_en[i] + mst_port_select_t [NoCounters-1:0] mst_select_q; + + // counter signals + logic [NoCounters-1:0] push_en, inject_en, pop_en, occupied, cnt_full; + + //----------------------------------- + // Lookup + //----------------------------------- + assign lookup_mst_select_o = mst_select_q[lookup_axi_id_i]; + assign lookup_mst_select_occupied_o = occupied[lookup_axi_id_i]; + //----------------------------------- + // Push and Pop + //----------------------------------- + assign push_en = (push_i) ? (1 << push_axi_id_i) : '0; + assign inject_en = (inject_i) ? (1 << inject_axi_id_i) : '0; + assign pop_en = (pop_i) ? (1 << pop_axi_id_i) : '0; + assign full_o = |cnt_full; + // counters + for (genvar i = 0; i < NoCounters; i++) begin : gen_counters + logic cnt_en, cnt_down, overflow; + cnt_t cnt_delta, in_flight; + always_comb begin + unique case ({push_en[i], inject_en[i], pop_en[i]}) + 3'b001 : begin // pop_i = -1 + cnt_en = 1'b1; + cnt_down = 1'b1; + cnt_delta = cnt_t'(1); + end + 3'b010 : begin // inject_i = +1 + cnt_en = 1'b1; + cnt_down = 1'b0; + cnt_delta = cnt_t'(1); + end + // 3'b011, inject_i & pop_i = 0 --> use default + 3'b100 : begin // push_i = +1 + cnt_en = 1'b1; + cnt_down = 1'b0; + cnt_delta = cnt_t'(1); + end + // 3'b101, push_i & pop_i = 0 --> use default + 3'b110 : begin // push_i & inject_i = +2 + cnt_en = 1'b1; + cnt_down = 1'b0; + cnt_delta = cnt_t'(2); + end + 3'b111 : begin // push_i & inject_i & pop_i = +1 + cnt_en = 1'b1; + cnt_down = 1'b0; + cnt_delta = cnt_t'(1); + end + default : begin // do nothing to the counters + cnt_en = 1'b0; + cnt_down = 1'b0; + cnt_delta = cnt_t'(0); + end + endcase + end + + delta_counter #( + .WIDTH ( CounterWidth ), + .STICKY_OVERFLOW ( 1'b0 ) + ) i_in_flight_cnt ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( 1'b0 ), + .en_i ( cnt_en ), + .load_i ( 1'b0 ), + .down_i ( cnt_down ), + .delta_i ( cnt_delta ), + .d_i ( '0 ), + .q_o ( in_flight ), + .overflow_o ( overflow ) + ); + assign occupied[i] = |in_flight; + assign cnt_full[i] = overflow | (&in_flight); + + // holds the selection signal for this id + `FFLARN(mst_select_q[i], push_mst_select_i, push_en[i], '0, clk_i, rst_ni) + +// pragma translate_off +`ifndef VERILATOR +`ifndef XSIM + // Validate parameters. + cnt_underflow: assert property( + @(posedge clk_i) disable iff (~rst_ni) (pop_en[i] |=> !overflow)) else + $fatal(1, "axi_demux_id_counters > Counter: %0d underflowed.\ + The reason is probably a faulty AXI response.", i); +`endif +`endif +// pragma translate_on + end +endmodule + +// interface wrapper +`include "axi/assign.svh" +`include "axi/typedef.svh" +module axi_demux_intf #( + parameter int unsigned AXI_ID_WIDTH = 32'd0, // Synopsys DC requires default value for params + parameter int unsigned AXI_ADDR_WIDTH = 32'd0, + parameter int unsigned AXI_DATA_WIDTH = 32'd0, + parameter int unsigned AXI_USER_WIDTH = 32'd0, + parameter int unsigned NO_MST_PORTS = 32'd3, + parameter int unsigned MAX_TRANS = 32'd8, + parameter int unsigned AXI_LOOK_BITS = 32'd3, + parameter bit UNIQUE_IDS = 1'b0, + parameter bit FALL_THROUGH = 1'b0, + parameter bit SPILL_AW = 1'b1, + parameter bit SPILL_W = 1'b0, + parameter bit SPILL_B = 1'b0, + parameter bit SPILL_AR = 1'b1, + parameter bit SPILL_R = 1'b0, + // Dependent parameters, DO NOT OVERRIDE! + parameter int unsigned SELECT_WIDTH = (NO_MST_PORTS > 32'd1) ? $clog2(NO_MST_PORTS) : 32'd1, + parameter type select_t = logic [SELECT_WIDTH-1:0] // MST port select type +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + input select_t slv_aw_select_i, // has to be stable, when aw_valid + input select_t slv_ar_select_i, // has to be stable, when ar_valid + AXI_BUS.Slave slv, // slave port + AXI_BUS.Master mst [NO_MST_PORTS-1:0] // master ports +); + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req; + resp_t slv_resp; + req_t [NO_MST_PORTS-1:0] mst_req; + resp_t [NO_MST_PORTS-1:0] mst_resp; + + `AXI_ASSIGN_TO_REQ(slv_req, slv) + `AXI_ASSIGN_FROM_RESP(slv, slv_resp) + + for (genvar i = 0; i < NO_MST_PORTS; i++) begin : gen_assign_mst_ports + `AXI_ASSIGN_FROM_REQ(mst[i], mst_req[i]) + `AXI_ASSIGN_TO_RESP(mst_resp[i], mst[i]) + end + + axi_demux #( + .AxiIdWidth ( AXI_ID_WIDTH ), // ID Width + .aw_chan_t ( aw_chan_t ), // AW Channel Type + .w_chan_t ( w_chan_t ), // W Channel Type + .b_chan_t ( b_chan_t ), // B Channel Type + .ar_chan_t ( ar_chan_t ), // AR Channel Type + .r_chan_t ( r_chan_t ), // R Channel Type + .req_t ( req_t ), + .resp_t ( resp_t ), + .NoMstPorts ( NO_MST_PORTS ), + .MaxTrans ( MAX_TRANS ), + .AxiLookBits ( AXI_LOOK_BITS ), + .UniqueIds ( UNIQUE_IDS ), + .FallThrough ( FALL_THROUGH ), + .SpillAw ( SPILL_AW ), + .SpillW ( SPILL_W ), + .SpillB ( SPILL_B ), + .SpillAr ( SPILL_AR ), + .SpillR ( SPILL_R ) + ) i_axi_demux ( + .clk_i, // Clock + .rst_ni, // Asynchronous reset active low + .test_i, // Testmode enable + // slave port + .slv_req_i ( slv_req ), + .slv_aw_select_i ( slv_aw_select_i ), + .slv_ar_select_i ( slv_ar_select_i ), + .slv_resp_o ( slv_resp ), + // master port + .mst_reqs_o ( mst_req ), + .mst_resps_i ( mst_resp ) + ); +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv new file mode 100644 index 0000000..f3c807d --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv @@ -0,0 +1,261 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth +// - Matheus Cavalcante + +// AXI Error Slave: This module always responds with an AXI error for transactions that are sent to +// it. This module optionally supports ATOPs if the `ATOPs` parameter is set. + +module axi_err_slv #( + parameter int unsigned AxiIdWidth = 0, // AXI ID Width + parameter type req_t = logic, // AXI 4 request struct, with atop field + parameter type resp_t = logic, // AXI 4 response struct + parameter axi_pkg::resp_t Resp = axi_pkg::RESP_DECERR, // Error generated by this slave. + parameter int unsigned RespWidth = 32'd64, // Data response width, gets zero extended or truncated to r.data. + parameter logic [RespWidth-1:0] RespData = 64'hCA11AB1EBADCAB1E, // Hexvalue for data return value + parameter bit ATOPs = 1'b1, // Activate support for ATOPs. Set to 1 if this slave could ever get an atomic AXI transaction. + parameter int unsigned MaxTrans = 1 // Maximum # of accepted transactions before stalling +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + // slave port + input req_t slv_req_i, + output resp_t slv_resp_o +); + typedef logic [AxiIdWidth-1:0] id_t; + typedef struct packed { + id_t id; + axi_pkg::len_t len; + } r_data_t; + + req_t err_req; + resp_t err_resp; + + if (ATOPs) begin + axi_atop_filter #( + .AxiIdWidth ( AxiIdWidth ), + .AxiMaxWriteTxns ( MaxTrans ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_atop_filter ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req_i ), + .slv_resp_o ( slv_resp_o ), + .mst_req_o ( err_req ), + .mst_resp_i ( err_resp ) + ); + end else begin + assign err_req = slv_req_i; + assign slv_resp_o = err_resp; + end + + // w fifo + logic w_fifo_full, w_fifo_empty; + logic w_fifo_push, w_fifo_pop; + id_t w_fifo_data; + // b fifo + logic b_fifo_full, b_fifo_empty; + logic b_fifo_push, b_fifo_pop; + id_t b_fifo_data; + // r fifo + r_data_t r_fifo_inp; + logic r_fifo_full, r_fifo_empty; + logic r_fifo_push, r_fifo_pop; + r_data_t r_fifo_data; + // r counter + logic r_cnt_clear, r_cnt_en, r_cnt_load; + axi_pkg::len_t r_current_beat; + // r status + logic r_busy_d, r_busy_q, r_busy_load; + + //-------------------------------------- + // Write Transactions + //-------------------------------------- + // push, when there is room in the fifo + assign w_fifo_push = err_req.aw_valid & ~w_fifo_full; + assign err_resp.aw_ready = ~w_fifo_full; + + fifo_v3 #( + .FALL_THROUGH ( 1'b1 ), + .DEPTH ( MaxTrans ), + .dtype ( id_t ) + ) i_w_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i ( test_i ), + .full_o ( w_fifo_full ), + .empty_o ( w_fifo_empty ), + .usage_o ( ), + .data_i ( err_req.aw.id ), + .push_i ( w_fifo_push ), + .data_o ( w_fifo_data ), + .pop_i ( w_fifo_pop ) + ); + + always_comb begin : proc_w_channel + err_resp.w_ready = 1'b0; + w_fifo_pop = 1'b0; + b_fifo_push = 1'b0; + if (!w_fifo_empty && !b_fifo_full) begin + // eat the beats + err_resp.w_ready = 1'b1; + // on the last w transaction + if (err_req.w_valid && err_req.w.last) begin + w_fifo_pop = 1'b1; + b_fifo_push = 1'b1; + end + end + end + + fifo_v3 #( + .FALL_THROUGH ( 1'b0 ), + .DEPTH ( unsigned'(2) ), // two placed so that w can eat beats if b is not sent + .dtype ( id_t ) + ) i_b_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i ( test_i ), + .full_o ( b_fifo_full ), + .empty_o ( b_fifo_empty ), + .usage_o ( ), + .data_i ( w_fifo_data ), + .push_i ( b_fifo_push ), + .data_o ( b_fifo_data ), + .pop_i ( b_fifo_pop ) + ); + + always_comb begin : proc_b_channel + b_fifo_pop = 1'b0; + err_resp.b = '0; + err_resp.b.id = b_fifo_data; + err_resp.b.resp = Resp; + err_resp.b_valid = 1'b0; + if (!b_fifo_empty) begin + err_resp.b_valid = 1'b1; + // b transaction + b_fifo_pop = err_req.b_ready; + end + end + + //-------------------------------------- + // Read Transactions + //-------------------------------------- + // push if there is room in the fifo + assign r_fifo_push = err_req.ar_valid & ~r_fifo_full; + assign err_resp.ar_ready = ~r_fifo_full; + + // fifo data assignment + assign r_fifo_inp.id = err_req.ar.id; + assign r_fifo_inp.len = err_req.ar.len; + + fifo_v3 #( + .FALL_THROUGH ( 1'b0 ), + .DEPTH ( MaxTrans ), + .dtype ( r_data_t ) + ) i_r_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i( test_i ), + .full_o ( r_fifo_full ), + .empty_o ( r_fifo_empty ), + .usage_o ( ), + .data_i ( r_fifo_inp ), + .push_i ( r_fifo_push ), + .data_o ( r_fifo_data ), + .pop_i ( r_fifo_pop ) + ); + + always_comb begin : proc_r_channel + // default assignments + r_busy_d = r_busy_q; + r_busy_load = 1'b0; + // r fifo signals + r_fifo_pop = 1'b0; + // r counter signals + r_cnt_clear = 1'b0; + r_cnt_en = 1'b0; + r_cnt_load = 1'b0; + // r_channel + err_resp.r = '0; + err_resp.r.id = r_fifo_data.id; + err_resp.r.data = RespData; + err_resp.r.resp = Resp; + err_resp.r_valid = 1'b0; + // control + if (r_busy_q) begin + err_resp.r_valid = 1'b1; + err_resp.r.last = (r_current_beat == '0); + // r transaction + if (err_req.r_ready) begin + r_cnt_en = 1'b1; + if (r_current_beat == '0) begin + r_busy_d = 1'b0; + r_busy_load = 1'b1; + r_cnt_clear = 1'b1; + r_fifo_pop = 1'b1; + end + end + end else begin + // when not busy and fifo not empty, start counter err gen + if (!r_fifo_empty) begin + r_busy_d = 1'b1; + r_busy_load = 1'b1; + r_cnt_load = 1'b1; + end + end + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + r_busy_q <= '0; + end else if (r_busy_load) begin + r_busy_q <= r_busy_d; + end + end + + counter #( + .WIDTH ($bits(axi_pkg::len_t)) + ) i_r_counter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( r_cnt_clear ), + .en_i ( r_cnt_en ), + .load_i ( r_cnt_load ), + .down_i ( 1'b1 ), + .d_i ( r_fifo_data.len ), + .q_o ( r_current_beat ), + .overflow_o( ) + ); + + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + initial begin + assert (Resp == axi_pkg::RESP_DECERR || Resp == axi_pkg::RESP_SLVERR) else + $fatal(1, "This module may only generate RESP_DECERR or RESP_SLVERR responses!"); + end + default disable iff (!rst_ni); + if (!ATOPs) begin : gen_assert_atops_unsupported + assume property( @(posedge clk_i) (slv_req_i.aw_valid |-> slv_req_i.aw.atop == '0)) else + $fatal(1, "Got ATOP but not configured to support ATOPs!"); + end + `endif + `endif + // pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv new file mode 100644 index 0000000..e9359b9 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv @@ -0,0 +1,161 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth + +// AXI ID Prepend: This module prepends/strips the MSB from the AXI IDs. +// Constraints enforced through assertions: ID width of slave and master port + +module axi_id_prepend #( + parameter int unsigned NoBus = 1, // Can take multiple axi busses + parameter int unsigned AxiIdWidthSlvPort = 4, // AXI ID Width of the Slave Ports + parameter int unsigned AxiIdWidthMstPort = 6, // AXI ID Width of the Master Ports + parameter type slv_aw_chan_t = logic, // AW Channel Type for slv port + parameter type slv_w_chan_t = logic, // W Channel Type for slv port + parameter type slv_b_chan_t = logic, // B Channel Type for slv port + parameter type slv_ar_chan_t = logic, // AR Channel Type for slv port + parameter type slv_r_chan_t = logic, // R Channel Type for slv port + parameter type mst_aw_chan_t = logic, // AW Channel Type for mst port + parameter type mst_w_chan_t = logic, // W Channel Type for mst port + parameter type mst_b_chan_t = logic, // B Channel Type for mst port + parameter type mst_ar_chan_t = logic, // AR Channel Type for mst port + parameter type mst_r_chan_t = logic, // R Channel Type for mst port + // DEPENDENT PARAMETER DO NOT OVERWRITE! + parameter int unsigned PreIdWidth = AxiIdWidthMstPort - AxiIdWidthSlvPort +) ( + input logic [PreIdWidth-1:0] pre_id_i, // ID to be prepended + // slave port (input), connect master modules here + // AW channel + input slv_aw_chan_t [NoBus-1:0] slv_aw_chans_i, + input logic [NoBus-1:0] slv_aw_valids_i, + output logic [NoBus-1:0] slv_aw_readies_o, + // W channel + input slv_w_chan_t [NoBus-1:0] slv_w_chans_i, + input logic [NoBus-1:0] slv_w_valids_i, + output logic [NoBus-1:0] slv_w_readies_o, + // B channel + output slv_b_chan_t [NoBus-1:0] slv_b_chans_o, + output logic [NoBus-1:0] slv_b_valids_o, + input logic [NoBus-1:0] slv_b_readies_i, + // AR channel + input slv_ar_chan_t [NoBus-1:0] slv_ar_chans_i, + input logic [NoBus-1:0] slv_ar_valids_i, + output logic [NoBus-1:0] slv_ar_readies_o, + // R channel + output slv_r_chan_t [NoBus-1:0] slv_r_chans_o, + output logic [NoBus-1:0] slv_r_valids_o, + input logic [NoBus-1:0] slv_r_readies_i, + // master ports (output), connect slave modules here + // AW channel + output mst_aw_chan_t [NoBus-1:0] mst_aw_chans_o, + output logic [NoBus-1:0] mst_aw_valids_o, + input logic [NoBus-1:0] mst_aw_readies_i, + // W channel + output mst_w_chan_t [NoBus-1:0] mst_w_chans_o, + output logic [NoBus-1:0] mst_w_valids_o, + input logic [NoBus-1:0] mst_w_readies_i, + // B channel + input mst_b_chan_t [NoBus-1:0] mst_b_chans_i, + input logic [NoBus-1:0] mst_b_valids_i, + output logic [NoBus-1:0] mst_b_readies_o, + // AR channel + output mst_ar_chan_t [NoBus-1:0] mst_ar_chans_o, + output logic [NoBus-1:0] mst_ar_valids_o, + input logic [NoBus-1:0] mst_ar_readies_i, + // R channel + input mst_r_chan_t [NoBus-1:0] mst_r_chans_i, + input logic [NoBus-1:0] mst_r_valids_i, + output logic [NoBus-1:0] mst_r_readies_o +); + + // prepend the ID + for (genvar i = 0; i < NoBus; i++) begin : gen_id_prepend + if (PreIdWidth == 0) begin : gen_no_prepend + assign mst_aw_chans_o[i] = slv_aw_chans_i[i]; + assign mst_ar_chans_o[i] = slv_ar_chans_i[i]; + end else begin : gen_prepend + always_comb begin + mst_aw_chans_o[i] = slv_aw_chans_i[i]; + mst_ar_chans_o[i] = slv_ar_chans_i[i]; + mst_aw_chans_o[i].id = {pre_id_i, slv_aw_chans_i[i].id[AxiIdWidthSlvPort-1:0]}; + mst_ar_chans_o[i].id = {pre_id_i, slv_ar_chans_i[i].id[AxiIdWidthSlvPort-1:0]}; + end + end + // The ID is in the highest bits of the struct, so an assignment from a channel with a wide ID + // to a channel with a shorter ID correctly cuts the prepended ID. + assign slv_b_chans_o[i] = mst_b_chans_i[i]; + assign slv_r_chans_o[i] = mst_r_chans_i[i]; + end + + // assign the handshaking's and w channel + assign mst_w_chans_o = slv_w_chans_i; + assign mst_aw_valids_o = slv_aw_valids_i; + assign slv_aw_readies_o = mst_aw_readies_i; + assign mst_w_valids_o = slv_w_valids_i; + assign slv_w_readies_o = mst_w_readies_i; + assign slv_b_valids_o = mst_b_valids_i; + assign mst_b_readies_o = slv_b_readies_i; + assign mst_ar_valids_o = slv_ar_valids_i; + assign slv_ar_readies_o = mst_ar_readies_i; + assign slv_r_valids_o = mst_r_valids_i; + assign mst_r_readies_o = slv_r_readies_i; + +// pragma translate_off +`ifndef VERILATOR + initial begin : p_assert + assert(NoBus > 0) + else $fatal(1, "Input must be at least one element wide."); + assert(PreIdWidth == ($bits(mst_aw_chans_o[0].id) - $bits(slv_aw_chans_i[0].id))) + else $fatal(1, "Prepend ID Width must be: $bits(mst_aw_chans_o.id)-$bits(slv_aw_chans_i.id)"); + assert ($bits(mst_aw_chans_o[0].id) > $bits(slv_aw_chans_i[0].id)) + else $fatal(1, "The master AXI port has to have a wider ID than the slave port."); + end + + aw_id : assert final( + mst_aw_chans_o[0].id[$bits(slv_aw_chans_i[0].id)-1:0] === slv_aw_chans_i[0].id) + else $fatal (1, "Something with the AW channel ID prepending went wrong."); + aw_addr : assert final(mst_aw_chans_o[0].addr === slv_aw_chans_i[0].addr) + else $fatal (1, "Something with the AW channel ID prepending went wrong."); + aw_len : assert final(mst_aw_chans_o[0].len === slv_aw_chans_i[0].len) + else $fatal (1, "Something with the AW channel ID prepending went wrong."); + aw_size : assert final(mst_aw_chans_o[0].size === slv_aw_chans_i[0].size) + else $fatal (1, "Something with the AW channel ID prepending went wrong."); + aw_qos : assert final(mst_aw_chans_o[0].qos === slv_aw_chans_i[0].qos) + else $fatal (1, "Something with the AW channel ID prepending went wrong."); + + b_id : assert final( + mst_b_chans_i[0].id[$bits(slv_b_chans_o[0].id)-1:0] === slv_b_chans_o[0].id) + else $fatal (1, "Something with the B channel ID stripping went wrong."); + b_resp : assert final(mst_b_chans_i[0].resp === slv_b_chans_o[0].resp) + else $fatal (1, "Something with the B channel ID stripping went wrong."); + + ar_id : assert final( + mst_ar_chans_o[0].id[$bits(slv_ar_chans_i[0].id)-1:0] === slv_ar_chans_i[0].id) + else $fatal (1, "Something with the AR channel ID prepending went wrong."); + ar_addr : assert final(mst_ar_chans_o[0].addr === slv_ar_chans_i[0].addr) + else $fatal (1, "Something with the AR channel ID prepending went wrong."); + ar_len : assert final(mst_ar_chans_o[0].len === slv_ar_chans_i[0].len) + else $fatal (1, "Something with the AR channel ID prepending went wrong."); + ar_size : assert final(mst_ar_chans_o[0].size === slv_ar_chans_i[0].size) + else $fatal (1, "Something with the AR channel ID prepending went wrong."); + ar_qos : assert final(mst_ar_chans_o[0].qos === slv_ar_chans_i[0].qos) + else $fatal (1, "Something with the AR channel ID prepending went wrong."); + + r_id : assert final(mst_r_chans_i[0].id[$bits(slv_r_chans_o[0].id)-1:0] === slv_r_chans_o[0].id) + else $fatal (1, "Something with the R channel ID stripping went wrong."); + r_data : assert final(mst_r_chans_i[0].data === slv_r_chans_o[0].data) + else $fatal (1, "Something with the R channel ID stripping went wrong."); + r_resp : assert final(mst_r_chans_i[0].resp === slv_r_chans_o[0].resp) + else $fatal (1, "Something with the R channel ID stripping went wrong."); +`endif +// pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_join.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_join.sv new file mode 100644 index 0000000..f15648e --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_join.sv @@ -0,0 +1,37 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Fabian Schuiki +// - Andreas Kurth + +`include "axi/assign.svh" + +/// A connector that joins two AXI interfaces. +module axi_join_intf ( + AXI_BUS.Slave in, + AXI_BUS.Master out +); + + `AXI_ASSIGN(out, in) + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert(in.AXI_ADDR_WIDTH == out.AXI_ADDR_WIDTH); + assert(in.AXI_DATA_WIDTH == out.AXI_DATA_WIDTH); + assert(in.AXI_ID_WIDTH <= out.AXI_ID_WIDTH ); + assert(in.AXI_USER_WIDTH == out.AXI_USER_WIDTH); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv new file mode 100644 index 0000000..8e5dc2f --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv @@ -0,0 +1,237 @@ +// Copyright (c) 2014-2019 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Fabian Schuiki +// - Andreas Kurth +// - Stefan Mach + +// Multiple AXI4 cuts. +// +// These can be used to relax timing pressure on very long AXI busses. +module axi_multicut #( + parameter int unsigned NoCuts = 32'd1, // Number of cuts. + // AXI channel structs + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic, + // AXI request & response structs + parameter type req_t = logic, + parameter type resp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // slave port + input req_t slv_req_i, + output resp_t slv_resp_o, + // master port + output req_t mst_req_o, + input resp_t mst_resp_i +); + + if (NoCuts == '0) begin : gen_no_cut + // degenerate case, connect input to output + assign mst_req_o = slv_req_i; + assign slv_resp_o = mst_resp_i; + end else begin : gen_axi_cut + // instantiate all needed cuts + req_t [NoCuts:0] cut_req; + resp_t [NoCuts:0] cut_resp; + + // connect slave to the lowest index + assign cut_req[0] = slv_req_i; + assign slv_resp_o = cut_resp[0]; + + // AXI cuts + for (genvar i = 0; i < NoCuts; i++) begin : gen_axi_cuts + axi_cut #( + .Bypass ( 1'b0 ), + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_cut ( + .clk_i, + .rst_ni, + .slv_req_i ( cut_req[i] ), + .slv_resp_o ( cut_resp[i] ), + .mst_req_o ( cut_req[i+1] ), + .mst_resp_i ( cut_resp[i+1] ) + ); + end + + // connect master to the highest index + assign mst_req_o = cut_req[NoCuts]; + assign cut_resp[NoCuts] = mst_resp_i; + end + + // Check the invariants + // pragma translate_off + `ifndef VERILATOR + initial begin + assert(NoCuts >= 0); + end + `endif + // pragma translate_on +endmodule + +`include "axi/assign.svh" +`include "axi/typedef.svh" + +// interface wrapper +module axi_multicut_intf #( + parameter int unsigned ADDR_WIDTH = 0, // The address width. + parameter int unsigned DATA_WIDTH = 0, // The data width. + parameter int unsigned ID_WIDTH = 0, // The ID width. + parameter int unsigned USER_WIDTH = 0, // The user data width. + parameter int unsigned NUM_CUTS = 0 // The number of cuts. +) ( + input logic clk_i, + input logic rst_ni, + AXI_BUS.Slave in, + AXI_BUS.Master out +); + + typedef logic [ID_WIDTH-1:0] id_t; + typedef logic [ADDR_WIDTH-1:0] addr_t; + typedef logic [DATA_WIDTH-1:0] data_t; + typedef logic [DATA_WIDTH/8-1:0] strb_t; + typedef logic [USER_WIDTH-1:0] user_t; + + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_ASSIGN_TO_REQ(slv_req, in) + `AXI_ASSIGN_FROM_RESP(in, slv_resp) + + `AXI_ASSIGN_FROM_REQ(out, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, out) + + axi_multicut #( + .NoCuts ( NUM_CUTS ), + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_axi_multicut ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); + + // Check the invariants. + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter"); + assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter"); + assert (ID_WIDTH > 0) else $fatal(1, "Wrong id width parameter"); + assert (USER_WIDTH > 0) else $fatal(1, "Wrong user width parameter"); + assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_ID_WIDTH == ID_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ID_WIDTH == ID_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition"); + end + `endif + // pragma translate_on +endmodule + +module axi_lite_multicut_intf #( + // The address width. + parameter int unsigned ADDR_WIDTH = 0, + // The data width. + parameter int unsigned DATA_WIDTH = 0, + // The number of cuts. + parameter int unsigned NUM_CUTS = 0 +) ( + input logic clk_i , + input logic rst_ni , + AXI_LITE.Slave in , + AXI_LITE.Master out +); + + typedef logic [ADDR_WIDTH-1:0] addr_t; + typedef logic [DATA_WIDTH-1:0] data_t; + typedef logic [DATA_WIDTH/8-1:0] strb_t; + + `AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t) + `AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t) + `AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_t) + `AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t) + `AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_t, data_t) + `AXI_LITE_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_LITE_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_LITE_ASSIGN_TO_REQ(slv_req, in) + `AXI_LITE_ASSIGN_FROM_RESP(in, slv_resp) + + `AXI_LITE_ASSIGN_FROM_REQ(out, mst_req) + `AXI_LITE_ASSIGN_TO_RESP(mst_resp, out) + + axi_multicut #( + .NoCuts ( NUM_CUTS ), + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_axi_multicut ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); + + // Check the invariants. + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter"); + assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter"); + assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + end + `endif + // pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv new file mode 100644 index 0000000..59ee3ec --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv @@ -0,0 +1,522 @@ +// Copyright (c) 2019 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth + +// AXI Multiplexer: This module multiplexes the AXI4 slave ports down to one master port. +// The AXI IDs from the slave ports get extended with the respective slave port index. +// The extension width can be calculated with `$clog2(NoSlvPorts)`. This means the AXI +// ID for the master port has to be this `$clog2(NoSlvPorts)` wider than the ID for the +// slave ports. +// Responses are switched based on these bits. For example, with 4 slave ports +// a response with ID `6'b100110` will be forwarded to slave port 2 (`2'b10`). + +// register macros +`include "common_cells/registers.svh" + +module axi_mux #( + // AXI parameter and channel types + parameter int unsigned SlvAxiIDWidth = 32'd0, // AXI ID width, slave ports + parameter type slv_aw_chan_t = logic, // AW Channel Type, slave ports + parameter type mst_aw_chan_t = logic, // AW Channel Type, master port + parameter type w_chan_t = logic, // W Channel Type, all ports + parameter type slv_b_chan_t = logic, // B Channel Type, slave ports + parameter type mst_b_chan_t = logic, // B Channel Type, master port + parameter type slv_ar_chan_t = logic, // AR Channel Type, slave ports + parameter type mst_ar_chan_t = logic, // AR Channel Type, master port + parameter type slv_r_chan_t = logic, // R Channel Type, slave ports + parameter type mst_r_chan_t = logic, // R Channel Type, master port + parameter type slv_req_t = logic, // Slave port request type + parameter type slv_resp_t = logic, // Slave port response type + parameter type mst_req_t = logic, // Master ports request type + parameter type mst_resp_t = logic, // Master ports response type + parameter int unsigned NoSlvPorts = 32'd0, // Number of slave ports + // Maximum number of outstanding transactions per write + parameter int unsigned MaxWTrans = 32'd8, + // If enabled, this multiplexer is purely combinatorial + parameter bit FallThrough = 1'b0, + // add spill register on write master ports, adds a cycle latency on write channels + parameter bit SpillAw = 1'b1, + parameter bit SpillW = 1'b0, + parameter bit SpillB = 1'b0, + // add spill register on read master ports, adds a cycle latency on read channels + parameter bit SpillAr = 1'b1, + parameter bit SpillR = 1'b0 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Test Mode enable + // slave ports (AXI inputs), connect master modules here + input slv_req_t [NoSlvPorts-1:0] slv_reqs_i, + output slv_resp_t [NoSlvPorts-1:0] slv_resps_o, + // master port (AXI outputs), connect slave modules here + output mst_req_t mst_req_o, + input mst_resp_t mst_resp_i +); + + localparam int unsigned MstIdxBits = $clog2(NoSlvPorts); + localparam int unsigned MstAxiIDWidth = SlvAxiIDWidth + MstIdxBits; + + // pass through if only one slave port + if (NoSlvPorts == 32'h1) begin : gen_no_mux + assign mst_req_o = slv_reqs_i[0]; + assign slv_resps_o[0] = mst_resp_i; + // other non degenerate cases + end else begin : gen_mux + + typedef logic [MstIdxBits-1:0] switch_id_t; + + // AXI channels between the ID prepend unit and the rest of the multiplexer + mst_aw_chan_t [NoSlvPorts-1:0] slv_aw_chans; + logic [NoSlvPorts-1:0] slv_aw_valids, slv_aw_readies; + w_chan_t [NoSlvPorts-1:0] slv_w_chans; + logic [NoSlvPorts-1:0] slv_w_valids, slv_w_readies; + mst_b_chan_t [NoSlvPorts-1:0] slv_b_chans; + logic [NoSlvPorts-1:0] slv_b_valids, slv_b_readies; + mst_ar_chan_t [NoSlvPorts-1:0] slv_ar_chans; + logic [NoSlvPorts-1:0] slv_ar_valids, slv_ar_readies; + mst_r_chan_t [NoSlvPorts-1:0] slv_r_chans; + logic [NoSlvPorts-1:0] slv_r_valids, slv_r_readies; + + // These signals are all ID prepended + // AW channel + mst_aw_chan_t mst_aw_chan; + logic mst_aw_valid, mst_aw_ready; + + // AW master handshake internal, so that we are able to stall, if w_fifo is full + logic aw_valid, aw_ready; + + // FF to lock the AW valid signal, when a new arbitration decision is made the decision + // gets pushed into the W FIFO, when it now stalls prevent subsequent pushing + // This FF removes AW to W dependency + logic lock_aw_valid_d, lock_aw_valid_q; + logic load_aw_lock; + + // signals for the FIFO that holds the last switching decision of the AW channel + logic w_fifo_full, w_fifo_empty; + logic w_fifo_push, w_fifo_pop; + switch_id_t w_fifo_data; + + // W channel spill reg + w_chan_t mst_w_chan; + logic mst_w_valid, mst_w_ready; + + // master ID in the b_id + switch_id_t switch_b_id; + + // B channel spill reg + mst_b_chan_t mst_b_chan; + logic mst_b_valid; + + // AR channel for when spill is enabled + mst_ar_chan_t mst_ar_chan; + logic ar_valid, ar_ready; + + // master ID in the r_id + switch_id_t switch_r_id; + + // R channel spill reg + mst_r_chan_t mst_r_chan; + logic mst_r_valid; + + //-------------------------------------- + // ID prepend for all slave ports + //-------------------------------------- + for (genvar i = 0; i < NoSlvPorts; i++) begin : gen_id_prepend + axi_id_prepend #( + .NoBus ( 32'd1 ), // one AXI bus per slave port + .AxiIdWidthSlvPort( SlvAxiIDWidth ), + .AxiIdWidthMstPort( MstAxiIDWidth ), + .slv_aw_chan_t ( slv_aw_chan_t ), + .slv_w_chan_t ( w_chan_t ), + .slv_b_chan_t ( slv_b_chan_t ), + .slv_ar_chan_t ( slv_ar_chan_t ), + .slv_r_chan_t ( slv_r_chan_t ), + .mst_aw_chan_t ( mst_aw_chan_t ), + .mst_w_chan_t ( w_chan_t ), + .mst_b_chan_t ( mst_b_chan_t ), + .mst_ar_chan_t ( mst_ar_chan_t ), + .mst_r_chan_t ( mst_r_chan_t ) + ) i_id_prepend ( + .pre_id_i ( switch_id_t'(i) ), + .slv_aw_chans_i ( slv_reqs_i[i].aw ), + .slv_aw_valids_i ( slv_reqs_i[i].aw_valid ), + .slv_aw_readies_o ( slv_resps_o[i].aw_ready ), + .slv_w_chans_i ( slv_reqs_i[i].w ), + .slv_w_valids_i ( slv_reqs_i[i].w_valid ), + .slv_w_readies_o ( slv_resps_o[i].w_ready ), + .slv_b_chans_o ( slv_resps_o[i].b ), + .slv_b_valids_o ( slv_resps_o[i].b_valid ), + .slv_b_readies_i ( slv_reqs_i[i].b_ready ), + .slv_ar_chans_i ( slv_reqs_i[i].ar ), + .slv_ar_valids_i ( slv_reqs_i[i].ar_valid ), + .slv_ar_readies_o ( slv_resps_o[i].ar_ready ), + .slv_r_chans_o ( slv_resps_o[i].r ), + .slv_r_valids_o ( slv_resps_o[i].r_valid ), + .slv_r_readies_i ( slv_reqs_i[i].r_ready ), + .mst_aw_chans_o ( slv_aw_chans[i] ), + .mst_aw_valids_o ( slv_aw_valids[i] ), + .mst_aw_readies_i ( slv_aw_readies[i] ), + .mst_w_chans_o ( slv_w_chans[i] ), + .mst_w_valids_o ( slv_w_valids[i] ), + .mst_w_readies_i ( slv_w_readies[i] ), + .mst_b_chans_i ( slv_b_chans[i] ), + .mst_b_valids_i ( slv_b_valids[i] ), + .mst_b_readies_o ( slv_b_readies[i] ), + .mst_ar_chans_o ( slv_ar_chans[i] ), + .mst_ar_valids_o ( slv_ar_valids[i] ), + .mst_ar_readies_i ( slv_ar_readies[i] ), + .mst_r_chans_i ( slv_r_chans[i] ), + .mst_r_valids_i ( slv_r_valids[i] ), + .mst_r_readies_o ( slv_r_readies[i] ) + ); + end + + //-------------------------------------- + // AW Channel + //-------------------------------------- + rr_arb_tree #( + .NumIn ( NoSlvPorts ), + .DataType ( mst_aw_chan_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ) + ) i_aw_arbiter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( '0 ), + .req_i ( slv_aw_valids ), + .gnt_o ( slv_aw_readies ), + .data_i ( slv_aw_chans ), + .gnt_i ( aw_ready ), + .req_o ( aw_valid ), + .data_o ( mst_aw_chan ), + .idx_o ( ) + ); + + // control of the AW channel + always_comb begin + // default assignments + lock_aw_valid_d = lock_aw_valid_q; + load_aw_lock = 1'b0; + w_fifo_push = 1'b0; + mst_aw_valid = 1'b0; + aw_ready = 1'b0; + // had a downstream stall, be valid and send the AW along + if (lock_aw_valid_q) begin + mst_aw_valid = 1'b1; + // transaction + if (mst_aw_ready) begin + aw_ready = 1'b1; + lock_aw_valid_d = 1'b0; + load_aw_lock = 1'b1; + end + end else begin + if (!w_fifo_full && aw_valid) begin + mst_aw_valid = 1'b1; + w_fifo_push = 1'b1; + if (mst_aw_ready) begin + aw_ready = 1'b1; + end else begin + // go to lock if transaction not in this cycle + lock_aw_valid_d = 1'b1; + load_aw_lock = 1'b1; + end + end + end + end + + `FFLARN(lock_aw_valid_q, lock_aw_valid_d, load_aw_lock, '0, clk_i, rst_ni) + + fifo_v3 #( + .FALL_THROUGH ( FallThrough ), + .DEPTH ( MaxWTrans ), + .dtype ( switch_id_t ) + ) i_w_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i( test_i ), + .full_o ( w_fifo_full ), + .empty_o ( w_fifo_empty ), + .usage_o ( ), + .data_i ( mst_aw_chan.id[SlvAxiIDWidth+:MstIdxBits] ), + .push_i ( w_fifo_push ), + .data_o ( w_fifo_data ), + .pop_i ( w_fifo_pop ) + ); + + spill_register #( + .T ( mst_aw_chan_t ), + .Bypass ( ~SpillAw ) // Param indicated that we want a spill reg + ) i_aw_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_aw_valid ), + .ready_o ( mst_aw_ready ), + .data_i ( mst_aw_chan ), + .valid_o ( mst_req_o.aw_valid ), + .ready_i ( mst_resp_i.aw_ready ), + .data_o ( mst_req_o.aw ) + ); + + //-------------------------------------- + // W Channel + //-------------------------------------- + // multiplexer + assign mst_w_chan = slv_w_chans[w_fifo_data]; + always_comb begin + // default assignments + mst_w_valid = 1'b0; + slv_w_readies = '0; + w_fifo_pop = 1'b0; + // control + if (!w_fifo_empty) begin + // connect the handshake + mst_w_valid = slv_w_valids[w_fifo_data]; + slv_w_readies[w_fifo_data] = mst_w_ready; + // pop FIFO on a last transaction + w_fifo_pop = slv_w_valids[w_fifo_data] & mst_w_ready & mst_w_chan.last; + end + end + + spill_register #( + .T ( w_chan_t ), + .Bypass ( ~SpillW ) + ) i_w_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_w_valid ), + .ready_o ( mst_w_ready ), + .data_i ( mst_w_chan ), + .valid_o ( mst_req_o.w_valid ), + .ready_i ( mst_resp_i.w_ready ), + .data_o ( mst_req_o.w ) + ); + + //-------------------------------------- + // B Channel + //-------------------------------------- + // replicate B channels + assign slv_b_chans = {NoSlvPorts{mst_b_chan}}; + // control B channel handshake + assign switch_b_id = mst_b_chan.id[SlvAxiIDWidth+:MstIdxBits]; + assign slv_b_valids = (mst_b_valid) ? (1 << switch_b_id) : '0; + + spill_register #( + .T ( mst_b_chan_t ), + .Bypass ( ~SpillB ) + ) i_b_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_resp_i.b_valid ), + .ready_o ( mst_req_o.b_ready ), + .data_i ( mst_resp_i.b ), + .valid_o ( mst_b_valid ), + .ready_i ( slv_b_readies[switch_b_id] ), + .data_o ( mst_b_chan ) + ); + + //-------------------------------------- + // AR Channel + //-------------------------------------- + rr_arb_tree #( + .NumIn ( NoSlvPorts ), + .DataType ( mst_ar_chan_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ) + ) i_ar_arbiter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( '0 ), + .req_i ( slv_ar_valids ), + .gnt_o ( slv_ar_readies ), + .data_i ( slv_ar_chans ), + .gnt_i ( ar_ready ), + .req_o ( ar_valid ), + .data_o ( mst_ar_chan ), + .idx_o ( ) + ); + + spill_register #( + .T ( mst_ar_chan_t ), + .Bypass ( ~SpillAr ) + ) i_ar_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( ar_valid ), + .ready_o ( ar_ready ), + .data_i ( mst_ar_chan ), + .valid_o ( mst_req_o.ar_valid ), + .ready_i ( mst_resp_i.ar_ready ), + .data_o ( mst_req_o.ar ) + ); + + //-------------------------------------- + // R Channel + //-------------------------------------- + // replicate R channels + assign slv_r_chans = {NoSlvPorts{mst_r_chan}}; + // R channel handshake control + assign switch_r_id = mst_r_chan.id[SlvAxiIDWidth+:MstIdxBits]; + assign slv_r_valids = (mst_r_valid) ? (1 << switch_r_id) : '0; + + spill_register #( + .T ( mst_r_chan_t ), + .Bypass ( ~SpillR ) + ) i_r_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_resp_i.r_valid ), + .ready_o ( mst_req_o.r_ready ), + .data_i ( mst_resp_i.r ), + .valid_o ( mst_r_valid ), + .ready_i ( slv_r_readies[switch_r_id] ), + .data_o ( mst_r_chan ) + ); + end + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert (SlvAxiIDWidth > 0) else $fatal(1, "AXI ID width of slave ports must be non-zero!"); + assert (NoSlvPorts > 0) else $fatal(1, "Number of slave ports must be non-zero!"); + assert (MaxWTrans > 0) + else $fatal(1, "Maximum number of outstanding writes must be non-zero!"); + assert (MstAxiIDWidth >= SlvAxiIDWidth + $clog2(NoSlvPorts)) + else $fatal(1, "AXI ID width of master ports must be wide enough to identify slave ports!"); + // Assert ID widths (one slave is sufficient since they all have the same type). + assert ($unsigned($bits(slv_reqs_i[0].aw.id)) == SlvAxiIDWidth) + else $fatal(1, "ID width of AW channel of slave ports does not match parameter!"); + assert ($unsigned($bits(slv_reqs_i[0].ar.id)) == SlvAxiIDWidth) + else $fatal(1, "ID width of AR channel of slave ports does not match parameter!"); + assert ($unsigned($bits(slv_resps_o[0].b.id)) == SlvAxiIDWidth) + else $fatal(1, "ID width of B channel of slave ports does not match parameter!"); + assert ($unsigned($bits(slv_resps_o[0].r.id)) == SlvAxiIDWidth) + else $fatal(1, "ID width of R channel of slave ports does not match parameter!"); + assert ($unsigned($bits(mst_req_o.aw.id)) == MstAxiIDWidth) + else $fatal(1, "ID width of AW channel of master port is wrong!"); + assert ($unsigned($bits(mst_req_o.ar.id)) == MstAxiIDWidth) + else $fatal(1, "ID width of AR channel of master port is wrong!"); + assert ($unsigned($bits(mst_resp_i.b.id)) == MstAxiIDWidth) + else $fatal(1, "ID width of B channel of master port is wrong!"); + assert ($unsigned($bits(mst_resp_i.r.id)) == MstAxiIDWidth) + else $fatal(1, "ID width of R channel of master port is wrong!"); + end +`endif +// pragma translate_on +endmodule + +// interface wrap +`include "axi/assign.svh" +`include "axi/typedef.svh" +module axi_mux_intf #( + parameter int unsigned SLV_AXI_ID_WIDTH = 32'd0, // Synopsys DC requires default value for params + parameter int unsigned MST_AXI_ID_WIDTH = 32'd0, + parameter int unsigned AXI_ADDR_WIDTH = 32'd0, + parameter int unsigned AXI_DATA_WIDTH = 32'd0, + parameter int unsigned AXI_USER_WIDTH = 32'd0, + parameter int unsigned NO_SLV_PORTS = 32'd0, // Number of slave ports + // Maximum number of outstanding transactions per write + parameter int unsigned MAX_W_TRANS = 32'd8, + // if enabled, this multiplexer is purely combinatorial + parameter bit FALL_THROUGH = 1'b0, + // add spill register on write master ports, adds a cycle latency on write channels + parameter bit SPILL_AW = 1'b1, + parameter bit SPILL_W = 1'b0, + parameter bit SPILL_B = 1'b0, + // add spill register on read master ports, adds a cycle latency on read channels + parameter bit SPILL_AR = 1'b1, + parameter bit SPILL_R = 1'b0 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + AXI_BUS.Slave slv [NO_SLV_PORTS-1:0], // slave ports + AXI_BUS.Master mst // master port +); + + typedef logic [SLV_AXI_ID_WIDTH-1:0] slv_id_t; + typedef logic [MST_AXI_ID_WIDTH-1:0] mst_id_t; + typedef logic [AXI_ADDR_WIDTH -1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + // channels typedef + `AXI_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, slv_id_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, mst_id_t, user_t) + + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + + `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, slv_id_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, mst_id_t, user_t) + + `AXI_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, slv_id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, mst_id_t, user_t) + + `AXI_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, slv_id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, mst_id_t, user_t) + + `AXI_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t) + `AXI_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t) + + `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t) + `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t) + + slv_req_t [NO_SLV_PORTS-1:0] slv_reqs; + slv_resp_t [NO_SLV_PORTS-1:0] slv_resps; + mst_req_t mst_req; + mst_resp_t mst_resp; + + for (genvar i = 0; i < NO_SLV_PORTS; i++) begin : gen_assign_slv_ports + `AXI_ASSIGN_TO_REQ(slv_reqs[i], slv[i]) + `AXI_ASSIGN_FROM_RESP(slv[i], slv_resps[i]) + end + + `AXI_ASSIGN_FROM_REQ(mst, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, mst) + + axi_mux #( + .SlvAxiIDWidth ( SLV_AXI_ID_WIDTH ), + .slv_aw_chan_t ( slv_aw_chan_t ), // AW Channel Type, slave ports + .mst_aw_chan_t ( mst_aw_chan_t ), // AW Channel Type, master port + .w_chan_t ( w_chan_t ), // W Channel Type, all ports + .slv_b_chan_t ( slv_b_chan_t ), // B Channel Type, slave ports + .mst_b_chan_t ( mst_b_chan_t ), // B Channel Type, master port + .slv_ar_chan_t ( slv_ar_chan_t ), // AR Channel Type, slave ports + .mst_ar_chan_t ( mst_ar_chan_t ), // AR Channel Type, master port + .slv_r_chan_t ( slv_r_chan_t ), // R Channel Type, slave ports + .mst_r_chan_t ( mst_r_chan_t ), // R Channel Type, master port + .slv_req_t ( slv_req_t ), + .slv_resp_t ( slv_resp_t ), + .mst_req_t ( mst_req_t ), + .mst_resp_t ( mst_resp_t ), + .NoSlvPorts ( NO_SLV_PORTS ), // Number of slave ports + .MaxWTrans ( MAX_W_TRANS ), + .FallThrough ( FALL_THROUGH ), + .SpillAw ( SPILL_AW ), + .SpillW ( SPILL_W ), + .SpillB ( SPILL_B ), + .SpillAr ( SPILL_AR ), + .SpillR ( SPILL_R ) + ) i_axi_mux ( + .clk_i ( clk_i ), // Clock + .rst_ni ( rst_ni ), // Asynchronous reset active low + .test_i ( test_i ), // Test Mode enable + .slv_reqs_i ( slv_reqs ), + .slv_resps_o ( slv_resps ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv new file mode 100644 index 0000000..92ede55 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv @@ -0,0 +1,423 @@ +// Copyright (c) 2014-2020 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Florian Zaruba +// - Wolfgang Roenninger +// - Fabian Schuiki +// - Matheus Cavalcante + +//! AXI Package +/// Contains all necessary type definitions, constants, and generally useful functions. +package axi_pkg; + /// AXI Transaction Burst Type. + typedef logic [1:0] burst_t; + /// AXI Transaction Response Type. + typedef logic [1:0] resp_t; + /// AXI Transaction Cacheability Type. + typedef logic [3:0] cache_t; + /// AXI Transaction Protection Type. + typedef logic [2:0] prot_t; + /// AXI Transaction Quality of Service Type. + typedef logic [3:0] qos_t; + /// AXI Transaction Region Type. + typedef logic [3:0] region_t; + /// AXI Transaction Length Type. + typedef logic [7:0] len_t; + /// AXI Transaction Size Type. + typedef logic [2:0] size_t; + /// AXI5 Atomic Operation Type. + typedef logic [5:0] atop_t; // atomic operations + /// AXI5 Non-Secure Address Identifier. + typedef logic [3:0] nsaid_t; + + /// In a fixed burst: + /// - The address is the same for every transfer in the burst. + /// - The byte lanes that are valid are constant for all beats in the burst. However, within + /// those byte lanes, the actual bytes that have `wstrb` asserted can differ for each beat in + /// the burst. + /// This burst type is used for repeated accesses to the same location such as when loading or + /// emptying a FIFO. + localparam BURST_FIXED = 2'b00; + /// In an incrementing burst, the address for each transfer in the burst is an increment of the + /// address for the previous transfer. The increment value depends on the size of the transfer. + /// For example, the address for each transfer in a burst with a size of 4 bytes is the previous + /// address plus four. + /// This burst type is used for accesses to normal sequential memory. + localparam BURST_INCR = 2'b01; + /// A wrapping burst is similar to an incrementing burst, except that the address wraps around to + /// a lower address if an upper address limit is reached. + /// The following restrictions apply to wrapping bursts: + /// - The start address must be aligned to the size of each transfer. + /// - The length of the burst must be 2, 4, 8, or 16 transfers. + localparam BURST_WRAP = 2'b10; + + /// Normal access success. Indicates that a normal access has been successful. Can also indicate + /// that an exclusive access has failed. + localparam RESP_OKAY = 2'b00; + /// Exclusive access okay. Indicates that either the read or write portion of an exclusive access + /// has been successful. + localparam RESP_EXOKAY = 2'b01; + /// Slave error. Used when the access has reached the slave successfully, but the slave wishes to + /// return an error condition to the originating master. + localparam RESP_SLVERR = 2'b10; + /// Decode error. Generated, typically by an interconnect component, to indicate that there is no + /// slave at the transaction address. + localparam RESP_DECERR = 2'b11; + + /// When this bit is asserted, the interconnect, or any component, can delay the transaction + /// reaching its final destination for any number of cycles. + localparam CACHE_BUFFERABLE = 4'b0001; + /// When HIGH, Modifiable indicates that the characteristics of the transaction can be modified. + /// When Modifiable is LOW, the transaction is Non-modifiable. + localparam CACHE_MODIFIABLE = 4'b0010; + /// When this bit is asserted, read allocation of the transaction is recommended but is not + /// mandatory. + localparam CACHE_RD_ALLOC = 4'b0100; + /// When this bit is asserted, write allocation of the transaction is recommended but is not + /// mandatory. + localparam CACHE_WR_ALLOC = 4'b1000; + + /// Maximum number of bytes per burst, as specified by `size` (see Table A3-2). + function automatic shortint unsigned num_bytes(size_t size); + return 1 << size; + endfunction + + /// An overly long address type. + /// It lets us define functions that work generically for shorter addresses. We rely on the + /// synthesizer to optimize the unused bits away. + typedef logic [127:0] largest_addr_t; + + /// Aligned address of burst (see A3-51). + function automatic largest_addr_t aligned_addr(largest_addr_t addr, size_t size); + return (addr >> size) << size; + endfunction + + /// Warp boundary of a `BURST_WRAP` transfer (see A3-51). + /// This is the lowest address accessed within a wrapping burst. + /// This address is aligned to the size and length of the burst. + /// The length of a `BURST_WRAP` has to be 2, 4, 8, or 16 transfers. + function automatic largest_addr_t wrap_boundary (largest_addr_t addr, size_t size, len_t len); + largest_addr_t wrap_addr; + + // pragma translate_off + `ifndef VERILATOR + assume (len == len_t'(4'b1) || len == len_t'(4'b11) || len == len_t'(4'b111) || + len == len_t'(4'b1111)) else + $error("AXI BURST_WRAP with not allowed len of: %0h", len); + `endif + // pragma translate_on + + // In A3-51 the wrap boundary is defined as: + // `Wrap_Boundary = (INT(Start_Address / (Number_Bytes × Burst_Length))) × + // (Number_Bytes × Burst_Length)` + // Whereas the aligned address is defined as: + // `Aligned_Address = (INT(Start_Address / Number_Bytes)) × Number_Bytes` + // This leads to the wrap boundary using the same calculation as the aligned address, difference + // being the additional dependency on the burst length. The addition in the case statement + // is equal to the multiplication with `Burst_Length` as a shift (used by `aligned_addr`) is + // equivalent with multiplication and division by a power of two, which conveniently are the + // only allowed values for `len` of a `BURST_WRAP`. + unique case (len) + 4'b1 : wrap_addr = (addr >> (unsigned'(size) + 1)) << (unsigned'(size) + 1); // multiply `Number_Bytes` by `2` + 4'b11 : wrap_addr = (addr >> (unsigned'(size) + 2)) << (unsigned'(size) + 2); // multiply `Number_Bytes` by `4` + 4'b111 : wrap_addr = (addr >> (unsigned'(size) + 3)) << (unsigned'(size) + 3); // multiply `Number_Bytes` by `8` + 4'b1111 : wrap_addr = (addr >> (unsigned'(size) + 4)) << (unsigned'(size) + 4); // multiply `Number_Bytes` by `16` + default : wrap_addr = '0; + endcase + return wrap_addr; + endfunction + + /// Address of beat (see A3-51). + function automatic largest_addr_t + beat_addr(largest_addr_t addr, size_t size, len_t len, burst_t burst, shortint unsigned i_beat); + largest_addr_t ret_addr = addr; + largest_addr_t wrp_bond = '0; + if (burst == BURST_WRAP) begin + // do not trigger the function if there is no wrapping burst, to prevent assumptions firing + wrp_bond = wrap_boundary(addr, size, len); + end + if (i_beat != 0 && burst != BURST_FIXED) begin + // From A3-51: + // For an INCR burst, and for a WRAP burst for which the address has not wrapped, this + // equation determines the address of any transfer after the first transfer in a burst: + // `Address_N = Aligned_Address + (N – 1) × Number_Bytes` (N counts from 1 to len!) + ret_addr = aligned_addr(addr, size) + i_beat * num_bytes(size); + // From A3-51: + // For a WRAP burst, if Address_N = Wrap_Boundary + (Number_Bytes × Burst_Length), then: + // * Use this equation for the current transfer: + // `Address_N = Wrap_Boundary` + // * Use this equation for any subsequent transfers: + // `Address_N = Start_Address + ((N – 1) × Number_Bytes) – (Number_Bytes × Burst_Length)` + // This means that the address calculation of a `BURST_WRAP` fundamentally works the same + // as for a `BURST_INC`, the difference is when the calculated address increments + // over the wrap threshold, the address wraps around by subtracting the accessed address + // space from the normal `BURST_INCR` address. The lower wrap boundary is equivalent to + // The wrap trigger condition minus the container size (`num_bytes(size) * (len + 1)`). + if (burst == BURST_WRAP && ret_addr >= wrp_bond + (num_bytes(size) * (len + 1))) begin + ret_addr = ret_addr - (num_bytes(size) * (len + 1)); + end + end + return ret_addr; + endfunction + + /// Index of lowest byte in beat (see A3-51). + function automatic shortint unsigned + beat_lower_byte(largest_addr_t addr, size_t size, len_t len, burst_t burst, + shortint unsigned strobe_width, shortint unsigned i_beat); + largest_addr_t _addr = beat_addr(addr, size, len, burst, i_beat); + return _addr - (_addr / strobe_width) * strobe_width; + endfunction + + /// Index of highest byte in beat (see A3-51). + function automatic shortint unsigned + beat_upper_byte(largest_addr_t addr, size_t size, len_t len, burst_t burst, + shortint unsigned strobe_width, shortint unsigned i_beat); + if (i_beat == 0) begin + return aligned_addr(addr, size) + (num_bytes(size) - 1) - (addr / strobe_width) * strobe_width; + end else begin + return beat_lower_byte(addr, size, len, burst, strobe_width, i_beat) + num_bytes(size) - 1; + end + endfunction + + /// Is the bufferable bit set? + function automatic logic bufferable(cache_t cache); + return |(cache & CACHE_BUFFERABLE); + endfunction + + /// Is the modifiable bit set? + function automatic logic modifiable(cache_t cache); + return |(cache & CACHE_MODIFIABLE); + endfunction + + /// Memory Type. + typedef enum logic [3:0] { + DEVICE_NONBUFFERABLE, + DEVICE_BUFFERABLE, + NORMAL_NONCACHEABLE_NONBUFFERABLE, + NORMAL_NONCACHEABLE_BUFFERABLE, + WTHRU_NOALLOCATE, + WTHRU_RALLOCATE, + WTHRU_WALLOCATE, + WTHRU_RWALLOCATE, + WBACK_NOALLOCATE, + WBACK_RALLOCATE, + WBACK_WALLOCATE, + WBACK_RWALLOCATE + } mem_type_t; + + /// Create an `AR_CACHE` field from a `mem_type_t` type. + function automatic logic [3:0] get_arcache(mem_type_t mtype); + unique case (mtype) + DEVICE_NONBUFFERABLE : return 4'b0000; + DEVICE_BUFFERABLE : return 4'b0001; + NORMAL_NONCACHEABLE_NONBUFFERABLE : return 4'b0010; + NORMAL_NONCACHEABLE_BUFFERABLE : return 4'b0011; + WTHRU_NOALLOCATE : return 4'b1010; + WTHRU_RALLOCATE : return 4'b1110; + WTHRU_WALLOCATE : return 4'b1010; + WTHRU_RWALLOCATE : return 4'b1110; + WBACK_NOALLOCATE : return 4'b1011; + WBACK_RALLOCATE : return 4'b1111; + WBACK_WALLOCATE : return 4'b1011; + WBACK_RWALLOCATE : return 4'b1111; + endcase // mtype + endfunction + + /// Create an `AW_CACHE` field from a `mem_type_t` type. + function automatic logic [3:0] get_awcache(mem_type_t mtype); + unique case (mtype) + DEVICE_NONBUFFERABLE : return 4'b0000; + DEVICE_BUFFERABLE : return 4'b0001; + NORMAL_NONCACHEABLE_NONBUFFERABLE : return 4'b0010; + NORMAL_NONCACHEABLE_BUFFERABLE : return 4'b0011; + WTHRU_NOALLOCATE : return 4'b0110; + WTHRU_RALLOCATE : return 4'b0110; + WTHRU_WALLOCATE : return 4'b1110; + WTHRU_RWALLOCATE : return 4'b1110; + WBACK_NOALLOCATE : return 4'b0111; + WBACK_RALLOCATE : return 4'b0111; + WBACK_WALLOCATE : return 4'b1111; + WBACK_RWALLOCATE : return 4'b1111; + endcase // mtype + endfunction + + /// RESP precedence: DECERR > SLVERR > OKAY > EXOKAY. This is not defined in the AXI standard but + /// depends on the implementation. We consistently use the precedence above. Rationale: + /// - EXOKAY means an exclusive access was successful, whereas OKAY means it was not. Thus, if + /// OKAY and EXOKAY are to be merged, OKAY precedes because the exclusive access was not fully + /// successful. + /// - Both DECERR and SLVERR mean (part of) a transaction were unsuccessful, whereas OKAY means an + /// entire transaction was successful. Thus both DECERR and SLVERR precede OKAY. + /// - DECERR means (part of) a transactions could not be routed to a slave component, whereas + /// SLVERR means the transaction reached a slave component but lead to an error condition there. + /// Thus DECERR precedes SLVERR because DECERR happens earlier in the handling of a transaction. + function automatic resp_t resp_precedence(resp_t resp_a, resp_t resp_b); + unique case (resp_a) + RESP_OKAY: begin + // Any response except EXOKAY precedes OKAY. + if (resp_b == RESP_EXOKAY) begin + return resp_a; + end else begin + return resp_b; + end + end + RESP_EXOKAY: begin + // Any response precedes EXOKAY. + return resp_b; + end + RESP_SLVERR: begin + // Only DECERR precedes SLVERR. + if (resp_b == RESP_DECERR) begin + return resp_b; + end else begin + return resp_a; + end + end + RESP_DECERR: begin + // No response precedes DECERR. + return resp_a; + end + endcase + endfunction + + // ATOP[5:0] + /// - Sends a single data value with an address. + /// - The target swaps the value at the addressed location with the data value that is supplied in + /// the transaction. + /// - The original data value at the addressed location is returned. + /// - Outbound data size is 1, 2, 4, or 8 bytes. + /// - Inbound data size is the same as the outbound data size. + localparam ATOP_ATOMICSWAP = 6'b110000; + /// - Sends two data values, the compare value and the swap value, to the addressed location. + /// The compare and swap values are of equal size. + /// - The data value at the addressed location is checked against the compare value: + /// - If the values match, the swap value is written to the addressed location. + /// - If the values do not match, the swap value is not written to the addressed location. + /// - The original data value at the addressed location is returned. + /// - Outbound data size is 2, 4, 8, 16, or 32 bytes. + /// - Inbound data size is half of the outbound data size because the outbound data contains both + /// compare and swap values, whereas the inbound data has only the original data value. + localparam ATOP_ATOMICCMP = 6'b110001; + // ATOP[5:4] + /// Perform no atomic operation. + localparam ATOP_NONE = 2'b00; + /// - Sends a single data value with an address and the atomic operation to be performed. + /// - The target performs the operation using the sent data and value at the addressed location as + /// operands. + /// - The result is stored in the address location. + /// - A single response is given without data. + /// - Outbound data size is 1, 2, 4, or 8 bytes. + localparam ATOP_ATOMICSTORE = 2'b01; + /// Sends a single data value with an address and the atomic operation to be performed. + /// - The original data value at the addressed location is returned. + /// - The target performs the operation using the sent data and value at the addressed location as + /// operands. + /// - The result is stored in the address location. + /// - Outbound data size is 1, 2, 4, or 8 bytes. + /// - Inbound data size is the same as the outbound data size. + localparam ATOP_ATOMICLOAD = 2'b10; + // ATOP[3] + /// For AtomicStore and AtomicLoad transactions `AWATOP[3]` indicates the endianness that is + /// required for the atomic operation. The value of `AWATOP[3]` applies to arithmetic operations + /// only and is ignored for bitwise logical operations. + /// When deasserted, this bit indicates that the operation is little-endian. + localparam ATOP_LITTLE_END = 1'b0; + /// When asserted, this bit indicates that the operation is big-endian. + localparam ATOP_BIG_END = 1'b1; + // ATOP[2:0] + /// The value in memory is added to the sent data and the result stored in memory. + localparam ATOP_ADD = 3'b000; + /// Every set bit in the sent data clears the corresponding bit of the data in memory. + localparam ATOP_CLR = 3'b001; + /// Bitwise exclusive OR of the sent data and value in memory. + localparam ATOP_EOR = 3'b010; + /// Every set bit in the sent data sets the corresponding bit of the data in memory. + localparam ATOP_SET = 3'b011; + /// The value stored in memory is the maximum of the existing value and sent data. This operation + /// assumes signed data. + localparam ATOP_SMAX = 3'b100; + /// The value stored in memory is the minimum of the existing value and sent data. This operation + /// assumes signed data. + localparam ATOP_SMIN = 3'b101; + /// The value stored in memory is the maximum of the existing value and sent data. This operation + /// assumes unsigned data. + localparam ATOP_UMAX = 3'b110; + /// The value stored in memory is the minimum of the existing value and sent data. This operation + /// assumes unsigned data. + localparam ATOP_UMIN = 3'b111; + // ATOP[5] == 1'b1 indicated that an atomic transaction has a read response + // Ussage eg: if (req_i.aw.atop[axi_pkg::ATOP_R_RESP]) begin + localparam ATOP_R_RESP = 32'd5; + + // `xbar_latency_e` and `xbar_cfg_t` are documented in `doc/axi_xbar.md`. + /// Slice on Demux AW channel. + localparam logic [9:0] DemuxAw = (1 << 9); + /// Slice on Demux W channel. + localparam logic [9:0] DemuxW = (1 << 8); + /// Slice on Demux B channel. + localparam logic [9:0] DemuxB = (1 << 7); + /// Slice on Demux AR channel. + localparam logic [9:0] DemuxAr = (1 << 6); + /// Slice on Demux R channel. + localparam logic [9:0] DemuxR = (1 << 5); + /// Slice on Mux AW channel. + localparam logic [9:0] MuxAw = (1 << 4); + /// Slice on Mux W channel. + localparam logic [9:0] MuxW = (1 << 3); + /// Slice on Mux B channel. + localparam logic [9:0] MuxB = (1 << 2); + /// Slice on Mux AR channel. + localparam logic [9:0] MuxAr = (1 << 1); + /// Slice on Mux R channel. + localparam logic [9:0] MuxR = (1 << 0); + /// Latency configuration for `axi_xbar`. + typedef enum logic [9:0] { + NO_LATENCY = 10'b000_00_000_00, + CUT_SLV_AX = DemuxAw | DemuxAr, + CUT_MST_AX = MuxAw | MuxAr, + CUT_ALL_AX = DemuxAw | DemuxAr | MuxAw | MuxAr, + CUT_SLV_PORTS = DemuxAw | DemuxW | DemuxB | DemuxAr | DemuxR, + CUT_MST_PORTS = MuxAw | MuxW | MuxB | MuxAr | MuxR, + CUT_ALL_PORTS = 10'b111_11_111_11 + } xbar_latency_e; + + /// Configuration for `axi_xbar`. + typedef struct packed { + int unsigned NoSlvPorts; + int unsigned NoMstPorts; + int unsigned MaxMstTrans; + int unsigned MaxSlvTrans; + bit FallThrough; + xbar_latency_e LatencyMode; + int unsigned AxiIdWidthSlvPorts; + int unsigned AxiIdUsedSlvPorts; + bit UniqueIds; + int unsigned AxiAddrWidth; + int unsigned AxiDataWidth; + int unsigned NoAddrRules; + } xbar_cfg_t; + + /// Commonly used rule types for `axi_xbar` (64-bit addresses). + typedef struct packed { + int unsigned idx; + logic [63:0] start_addr; + logic [63:0] end_addr; + } xbar_rule_64_t; + + /// Commonly used rule types for `axi_xbar` (32-bit addresses). + typedef struct packed { + int unsigned idx; + logic [31:0] start_addr; + logic [31:0] end_addr; + } xbar_rule_32_t; +endpackage diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv new file mode 100644 index 0000000..c75887a --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv @@ -0,0 +1,323 @@ +// Copyright (c) 2014-2020 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth +// - Fabian Schuiki +// - Florian Zaruba + +/// An AXI4+ATOP to AXI4-Lite converter with atomic transaction and burst support. +module axi_to_axi_lite #( + parameter int unsigned AxiAddrWidth = 32'd0, + parameter int unsigned AxiDataWidth = 32'd0, + parameter int unsigned AxiIdWidth = 32'd0, + parameter int unsigned AxiUserWidth = 32'd0, + parameter int unsigned AxiMaxWriteTxns = 32'd0, + parameter int unsigned AxiMaxReadTxns = 32'd0, + parameter bit FallThrough = 1'b1, // FIFOs in Fall through mode in ID reflect + parameter type full_req_t = logic, + parameter type full_resp_t = logic, + parameter type lite_req_t = logic, + parameter type lite_resp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + // slave port full AXI4+ATOP + input full_req_t slv_req_i, + output full_resp_t slv_resp_o, + // master port AXI4-Lite + output lite_req_t mst_req_o, + input lite_resp_t mst_resp_i +); + // full bus declarations + full_req_t filtered_req, splitted_req; + full_resp_t filtered_resp, splitted_resp; + + // atomics adapter so that atomics can be resolved + axi_atop_filter #( + .AxiIdWidth ( AxiIdWidth ), + .AxiMaxWriteTxns ( AxiMaxWriteTxns ), + .req_t ( full_req_t ), + .resp_t ( full_resp_t ) + ) i_axi_atop_filter( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_req_i ( slv_req_i ), + .slv_resp_o ( slv_resp_o ), + .mst_req_o ( filtered_req ), + .mst_resp_i ( filtered_resp ) + ); + + // burst splitter so that the id reflect module has no burst accessing it + axi_burst_splitter #( + .MaxReadTxns ( AxiMaxReadTxns ), + .MaxWriteTxns ( AxiMaxWriteTxns ), + .AddrWidth ( AxiAddrWidth ), + .DataWidth ( AxiDataWidth ), + .IdWidth ( AxiIdWidth ), + .UserWidth ( AxiUserWidth ), + .req_t ( full_req_t ), + .resp_t ( full_resp_t ) + ) i_axi_burst_splitter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_req_i ( filtered_req ), + .slv_resp_o ( filtered_resp ), + .mst_req_o ( splitted_req ), + .mst_resp_i ( splitted_resp ) + ); + + // ID reflect module handles the conversion from the full AXI to AXI lite on the wireing + axi_to_axi_lite_id_reflect #( + .AxiIdWidth ( AxiIdWidth ), + .AxiMaxWriteTxns ( AxiMaxWriteTxns ), + .AxiMaxReadTxns ( AxiMaxReadTxns ), + .FallThrough ( FallThrough ), + .full_req_t ( full_req_t ), + .full_resp_t ( full_resp_t ), + .lite_req_t ( lite_req_t ), + .lite_resp_t ( lite_resp_t ) + ) i_axi_to_axi_lite_id_reflect ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_i ( test_i ), + .slv_req_i ( splitted_req ), + .slv_resp_o ( splitted_resp ), + .mst_req_o ( mst_req_o ), + .mst_resp_i ( mst_resp_i ) + ); + + // Assertions, check params + // pragma translate_off + `ifndef VERILATOR + initial begin + assume (AxiIdWidth > 0) else $fatal(1, "AXI ID width has to be > 0"); + assume (AxiAddrWidth > 0) else $fatal(1, "AXI address width has to be > 0"); + assume (AxiDataWidth > 0) else $fatal(1, "AXI data width has to be > 0"); + end + `endif + // pragma translate_on +endmodule + +// Description: This module does the translation of the full AXI4+ATOP to AXI4-Lite signals. +// It reflects the ID of the incoming transaction and crops all signals not used +// in AXI4-Lite. It requires that incoming AXI4+ATOP transactions have a +// `axi_pkg::len_t` of `'0` and an `axi_pkg::atop_t` of `'0`. + +module axi_to_axi_lite_id_reflect #( + parameter int unsigned AxiIdWidth = 32'd0, + parameter int unsigned AxiMaxWriteTxns = 32'd0, + parameter int unsigned AxiMaxReadTxns = 32'd0, + parameter bit FallThrough = 1'b1, // FIFOs in fall through mode + parameter type full_req_t = logic, + parameter type full_resp_t = logic, + parameter type lite_req_t = logic, + parameter type lite_resp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + // slave port full AXI + input full_req_t slv_req_i, + output full_resp_t slv_resp_o, + // master port AXI LITE + output lite_req_t mst_req_o, + input lite_resp_t mst_resp_i +); + typedef logic [AxiIdWidth-1:0] id_t; + + // FIFO status and control signals + logic aw_full, aw_empty, aw_push, aw_pop, ar_full, ar_empty, ar_push, ar_pop; + id_t aw_reflect_id, ar_reflect_id; + + assign slv_resp_o = '{ + aw_ready: mst_resp_i.aw_ready & ~aw_full, + w_ready: mst_resp_i.w_ready, + b: '{ + id: aw_reflect_id, + resp: mst_resp_i.b.resp, + default: '0 + }, + b_valid: mst_resp_i.b_valid & ~aw_empty, + ar_ready: mst_resp_i.ar_ready & ~ar_full, + r: '{ + id: ar_reflect_id, + data: mst_resp_i.r.data, + resp: mst_resp_i.r.resp, + last: 1'b1, + default: '0 + }, + r_valid: mst_resp_i.r_valid & ~ar_empty, + default: '0 + }; + + // Write ID reflection + assign aw_push = mst_req_o.aw_valid & slv_resp_o.aw_ready; + assign aw_pop = slv_resp_o.b_valid & mst_req_o.b_ready; + fifo_v3 #( + .FALL_THROUGH ( FallThrough ), + .DEPTH ( AxiMaxWriteTxns ), + .dtype ( id_t ) + ) i_aw_id_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i( test_i ), + .full_o ( aw_full ), + .empty_o ( aw_empty ), + .usage_o ( /*not used*/ ), + .data_i ( slv_req_i.aw.id ), + .push_i ( aw_push ), + .data_o ( aw_reflect_id ), + .pop_i ( aw_pop ) + ); + + // Read ID reflection + assign ar_push = mst_req_o.ar_valid & slv_resp_o.ar_ready; + assign ar_pop = slv_resp_o.r_valid & mst_req_o.r_ready; + fifo_v3 #( + .FALL_THROUGH ( FallThrough ), + .DEPTH ( AxiMaxReadTxns ), + .dtype ( id_t ) + ) i_ar_id_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i( test_i ), + .full_o ( ar_full ), + .empty_o ( ar_empty ), + .usage_o ( /*not used*/ ), + .data_i ( slv_req_i.ar.id ), + .push_i ( ar_push ), + .data_o ( ar_reflect_id ), + .pop_i ( ar_pop ) + ); + + assign mst_req_o = '{ + aw: '{ + addr: slv_req_i.aw.addr, + prot: slv_req_i.aw.prot + }, + aw_valid: slv_req_i.aw_valid & ~aw_full, + w: '{ + data: slv_req_i.w.data, + strb: slv_req_i.w.strb + }, + w_valid: slv_req_i.w_valid, + b_ready: slv_req_i.b_ready & ~aw_empty, + ar: '{ + addr: slv_req_i.ar.addr, + prot: slv_req_i.ar.prot + }, + ar_valid: slv_req_i.ar_valid & ~ar_full, + r_ready: slv_req_i.r_ready & ~ar_empty, + default: '0 + }; + + // Assertions + // pragma translate_off + `ifndef VERILATOR + aw_atop: assume property( @(posedge clk_i) disable iff (~rst_ni) + slv_req_i.aw_valid |-> (slv_req_i.aw.atop == '0)) else + $fatal(1, "Module does not support atomics. Value observed: %0b", slv_req_i.aw.atop); + aw_axi_len: assume property( @(posedge clk_i) disable iff (~rst_ni) + slv_req_i.aw_valid |-> (slv_req_i.aw.len == '0)) else + $fatal(1, "AW request length has to be zero. Value observed: %0b", slv_req_i.aw.len); + w_axi_last: assume property( @(posedge clk_i) disable iff (~rst_ni) + slv_req_i.w_valid |-> (slv_req_i.w.last == 1'b1)) else + $fatal(1, "W last signal has to be one. Value observed: %0b", slv_req_i.w.last); + ar_axi_len: assume property( @(posedge clk_i) disable iff (~rst_ni) + slv_req_i.ar_valid |-> (slv_req_i.ar.len == '0)) else + $fatal(1, "AR request length has to be zero. Value observed: %0b", slv_req_i.ar.len); + `endif + // pragma translate_on +endmodule + +// interface wrapper +`include "axi/assign.svh" +`include "axi/typedef.svh" +module axi_to_axi_lite_intf #( + /// AXI bus parameters + parameter int unsigned AXI_ADDR_WIDTH = 32'd0, + parameter int unsigned AXI_DATA_WIDTH = 32'd0, + parameter int unsigned AXI_ID_WIDTH = 32'd0, + parameter int unsigned AXI_USER_WIDTH = 32'd0, + /// Maximum number of outstanding writes. + parameter int unsigned AXI_MAX_WRITE_TXNS = 32'd1, + /// Maximum number of outstanding reads. + parameter int unsigned AXI_MAX_READ_TXNS = 32'd1, + parameter bit FALL_THROUGH = 1'b1 +) ( + input logic clk_i, + input logic rst_ni, + input logic testmode_i, + AXI_BUS.Slave slv, + AXI_LITE.Master mst +); + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + // full channels typedefs + `AXI_TYPEDEF_AW_CHAN_T(full_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(full_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(full_b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(full_ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(full_r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(full_req_t, full_aw_chan_t, full_w_chan_t, full_ar_chan_t) + `AXI_TYPEDEF_RESP_T(full_resp_t, full_b_chan_t, full_r_chan_t) + // LITE channels typedef + `AXI_LITE_TYPEDEF_AW_CHAN_T(lite_aw_chan_t, addr_t) + `AXI_LITE_TYPEDEF_W_CHAN_T(lite_w_chan_t, data_t, strb_t) + `AXI_LITE_TYPEDEF_B_CHAN_T(lite_b_chan_t) + `AXI_LITE_TYPEDEF_AR_CHAN_T(lite_ar_chan_t, addr_t) + `AXI_LITE_TYPEDEF_R_CHAN_T (lite_r_chan_t, data_t) + `AXI_LITE_TYPEDEF_REQ_T(lite_req_t, lite_aw_chan_t, lite_w_chan_t, lite_ar_chan_t) + `AXI_LITE_TYPEDEF_RESP_T(lite_resp_t, lite_b_chan_t, lite_r_chan_t) + + full_req_t full_req; + full_resp_t full_resp; + lite_req_t lite_req; + lite_resp_t lite_resp; + + `AXI_ASSIGN_TO_REQ(full_req, slv) + `AXI_ASSIGN_FROM_RESP(slv, full_resp) + + `AXI_LITE_ASSIGN_FROM_REQ(mst, lite_req) + `AXI_LITE_ASSIGN_TO_RESP(lite_resp, mst) + + axi_to_axi_lite #( + .AxiAddrWidth ( AXI_ADDR_WIDTH ), + .AxiDataWidth ( AXI_DATA_WIDTH ), + .AxiIdWidth ( AXI_ID_WIDTH ), + .AxiUserWidth ( AXI_USER_WIDTH ), + .AxiMaxWriteTxns ( AXI_MAX_WRITE_TXNS ), + .AxiMaxReadTxns ( AXI_MAX_READ_TXNS ), + .FallThrough ( FALL_THROUGH ), // FIFOs in Fall through mode in ID reflect + .full_req_t ( full_req_t ), + .full_resp_t ( full_resp_t ), + .lite_req_t ( lite_req_t ), + .lite_resp_t ( lite_resp_t ) + ) i_axi_to_axi_lite ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_i ( testmode_i ), + // slave port full AXI4+ATOP + .slv_req_i ( full_req ), + .slv_resp_o ( full_resp ), + // master port AXI4-Lite + .mst_req_o ( lite_req ), + .mst_resp_i ( lite_resp ) + ); +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv new file mode 100644 index 0000000..d66cd97 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv @@ -0,0 +1,324 @@ +// Copyright (c) 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth +// - Florian Zaruba + +// axi_xbar: Fully-connected AXI4+ATOP crossbar with an arbitrary number of slave and master ports. +// See `doc/axi_xbar.md` for the documentation, including the definition of parameters and ports. +module axi_xbar #( + parameter axi_pkg::xbar_cfg_t Cfg = '0, + parameter bit ATOPs = 1'b1, + parameter type slv_aw_chan_t = logic, + parameter type mst_aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type slv_b_chan_t = logic, + parameter type mst_b_chan_t = logic, + parameter type slv_ar_chan_t = logic, + parameter type mst_ar_chan_t = logic, + parameter type slv_r_chan_t = logic, + parameter type mst_r_chan_t = logic, + parameter type slv_req_t = logic, + parameter type slv_resp_t = logic, + parameter type mst_req_t = logic, + parameter type mst_resp_t = logic, + parameter type rule_t = axi_pkg::xbar_rule_64_t +) ( + input logic clk_i, + input logic rst_ni, + input logic test_i, + input slv_req_t [Cfg.NoSlvPorts-1:0] slv_ports_req_i, + output slv_resp_t [Cfg.NoSlvPorts-1:0] slv_ports_resp_o, + output mst_req_t [Cfg.NoMstPorts-1:0] mst_ports_req_o, + input mst_resp_t [Cfg.NoMstPorts-1:0] mst_ports_resp_i, + input rule_t [Cfg.NoAddrRules-1:0] addr_map_i, + input logic [Cfg.NoSlvPorts-1:0] en_default_mst_port_i, + input logic [Cfg.NoSlvPorts-1:0][$clog2(Cfg.NoMstPorts)-1:0] default_mst_port_i +); + + typedef logic [Cfg.AxiAddrWidth-1:0] addr_t; + // to account for the decoding error slave + typedef logic [$clog2(Cfg.NoMstPorts + 1)-1:0] mst_port_idx_t; + + // signals from the axi_demuxes, one index more for decode error + slv_req_t [Cfg.NoSlvPorts-1:0][Cfg.NoMstPorts:0] slv_reqs; + slv_resp_t [Cfg.NoSlvPorts-1:0][Cfg.NoMstPorts:0] slv_resps; + + // workaround for issue #133 (problem with vsim 10.6c) + localparam int unsigned cfg_NoMstPorts = Cfg.NoMstPorts; + + // signals into the axi_muxes, are of type slave as the multiplexer extends the ID + slv_req_t [Cfg.NoMstPorts-1:0][Cfg.NoSlvPorts-1:0] mst_reqs; + slv_resp_t [Cfg.NoMstPorts-1:0][Cfg.NoSlvPorts-1:0] mst_resps; + + for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_slv_port_demux + logic [$clog2(Cfg.NoMstPorts)-1:0] dec_aw, dec_ar; + mst_port_idx_t slv_aw_select, slv_ar_select; + logic dec_aw_valid, dec_aw_error; + logic dec_ar_valid, dec_ar_error; + + addr_decode #( + .NoIndices ( Cfg.NoMstPorts ), + .NoRules ( Cfg.NoAddrRules ), + .addr_t ( addr_t ), + .rule_t ( rule_t ) + ) i_axi_aw_decode ( + .addr_i ( slv_ports_req_i[i].aw.addr ), + .addr_map_i ( addr_map_i ), + .idx_o ( dec_aw ), + .dec_valid_o ( dec_aw_valid ), + .dec_error_o ( dec_aw_error ), + .en_default_idx_i ( en_default_mst_port_i[i] ), + .default_idx_i ( default_mst_port_i[i] ) + ); + + addr_decode #( + .NoIndices ( Cfg.NoMstPorts ), + .addr_t ( addr_t ), + .NoRules ( Cfg.NoAddrRules ), + .rule_t ( rule_t ) + ) i_axi_ar_decode ( + .addr_i ( slv_ports_req_i[i].ar.addr ), + .addr_map_i ( addr_map_i ), + .idx_o ( dec_ar ), + .dec_valid_o ( dec_ar_valid ), + .dec_error_o ( dec_ar_error ), + .en_default_idx_i ( en_default_mst_port_i[i] ), + .default_idx_i ( default_mst_port_i[i] ) + ); + + assign slv_aw_select = (dec_aw_error) ? + mst_port_idx_t'(Cfg.NoMstPorts) : mst_port_idx_t'(dec_aw); + assign slv_ar_select = (dec_ar_error) ? + mst_port_idx_t'(Cfg.NoMstPorts) : mst_port_idx_t'(dec_ar); + + // make sure that the default slave does not get changed, if there is an unserved Ax + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + default disable iff (~rst_ni); + default_aw_mst_port_en: assert property( + @(posedge clk_i) (slv_ports_req_i[i].aw_valid && !slv_ports_resp_o[i].aw_ready) + |=> $stable(en_default_mst_port_i[i])) + else $fatal (1, $sformatf("It is not allowed to change the default mst port\ + enable, when there is an unserved Aw beat. Slave Port: %0d", i)); + default_aw_mst_port: assert property( + @(posedge clk_i) (slv_ports_req_i[i].aw_valid && !slv_ports_resp_o[i].aw_ready) + |=> $stable(default_mst_port_i[i])) + else $fatal (1, $sformatf("It is not allowed to change the default mst port\ + when there is an unserved Aw beat. Slave Port: %0d", i)); + default_ar_mst_port_en: assert property( + @(posedge clk_i) (slv_ports_req_i[i].ar_valid && !slv_ports_resp_o[i].ar_ready) + |=> $stable(en_default_mst_port_i[i])) + else $fatal (1, $sformatf("It is not allowed to change the enable, when\ + there is an unserved Ar beat. Slave Port: %0d", i)); + default_ar_mst_port: assert property( + @(posedge clk_i) (slv_ports_req_i[i].ar_valid && !slv_ports_resp_o[i].ar_ready) + |=> $stable(default_mst_port_i[i])) + else $fatal (1, $sformatf("It is not allowed to change the default mst port\ + when there is an unserved Ar beat. Slave Port: %0d", i)); + `endif + `endif + // pragma translate_on + axi_demux #( + .AxiIdWidth ( Cfg.AxiIdWidthSlvPorts ), // ID Width + .aw_chan_t ( slv_aw_chan_t ), // AW Channel Type + .w_chan_t ( w_chan_t ), // W Channel Type + .b_chan_t ( slv_b_chan_t ), // B Channel Type + .ar_chan_t ( slv_ar_chan_t ), // AR Channel Type + .r_chan_t ( slv_r_chan_t ), // R Channel Type + .req_t ( slv_req_t ), + .resp_t ( slv_resp_t ), + .NoMstPorts ( Cfg.NoMstPorts + 1 ), + .MaxTrans ( Cfg.MaxMstTrans ), + .AxiLookBits ( Cfg.AxiIdUsedSlvPorts ), + .UniqueIds ( Cfg.UniqueIds ), + .FallThrough ( Cfg.FallThrough ), + .SpillAw ( Cfg.LatencyMode[9] ), + .SpillW ( Cfg.LatencyMode[8] ), + .SpillB ( Cfg.LatencyMode[7] ), + .SpillAr ( Cfg.LatencyMode[6] ), + .SpillR ( Cfg.LatencyMode[5] ) + ) i_axi_demux ( + .clk_i, // Clock + .rst_ni, // Asynchronous reset active low + .test_i, // Testmode enable + .slv_req_i ( slv_ports_req_i[i] ), + .slv_aw_select_i ( slv_aw_select ), + .slv_ar_select_i ( slv_ar_select ), + .slv_resp_o ( slv_ports_resp_o[i] ), + .mst_reqs_o ( slv_reqs[i] ), + .mst_resps_i ( slv_resps[i] ) + ); + + axi_err_slv #( + .AxiIdWidth ( Cfg.AxiIdWidthSlvPorts ), + .req_t ( slv_req_t ), + .resp_t ( slv_resp_t ), + .Resp ( axi_pkg::RESP_DECERR ), + .ATOPs ( ATOPs ), + .MaxTrans ( 4 ) // Transactions terminate at this slave, so minimize + // resource consumption by accepting only a few + // transactions at a time. + ) i_axi_err_slv ( + .clk_i, // Clock + .rst_ni, // Asynchronous reset active low + .test_i, // Testmode enable + // slave port + .slv_req_i ( slv_reqs[i][Cfg.NoMstPorts] ), + .slv_resp_o ( slv_resps[i][cfg_NoMstPorts] ) + ); + end + + // cross all channels + for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_xbar_slv_cross + for (genvar j = 0; j < Cfg.NoMstPorts; j++) begin : gen_xbar_mst_cross + assign mst_reqs[j][i] = slv_reqs[i][j]; + assign slv_resps[i][j] = mst_resps[j][i]; + end + end + + for (genvar i = 0; i < Cfg.NoMstPorts; i++) begin : gen_mst_port_mux + axi_mux #( + .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports + .slv_aw_chan_t ( slv_aw_chan_t ), // AW Channel Type, slave ports + .mst_aw_chan_t ( mst_aw_chan_t ), // AW Channel Type, master port + .w_chan_t ( w_chan_t ), // W Channel Type, all ports + .slv_b_chan_t ( slv_b_chan_t ), // B Channel Type, slave ports + .mst_b_chan_t ( mst_b_chan_t ), // B Channel Type, master port + .slv_ar_chan_t ( slv_ar_chan_t ), // AR Channel Type, slave ports + .mst_ar_chan_t ( mst_ar_chan_t ), // AR Channel Type, master port + .slv_r_chan_t ( slv_r_chan_t ), // R Channel Type, slave ports + .mst_r_chan_t ( mst_r_chan_t ), // R Channel Type, master port + .slv_req_t ( slv_req_t ), + .slv_resp_t ( slv_resp_t ), + .mst_req_t ( mst_req_t ), + .mst_resp_t ( mst_resp_t ), + .NoSlvPorts ( Cfg.NoSlvPorts ), // Number of Masters for the module + .MaxWTrans ( Cfg.MaxSlvTrans ), + .FallThrough ( Cfg.FallThrough ), + .SpillAw ( Cfg.LatencyMode[4] ), + .SpillW ( Cfg.LatencyMode[3] ), + .SpillB ( Cfg.LatencyMode[2] ), + .SpillAr ( Cfg.LatencyMode[1] ), + .SpillR ( Cfg.LatencyMode[0] ) + ) i_axi_mux ( + .clk_i, // Clock + .rst_ni, // Asynchronous reset active low + .test_i, // Test Mode enable + .slv_reqs_i ( mst_reqs[i] ), + .slv_resps_o ( mst_resps[i] ), + .mst_req_o ( mst_ports_req_o[i] ), + .mst_resp_i ( mst_ports_resp_i[i] ) + ); + end + + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + initial begin : check_params + id_slv_req_ports: assert ($bits(slv_ports_req_i[0].aw.id ) == Cfg.AxiIdWidthSlvPorts) else + $fatal(1, $sformatf("Slv_req and aw_chan id width not equal.")); + id_slv_resp_ports: assert ($bits(slv_ports_resp_o[0].r.id) == Cfg.AxiIdWidthSlvPorts) else + $fatal(1, $sformatf("Slv_req and aw_chan id width not equal.")); + end + `endif + `endif + // pragma translate_on +endmodule + +`include "axi/assign.svh" +`include "axi/typedef.svh" + +module axi_xbar_intf #( + parameter int unsigned AXI_USER_WIDTH = 0, + parameter axi_pkg::xbar_cfg_t Cfg = '0, + parameter type rule_t = axi_pkg::xbar_rule_64_t +) ( + input logic clk_i, + input logic rst_ni, + input logic test_i, + AXI_BUS.Slave slv_ports [Cfg.NoSlvPorts-1:0], + AXI_BUS.Master mst_ports [Cfg.NoMstPorts-1:0], + input rule_t [Cfg.NoAddrRules-1:0] addr_map_i, + input logic [Cfg.NoSlvPorts-1:0] en_default_mst_port_i, + input logic [Cfg.NoSlvPorts-1:0][$clog2(Cfg.NoMstPorts)-1:0] default_mst_port_i +); + + localparam int unsigned AxiIdWidthMstPorts = Cfg.AxiIdWidthSlvPorts + $clog2(Cfg.NoSlvPorts); + + typedef logic [AxiIdWidthMstPorts -1:0] id_mst_t; + typedef logic [Cfg.AxiIdWidthSlvPorts -1:0] id_slv_t; + typedef logic [Cfg.AxiAddrWidth -1:0] addr_t; + typedef logic [Cfg.AxiDataWidth -1:0] data_t; + typedef logic [Cfg.AxiDataWidth/8 -1:0] strb_t; + typedef logic [AXI_USER_WIDTH -1:0] user_t; + + `AXI_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, id_mst_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, id_slv_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, id_mst_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, id_slv_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, id_mst_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, id_slv_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, id_mst_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, id_slv_t, user_t) + `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t) + `AXI_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t) + `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t) + `AXI_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t) + + mst_req_t [Cfg.NoMstPorts-1:0] mst_reqs; + mst_resp_t [Cfg.NoMstPorts-1:0] mst_resps; + slv_req_t [Cfg.NoSlvPorts-1:0] slv_reqs; + slv_resp_t [Cfg.NoSlvPorts-1:0] slv_resps; + + for (genvar i = 0; i < Cfg.NoMstPorts; i++) begin : gen_assign_mst + `AXI_ASSIGN_FROM_REQ(mst_ports[i], mst_reqs[i]) + `AXI_ASSIGN_TO_RESP(mst_resps[i], mst_ports[i]) + end + + for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_assign_slv + `AXI_ASSIGN_TO_REQ(slv_reqs[i], slv_ports[i]) + `AXI_ASSIGN_FROM_RESP(slv_ports[i], slv_resps[i]) + end + + axi_xbar #( + .Cfg (Cfg), + .slv_aw_chan_t ( slv_aw_chan_t ), + .mst_aw_chan_t ( mst_aw_chan_t ), + .w_chan_t ( w_chan_t ), + .slv_b_chan_t ( slv_b_chan_t ), + .mst_b_chan_t ( mst_b_chan_t ), + .slv_ar_chan_t ( slv_ar_chan_t ), + .mst_ar_chan_t ( mst_ar_chan_t ), + .slv_r_chan_t ( slv_r_chan_t ), + .mst_r_chan_t ( mst_r_chan_t ), + .slv_req_t ( slv_req_t ), + .slv_resp_t ( slv_resp_t ), + .mst_req_t ( mst_req_t ), + .mst_resp_t ( mst_resp_t ), + .rule_t ( rule_t ) + ) i_xbar ( + .clk_i, + .rst_ni, + .test_i, + .slv_ports_req_i (slv_reqs ), + .slv_ports_resp_o (slv_resps), + .mst_ports_req_o (mst_reqs ), + .mst_ports_resp_i (mst_resps), + .addr_map_i, + .en_default_mst_port_i, + .default_mst_port_i + ); + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh b/test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh new file mode 100644 index 0000000..b64f31a --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh @@ -0,0 +1,221 @@ +// Copyright 2018, 2021 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Stefan Mach +// Description: Common register defines for RTL designs + +`ifndef COMMON_CELLS_REGISTERS_SVH_ +`define COMMON_CELLS_REGISTERS_SVH_ + +// Abridged Summary of available FF macros: +// `FF: asynchronous active-low reset +// `FFAR: asynchronous active-high reset +// `FFARN: [deprecated] asynchronous active-low reset +// `FFSR: synchronous active-high reset +// `FFSRN: synchronous active-low reset +// `FFNR: without reset +// `FFL: load-enable and asynchronous active-low reset +// `FFLAR: load-enable and asynchronous active-high reset +// `FFLARN: [deprecated] load-enable and asynchronous active-low reset +// `FFLARNC: load-enable and asynchronous active-low reset and synchronous active-high clear +// `FFLSR: load-enable and synchronous active-high reset +// `FFLSRN: load-enable and synchronous active-low reset +// `FFLNR: load-enable without reset + +`ifdef VERILATOR +`define NO_SYNOPSYS_FF 1 +`endif + +`define REG_DFLT_CLK clk_i +`define REG_DFLT_RST rst_ni + +// Flip-Flop with asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// (__clk: clock input) +// (__arst_n: asynchronous reset, active-low) +`define FF(__q, __d, __reset_value, __clk = `REG_DFLT_CLK, __arst_n = `REG_DFLT_RST) \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__d); \ + end \ + end + +// Flip-Flop with asynchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst: asynchronous reset, active-high +`define FFAR(__q, __d, __reset_value, __clk, __arst) \ + always_ff @(posedge (__clk) or posedge (__arst)) begin \ + if (__arst) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__d); \ + end \ + end + +// DEPRECATED - use `FF instead +// Flip-Flop with asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset, active-low +`define FFARN(__q, __d, __reset_value, __clk, __arst_n) \ + `FF(__q, __d, __reset_value, __clk, __arst_n) + +// Flip-Flop with synchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_clk: reset input, active-high +`define FFSR(__q, __d, __reset_value, __clk, __reset_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (__reset_clk) ? (__reset_value) : (__d); \ + end + +// Flip-Flop with synchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_n_clk: reset input, active-low +`define FFSRN(__q, __d, __reset_value, __clk, __reset_n_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_n_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (!__reset_n_clk) ? (__reset_value) : (__d); \ + end + +// Always-enable Flip-Flop without reset +// __q: Q output of FF +// __d: D input of FF +// __clk: clock input +`define FFNR(__q, __d, __clk) \ + always_ff @(posedge (__clk)) begin \ + __q <= (__d); \ + end + +// Flip-Flop with load-enable and asynchronous active-low reset (implicit clock and reset) +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// (__clk: clock input) +// (__arst_n: asynchronous reset, active-low) +`define FFL(__q, __d, __load, __reset_value, __clk = `REG_DFLT_CLK, __arst_n = `REG_DFLT_RST) \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__load) ? (__d) : (__q); \ + end \ + end + +// Flip-Flop with load-enable and asynchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst: asynchronous reset, active-high +`define FFLAR(__q, __d, __load, __reset_value, __clk, __arst) \ + always_ff @(posedge (__clk) or posedge (__arst)) begin \ + if (__arst) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__load) ? (__d) : (__q); \ + end \ + end + +// DEPRECATED - use `FFL instead +// Flip-Flop with load-enable and asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset, active-low +`define FFLARN(__q, __d, __load, __reset_value, __clk, __arst_n) \ + `FFL(__q, __d, __load, __reset_value, __clk, __arst_n) + +// Flip-Flop with load-enable and synchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_clk: reset input, active-high +`define FFLSR(__q, __d, __load, __reset_value, __clk, __reset_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (__reset_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \ + end + +// Flip-Flop with load-enable and synchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_n_clk: reset input, active-low +`define FFLSRN(__q, __d, __load, __reset_value, __clk, __reset_n_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_n_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (!__reset_n_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \ + end + +// Flip-Flop with load-enable and asynchronous active-low reset and synchronous clear +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __clear: assign reset value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset, active-low +`define FFLARNC(__q, __d, __load, __clear, __reset_value, __clk, __arst_n) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__clear`" *``/ \ + `endif \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__clear) ? (__reset_value) : (__load) ? (__d) : (__q); \ + end \ + end + +// Load-enable Flip-Flop without reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __clk: clock input +`define FFLNR(__q, __d, __load, __clk) \ + always_ff @(posedge (__clk)) begin \ + __q <= (__load) ? (__d) : (__q); \ + end + +`endif diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv b/test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv new file mode 100644 index 0000000..90a43a0 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv @@ -0,0 +1,161 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +/// Address Decoder: Maps the input address combinatorially to an index. +/// The address map `addr_map_i` is a packed array of rule_t structs. +/// The ranges of any two rules may overlap. If so, the rule at the higher (more significant) +/// position in `addr_map_i` prevails. +/// +/// There can be an arbitrary number of address rules. There can be multiple +/// ranges defined for the same index. The start address has to be less than the end address. +/// +/// There is the possibility to add a default mapping: +/// `en_default_idx_i`: Driving this port to `1'b1` maps all input addresses +/// for which no rule in `addr_map_i` exists to the default index specified by +/// `default_idx_i`. In this case, `dec_error_o` is always `1'b0`. +/// +/// Assertions: The module checks every time there is a change in the address mapping +/// if the resulting map is valid. It fatals if `start_addr` is higher than `end_addr` +/// or if a mapping targets an index that is outside the number of allowed indices. +/// It issues warnings if the address regions of any two mappings overlap. +module addr_decode #( + /// Highest index which can happen in a rule. + parameter int unsigned NoIndices = 32'd0, + /// Total number of rules. + parameter int unsigned NoRules = 32'd0, + /// Address type inside the rules and to decode. + parameter type addr_t = logic, + /// Rule packed struct type. + /// The address decoder expects three fields in `rule_t`: + /// + /// typedef struct packed { + /// int unsigned idx; + /// addr_t start_addr; + /// addr_t end_addr; + /// } rule_t; + /// + /// - `idx`: index of the rule, has to be < `NoIndices` + /// - `start_addr`: start address of the range the rule describes, value is included in range + /// - `end_addr`: end address of the range the rule describes, value is NOT included in range + parameter type rule_t = logic, + /// Dependent parameter, do **not** overwite! + /// + /// Width of the `idx_o` output port. + parameter int unsigned IdxWidth = cf_math_pkg::idx_width(NoIndices), + /// Dependent parameter, do **not** overwite! + /// + /// Type of the `idx_o` output port. + parameter type idx_t = logic [IdxWidth-1:0] +) ( + /// Address to decode. + input addr_t addr_i, + /// Address map: rule with the highest array position wins on collision + input rule_t [NoRules-1:0] addr_map_i, + /// Decoded index. + output idx_t idx_o, + /// Decode is valid. + output logic dec_valid_o, + /// Decode is not valid, no matching rule found. + output logic dec_error_o, + /// Enable default port mapping. + /// + /// When not used, tie to `0`. + input logic en_default_idx_i, + /// Default port index. + /// + /// When `en_default_idx_i` is `1`, this will be the index when no rule matches. + /// + /// When not used, tie to `0`. + input idx_t default_idx_i +); + + logic [NoRules-1:0] matched_rules; // purely for address map debugging + + always_comb begin + // default assignments + matched_rules = '0; + dec_valid_o = 1'b0; + dec_error_o = (en_default_idx_i) ? 1'b0 : 1'b1; + idx_o = (en_default_idx_i) ? default_idx_i : '0; + + // match the rules + for (int unsigned i = 0; i < NoRules; i++) begin + if ((addr_i >= addr_map_i[i].start_addr) && (addr_i < addr_map_i[i].end_addr)) begin + matched_rules[i] = 1'b1; + dec_valid_o = 1'b1; + dec_error_o = 1'b0; + idx_o = idx_t'(addr_map_i[i].idx); + end + end + end + + // Assumptions and assertions + `ifndef VERILATOR + `ifndef XSIM + // pragma translate_off + initial begin : proc_check_parameters + assume ($bits(addr_i) == $bits(addr_map_i[0].start_addr)) else + $warning($sformatf("Input address has %d bits and address map has %d bits.", + $bits(addr_i), $bits(addr_map_i[0].start_addr))); + assume (NoRules > 0) else + $fatal(1, $sformatf("At least one rule needed")); + assume (NoIndices > 0) else + $fatal(1, $sformatf("At least one index needed")); + end + + assert final ($onehot0(matched_rules)) else + $warning("More than one bit set in the one-hot signal, matched_rules"); + + // These following assumptions check the validity of the address map. + // The assumptions gets generated for each distinct pair of rules. + // Each assumption is present two times, as they rely on one rules being + // effectively ordered. Only one of the rules with the same function is + // active at a time for a given pair. + // check_start: Enforces a smaller start than end address. + // check_idx: Enforces a valid index in the rule. + // check_overlap: Warns if there are overlapping address regions. + always @(addr_map_i) #0 begin : proc_check_addr_map + if (!$isunknown(addr_map_i)) begin + for (int unsigned i = 0; i < NoRules; i++) begin + check_start : assume (addr_map_i[i].start_addr < addr_map_i[i].end_addr) else + $fatal(1, $sformatf("This rule has a higher start than end address!!!\n\ + Violating rule %d.\n\ + Rule> IDX: %h START: %h END: %h\n\ + #####################################################", + i ,addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr)); + // check the SLV ids + check_idx : assume (addr_map_i[i].idx < NoIndices) else + $fatal(1, $sformatf("This rule has a IDX that is not allowed!!!\n\ + Violating rule %d.\n\ + Rule> IDX: %h START: %h END: %h\n\ + Rule> MAX_IDX: %h\n\ + #####################################################", + i, addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr, + (NoIndices-1))); + for (int unsigned j = i + 1; j < NoRules; j++) begin + // overlap check + check_overlap : assume (!((addr_map_i[j].start_addr < addr_map_i[i].end_addr) && + (addr_map_i[j].end_addr > addr_map_i[i].start_addr))) else + $warning($sformatf("Overlapping address region found!!!\n\ + Rule %d: IDX: %h START: %h END: %h\n\ + Rule %d: IDX: %h START: %h END: %h\n\ + #####################################################", + i, addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr, + j, addr_map_i[j].idx, addr_map_i[j].start_addr, addr_map_i[j].end_addr)); + end + end + end + end + // pragma translate_on + `endif + `endif +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv b/test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv new file mode 100644 index 0000000..8e770ab --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv @@ -0,0 +1,175 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A two-phase clock domain crossing. +/// +/// CONSTRAINT: Requires max_delay of min_period(src_clk_i, dst_clk_i) through +/// the paths async_req, async_ack, async_data. +/* verilator lint_off DECLFILENAME */ +module cdc_2phase #( + parameter type T = logic +)( + input logic src_rst_ni, + input logic src_clk_i, + input T src_data_i, + input logic src_valid_i, + output logic src_ready_o, + + input logic dst_rst_ni, + input logic dst_clk_i, + output T dst_data_o, + output logic dst_valid_o, + input logic dst_ready_i +); + + // Asynchronous handshake signals. + (* dont_touch = "true" *) logic async_req; + (* dont_touch = "true" *) logic async_ack; + (* dont_touch = "true" *) T async_data; + + // The sender in the source domain. + cdc_2phase_src #(.T(T)) i_src ( + .rst_ni ( src_rst_ni ), + .clk_i ( src_clk_i ), + .data_i ( src_data_i ), + .valid_i ( src_valid_i ), + .ready_o ( src_ready_o ), + .async_req_o ( async_req ), + .async_ack_i ( async_ack ), + .async_data_o ( async_data ) + ); + + // The receiver in the destination domain. + cdc_2phase_dst #(.T(T)) i_dst ( + .rst_ni ( dst_rst_ni ), + .clk_i ( dst_clk_i ), + .data_o ( dst_data_o ), + .valid_o ( dst_valid_o ), + .ready_i ( dst_ready_i ), + .async_req_i ( async_req ), + .async_ack_o ( async_ack ), + .async_data_i ( async_data ) + ); + +endmodule + + +/// Half of the two-phase clock domain crossing located in the source domain. +module cdc_2phase_src #( + parameter type T = logic +)( + input logic rst_ni, + input logic clk_i, + input T data_i, + input logic valid_i, + output logic ready_o, + output logic async_req_o, + input logic async_ack_i, + output T async_data_o +); + + (* dont_touch = "true" *) + logic req_src_q, ack_src_q, ack_q; + (* dont_touch = "true" *) + T data_src_q; + + // The req_src and data_src registers change when a new data item is accepted. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + req_src_q <= 0; + data_src_q <= '0; + end else if (valid_i && ready_o) begin + req_src_q <= ~req_src_q; + data_src_q <= data_i; + end + end + + // The ack_src and ack registers act as synchronization stages. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ack_src_q <= 0; + ack_q <= 0; + end else begin + ack_src_q <= async_ack_i; + ack_q <= ack_src_q; + end + end + + // Output assignments. + assign ready_o = (req_src_q == ack_q); + assign async_req_o = req_src_q; + assign async_data_o = data_src_q; + +endmodule + + +/// Half of the two-phase clock domain crossing located in the destination +/// domain. +module cdc_2phase_dst #( + parameter type T = logic +)( + input logic rst_ni, + input logic clk_i, + output T data_o, + output logic valid_o, + input logic ready_i, + input logic async_req_i, + output logic async_ack_o, + input T async_data_i +); + + (* dont_touch = "true" *) + (* async_reg = "true" *) + logic req_dst_q, req_q0, req_q1, ack_dst_q; + (* dont_touch = "true" *) + T data_dst_q; + + // The ack_dst register changes when a new data item is accepted. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ack_dst_q <= 0; + end else if (valid_o && ready_i) begin + ack_dst_q <= ~ack_dst_q; + end + end + + // The data_dst register changes when a new data item is presented. This is + // indicated by the async_req line changing levels. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + data_dst_q <= '0; + end else if (req_q0 != req_q1 && !valid_o) begin + data_dst_q <= async_data_i; + end + end + + // The req_dst and req registers act as synchronization stages. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + req_dst_q <= 0; + req_q0 <= 0; + req_q1 <= 0; + end else begin + req_dst_q <= async_req_i; + req_q0 <= req_dst_q; + req_q1 <= req_q0; + end + end + + // Output assignments. + assign valid_o = (ack_dst_q != req_q1); + assign data_o = data_dst_q; + assign async_ack_o = ack_dst_q; + +endmodule +/* verilator lint_on DECLFILENAME */ diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv b/test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv new file mode 100644 index 0000000..9f35a44 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv @@ -0,0 +1,61 @@ +// Copyright 2016 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// cf_math_pkg: Constant Function Implementations of Mathematical Functions for HDL Elaboration +/// +/// This package contains a collection of mathematical functions that are commonly used when defining +/// the value of constants in HDL code. These functions are implemented as Verilog constants +/// functions. Introduced in Verilog 2001 (IEEE Std 1364-2001), a constant function (§ 10.3.5) is a +/// function whose value can be evaluated at compile time or during elaboration. A constant function +/// must be called with arguments that are constants. +package cf_math_pkg; + + /// Ceiled Division of Two Natural Numbers + /// + /// Returns the quotient of two natural numbers, rounded towards plus infinity. + function automatic integer ceil_div (input longint dividend, input longint divisor); + automatic longint remainder; + + // pragma translate_off + `ifndef VERILATOR + if (dividend < 0) begin + $fatal(1, "Dividend %0d is not a natural number!", dividend); + end + + if (divisor < 0) begin + $fatal(1, "Divisor %0d is not a natural number!", divisor); + end + + if (divisor == 0) begin + $fatal(1, "Division by zero!"); + end + `endif + // pragma translate_on + + remainder = dividend; + for (ceil_div = 0; remainder > 0; ceil_div++) begin + remainder = remainder - divisor; + end + endfunction + + /// Index width required to be able to represent up to `num_idx` indices as a binary + /// encoded signal. + /// Ensures that the minimum width if an index signal is `1`, regardless of parametrization. + /// + /// Sample usage in type definition: + /// As parameter: + /// `parameter type idx_t = logic[cf_math_pkg::idx_width(NumIdx)-1:0]` + /// As typedef: + /// `typedef logic [cf_math_pkg::idx_width(NumIdx)-1:0] idx_t` + function automatic integer unsigned idx_width (input integer unsigned num_idx); + return (num_idx > 32'd1) ? unsigned'($clog2(num_idx)) : 32'd1; + endfunction + +endpackage diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/counter.sv b/test/type_param/vendor/pulp-platform/common_cells/src/counter.sv new file mode 100644 index 0000000..43392e4 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/counter.sv @@ -0,0 +1,43 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba +// Description: Generic up/down counter + +module counter #( + parameter int unsigned WIDTH = 4, + parameter bit STICKY_OVERFLOW = 1'b0 +)( + input logic clk_i, + input logic rst_ni, + input logic clear_i, // synchronous clear + input logic en_i, // enable the counter + input logic load_i, // load a new value + input logic down_i, // downcount, default is up + input logic [WIDTH-1:0] d_i, + output logic [WIDTH-1:0] q_o, + output logic overflow_o +); + delta_counter #( + .WIDTH (WIDTH), + .STICKY_OVERFLOW (STICKY_OVERFLOW) + ) i_counter ( + .clk_i, + .rst_ni, + .clear_i, + .en_i, + .load_i, + .down_i, + .delta_i({{WIDTH-1{1'b0}}, 1'b1}), + .d_i, + .q_o, + .overflow_o + ); +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv b/test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv new file mode 100644 index 0000000..90b5cff --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv @@ -0,0 +1,74 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Up/down counter with variable delta + +module delta_counter #( + parameter int unsigned WIDTH = 4, + parameter bit STICKY_OVERFLOW = 1'b0 +)( + input logic clk_i, + input logic rst_ni, + input logic clear_i, // synchronous clear + input logic en_i, // enable the counter + input logic load_i, // load a new value + input logic down_i, // downcount, default is up + input logic [WIDTH-1:0] delta_i, + input logic [WIDTH-1:0] d_i, + output logic [WIDTH-1:0] q_o, + output logic overflow_o +); + logic [WIDTH:0] counter_q, counter_d; + if (STICKY_OVERFLOW) begin : gen_sticky_overflow + logic overflow_d, overflow_q; + always_ff @(posedge clk_i or negedge rst_ni) overflow_q <= ~rst_ni ? 1'b0 : overflow_d; + always_comb begin + overflow_d = overflow_q; + if (clear_i || load_i) begin + overflow_d = 1'b0; + end else if (!overflow_q && en_i) begin + if (down_i) begin + overflow_d = delta_i > counter_q[WIDTH-1:0]; + end else begin + overflow_d = counter_q[WIDTH-1:0] > ({WIDTH{1'b1}} - delta_i); + end + end + end + assign overflow_o = overflow_q; + end else begin : gen_transient_overflow + // counter overflowed if the MSB is set + assign overflow_o = counter_q[WIDTH]; + end + assign q_o = counter_q[WIDTH-1:0]; + + always_comb begin + counter_d = counter_q; + + if (clear_i) begin + counter_d = '0; + end else if (load_i) begin + counter_d = {1'b0, d_i}; + end else if (en_i) begin + if (down_i) begin + counter_d = counter_q - delta_i; + end else begin + counter_d = counter_q + delta_i; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + counter_q <= '0; + end else begin + counter_q <= counter_d; + end + end +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv new file mode 100644 index 0000000..31295e8 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +/* verilator lint_off DECLFILENAME */ +module fifo #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter int unsigned THRESHOLD = 1, // fill count until when to assert threshold_o + parameter type dtype = logic [DATA_WIDTH-1:0] +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic threshold_o, // the FIFO is above the specified threshold + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + fifo_v2 #( + .FALL_THROUGH ( FALL_THROUGH ), + .DATA_WIDTH ( DATA_WIDTH ), + .DEPTH ( DEPTH ), + .ALM_FULL_TH ( THRESHOLD ), + .dtype ( dtype ) + ) impl ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .testmode_i ( testmode_i ), + .full_o ( full_o ), + .empty_o ( empty_o ), + .alm_full_o ( threshold_o ), + .alm_empty_o ( ), + .data_i ( data_i ), + .push_i ( push_i ), + .data_o ( data_o ), + .pop_i ( pop_i ) + ); +endmodule +/* verilator lint_on DECLFILENAME */ diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv new file mode 100644 index 0000000..9c87ed9 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv @@ -0,0 +1,79 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +module fifo_v2 #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter int unsigned ALM_EMPTY_TH = 1, // almost empty threshold (when to assert alm_empty_o) + parameter int unsigned ALM_FULL_TH = 1, // almost full threshold (when to assert alm_full_o) + parameter type dtype = logic [DATA_WIDTH-1:0], + // DO NOT OVERWRITE THIS PARAMETER + parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic alm_full_o, // FIFO fillstate >= the specified threshold + output logic alm_empty_o, // FIFO fillstate <= the specified threshold + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + + logic [ADDR_DEPTH-1:0] usage; + + // generate threshold parameters + if (DEPTH == 0) begin + assign alm_full_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0 + assign alm_empty_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0 + end else begin + assign alm_full_o = (usage >= ALM_FULL_TH[ADDR_DEPTH-1:0]); + assign alm_empty_o = (usage <= ALM_EMPTY_TH[ADDR_DEPTH-1:0]); + end + + fifo_v3 #( + .FALL_THROUGH ( FALL_THROUGH ), + .DATA_WIDTH ( DATA_WIDTH ), + .DEPTH ( DEPTH ), + .dtype ( dtype ) + ) i_fifo_v3 ( + .clk_i, + .rst_ni, + .flush_i, + .testmode_i, + .full_o, + .empty_o, + .usage_o (usage), + .data_i, + .push_i, + .data_o, + .pop_i + ); + + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ALM_FULL_TH <= DEPTH) else $error("ALM_FULL_TH can't be larger than the DEPTH."); + assert (ALM_EMPTY_TH <= DEPTH) else $error("ALM_EMPTY_TH can't be larger than the DEPTH."); + end + `endif + // pragma translate_on + +endmodule // fifo_v2 diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv b/test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv new file mode 100644 index 0000000..91dccb0 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv @@ -0,0 +1,98 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 10.04.2019 +// Description: exponential backoff counter with randomization. +// +// For each failed trial (set_i pulsed), this unit exponentially increases the +// (average) backoff time by masking an LFSR with a shifted mask in order to +// create the backoff counter initial value. +// +// The shift register mask and the counter value are both reset to '0 in case of +// a successful trial (clr_i). +// + +module exp_backoff #( + /// Seed for 16bit LFSR + parameter int unsigned Seed = 'hffff, + /// 2**MaxExp-1 determines the maximum range from which random wait counts are drawn + parameter int unsigned MaxExp = 16 +) ( + input logic clk_i, + input logic rst_ni, + /// Sets the backoff counter (pulse) -> use when trial did not succeed + input logic set_i, + /// Clears the backoff counter (pulse) -> use when trial succeeded + input logic clr_i, + /// Indicates whether the backoff counter is equal to zero and a new trial can be launched + output logic is_zero_o +); + + // leave this constant + localparam int unsigned WIDTH = 16; + + logic [WIDTH-1:0] lfsr_d, lfsr_q, cnt_d, cnt_q, mask_d, mask_q; + logic lfsr; + + // generate random wait counts + // note: we use a flipped lfsr here to + // avoid strange correlation effects between + // the (left-shifted) mask and the lfsr + assign lfsr = lfsr_q[15-15] ^ + lfsr_q[15-13] ^ + lfsr_q[15-12] ^ + lfsr_q[15-10]; + + assign lfsr_d = (set_i) ? {lfsr, lfsr_q[$high(lfsr_q):1]} : + lfsr_q; + + // mask the wait counts with exponentially increasing mask (shift reg) + assign mask_d = (clr_i) ? '0 : + (set_i) ? {{(WIDTH-MaxExp){1'b0}},mask_q[MaxExp-2:0], 1'b1} : + mask_q; + + assign cnt_d = (clr_i) ? '0 : + (set_i) ? (mask_q & lfsr_q) : + (!is_zero_o) ? cnt_q - 1'b1 : '0; + + assign is_zero_o = (cnt_q=='0); + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + lfsr_q <= WIDTH'(Seed); + mask_q <= '0; + cnt_q <= '0; + end else begin + lfsr_q <= lfsr_d; + mask_q <= mask_d; + cnt_q <= cnt_d; + end + end + +/////////////////////////////////////////////////////// +// assertions +/////////////////////////////////////////////////////// + +//pragma translate_off +`ifndef VERILATOR + initial begin + // assert wrong parameterizations + assert (MaxExp>0) + else $fatal(1,"MaxExp must be greater than 0"); + assert (MaxExp<=16) + else $fatal(1,"MaxExp cannot be greater than 16"); + assert (Seed>0) + else $fatal(1,"Zero seed is not allowed for LFSR"); + end +`endif +//pragma translate_on + +endmodule // exp_backoff diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv b/test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv new file mode 100644 index 0000000..11b77e0 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv @@ -0,0 +1,191 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +module fifo_v3 #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter type dtype = logic [DATA_WIDTH-1:0], + // DO NOT OVERWRITE THIS PARAMETER + parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic [ADDR_DEPTH-1:0] usage_o, // fill pointer + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + // local parameter + // FIFO depth - handle the case of pass-through, synthesizer will do constant propagation + localparam int unsigned FifoDepth = (DEPTH > 0) ? DEPTH : 1; + // clock gating control + logic gate_clock; + // pointer to the read and write section of the queue + logic [ADDR_DEPTH - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q; + // keep a counter to keep track of the current queue status + // this integer will be truncated by the synthesis tool + logic [ADDR_DEPTH:0] status_cnt_n, status_cnt_q; + // actual memory + dtype [FifoDepth - 1:0] mem_n, mem_q; + + // fifo ram signals for fpga target + logic fifo_ram_we; + logic [ADDR_DEPTH-1:0] fifo_ram_read_address; + logic [ADDR_DEPTH-1:0] fifo_ram_write_address; + logic [$bits(dtype)-1:0] fifo_ram_wdata; + logic [$bits(dtype)-1:0] fifo_ram_rdata; + + assign usage_o = status_cnt_q[ADDR_DEPTH-1:0]; + + if (DEPTH == 0) begin : gen_pass_through + assign empty_o = ~push_i; + assign full_o = ~pop_i; + end else begin : gen_fifo + assign full_o = (status_cnt_q == FifoDepth[ADDR_DEPTH:0]); + assign empty_o = (status_cnt_q == 0) & ~(FALL_THROUGH & push_i); + end + // status flags + + // read and write queue logic + always_comb begin : read_write_comb + // default assignment + read_pointer_n = read_pointer_q; + write_pointer_n = write_pointer_q; + status_cnt_n = status_cnt_q; + if (ariane_pkg::FPGA_EN) begin + fifo_ram_we = '0; + fifo_ram_read_address = read_pointer_q; + fifo_ram_write_address = '0; + fifo_ram_wdata = '0; + data_o = (DEPTH == 0) ? data_i : fifo_ram_rdata; + end else begin + data_o = (DEPTH == 0) ? data_i : mem_q[read_pointer_q]; + mem_n = mem_q; + gate_clock = 1'b1; + end + + // push a new element to the queue + if (push_i && ~full_o) begin + if (ariane_pkg::FPGA_EN) begin + fifo_ram_we = 1'b1; + fifo_ram_write_address = write_pointer_q; + fifo_ram_wdata = data_i; + end else begin + // push the data onto the queue + mem_n[write_pointer_q] = data_i; + // un-gate the clock, we want to write something + gate_clock = 1'b0; + end + + // increment the write counter + if (write_pointer_q == FifoDepth[ADDR_DEPTH-1:0] - 1) + write_pointer_n = '0; + else + write_pointer_n = write_pointer_q + 1; + // increment the overall counter + status_cnt_n = status_cnt_q + 1; + end + + if (pop_i && ~empty_o) begin + // read from the queue is a default assignment + // but increment the read pointer... + if (read_pointer_n == FifoDepth[ADDR_DEPTH-1:0] - 1) + read_pointer_n = '0; + else + read_pointer_n = read_pointer_q + 1; + // ... and decrement the overall count + status_cnt_n = status_cnt_q - 1; + end + + // keep the count pointer stable if we push and pop at the same time + if (push_i && pop_i && ~full_o && ~empty_o) + status_cnt_n = status_cnt_q; + + // FIFO is in pass through mode -> do not change the pointers + if (FALL_THROUGH && (status_cnt_q == 0) && push_i) begin + data_o = data_i; + if (pop_i) begin + status_cnt_n = status_cnt_q; + read_pointer_n = read_pointer_q; + write_pointer_n = write_pointer_q; + end + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + read_pointer_q <= '0; + write_pointer_q <= '0; + status_cnt_q <= '0; + end else begin + if (flush_i) begin + read_pointer_q <= '0; + write_pointer_q <= '0; + status_cnt_q <= '0; + end else begin + read_pointer_q <= read_pointer_n; + write_pointer_q <= write_pointer_n; + status_cnt_q <= status_cnt_n; + end + end + end + + if (ariane_pkg::FPGA_EN) begin : gen_fpga_queue + AsyncDpRam #( + .ADDR_WIDTH (ADDR_DEPTH), + .DATA_DEPTH (DEPTH), + .DATA_WIDTH ($bits(dtype)) + ) fifo_ram ( + .Clk_CI ( clk_i ), + .WrEn_SI ( fifo_ram_we ), + .RdAddr_DI ( fifo_ram_read_address ), + .WrAddr_DI ( fifo_ram_write_address ), + .WrData_DI ( fifo_ram_wdata ), + .RdData_DO ( fifo_ram_rdata ) + ); + end else begin : gen_asic_queue + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + mem_q <= '0; + end else if (!gate_clock) begin + mem_q <= mem_n; + end + end + end + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert (DEPTH > 0) else $error("DEPTH must be greater than 0."); + end + + full_write : assert property( + @(posedge clk_i) disable iff (~rst_ni) (full_o |-> ~push_i)) + else $fatal (1, "Trying to push new data although the FIFO is full."); + + empty_read : assert property( + @(posedge clk_i) disable iff (~rst_ni) (empty_o |-> ~pop_i)) + else $fatal (1, "Trying to pop data although the FIFO is empty."); +`endif +// pragma translate_on + +endmodule // fifo_v3 diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv new file mode 100644 index 0000000..aae2e2d --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv @@ -0,0 +1,315 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 26.04.2019 +// +// Description: This is a parametric LFSR with precomputed coefficients for +// LFSR lengths from 4 to 64bit. + +// Additional block cipher layers can be instantiated to non-linearly transform +// the pseudo-random LFSR sequence at the output, and hence break the shifting +// patterns. The additional cipher layers can only be used for an LFSR width +// of 64bit, since the block cipher has been designed for that block length. + +module lfsr #( + parameter int unsigned LfsrWidth = 64, // [4,64] + parameter int unsigned OutWidth = 8, // [1,LfsrWidth] + parameter logic [LfsrWidth-1:0] RstVal = '1, // [1,2^LfsrWidth-1] + // 0: disabled, the present cipher uses 31, but just a few layers (1-3) are enough + // to break linear shifting patterns + parameter int unsigned CipherLayers = 0, + parameter bit CipherReg = 1'b1 // additional output reg after cipher +) ( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [OutWidth-1:0] out_o +); + +// Galois LFSR feedback masks +// Automatically generated with get_lfsr_masks.py +// Masks are from https://users.ece.cmu.edu/~koopman/lfsr/ +localparam logic [63:0] Masks [4:64] = '{64'hC, + 64'h1E, + 64'h39, + 64'h7E, + 64'hFA, + 64'h1FD, + 64'h3FC, + 64'h64B, + 64'hD8F, + 64'h1296, + 64'h2496, + 64'h4357, + 64'h8679, + 64'h1030E, + 64'h206CD, + 64'h403FE, + 64'h807B8, + 64'h1004B2, + 64'h2006A8, + 64'h4004B2, + 64'h800B87, + 64'h10004F3, + 64'h200072D, + 64'h40006AE, + 64'h80009E3, + 64'h10000583, + 64'h20000C92, + 64'h400005B6, + 64'h80000EA6, + 64'h1000007A3, + 64'h200000ABF, + 64'h400000842, + 64'h80000123E, + 64'h100000074E, + 64'h2000000AE9, + 64'h400000086A, + 64'h8000001213, + 64'h1000000077E, + 64'h2000000123B, + 64'h40000000877, + 64'h8000000108D, + 64'h100000000AE9, + 64'h200000000E9F, + 64'h4000000008A6, + 64'h80000000191E, + 64'h100000000090E, + 64'h2000000000FB3, + 64'h4000000000D7D, + 64'h80000000016A5, + 64'h10000000000B4B, + 64'h200000000010AF, + 64'h40000000000DDE, + 64'h8000000000181A, + 64'h100000000000B65, + 64'h20000000000102D, + 64'h400000000000CD5, + 64'h8000000000024C1, + 64'h1000000000000EF6, + 64'h2000000000001363, + 64'h4000000000000FCD, + 64'h80000000000019E2}; + +// this S-box and permutation P has been taken from the Present Cipher, +// a super lightweight block cipher. use the cipher layers to add additional +// non-linearity to the LFSR output. note one layer does not fully correspond +// to the present cipher round, since the key and rekeying function is not applied here. +// +// See also: +// "PRESENT: An Ultra-Lightweight Block Cipher", A. Bogdanov et al., Ches 2007 +// http://www.lightweightcrypto.org/present/present_ches2007.pdf + +// this is the sbox from the present cipher +localparam logic[15:0][3:0] Sbox4 = {4'h2, 4'h1, 4'h7, 4'h4, + 4'h8, 4'hF, 4'hE, 4'h3, + 4'hD, 4'hA, 4'h0, 4'h9, + 4'hB, 4'h6, 4'h5, 4'hC }; + +// these are the permutation indices of the present cipher +localparam logic[63:0][5:0] Perm = {6'd63, 6'd47, 6'd31, 6'd15, 6'd62, 6'd46, 6'd30, 6'd14, + 6'd61, 6'd45, 6'd29, 6'd13, 6'd60, 6'd44, 6'd28, 6'd12, + 6'd59, 6'd43, 6'd27, 6'd11, 6'd58, 6'd42, 6'd26, 6'd10, + 6'd57, 6'd41, 6'd25, 6'd09, 6'd56, 6'd40, 6'd24, 6'd08, + 6'd55, 6'd39, 6'd23, 6'd07, 6'd54, 6'd38, 6'd22, 6'd06, + 6'd53, 6'd37, 6'd21, 6'd05, 6'd52, 6'd36, 6'd20, 6'd04, + 6'd51, 6'd35, 6'd19, 6'd03, 6'd50, 6'd34, 6'd18, 6'd02, + 6'd49, 6'd33, 6'd17, 6'd01, 6'd48, 6'd32, 6'd16, 6'd00}; + + +function automatic logic [63:0] sbox4_layer(logic [63:0] in); + logic [63:0] out; + //for (logic [4:0] j = '0; j<16; j++) out[j*4 +: 4] = sbox4[in[j*4 +: 4]]; + // this simulates much faster than the loop + out[0*4 +: 4] = Sbox4[in[0*4 +: 4]]; + out[1*4 +: 4] = Sbox4[in[1*4 +: 4]]; + out[2*4 +: 4] = Sbox4[in[2*4 +: 4]]; + out[3*4 +: 4] = Sbox4[in[3*4 +: 4]]; + + out[4*4 +: 4] = Sbox4[in[4*4 +: 4]]; + out[5*4 +: 4] = Sbox4[in[5*4 +: 4]]; + out[6*4 +: 4] = Sbox4[in[6*4 +: 4]]; + out[7*4 +: 4] = Sbox4[in[7*4 +: 4]]; + + out[8*4 +: 4] = Sbox4[in[8*4 +: 4]]; + out[9*4 +: 4] = Sbox4[in[9*4 +: 4]]; + out[10*4 +: 4] = Sbox4[in[10*4 +: 4]]; + out[11*4 +: 4] = Sbox4[in[11*4 +: 4]]; + + out[12*4 +: 4] = Sbox4[in[12*4 +: 4]]; + out[13*4 +: 4] = Sbox4[in[13*4 +: 4]]; + out[14*4 +: 4] = Sbox4[in[14*4 +: 4]]; + out[15*4 +: 4] = Sbox4[in[15*4 +: 4]]; + return out; +endfunction : sbox4_layer + +function automatic logic [63:0] perm_layer(logic [63:0] in); + logic [63:0] out; + // for (logic [7:0] j = '0; j<64; j++) out[perm[j]] = in[j]; + // this simulates much faster than the loop + out[Perm[0]] = in[0]; + out[Perm[1]] = in[1]; + out[Perm[2]] = in[2]; + out[Perm[3]] = in[3]; + out[Perm[4]] = in[4]; + out[Perm[5]] = in[5]; + out[Perm[6]] = in[6]; + out[Perm[7]] = in[7]; + out[Perm[8]] = in[8]; + out[Perm[9]] = in[9]; + + out[Perm[10]] = in[10]; + out[Perm[11]] = in[11]; + out[Perm[12]] = in[12]; + out[Perm[13]] = in[13]; + out[Perm[14]] = in[14]; + out[Perm[15]] = in[15]; + out[Perm[16]] = in[16]; + out[Perm[17]] = in[17]; + out[Perm[18]] = in[18]; + out[Perm[19]] = in[19]; + + out[Perm[20]] = in[20]; + out[Perm[21]] = in[21]; + out[Perm[22]] = in[22]; + out[Perm[23]] = in[23]; + out[Perm[24]] = in[24]; + out[Perm[25]] = in[25]; + out[Perm[26]] = in[26]; + out[Perm[27]] = in[27]; + out[Perm[28]] = in[28]; + out[Perm[29]] = in[29]; + + out[Perm[30]] = in[30]; + out[Perm[31]] = in[31]; + out[Perm[32]] = in[32]; + out[Perm[33]] = in[33]; + out[Perm[34]] = in[34]; + out[Perm[35]] = in[35]; + out[Perm[36]] = in[36]; + out[Perm[37]] = in[37]; + out[Perm[38]] = in[38]; + out[Perm[39]] = in[39]; + + out[Perm[40]] = in[40]; + out[Perm[41]] = in[41]; + out[Perm[42]] = in[42]; + out[Perm[43]] = in[43]; + out[Perm[44]] = in[44]; + out[Perm[45]] = in[45]; + out[Perm[46]] = in[46]; + out[Perm[47]] = in[47]; + out[Perm[48]] = in[48]; + out[Perm[49]] = in[49]; + + out[Perm[50]] = in[50]; + out[Perm[51]] = in[51]; + out[Perm[52]] = in[52]; + out[Perm[53]] = in[53]; + out[Perm[54]] = in[54]; + out[Perm[55]] = in[55]; + out[Perm[56]] = in[56]; + out[Perm[57]] = in[57]; + out[Perm[58]] = in[58]; + out[Perm[59]] = in[59]; + + out[Perm[60]] = in[60]; + out[Perm[61]] = in[61]; + out[Perm[62]] = in[62]; + out[Perm[63]] = in[63]; + return out; +endfunction : perm_layer + +//////////////////////////////////////////////////////////////////////// +// lfsr +//////////////////////////////////////////////////////////////////////// + +logic [LfsrWidth-1:0] lfsr_d, lfsr_q; +assign lfsr_d = + (en_i) ? (lfsr_q>>1) ^ ({LfsrWidth{lfsr_q[0]}} & Masks[LfsrWidth][LfsrWidth-1:0]) : lfsr_q; + +always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + //$display("%b %h", en_i, lfsr_d); + if (!rst_ni) begin + lfsr_q <= LfsrWidth'(RstVal); + end else begin + lfsr_q <= lfsr_d; + end +end + +//////////////////////////////////////////////////////////////////////// +// block cipher layers +//////////////////////////////////////////////////////////////////////// + +if (CipherLayers > unsigned'(0)) begin : g_cipher_layers + logic [63:0] ciph_layer; + localparam int unsigned NumRepl = ((64+LfsrWidth)/LfsrWidth); + + always_comb begin : p_ciph_layer + automatic logic [63:0] tmp; + tmp = 64'({NumRepl{lfsr_q}}); + for(int unsigned k = 0; k < CipherLayers; k++) begin + tmp = perm_layer(sbox4_layer(tmp)); + end + ciph_layer = tmp; + end + + // additiona output reg after cipher + if (CipherReg) begin : g_cipher_reg + logic [OutWidth-1:0] out_d, out_q; + + assign out_d = (en_i) ? ciph_layer[OutWidth-1:0] : out_q; + assign out_o = out_q[OutWidth-1:0]; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + out_q <= '0; + end else begin + out_q <= out_d; + end + end + // no outreg + end else begin : g_no_out_reg + assign out_o = ciph_layer[OutWidth-1:0]; + end + +// no block cipher +end else begin : g_no_cipher_layers + assign out_o = lfsr_q[OutWidth-1:0]; +end + +//////////////////////////////////////////////////////////////////////// +// assertions +//////////////////////////////////////////////////////////////////////// + +// pragma translate_off +initial begin + // these are the LUT limits + assert(OutWidth <= LfsrWidth) else + $fatal(1,"OutWidth must be smaller equal the LfsrWidth."); + assert(RstVal > unsigned'(0)) else + $fatal(1,"RstVal must be nonzero."); + assert((LfsrWidth >= $low(Masks)) && (LfsrWidth <= $high(Masks))) else + $fatal(1,"Unsupported LfsrWidth."); + assert(Masks[LfsrWidth][LfsrWidth-1]) else + $fatal(1, "LFSR mask is not correct. The MSB must be 1." ); + assert((CipherLayers > 0) && (LfsrWidth == 64) || (CipherLayers == 0)) else + $fatal(1, "Use additional cipher layers only in conjunction with an LFSR width of 64 bit." ); +end + +`ifndef VERILATOR + all_zero: assert property ( + @(posedge clk_i) disable iff (!rst_ni) en_i |-> lfsr_d) + else $fatal(1,"Lfsr must not be all-zero."); +`endif +// pragma translate_on + +endmodule // lfsr diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv new file mode 100644 index 0000000..3fc93c7 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv @@ -0,0 +1,68 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba, ETH Zurich +// Date: 5.11.2018 +// Description: 16-bit LFSR + +// -------------- +// 16-bit LFSR +// -------------- +// +// Description: Shift register +// +module lfsr_16bit #( + parameter logic [15:0] SEED = 8'b0, + parameter int unsigned WIDTH = 16 +)( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [WIDTH-1:0] refill_way_oh, + output logic [$clog2(WIDTH)-1:0] refill_way_bin +); + + localparam int unsigned LogWidth = $clog2(WIDTH); + + logic [15:0] shift_d, shift_q; + + + always_comb begin + + automatic logic shift_in; + shift_in = !(shift_q[15] ^ shift_q[12] ^ shift_q[5] ^ shift_q[1]); + + shift_d = shift_q; + + if (en_i) + shift_d = {shift_q[14:0], shift_in}; + + // output assignment + refill_way_oh = 'b0; + refill_way_oh[shift_q[LogWidth-1:0]] = 1'b1; + refill_way_bin = shift_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ + if(~rst_ni) begin + shift_q <= SEED; + end else begin + shift_q <= shift_d; + end + end + + //pragma translate_off + initial begin + assert (WIDTH <= 16) + else $fatal(1, "WIDTH needs to be less than 16 because of the 16-bit LFSR"); + end + //pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv new file mode 100644 index 0000000..60fdf19 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv @@ -0,0 +1,61 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Igor Loi - University of Bologna +// Author: Florian Zaruba, ETH Zurich +// Date: 12.11.2017 +// Description: 8-bit LFSR + +/// 8 bit Linear Feedback Shift register +module lfsr_8bit #( + parameter logic [7:0] SEED = 8'b0, + parameter int unsigned WIDTH = 8 +) ( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [ WIDTH-1:0] refill_way_oh, + output logic [$clog2(WIDTH)-1:0] refill_way_bin +); + + localparam int unsigned LogWidth = $clog2(WIDTH); + + logic [7:0] shift_d, shift_q; + + always_comb begin + + automatic logic shift_in; + shift_in = !(shift_q[7] ^ shift_q[3] ^ shift_q[2] ^ shift_q[1]); + + shift_d = shift_q; + + if (en_i) shift_d = {shift_q[6:0], shift_in}; + + // output assignment + refill_way_oh = 'b0; + refill_way_oh[shift_q[LogWidth - 1:0]] = 1'b1; + refill_way_bin = shift_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ + if (~rst_ni) begin + shift_q <= SEED; + end else begin + shift_q <= shift_d; + end + end + + //pragma translate_off + initial begin + assert (WIDTH <= 8) else $fatal(1, "WIDTH needs to be less than 8 because of the 8-bit LFSR"); + end + //pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv new file mode 100644 index 0000000..424eb2e --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv @@ -0,0 +1,112 @@ +// Copyright (c) 2018 - 2019 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. + +/// A trailing zero counter / leading zero counter. +/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB) +/// Set MODE to 1 for leading zero counter => cnt_o is the number of leading zeros (from the MSB) +/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains +/// the maximum number of zeros - 1. For example: +/// in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0) +/// in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0) +/// in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0) +/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only). +/// This speeds up simulation significantly. +module lzc #( + /// The width of the input vector. + parameter int unsigned WIDTH = 2, + /// Mode selection: 0 -> trailing zero, 1 -> leading zero + parameter bit MODE = 1'b0, + /// Dependent parameter. Do **not** change! + /// + /// Width of the output signal with the zero count. + parameter int unsigned CNT_WIDTH = cf_math_pkg::idx_width(WIDTH) +) ( + /// Input vector to be counted. + input logic [WIDTH-1:0] in_i, + /// Count of the leading / trailing zeros. + output logic [CNT_WIDTH-1:0] cnt_o, + /// Counter is empty: Asserted if all bits in in_i are zero. + output logic empty_o +); + + if (WIDTH == 1) begin : gen_degenerate_lzc + + assign cnt_o[0] = !in_i[0]; + assign empty_o = !in_i[0]; + + end else begin : gen_lzc + + localparam int unsigned NumLevels = $clog2(WIDTH); + + // pragma translate_off + initial begin + assert(WIDTH > 0) else $fatal(1, "input must be at least one bit wide"); + end + // pragma translate_on + + logic [WIDTH-1:0][NumLevels-1:0] index_lut; + logic [2**NumLevels-1:0] sel_nodes; + logic [2**NumLevels-1:0][NumLevels-1:0] index_nodes; + + logic [WIDTH-1:0] in_tmp; + + // reverse vector if required + always_comb begin : flip_vector + for (int unsigned i = 0; i < WIDTH; i++) begin + in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i]; + end + end + + for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut + assign index_lut[j] = (NumLevels)'(unsigned'(j)); + end + + for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : g_levels + if (unsigned'(level) == NumLevels - 1) begin : g_last_level + for (genvar k = 0; k < 2 ** level; k++) begin : g_level + // if two successive indices are still in the vector... + if (unsigned'(k) * 2 < WIDTH - 1) begin : g_reduce + assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2] | in_tmp[k * 2 + 1]; + assign index_nodes[2 ** level - 1 + k] = (in_tmp[k * 2] == 1'b1) + ? index_lut[k * 2] : + index_lut[k * 2 + 1]; + end + // if only the first index is still in the vector... + if (unsigned'(k) * 2 == WIDTH - 1) begin : g_base + assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2]; + assign index_nodes[2 ** level - 1 + k] = index_lut[k * 2]; + end + // if index is out of range + if (unsigned'(k) * 2 > WIDTH - 1) begin : g_out_of_range + assign sel_nodes[2 ** level - 1 + k] = 1'b0; + assign index_nodes[2 ** level - 1 + k] = '0; + end + end + end else begin : g_not_last_level + for (genvar l = 0; l < 2 ** level; l++) begin : g_level + assign sel_nodes[2 ** level - 1 + l] = + sel_nodes[2 ** (level + 1) - 1 + l * 2] | sel_nodes[2 ** (level + 1) - 1 + l * 2 + 1]; + assign index_nodes[2 ** level - 1 + l] = (sel_nodes[2 ** (level + 1) - 1 + l * 2] == 1'b1) + ? index_nodes[2 ** (level + 1) - 1 + l * 2] : + index_nodes[2 ** (level + 1) - 1 + l * 2 + 1]; + end + end + end + + assign cnt_o = NumLevels > unsigned'(0) ? index_nodes[0] : {($clog2(WIDTH)) {1'b0}}; + assign empty_o = NumLevels > unsigned'(0) ? ~sel_nodes[0] : ~(|in_i); + + end : gen_lzc + +endmodule : lzc diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv b/test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv new file mode 100644 index 0000000..72b9b71 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv @@ -0,0 +1,60 @@ +// Copyright (C) 2013-2018 ETH Zurich, University of Bologna +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Manuel Eggimann + +// Description: This module calculates the hamming weight (number of ones) in +// its input vector using a balanced binary adder tree. Recursive instantiation +// is used to build the tree. Any unsigned INPUT_WIDTH larger or equal 2 is +// legal. The module pads the signal internally to the next power of two. The +// output result width is ceil(log2(INPUT_WIDTH))+1. + +module popcount #( + parameter int unsigned INPUT_WIDTH = 256, + localparam int unsigned PopcountWidth = $clog2(INPUT_WIDTH)+1 +) ( + input logic [INPUT_WIDTH-1:0] data_i, + output logic [PopcountWidth-1:0] popcount_o +); + + localparam int unsigned PaddedWidth = 1 << $clog2(INPUT_WIDTH); + + logic [PaddedWidth-1:0] padded_input; + logic [PopcountWidth-2:0] left_child_result, right_child_result; + + //Zero pad the input to next power of two + always_comb begin + padded_input = '0; + padded_input[INPUT_WIDTH-1:0] = data_i; + end + + //Recursive instantiation to build binary adder tree + if (INPUT_WIDTH == 1) begin : single_node + assign left_child_result = 1'b0; + assign right_child_result = padded_input[0]; + end else if (INPUT_WIDTH == 2) begin : leaf_node + assign left_child_result = padded_input[1]; + assign right_child_result = padded_input[0]; + end else begin : non_leaf_node + popcount #(.INPUT_WIDTH(PaddedWidth / 2)) + left_child( + .data_i(padded_input[PaddedWidth-1:PaddedWidth/2]), + .popcount_o(left_child_result)); + + popcount #(.INPUT_WIDTH(PaddedWidth / 2)) + right_child( + .data_i(padded_input[PaddedWidth/2-1:0]), + .popcount_o(right_child_result)); + end + + //Output assignment + assign popcount_o = left_child_result + right_child_result; + +endmodule : popcount diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv b/test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv new file mode 100644 index 0000000..90301c8 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv @@ -0,0 +1,348 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Wolfgang Roenninger , ETH Zurich +// Date: 02.04.2019 +// Description: logarithmic arbitration tree with round robin arbitration scheme. + +/// The rr_arb_tree employs non-starving round robin-arbitration - i.e., the priorities +/// rotate each cycle. +/// +/// ## Fair vs. unfair Arbitration +/// +/// This refers to fair throughput distribution when not all inputs have active requests. +/// This module has an internal state `rr_q` which defines the highest priority input. (When +/// `ExtPrio` is `1'b1` this state is provided from the outside.) The arbitration tree will +/// choose the input with the same index as currently defined by the state if it has an active +/// request. Otherwise a *random* other active input is selected. The parameter `FairArb` is used +/// to distinguish between two methods of calculating the next state. +/// * `1'b0`: The next state is calculated by advancing the current state by one. This leads to the +/// state being calculated without the context of the active request. Leading to an +/// unfair throughput distribution if not all inputs have active requests. +/// * `1'b1`: The next state jumps to the next unserved request with higher index. +/// This is achieved by using two trailing-zero-counters (`lzc`). The upper has the masked +/// `req_i` signal with all indices which will have a higher priority in the next state. +/// The trailing zero count defines the input index with the next highest priority after +/// the current one is served. When the upper is empty the lower `lzc` provides the +/// wrapped index if there are outstanding requests with lower or same priority. +/// The implication of throughput fairness on the module timing are: +/// * The trailing zero counter (`lzc`) has a loglog relation of input to output timing. This means +/// that in this module the input to register path scales with Log(Log(`NumIn`)). +/// * The `rr_arb_tree` data multiplexing scales with Log(`NumIn`). This means that the input to output +/// timing path of this module also scales scales with Log(`NumIn`). +/// This implies that in this module the input to output path is always longer than the input to +/// register path. As the output data usually also terminates in a register the parameter `FairArb` +/// only has implications on the area. When it is `1'b0` a static plus one adder is instantiated. +/// If it is `1'b1` two `lzc`, a masking logic stage and a two input multiplexer are instantiated. +/// However these are small in respect of the data multiplexers needed, as the width of the `req_i` +/// signal is usually less as than `DataWidth`. +module rr_arb_tree #( + /// Number of inputs to be arbitrated. + parameter int unsigned NumIn = 64, + /// Data width of the payload in bits. Not needed if `DataType` is overwritten. + parameter int unsigned DataWidth = 32, + /// Data type of the payload, can be overwritten with custom type. Only use of `DataWidth`. + parameter type DataType = logic [DataWidth-1:0], + /// The `ExtPrio` option allows to override the internal round robin counter via the + /// `rr_i` signal. This can be useful in case multiple arbiters need to have + /// rotating priorities that are operating in lock-step. If static priority arbitration + /// is needed, just connect `rr_i` to '0. + /// + /// Set to 1'b1 to enable. + parameter bit ExtPrio = 1'b0, + /// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy + /// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted + /// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter + /// delay and area. + /// + /// Set to `1'b1` to treat req/gnt as vld/rdy. + parameter bit AxiVldRdy = 1'b0, + /// The `LockIn` option prevents the arbiter from changing the arbitration + /// decision when the arbiter is disabled. I.e., the index of the first request + /// that wins the arbitration will be locked in case the destination is not + /// able to grant the request in the same cycle. + /// + /// Set to `1'b1` to enable. + parameter bit LockIn = 1'b0, + /// When set, ensures that throughput gets distributed evenly between all inputs. + /// + /// Set to `1'b0` to disable. + parameter bit FairArb = 1'b1, + /// Dependent parameter, do **not** overwrite. + /// Width of the arbitration priority signal and the arbitrated index. + parameter int unsigned IdxWidth = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1, + /// Dependent parameter, do **not** overwrite. + /// Type for defining the arbitration priority and arbitrated index signal. + parameter type idx_t = logic [IdxWidth-1:0] +) ( + /// Clock, positive edge triggered. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// Clears the arbiter state. Only used if `ExtPrio` is `1'b0` or `LockIn` is `1'b1`. + input logic flush_i, + /// External round-robin priority. Only used if `ExtPrio` is `1'b1.` + input idx_t rr_i, + /// Input requests arbitration. + input logic [NumIn-1:0] req_i, + /* verilator lint_off UNOPTFLAT */ + /// Input request is granted. + output logic [NumIn-1:0] gnt_o, + /* verilator lint_on UNOPTFLAT */ + /// Input data for arbitration. + input DataType [NumIn-1:0] data_i, + /// Output request is valid. + output logic req_o, + /// Output request is granted. + input logic gnt_i, + /// Output data. + output DataType data_o, + /// Index from which input the data came from. + output idx_t idx_o +); + + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + // Default SVA reset + default disable iff (!rst_ni || flush_i); + `endif + `endif + // pragma translate_on + + // just pass through in this corner case + if (NumIn == unsigned'(1)) begin : gen_pass_through + assign req_o = req_i[0]; + assign gnt_o[0] = gnt_i; + assign data_o = data_i[0]; + assign idx_o = '0; + // non-degenerate cases + end else begin : gen_arbiter + localparam int unsigned NumLevels = unsigned'($clog2(NumIn)); + + /* verilator lint_off UNOPTFLAT */ + idx_t [2**NumLevels-2:0] index_nodes; // used to propagate the indices + DataType [2**NumLevels-2:0] data_nodes; // used to propagate the data + logic [2**NumLevels-2:0] gnt_nodes; // used to propagate the grant to masters + logic [2**NumLevels-2:0] req_nodes; // used to propagate the requests to slave + /* lint_off */ + idx_t rr_q; + logic [NumIn-1:0] req_d; + + // the final arbitration decision can be taken from the root of the tree + assign req_o = req_nodes[0]; + assign data_o = data_nodes[0]; + assign idx_o = index_nodes[0]; + + if (ExtPrio) begin : gen_ext_rr + assign rr_q = rr_i; + assign req_d = req_i; + end else begin : gen_int_rr + idx_t rr_d; + + // lock arbiter decision in case we got at least one req and no acknowledge + if (LockIn) begin : gen_lock + logic lock_d, lock_q; + logic [NumIn-1:0] req_q; + + assign lock_d = req_o & ~gnt_i; + assign req_d = (lock_q) ? req_q : req_i; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg + if (!rst_ni) begin + lock_q <= '0; + end else begin + if (flush_i) begin + lock_q <= '0; + end else begin + lock_q <= lock_d; + end + end + end + + // pragma translate_off + `ifndef VERILATOR + lock: assert property( + @(posedge clk_i) LockIn |-> req_o && + (!gnt_i && !flush_i) |=> idx_o == $past(idx_o)) else + $fatal (1, "Lock implies same arbiter decision in next cycle if output is not \ + ready."); + + logic [NumIn-1:0] req_tmp; + assign req_tmp = req_q & req_i; + lock_req: assume property( + @(posedge clk_i) LockIn |-> lock_d |=> req_tmp == req_q) else + $fatal (1, "It is disallowed to deassert unserved request signals when LockIn is \ + enabled."); + `endif + // pragma translate_on + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs + if (!rst_ni) begin + req_q <= '0; + end else begin + if (flush_i) begin + req_q <= '0; + end else begin + req_q <= req_d; + end + end + end + end else begin : gen_no_lock + assign req_d = req_i; + end + + if (FairArb) begin : gen_fair_arb + logic [NumIn-1:0] upper_mask, lower_mask; + idx_t upper_idx, lower_idx, next_idx; + logic upper_empty, lower_empty; + + for (genvar i = 0; i < NumIn; i++) begin : gen_mask + assign upper_mask[i] = (i > rr_q) ? req_d[i] : 1'b0; + assign lower_mask[i] = (i <= rr_q) ? req_d[i] : 1'b0; + end + + lzc #( + .WIDTH ( NumIn ), + .MODE ( 1'b0 ) + ) i_lzc_upper ( + .in_i ( upper_mask ), + .cnt_o ( upper_idx ), + .empty_o ( upper_empty ) + ); + + lzc #( + .WIDTH ( NumIn ), + .MODE ( 1'b0 ) + ) i_lzc_lower ( + .in_i ( lower_mask ), + .cnt_o ( lower_idx ), + .empty_o ( /*unused*/ ) + ); + + assign next_idx = upper_empty ? lower_idx : upper_idx; + assign rr_d = (gnt_i && req_o) ? next_idx : rr_q; + + end else begin : gen_unfair_arb + assign rr_d = (gnt_i && req_o) ? ((rr_q == idx_t'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q; + end + + // this holds the highest priority + always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs + if (!rst_ni) begin + rr_q <= '0; + end else begin + if (flush_i) begin + rr_q <= '0; + end else begin + rr_q <= rr_d; + end + end + end + end + + assign gnt_nodes[0] = gnt_i; + + // arbiter tree + for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels + for (genvar l = 0; l < 2**level; l++) begin : gen_level + // local select signal + logic sel; + // index calcs + localparam int unsigned Idx0 = 2**level-1+l;// current node + localparam int unsigned Idx1 = 2**(level+1)-1+l*2; + ////////////////////////////////////////////////////////////// + // uppermost level where data is fed in from the inputs + if (unsigned'(level) == NumLevels-1) begin : gen_first_level + // if two successive indices are still in the vector... + if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce + assign req_nodes[Idx0] = req_d[l*2] | req_d[l*2+1]; + + // arbitration: round robin + assign sel = ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level]; + + assign index_nodes[Idx0] = idx_t'(sel); + assign data_nodes[Idx0] = (sel) ? data_i[l*2+1] : data_i[l*2]; + assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]) & ~sel; + assign gnt_o[l*2+1] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2+1]) & sel; + end + // if only the first index is still in the vector... + if (unsigned'(l) * 2 == NumIn-1) begin : gen_first + assign req_nodes[Idx0] = req_d[l*2]; + assign index_nodes[Idx0] = '0;// always zero in this case + assign data_nodes[Idx0] = data_i[l*2]; + assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]); + end + // if index is out of range, fill up with zeros (will get pruned) + if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range + assign req_nodes[Idx0] = 1'b0; + assign index_nodes[Idx0] = idx_t'('0); + assign data_nodes[Idx0] = DataType'('0); + end + ////////////////////////////////////////////////////////////// + // general case for other levels within the tree + end else begin : gen_other_levels + assign req_nodes[Idx0] = req_nodes[Idx1] | req_nodes[Idx1+1]; + + // arbitration: round robin + assign sel = ~req_nodes[Idx1] | req_nodes[Idx1+1] & rr_q[NumLevels-1-level]; + + assign index_nodes[Idx0] = (sel) ? + idx_t'({1'b1, index_nodes[Idx1+1][NumLevels-unsigned'(level)-2:0]}) : + idx_t'({1'b0, index_nodes[Idx1][NumLevels-unsigned'(level)-2:0]}); + + assign data_nodes[Idx0] = (sel) ? data_nodes[Idx1+1] : data_nodes[Idx1]; + assign gnt_nodes[Idx1] = gnt_nodes[Idx0] & ~sel; + assign gnt_nodes[Idx1+1] = gnt_nodes[Idx0] & sel; + end + ////////////////////////////////////////////////////////////// + end + end + + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + initial begin : p_assert + assert(NumIn) + else $fatal(1, "Input must be at least one element wide."); + assert(!(LockIn && ExtPrio)) + else $fatal(1,"Cannot use LockIn feature together with external ExtPrio."); + end + + hot_one : assert property( + @(posedge clk_i) $onehot0(gnt_o)) + else $fatal (1, "Grant signal must be hot1 or zero."); + + gnt0 : assert property( + @(posedge clk_i) |gnt_o |-> gnt_i) + else $fatal (1, "Grant out implies grant in."); + + gnt1 : assert property( + @(posedge clk_i) req_o |-> gnt_i |-> |gnt_o) + else $fatal (1, "Req out and grant in implies grant out."); + + gnt_idx : assert property( + @(posedge clk_i) req_o |-> gnt_i |-> gnt_o[idx_o]) + else $fatal (1, "Idx_o / gnt_o do not match."); + + req0 : assert property( + @(posedge clk_i) |req_i |-> req_o) + else $fatal (1, "Req in implies req out."); + + req1 : assert property( + @(posedge clk_i) req_o |-> |req_i) + else $fatal (1, "Req out implies req in."); + `endif + `endif + // pragma translate_on + end + +endmodule : rr_arb_tree diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv new file mode 100644 index 0000000..a7dccc6 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv @@ -0,0 +1,30 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module rstgen ( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + output logic rst_no, + output logic init_no +); + + rstgen_bypass i_rstgen_bypass ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .rst_test_mode_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + .rst_no ( rst_no ), + .init_no ( init_no ) + ); + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv new file mode 100644 index 0000000..c51ee83 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Florian Zaruba +// Description: This module is a reset synchronizer with a dedicated reset bypass pin for testmode reset. +// Pro Tip: The wise Dr. Schaffner recommends at least 4 registers! + +module rstgen_bypass #( + parameter int unsigned NumRegs = 4 +) ( + input logic clk_i, + input logic rst_ni, + input logic rst_test_mode_ni, + input logic test_mode_i, + output logic rst_no, + output logic init_no +); + + // internal reset + logic rst_n; + + logic [NumRegs-1:0] synch_regs_q; + // bypass mode + always_comb begin + if (test_mode_i == 1'b0) begin + rst_n = rst_ni; + rst_no = synch_regs_q[NumRegs-1]; + init_no = synch_regs_q[NumRegs-1]; + end else begin + rst_n = rst_test_mode_ni; + rst_no = rst_test_mode_ni; + init_no = 1'b1; + end + end + + always @(posedge clk_i or negedge rst_n) begin + if (~rst_n) begin + synch_regs_q <= 0; + end else begin + synch_regs_q <= {synch_regs_q[NumRegs-2:0], 1'b1}; + end + end + // pragma translate_off + `ifndef VERILATOR + initial begin : p_assertions + if (NumRegs < 1) $fatal(1, "At least one register is required."); + end + `endif + // pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv b/test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv new file mode 100644 index 0000000..7193fbc --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv @@ -0,0 +1,53 @@ + +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: +// +// Description: Simple shift register for arbitrary depth and types + +module shift_reg #( + parameter type dtype = logic, + parameter int unsigned Depth = 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input dtype d_i, + output dtype d_o +); + + // register of depth 0 is a wire + if (Depth == 0) begin : gen_pass_through + assign d_o = d_i; + // register of depth 1 is a simple register + end else if (Depth == 1) begin : gen_register + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + d_o <= '0; + end else begin + d_o <= d_i; + end + end + // if depth is greater than 1 it becomes a shift register + end else if (Depth > 1) begin : gen_shift_reg + dtype [Depth-1:0] reg_d, reg_q; + assign d_o = reg_q[Depth-1]; + assign reg_d = {reg_q[Depth-2:0], d_i}; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + reg_q <= '0; + end else begin + reg_q <= reg_d; + end + end + end + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv new file mode 100644 index 0000000..80ff37f --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv @@ -0,0 +1,46 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + + +/// Wrapper around the flushable spill register to maintain back-ward +/// compatibility. +module spill_register #( + parameter type T = logic, + parameter bit Bypass = 1'b0 // make this spill register transparent +) ( + input logic clk_i , + input logic rst_ni , + input logic valid_i , + output logic ready_o , + input T data_i , + output logic valid_o , + input logic ready_i , + output T data_o +); + + spill_register_flushable #( + .T(T), + .Bypass(Bypass) + ) spill_register_flushable_i ( + .clk_i, + .rst_ni, + .valid_i, + .flush_i(1'b0), + .ready_o, + .data_i, + .valid_o, + .ready_i, + .data_o + ); + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv new file mode 100644 index 0000000..c03ad27 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv @@ -0,0 +1,105 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + + +/// A register with handshakes that completely cuts any combinational paths +/// between the input and output. This spill register can be flushed. +module spill_register_flushable #( + parameter type T = logic, + parameter bit Bypass = 1'b0 // make this spill register transparent +) ( + input logic clk_i , + input logic rst_ni , + input logic valid_i , + input logic flush_i , + output logic ready_o , + input T data_i , + output logic valid_o , + input logic ready_i , + output T data_o +); + + if (Bypass) begin : gen_bypass + assign valid_o = valid_i; + assign ready_o = ready_i; + assign data_o = data_i; + end else begin : gen_spill_reg + // The A register. + T a_data_q; + logic a_full_q; + logic a_fill, a_drain; + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_data + if (!rst_ni) + a_data_q <= '0; + else if (a_fill) + a_data_q <= data_i; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_full + if (!rst_ni) + a_full_q <= 0; + else if (a_fill || a_drain) + a_full_q <= a_fill; + end + + // The B register. + T b_data_q; + logic b_full_q; + logic b_fill, b_drain; + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_data + if (!rst_ni) + b_data_q <= '0; + else if (b_fill) + b_data_q <= a_data_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_full + if (!rst_ni) + b_full_q <= 0; + else if (b_fill || b_drain) + b_full_q <= b_fill; + end + + // Fill the A register when the A or B register is empty. Drain the A register + // whenever it is full and being filled, or if a flush is requested. + assign a_fill = valid_i && ready_o && (!flush_i); + assign a_drain = (a_full_q && !b_full_q) || flush_i; + + // Fill the B register whenever the A register is drained, but the downstream + // circuit is not ready. Drain the B register whenever it is full and the + // downstream circuit is ready, or if a flush is requested. + assign b_fill = a_drain && (!ready_i) && (!flush_i); + assign b_drain = (b_full_q && ready_i) || flush_i; + + // We can accept input as long as register B is not full. + // Note: flush_i and valid_i must not be high at the same time, + // otherwise an invalid handshake may occur + assign ready_o = !a_full_q || !b_full_q; + + // The unit provides output as long as one of the registers is filled. + assign valid_o = a_full_q | b_full_q; + + // We empty the spill register before the slice register. + assign data_o = b_full_q ? b_data_q : a_data_q; + + // pragma translate_off + `ifndef VERILATOR + flush_valid : assert property ( + @(posedge clk_i) disable iff (~rst_ni) (flush_i |-> ~valid_i)) else + $warning("Trying to flush and feed the spill register simultaneously. You will lose data!"); + `endif + // pragma translate_on + end +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv new file mode 100644 index 0000000..c8ca2a8 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv @@ -0,0 +1,49 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready +// handshaking with dependency rules as in AXI4) to a single output stream. Once `oup_valid_o` is +// asserted, `oup_data_o` remains invariant until the output handshake has occurred. The +// arbitration scheme is round-robin with "look ahead", see the `rrarbiter` for details. + +module stream_arbiter #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = -1, // Synopsys DC requires a default value for parameters. + parameter ARBITER = "rr" // "rr" or "prio" +) ( + input logic clk_i, + input logic rst_ni, + + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + stream_arbiter_flushable #( + .DATA_T (DATA_T), + .N_INP (N_INP), + .ARBITER (ARBITER) + ) i_arb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .inp_data_i (inp_data_i), + .inp_valid_i (inp_valid_i), + .inp_ready_o (inp_ready_o), + .oup_data_o (oup_data_o), + .oup_valid_o (oup_valid_o), + .oup_ready_i (oup_ready_i) + ); + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv new file mode 100644 index 0000000..32946e6 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv @@ -0,0 +1,82 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready +// handshaking with dependency rules as in AXI4) to a single output stream. Once `oup_valid_o` is +// asserted, `oup_data_o` remains invariant until the output handshake has occurred. The +// arbitration scheme is fair round-robin tree, see `rr_arb_tree` for details. + +module stream_arbiter_flushable #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = -1, // Synopsys DC requires a default value for parameters. + parameter ARBITER = "rr" // "rr" or "prio" +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + if (ARBITER == "rr") begin : gen_rr_arb + rr_arb_tree #( + .NumIn (N_INP), + .DataType (DATA_T), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ('0), + .req_i (inp_valid_i), + .gnt_o (inp_ready_o), + .data_i (inp_data_i), + .gnt_i (oup_ready_i), + .req_o (oup_valid_o), + .data_o (oup_data_o), + .idx_o () + ); + + end else if (ARBITER == "prio") begin : gen_prio_arb + rr_arb_tree #( + .NumIn (N_INP), + .DataType (DATA_T), + .ExtPrio (1'b1), + .AxiVldRdy (1'b1), + .LockIn (1'b1) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ('0), + .req_i (inp_valid_i), + .gnt_o (inp_ready_o), + .data_i (inp_data_i), + .gnt_i (oup_ready_i), + .req_o (oup_valid_o), + .data_o (oup_data_o), + .idx_o () + ); + + end else begin : gen_arb_error + // pragma translate_off + $fatal(1, "Invalid value for parameter 'ARBITER'!"); + // pragma translate_on + end + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv new file mode 100644 index 0000000..5051b6c --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv @@ -0,0 +1,132 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba, zarubaf@iis.ee.ethz.ch +// Description: Delay (or randomize) AXI-like handshaking + +module stream_delay #( + parameter bit StallRandom = 0, + parameter int FixedDelay = 1, + parameter type payload_t = logic +)( + input logic clk_i, + input logic rst_ni, + + input payload_t payload_i, + output logic ready_o, + input logic valid_i, + + output payload_t payload_o, + input logic ready_i, + output logic valid_o +); + + if (FixedDelay == 0 && !StallRandom) begin : gen_pass_through + assign ready_o = ready_i; + assign valid_o = valid_i; + assign payload_o = payload_i; + end else begin : gen_delay + + localparam int unsigned CounterBits = 4; + + typedef enum logic [1:0] { + Idle, Valid, Ready + } state_e; + + state_e state_d, state_q; + + logic load; + logic [3:0] count_out; + logic en; + + logic [CounterBits-1:0] counter_load; + + assign payload_o = payload_i; + + always_comb begin + state_d = state_q; + valid_o = 1'b0; + ready_o = 1'b0; + load = 1'b0; + en = 1'b0; + + unique case (state_q) + Idle: begin + if (valid_i) begin + load = 1'b1; + state_d = Valid; + // Just one cycle delay + if (FixedDelay == 1 || (StallRandom && counter_load == 1)) begin + state_d = Ready; + end + + if (StallRandom && counter_load == 0) begin + valid_o = 1'b1; + ready_o = ready_i; + if (ready_i) state_d = Idle; + else state_d = Ready; + end + end + end + Valid: begin + en = 1'b1; + if (count_out == 0) begin + state_d = Ready; + end + end + + Ready: begin + valid_o = 1'b1; + ready_o = ready_i; + if (ready_i) state_d = Idle; + end + default : /* default */; + endcase + + end + + if (StallRandom) begin : gen_random_stall + lfsr_16bit #( + .WIDTH ( 16 ) + ) i_lfsr_16bit ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( load ), + .refill_way_oh ( ), + .refill_way_bin ( counter_load ) + ); + end else begin : gen_fixed_delay + assign counter_load = FixedDelay; + end + + counter #( + .WIDTH ( CounterBits ) + ) i_counter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( 1'b0 ), + .en_i ( en ), + .load_i ( load ), + .down_i ( 1'b1 ), + .d_i ( counter_load ), + .q_o ( count_out ), + .overflow_o ( ) + ); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= Idle; + end else begin + state_q <= state_d; + end + end + end + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv new file mode 100644 index 0000000..69ad309 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv @@ -0,0 +1,36 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Connects the input stream (valid-ready) handshake to one of `N_OUP` output stream handshakes. +/// +/// This module has no data ports because stream data does not need to be demultiplexed: the data of +/// the input stream can just be applied at all output streams. +module stream_demux #( + /// Number of connected outputs. + parameter int unsigned N_OUP = 32'd1, + /// Dependent parameters, DO NOT OVERRIDE! + parameter int unsigned LOG_N_OUP = (N_OUP > 32'd1) ? unsigned'($clog2(N_OUP)) : 1'b1 +) ( + input logic inp_valid_i, + output logic inp_ready_o, + + input logic [LOG_N_OUP-1:0] oup_sel_i, + + output logic [N_OUP-1:0] oup_valid_o, + input logic [N_OUP-1:0] oup_ready_i +); + + always_comb begin + oup_valid_o = '0; + oup_valid_o[oup_sel_i] = inp_valid_i; + end + assign inp_ready_o = oup_ready_i[oup_sel_i]; + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv new file mode 100644 index 0000000..34607d9 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv @@ -0,0 +1,46 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Stream multiplexer: connects the output to one of `N_INP` data streams with valid-ready +/// handshaking. + +module stream_mux #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = 0, // Synopsys DC requires a default value for value parameters. + /// Dependent parameters, DO NOT OVERRIDE! + parameter integer LOG_N_INP = $clog2(N_INP) +) ( + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + input logic [LOG_N_INP-1:0] inp_sel_i, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + always_comb begin + inp_ready_o = '0; + inp_ready_o[inp_sel_i] = oup_ready_i; + end + assign oup_data_o = inp_data_i[inp_sel_i]; + assign oup_valid_o = inp_valid_i[inp_sel_i]; + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (N_INP >= 1) else $fatal (1, "The number of inputs must be at least 1!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv new file mode 100644 index 0000000..f529d6a --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Register with a simple stream-like ready/valid handshake. +/// This register does not cut combinatorial paths on all control signals; if you need a complete +/// cut, use the `spill_register`. +module stream_register #( + parameter type T = logic // Vivado requires a default value for type parameters. +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous active-low reset + input logic clr_i, // Synchronous clear + input logic testmode_i, // Test mode to bypass clock gating + // Input port + input logic valid_i, + output logic ready_o, + input T data_i, + // Output port + output logic valid_o, + input logic ready_i, + output T data_o +); + + logic fifo_empty, + fifo_full; + + fifo_v2 #( + .FALL_THROUGH (1'b0), + .DATA_WIDTH ($bits(T)), + .DEPTH (1), + .dtype (T) + ) i_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (clr_i), + .testmode_i (testmode_i), + .full_o (fifo_full), + .empty_o (fifo_empty), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i (data_i), + .push_i (valid_i & ~fifo_full), + .data_o (data_o), + .pop_i (ready_i & ~fifo_empty) + ); + + assign ready_o = ~fifo_full; + assign valid_o = ~fifo_empty; + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/unread.sv b/test/type_param/vendor/pulp-platform/common_cells/src/unread.sv new file mode 100644 index 0000000..80e7356 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/unread.sv @@ -0,0 +1,21 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 29.10.2018 +// Description: Dummy circuit to mitigate Open Pin warnings + +/* verilator lint_off UNUSED */ +module unread ( + input logic d_i +); + +endmodule +/* verilator lint_on UNUSED */ diff --git a/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv new file mode 100644 index 0000000..ac04b9b --- /dev/null +++ b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv @@ -0,0 +1,62 @@ +// Copyright 2022 Thales Research and Technology +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses +// +// Inferable, Asynchronous Dual-Port RAM, there are a write port and a read port +// +// +// This module is designed to work with both Xilinx and Microchip FPGA tools by following the respective +// guidelines: +// - Xilinx UG901 Vivado Design Suite User Guide: Synthesis +// - Inferring Microchip PolarFire RAM Blocks +// +// Intel FPGA (Altera) doesn't seem to support asynchronous RAM +// +// Current Maintainers:: Sébastien Jacq - sjthales on github.com + + +module AsyncDpRam +#( + parameter ADDR_WIDTH = 10, + parameter DATA_DEPTH = 1024, // usually 2**ADDR_WIDTH, but can be lower + parameter DATA_WIDTH = 32 +)( + input logic Clk_CI, + + // Write port + input logic WrEn_SI, + input logic [ADDR_WIDTH-1:0] WrAddr_DI, + input logic [DATA_WIDTH-1:0] WrData_DI, + + // Read port + input logic [ADDR_WIDTH-1:0] RdAddr_DI, + output logic [DATA_WIDTH-1:0] RdData_DO +); + + logic [DATA_WIDTH-1:0] mem [DATA_DEPTH-1:0]= '{default:0}; + + // WRITE + always_ff @(posedge Clk_CI) + begin + if (WrEn_SI) begin + mem[WrAddr_DI] <= WrData_DI; + end + end + + // READ + assign RdData_DO = mem[RdAddr_DI]; + + //////////////////////////// + // assertions + //////////////////////////// + + // pragma translate_off + assert property + (@(posedge Clk_CI) (longint'(2)**longint'(ADDR_WIDTH) >= longint'(DATA_DEPTH))) + else $error("depth out of bounds"); + // pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv new file mode 100644 index 0000000..ee6fd1a --- /dev/null +++ b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv @@ -0,0 +1,66 @@ +// Copyright 2023 Thales Research and Technology +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses +// +// Inferable, Asynchronous Three-Ports RAM, there are a write port and two read ports +// +// +// This module is designed to work with both Xilinx and Microchip FPGA tools by following the respective +// guidelines: +// - Xilinx UG901 Vivado Design Suite User Guide: Synthesis +// - Inferring Microchip PolarFire RAM Blocks +// +// Intel FPGA (Altera) doesn't seem to support asynchronous RAM +// +// Current Maintainers:: Sébastien Jacq - sjthales on github.com + + +module AsyncThreePortRam +#( + parameter ADDR_WIDTH = 10, + parameter DATA_DEPTH = 1024, // usually 2**ADDR_WIDTH, but can be lower + parameter DATA_WIDTH = 32 +)( + input logic Clk_CI, + + // Write port + input logic WrEn_SI, + input logic [ADDR_WIDTH-1:0] WrAddr_DI, + input logic [DATA_WIDTH-1:0] WrData_DI, + + // Read ports + input logic [ADDR_WIDTH-1:0] RdAddr_DI_0, + input logic [ADDR_WIDTH-1:0] RdAddr_DI_1, + + output logic [DATA_WIDTH-1:0] RdData_DO_0, + output logic [DATA_WIDTH-1:0] RdData_DO_1 +); + + logic [DATA_WIDTH-1:0] mem [DATA_DEPTH-1:0]= '{default:0}; + + // WRITE + always_ff @(posedge Clk_CI) + begin + if (WrEn_SI) begin + mem[WrAddr_DI] <= WrData_DI; + end + end + + // READ + assign RdData_DO_0 = mem[RdAddr_DI_0]; + assign RdData_DO_1 = mem[RdAddr_DI_1]; + + //////////////////////////// + // assertions + //////////////////////////// + + // pragma translate_off + assert property + (@(posedge Clk_CI) (longint'(2)**longint'(ADDR_WIDTH) >= longint'(DATA_DEPTH))) + else $error("depth out of bounds"); + // pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv b/test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv new file mode 100644 index 0000000..e3efb12 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv @@ -0,0 +1,182 @@ +// Copyright 2014 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/** + * Inferable, Synchronous Dual-Port RAM + * + * This module is designed to work with both Xilinx and Altera tools by following the respective + * guidelines: + * - Xilinx UG901 Vivado Design Suite User Guide: Synthesis (p. 106) + * - Altera Quartus II Handbook Volume 1: Design and Synthesis (p. 768) + * + * Current Maintainers: + * - Michael Schaffner + */ + +// this automatically switches the behavioral description +// pragma translate_off +`define SIMULATION +// pragma translate_on + +module SyncDpRam +#( + parameter ADDR_WIDTH = 10, + parameter DATA_DEPTH = 1024, // usually 2**ADDR_WIDTH, but can be lower + parameter DATA_WIDTH = 32, + parameter OUT_REGS = 0, + parameter SIM_INIT = 0 // for simulation only, will not be synthesized + // 0: no init, 1: zero init, 2: random init + // note: on verilator, 2 is not supported. define the VERILATOR macro to work around. +)( + input logic Clk_CI, + input logic Rst_RBI, + // port A + input logic CSelA_SI, + input logic WrEnA_SI, + input logic [DATA_WIDTH-1:0] WrDataA_DI, + input logic [ADDR_WIDTH-1:0] AddrA_DI, + output logic [DATA_WIDTH-1:0] RdDataA_DO, + // port B + input logic CSelB_SI, + input logic WrEnB_SI, + input logic [DATA_WIDTH-1:0] WrDataB_DI, + input logic [ADDR_WIDTH-1:0] AddrB_DI, + output logic [DATA_WIDTH-1:0] RdDataB_DO +); + + //////////////////////////// + // signals, localparams + //////////////////////////// + + logic [DATA_WIDTH-1:0] RdDataA_DN; + logic [DATA_WIDTH-1:0] RdDataA_DP; + logic [DATA_WIDTH-1:0] RdDataB_DN; + logic [DATA_WIDTH-1:0] RdDataB_DP; + logic [DATA_WIDTH-1:0] Mem_DP [DATA_DEPTH-1:0]; + + //////////////////////////// + // XILINX/ALTERA implementation + //////////////////////////// + + `ifdef SIMULATION + always_ff @(posedge Clk_CI) + begin + automatic logic [DATA_WIDTH-1:0] val; + if(Rst_RBI == 1'b0 && SIM_INIT>0) begin + for(int k=0; k0) begin : g_outreg + always_ff @(posedge Clk_CI or negedge Rst_RBI) begin + if(Rst_RBI == 1'b0) + begin + RdDataA_DP <= 0; + RdDataB_DP <= 0; + end + else + begin + RdDataA_DP <= RdDataA_DN; + RdDataB_DP <= RdDataB_DN; + end + end + end + endgenerate // g_outreg + + // output reg bypass + generate + if (OUT_REGS==0) begin : g_oureg_byp + assign RdDataA_DP = RdDataA_DN; + assign RdDataB_DP = RdDataB_DN; + end + endgenerate// g_oureg_byp + + assign RdDataA_DO = RdDataA_DP; + assign RdDataB_DO = RdDataB_DP; + + //////////////////////////// + // assertions + //////////////////////////// + + // pragma translate_off + assert property + (@(posedge Clk_CI) (longint'(2)**longint'(ADDR_WIDTH) >= longint'(DATA_DEPTH))) + else $error("depth out of bounds"); + assert property + (@(posedge Clk_CI) (CSelA_SI & CSelB_SI & WrEnA_SI & WrEnB_SI) |-> (AddrA_DI != AddrB_DI)) + else $error("A and B write to the same address"); + // pragma translate_on + +endmodule // SyncDpRam diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv new file mode 100644 index 0000000..bc7ed5c --- /dev/null +++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv @@ -0,0 +1,94 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module cluster_clock_and2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + tc_clk_and2 i_tc_clk_and2 ( + .clk0_i, + .clk1_i, + .clk_o + ); + +endmodule + +module cluster_clock_buffer ( + input logic clk_i, + output logic clk_o +); + + tc_clk_buffer i_tc_clk_buffer ( + .clk_i, + .clk_o + ); + +endmodule + +// Description: Behavioral model of an integrated clock-gating cell (ICG) +module cluster_clock_gating ( + input logic clk_i, + input logic en_i, + input logic test_en_i, + output logic clk_o +); + + tc_clk_gating i_tc_clk_gating ( + .clk_i, + .en_i, + .test_en_i, + .clk_o + ); + +endmodule + +module cluster_clock_inverter ( + input logic clk_i, + output logic clk_o +); + + tc_clk_inverter i_tc_clk_inverter ( + .clk_i, + .clk_o + ); + +endmodule + +module cluster_clock_mux2 ( + input logic clk0_i, + input logic clk1_i, + input logic clk_sel_i, + output logic clk_o +); + + tc_clk_mux2 i_tc_clk_mux2 ( + .clk0_i, + .clk1_i, + .clk_sel_i, + .clk_o + ); + +endmodule + +module cluster_clock_xor2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + tc_clk_xor2 i_tc_clk_xor2 ( + .clk0_i, + .clk1_i, + .clk_o + ); + +endmodule diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv new file mode 100644 index 0000000..53ad07f --- /dev/null +++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv @@ -0,0 +1,107 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module pulp_clock_and2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + tc_clk_and2 i_tc_clk_and2 ( + .clk0_i, + .clk1_i, + .clk_o + ); + +endmodule + +module pulp_clock_buffer ( + input logic clk_i, + output logic clk_o +); + + tc_clk_buffer i_tc_clk_buffer ( + .clk_i, + .clk_o + ); + +endmodule + +// Description: Behavioral model of an integrated clock-gating cell (ICG) +module pulp_clock_gating ( + input logic clk_i, + input logic en_i, + input logic test_en_i, + output logic clk_o +); + + tc_clk_gating i_tc_clk_gating ( + .clk_i, + .en_i, + .test_en_i, + .clk_o + ); + +endmodule + +module pulp_clock_inverter ( + input logic clk_i, + output logic clk_o +); + + tc_clk_inverter i_tc_clk_inverter ( + .clk_i, + .clk_o + ); + +endmodule + +module pulp_clock_mux2 ( + input logic clk0_i, + input logic clk1_i, + input logic clk_sel_i, + output logic clk_o +); + + tc_clk_mux2 i_tc_clk_mux2 ( + .clk0_i, + .clk1_i, + .clk_sel_i, + .clk_o + ); + +endmodule + +module pulp_clock_xor2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + tc_clk_xor2 i_tc_clk_xor2 ( + .clk0_i, + .clk1_i, + .clk_o + ); + +endmodule + +`ifndef SYNTHESIS +module pulp_clock_delay( + input logic in_i, + output logic out_o +); + + assign #(300ps) out_o = in_i; + +endmodule +`endif + + diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv new file mode 100644 index 0000000..3ab329e --- /dev/null +++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv @@ -0,0 +1,120 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module tc_clk_and2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + assign clk_o = clk0_i & clk1_i; + +endmodule + +module tc_clk_buffer ( + input logic clk_i, + output logic clk_o +); + + assign clk_o = clk_i; + +endmodule + +// Description: Behavioral model of an integrated clock-gating cell (ICG) +module tc_clk_gating #( + /// This paramaeter is a hint for tool/technology specific mappings of this + /// tech_cell. It indicates wether this particular clk gate instance is + /// required for functional correctness or just instantiated for power + /// savings. If IS_FUNCTIONAL == 0, technology specific mappings might + /// replace this cell with a feedthrough connection without any gating. + parameter bit IS_FUNCTIONAL = 1'b1 +)( + input logic clk_i, + input logic en_i, + input logic test_en_i, + output logic clk_o +); + + logic clk_en; + + always_latch begin + if (clk_i == 1'b0) clk_en <= en_i | test_en_i; + end + + assign clk_o = clk_i & clk_en; + +endmodule + +module tc_clk_inverter ( + input logic clk_i, + output logic clk_o +); + + assign clk_o = ~clk_i; + +endmodule + +// Warning: Typical clock mux cells of a technologies std cell library ARE NOT +// GLITCH FREE!! The only difference to a regular multiplexer cell is that they +// feature balanced rise- and fall-times. In other words: SWITCHING FROM ONE +// CLOCK TO THE OTHER CAN INTRODUCE GLITCHES. ALSO, GLITCHES ON THE SELECT LINE +// DIRECTLY TRANSLATE TO GLITCHES ON THE OUTPUT CLOCK!! This cell is only +// intended to be used for quasi-static switching between clocks when one of the +// clocks is anyway inactive or if the downstream logic remains gated or in +// reset state during the transition phase. If you need dynamic switching +// between arbitrary input clocks without introducing glitches, have a look at +// the clk_mux_glitch_free cell in the pulp-platform/common_cells repository. +module tc_clk_mux2 ( + input logic clk0_i, + input logic clk1_i, + input logic clk_sel_i, + output logic clk_o +); + + assign clk_o = (clk_sel_i) ? clk1_i : clk0_i; + +endmodule + +module tc_clk_xor2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + assign clk_o = clk0_i ^ clk1_i; + +endmodule + +module tc_clk_or2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + assign clk_o = clk0_i | clk1_i; + +endmodule + +`ifndef SYNTHESIS +module tc_clk_delay #( + parameter int unsigned Delay = 300ps +) ( + input logic in_i, + output logic out_o +); + +// pragma translate_off +`ifndef VERILATOR + assign #(Delay) out_o = in_i; +`endif +// pragma translate_on + +endmodule +`endif diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv new file mode 100644 index 0000000..b702a11 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv @@ -0,0 +1,245 @@ +// Copyright (c) 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +// Description: Functional module of a generic SRAM +// +// Parameters: +// - NumWords: Number of words in the macro. Address width can be calculated with: +// `AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1` +// The module issues a warning if there is a request on an address which is +// not in range. +// - DataWidth: Width of the ports `wdata_i` and `rdata_o`. +// - ByteWidth: Width of a byte, the byte enable signal `be_i` can be calculated with the +// ceiling division `ceil(DataWidth, ByteWidth)`. +// - NumPorts: Number of read and write ports. Each is a full port. Ports with a higher +// index read and write after the ones with lower indices. +// - Latency: Read latency, the read data is available this many cycles after a request. +// - SimInit: Macro simulation initialization. Values are: +// "zeros": Each bit gets initialized with 1'b0. +// "ones": Each bit gets initialized with 1'b1. +// "random": Each bit gets random initialized with 1'b0 or 1'b1. +// "none": Each bit gets initialized with 1'bx. (default) +// - PrintSimCfg: Prints at the beginning of the simulation a `Hello` message with +// the instantiated parameters and signal widths. +// - ImplKey: Key by which an instance can refer to a specific implementation (e.g. macro). +// May be used to look up additional parameters for implementation (e.g. generator, +// line width, muxing) in an external reference, such as a configuration file. +// +// Ports: +// - `clk_i`: Clock +// - `rst_ni`: Asynchronous reset, active low +// - `req_i`: Request, active high +// - `we_i`: Write request, active high +// - `addr_i`: Request address +// - `wdata_i`: Write data, has to be valid on request +// - `be_i`: Byte enable, active high +// - `rdata_o`: Read data, valid `Latency` cycles after a request with `we_i` low. +// +// Behaviour: +// - Address collision: When Ports are making a write access onto the same address, +// the write operation will start at the port with the lowest address +// index, each port will overwrite the changes made by the previous ports +// according how the respective `be_i` signal is set. +// - Read data on write: This implementation will not produce a read data output on the signal +// `rdata_o` when `req_i` and `we_i` are asserted. The output data is stable +// on write requests. + +module tc_sram #( + parameter int unsigned NumWords = 32'd1024, // Number of Words in data array + parameter int unsigned DataWidth = 32'd128, // Data signal width + parameter int unsigned ByteWidth = 32'd8, // Width of a data byte + parameter int unsigned NumPorts = 32'd2, // Number of read and write ports + parameter int unsigned Latency = 32'd1, // Latency when the read data is available + parameter SimInit = "none", // Simulation initialization + parameter bit PrintSimCfg = 1'b0, // Print configuration + parameter ImplKey = "none", // Reference to specific implementation + // DEPENDENT PARAMETERS, DO NOT OVERWRITE! + parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1, + parameter int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div + parameter type addr_t = logic [AddrWidth-1:0], + parameter type data_t = logic [DataWidth-1:0], + parameter type be_t = logic [BeWidth-1:0] +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // input ports + input logic [NumPorts-1:0] req_i, // request + input logic [NumPorts-1:0] we_i, // write enable + input addr_t [NumPorts-1:0] addr_i, // request address + input data_t [NumPorts-1:0] wdata_i, // write data + input be_t [NumPorts-1:0] be_i, // write byte enable + // output ports + output data_t [NumPorts-1:0] rdata_o // read data +); + + // memory array + data_t sram [NumWords-1:0]; + // hold the read address when no read access is made + addr_t [NumPorts-1:0] r_addr_q; + + // SRAM simulation initialization + data_t init_val[NumWords-1:0]; + initial begin : proc_sram_init + for (int unsigned i = 0; i < NumWords; i++) begin + case (SimInit) + "zeros": init_val[i] = {DataWidth{1'b0}}; + "ones": init_val[i] = {DataWidth{1'b1}}; + "random": init_val[i] = {DataWidth{$urandom()}}; + default: init_val[i] = {DataWidth{1'bx}}; + endcase + end + end + + // set the read output if requested + // The read data at the highest array index is set combinational. + // It gets then delayed for a number of cycles until it gets available at the output at + // array index 0. + + // read data output assignment + data_t [NumPorts-1:0][Latency-1:0] rdata_q, rdata_d; + if (Latency == 32'd0) begin : gen_no_read_lat + for (genvar i = 0; i < NumPorts; i++) begin : gen_port + assign rdata_o[i] = (req_i[i] && !we_i[i]) ? sram[addr_i[i]] : sram[r_addr_q[i]]; + end + end else begin : gen_read_lat + + always_comb begin + for (int unsigned i = 0; i < NumPorts; i++) begin + rdata_o[i] = rdata_q[i][0]; + for (int unsigned j = 0; j < (Latency-1); j++) begin + rdata_d[i][j] = rdata_q[i][j+1]; + end + rdata_d[i][Latency-1] = (req_i[i] && !we_i[i]) ? sram[addr_i[i]] : sram[r_addr_q[i]]; + end + end + end + + // In case simulation initialization is disabled (SimInit == 'none'), don't assign to the sram + // content at all. This improves simulation performance in tools like verilator + if (SimInit == "none") begin + // write memory array without initialization + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + for (int i = 0; i < NumPorts; i++) begin + r_addr_q[i] <= {AddrWidth{1'b0}}; + end + end else begin + // read value latch happens before new data is written to the sram + for (int unsigned i = 0; i < NumPorts; i++) begin + if (Latency != 0) begin + for (int unsigned j = 0; j < Latency; j++) begin + rdata_q[i][j] <= rdata_d[i][j]; + end + end + end + // there is a request for the SRAM, latch the required register + for (int unsigned i = 0; i < NumPorts; i++) begin + if (req_i[i]) begin + if (we_i[i]) begin + // update value when write is set at clock + for (int unsigned j = 0; j < BeWidth; j++) begin + if (be_i[i][j]) begin + sram[addr_i[i]][j*ByteWidth+:ByteWidth] <= wdata_i[i][j*ByteWidth+:ByteWidth]; + end + end + end else begin + // otherwise update read address for subsequent non request cycles + r_addr_q[i] <= addr_i[i]; + end + end // if req_i + end // for ports + end // if !rst_ni + end + end else begin + // write memory array + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + sram <= init_val; + for (int i = 0; i < NumPorts; i++) begin + r_addr_q[i] <= {AddrWidth{1'b0}}; + // initialize the read output register for each port + if (Latency != 32'd0) begin + for (int unsigned j = 0; j < Latency; j++) begin + rdata_q[i][j] <= init_val[{AddrWidth{1'b0}}]; + end + end + end + end else begin + // read value latch happens before new data is written to the sram + for (int unsigned i = 0; i < NumPorts; i++) begin + if (Latency != 0) begin + for (int unsigned j = 0; j < Latency; j++) begin + rdata_q[i][j] <= rdata_d[i][j]; + end + end + end + // there is a request for the SRAM, latch the required register + for (int unsigned i = 0; i < NumPorts; i++) begin + if (req_i[i]) begin + if (we_i[i]) begin + // update value when write is set at clock + for (int unsigned j = 0; j < BeWidth; j++) begin + if (be_i[i][j]) begin + sram[addr_i[i]][j*ByteWidth+:ByteWidth] <= wdata_i[i][j*ByteWidth+:ByteWidth]; + end + end + end else begin + // otherwise update read address for subsequent non request cycles + r_addr_q[i] <= addr_i[i]; + end + end // if req_i + end // for ports + end // if !rst_ni + end + end + +// Validate parameters. +// pragma translate_off +`ifndef VERILATOR +`ifndef TARGET_SYNTHESIS + initial begin: p_assertions + assert ($bits(addr_i) == NumPorts * AddrWidth) else $fatal(1, "AddrWidth problem on `addr_i`"); + assert ($bits(wdata_i) == NumPorts * DataWidth) else $fatal(1, "DataWidth problem on `wdata_i`"); + assert ($bits(be_i) == NumPorts * BeWidth) else $fatal(1, "BeWidth problem on `be_i`" ); + assert ($bits(rdata_o) == NumPorts * DataWidth) else $fatal(1, "DataWidth problem on `rdata_o`"); + assert (NumWords >= 32'd1) else $fatal(1, "NumWords has to be > 0"); + assert (DataWidth >= 32'd1) else $fatal(1, "DataWidth has to be > 0"); + assert (ByteWidth >= 32'd1) else $fatal(1, "ByteWidth has to be > 0"); + assert (NumPorts >= 32'd1) else $fatal(1, "The number of ports must be at least 1!"); + end + initial begin: p_sim_hello + if (PrintSimCfg) begin + $display("#################################################################################"); + $display("tc_sram functional instantiated with the configuration:" ); + $display("Instance: %m" ); + $display("Number of ports (dec): %0d", NumPorts ); + $display("Number of words (dec): %0d", NumWords ); + $display("Address width (dec): %0d", AddrWidth ); + $display("Data width (dec): %0d", DataWidth ); + $display("Byte width (dec): %0d", ByteWidth ); + $display("Byte enable width (dec): %0d", BeWidth ); + $display("Latency Cycles (dec): %0d", Latency ); + $display("Simulation init (str): %0s", SimInit ); + $display("#################################################################################"); + end + end + for (genvar i = 0; i < NumPorts; i++) begin : gen_assertions + assert property ( @(posedge clk_i) disable iff (!rst_ni) + (req_i[i] |-> (addr_i[i] < NumWords))) else + $warning("Request address %0h not mapped, port %0d, expect random write or read behavior!", + addr_i[i], i); + end + +`endif +`endif +// pragma translate_on +endmodule