From 687e921994f5eca185acb93f14f29f805c8996bf Mon Sep 17 00:00:00 2001
From: Jonathan Kimmitt <jonathan.kimmitt@pcsemi.com>
Date: Tue, 23 Apr 2024 10:42:46 +0100
Subject: [PATCH] Test correct and incorrect type parameters from CVA6

---
 .../common/local/util/instr_tracer.sv         |  223 ++
 .../common/local/util/instr_tracer_if.sv      |   67 +
 test/type_param/common/local/util/sram.sv     |  107 +
 .../common/local/util/tc_sram_wrapper.sv      |   60 +
 test/type_param/core/acc_dispatcher.sv        |  423 ++
 .../core/acc_dispatcher_corrected.sv          |  423 ++
 test/type_param/core/alu.sv                   |  359 ++
 test/type_param/core/amo_buffer.sv            |   82 +
 test/type_param/core/ariane_regfile_ff.sv     |   83 +
 test/type_param/core/ariane_regfile_fpga.sv   |  125 +
 test/type_param/core/axi_shim.sv              |  310 ++
 test/type_param/core/branch_unit.sv           |  106 +
 .../core/cache_subsystem/axi_adapter.sv       |  520 +++
 .../core/cache_subsystem/cache_ctrl.sv        |  475 +++
 .../cva6_hpdcache_if_adapter.sv               |  200 +
 .../cva6_hpdcache_subsystem.sv                |  609 +++
 .../cva6_hpdcache_subsystem_axi_arbiter.sv    |  586 +++
 .../core/cache_subsystem/cva6_icache.sv       |  584 +++
 .../cva6_icache_axi_wrapper.sv                |  202 +
 .../hpdcache/rtl/include/hpdcache_typedef.svh |   62 +
 .../rtl/src/common/hpdcache_data_downsize.sv  |  181 +
 .../rtl/src/common/hpdcache_data_upsize.sv    |  181 +
 .../hpdcache/rtl/src/common/hpdcache_demux.sv |   69 +
 .../rtl/src/common/hpdcache_fifo_reg.sv       |  167 +
 .../hpdcache/rtl/src/common/hpdcache_fxarb.sv |   85 +
 .../hpdcache/rtl/src/common/hpdcache_mux.sv   |   79 +
 .../src/common/hpdcache_prio_1hot_encoder.sv  |   43 +
 .../hpdcache_regbank_wbyteenable_1rw.sv       |   63 +
 .../src/common/hpdcache_regbank_wmask_1rw.sv  |   61 +
 .../hpdcache/rtl/src/common/hpdcache_rrarb.sv |  121 +
 .../hpdcache/rtl/src/common/hpdcache_sram.sv  |   56 +
 .../src/common/hpdcache_sram_wbyteenable.sv   |   58 +
 .../rtl/src/common/hpdcache_sram_wmask.sv     |   58 +
 .../rtl/src/common/hpdcache_sync_buffer.sv    |   89 +
 .../common/macros/behav/hpdcache_sram_1rw.sv  |   60 +
 .../behav/hpdcache_sram_wbyteenable_1rw.sv    |   63 +
 .../macros/behav/hpdcache_sram_wmask_1rw.sv   |   61 +
 .../hpdcache/rtl/src/hpdcache.sv              |  658 ++++
 .../hpdcache/rtl/src/hpdcache_amo.sv          |   67 +
 .../hpdcache/rtl/src/hpdcache_cmo.sv          |  250 ++
 .../hpdcache/rtl/src/hpdcache_core_arbiter.sv |  171 +
 .../hpdcache/rtl/src/hpdcache_ctrl.sv         |  760 ++++
 .../hpdcache/rtl/src/hpdcache_ctrl_pe.sv      |  620 +++
 .../hpdcache/rtl/src/hpdcache_memarray.sv     |  120 +
 .../hpdcache/rtl/src/hpdcache_memctrl.sv      |  656 ++++
 .../hpdcache/rtl/src/hpdcache_miss_handler.sv |  659 ++++
 .../hpdcache/rtl/src/hpdcache_mshr.sv         |  385 ++
 .../rtl/src/hpdcache_mshr_to_cache_set.sv     |  105 +
 .../hpdcache/rtl/src/hpdcache_pkg.sv          |  623 +++
 .../hpdcache/rtl/src/hpdcache_plru.sv         |  138 +
 .../hpdcache/rtl/src/hpdcache_rtab.sv         |  666 ++++
 .../hpdcache/rtl/src/hpdcache_uncached.sv     |  965 +++++
 .../hpdcache/rtl/src/hpdcache_wbuf.sv         |  678 ++++
 .../hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv |  228 ++
 .../rtl/src/hwpf_stride/hwpf_stride.sv        |  374 ++
 .../rtl/src/hwpf_stride/hwpf_stride_arb.sv    |  117 +
 .../rtl/src/hwpf_stride/hwpf_stride_pkg.sv    |   68 +
 .../src/hwpf_stride/hwpf_stride_snooper.sv    |   38 +
 .../src/hwpf_stride/hwpf_stride_wrapper.sv    |  265 ++
 .../utils/hpdcache_mem_req_read_arbiter.sv    |  103 +
 .../utils/hpdcache_mem_req_write_arbiter.sv   |  193 +
 .../rtl/src/utils/hpdcache_mem_resp_demux.sv  |  108 +
 .../rtl/src/utils/hpdcache_mem_to_axi_read.sv |   95 +
 .../src/utils/hpdcache_mem_to_axi_write.sv    |  148 +
 .../core/cache_subsystem/miss_handler.sv      |  826 ++++
 .../cache_subsystem/std_cache_subsystem.sv    |  315 ++
 .../core/cache_subsystem/std_nbdcache.sv      |  279 ++
 .../core/cache_subsystem/tag_cmp.sv           |  106 +
 .../core/cache_subsystem/wt_axi_adapter.sv    |  712 ++++
 .../cache_subsystem/wt_cache_subsystem.sv     |  233 ++
 .../core/cache_subsystem/wt_dcache.sv         |  360 ++
 .../core/cache_subsystem/wt_dcache_ctrl.sv    |  299 ++
 .../core/cache_subsystem/wt_dcache_mem.sv     |  428 +++
 .../cache_subsystem/wt_dcache_missunit.sv     |  645 ++++
 .../core/cache_subsystem/wt_dcache_wbuffer.sv |  635 +++
 test/type_param/core/commit_stage.sv          |  298 ++
 test/type_param/core/compressed_decoder.sv    |  935 +++++
 test/type_param/core/controller.sv            |  194 +
 test/type_param/core/csr_buffer.sv            |   76 +
 test/type_param/core/csr_regfile.sv           | 1646 ++++++++
 test/type_param/core/cva6.sv                  | 1401 +++++++
 .../cva6_accel_first_pass_decoder_stub.sv     |   31 +
 test/type_param/core/cva6_rvfi.sv             |  294 ++
 test/type_param/core/cva6_rvfi_probes.sv      |   81 +
 .../cvxif_example_coprocessor.sv              |  155 +
 .../cvxif_example/include/cvxif_instr_pkg.sv  |   47 +
 .../core/cvxif_example/instr_decoder.sv       |   49 +
 test/type_param/core/cvxif_fu.sv              |  112 +
 test/type_param/core/decoder.sv               | 1397 +++++++
 test/type_param/core/ex_stage.sv              |  413 ++
 test/type_param/core/fpu_wrap.sv              |  568 +++
 test/type_param/core/frontend/bht.sv          |  215 ++
 test/type_param/core/frontend/btb.sv          |  185 +
 test/type_param/core/frontend/frontend.sv     |  516 +++
 test/type_param/core/frontend/instr_queue.sv  |  459 +++
 test/type_param/core/frontend/instr_scan.sv   |   83 +
 test/type_param/core/frontend/ras.sv          |   71 +
 test/type_param/core/id_stage.sv              |  143 +
 test/type_param/core/include/acc_pkg.sv       |   47 +
 test/type_param/core/include/ariane_pkg.sv    |  994 +++++
 test/type_param/core/include/config_pkg.sv    |  181 +
 .../include/cv64a6_imafdc_sv39_config_pkg.sv  |  150 +
 .../cva6_hpdcache_default_config_pkg.sv       |  123 +
 test/type_param/core/include/cvxif_pkg.sv     |  110 +
 .../core/include/instr_tracer_pkg.sv          |  202 +
 test/type_param/core/include/riscv_pkg.sv     |  851 ++++
 test/type_param/core/include/std_cache_pkg.sv |   98 +
 test/type_param/core/include/wt_cache_pkg.sv  |  344 ++
 test/type_param/core/instr_realign.sv         |  361 ++
 test/type_param/core/issue_read_operands.sv   |  604 +++
 test/type_param/core/issue_stage.sv           |  199 +
 test/type_param/core/load_store_unit.sv       |  493 +++
 test/type_param/core/load_unit.sv             |  534 +++
 test/type_param/core/lsu_bypass.sv            |  122 +
 .../type_param/core/mmu_sv32/cva6_mmu_sv32.sv |  565 +++
 .../type_param/core/mmu_sv32/cva6_ptw_sv32.sv |  400 ++
 .../core/mmu_sv32/cva6_shared_tlb_sv32.sv     |  367 ++
 .../type_param/core/mmu_sv32/cva6_tlb_sv32.sv |  281 ++
 test/type_param/core/mmu_sv39/mmu.sv          |  519 +++
 test/type_param/core/mmu_sv39/ptw.sv          |  409 ++
 test/type_param/core/mmu_sv39/tlb.sv          |  290 ++
 test/type_param/core/mult.sv                  |  149 +
 test/type_param/core/multiplier.sv            |  158 +
 test/type_param/core/perf_counters.sv         |  226 ++
 test/type_param/core/pmp/src/pmp.sv           |   94 +
 test/type_param/core/pmp/src/pmp_entry.sv     |  125 +
 test/type_param/core/scoreboard.sv            |  452 +++
 test/type_param/core/serdiv.sv                |  269 ++
 test/type_param/core/store_buffer.sv          |  291 ++
 test/type_param/core/store_unit.sv            |  300 ++
 .../corev_apu/axi_mem_if/src/axi2mem.sv       |  301 ++
 test/type_param/corev_apu/bootrom/bootrom.sv  |  225 ++
 .../corev_apu/clint/axi_lite_interface.sv     |  170 +
 test/type_param/corev_apu/clint/clint.sv      |  294 ++
 .../corev_apu/fpga/src/apb_timer/apb_timer.sv |   88 +
 .../corev_apu/fpga/src/apb_timer/timer.sv     |  145 +
 .../corev_apu/fpga/src/axi2apb/src/axi2apb.sv |  449 +++
 .../fpga/src/axi2apb/src/axi2apb_64_32.sv     |  745 ++++
 .../fpga/src/axi_slice/src/axi_ar_buffer.sv   |   74 +
 .../fpga/src/axi_slice/src/axi_aw_buffer.sv   |   74 +
 .../fpga/src/axi_slice/src/axi_b_buffer.sv    |   54 +
 .../fpga/src/axi_slice/src/axi_r_buffer.sv    |   60 +
 .../src/axi_slice/src/axi_single_slice.sv     |   51 +
 .../fpga/src/axi_slice/src/axi_slice.sv       |  311 ++
 .../fpga/src/axi_slice/src/axi_slice_wrap.sv  |  115 +
 .../fpga/src/axi_slice/src/axi_w_buffer.sv    |   55 +
 .../include/register_interface/assign.svh     |   46 +
 .../include/register_interface/typedef.svh    |   38 +
 .../register_interface/src/apb_to_reg.sv      |   39 +
 .../register_interface/src/reg_intf.sv        |   43 +
 .../riscv-dbg/debug_rom/debug_rom.sv          |   66 +
 .../corev_apu/riscv-dbg/src/dm_csrs.sv        |  634 +++
 .../corev_apu/riscv-dbg/src/dm_mem.sv         |  523 +++
 .../corev_apu/riscv-dbg/src/dm_pkg.sv         |  436 +++
 .../corev_apu/riscv-dbg/src/dm_sba.sv         |  170 +
 .../corev_apu/riscv-dbg/src/dm_top.sv         |  218 ++
 .../corev_apu/riscv-dbg/src/dmi_cdc.sv        |   73 +
 .../corev_apu/riscv-dbg/src/dmi_jtag.sv       |  271 ++
 .../corev_apu/riscv-dbg/src/dmi_jtag_tap.sv   |  349 ++
 .../corev_apu/rv_plic/rtl/plic_regmap.sv      |  357 ++
 .../corev_apu/rv_plic/rtl/plic_top.sv         |  157 +
 .../corev_apu/rv_plic/rtl/rv_plic_gateway.sv  |   60 +
 .../corev_apu/rv_plic/rtl/rv_plic_target.sv   |  125 +
 test/type_param/corev_apu/src/ariane.sv       |   86 +
 .../src/axi_riscv_atomics/src/axi_res_tbl.sv  |   93 +
 .../axi_riscv_atomics/src/axi_riscv_amos.sv   | 1004 +++++
 .../src/axi_riscv_amos_alu.sv                 |   78 +
 .../src/axi_riscv_atomics.sv                  |  400 ++
 .../src/axi_riscv_atomics_wrap.sv             |  151 +
 .../axi_riscv_atomics/src/axi_riscv_lrsc.sv   |  509 +++
 .../src/axi_riscv_lrsc_wrap.sv                |  148 +
 .../type_param/corev_apu/tb/ariane_axi_pkg.sv |  109 +
 .../corev_apu/tb/ariane_axi_soc_pkg.sv        |  102 +
 .../corev_apu/tb/ariane_peripherals.sv        |  619 +++
 .../type_param/corev_apu/tb/ariane_soc_pkg.sv |   68 +
 .../corev_apu/tb/ariane_testharness.sv        |  807 ++++
 test/type_param/corev_apu/tb/axi_intf.sv      |  311 ++
 .../corev_apu/tb/common/mock_uart.sv          |  120 +
 test/type_param/corev_apu/tb/common/uart.sv   |  104 +
 test/type_param/corev_apu/tb/rvfi_tracer.sv   |  134 +
 test/type_param/sv2v.sh                       |  249 ++
 test/type_param/sv2v_corrected.sh             |  249 ++
 .../openhwgroup/cvfpu/src/fpnew_cast_multi.sv |  794 ++++
 .../openhwgroup/cvfpu/src/fpnew_classifier.sv |   74 +
 .../cvfpu/src/fpnew_divsqrt_multi.sv          |  366 ++
 .../vendor/openhwgroup/cvfpu/src/fpnew_fma.sv |  690 ++++
 .../openhwgroup/cvfpu/src/fpnew_fma_multi.sv  |  839 ++++
 .../openhwgroup/cvfpu/src/fpnew_noncomp.sv    |  415 ++
 .../cvfpu/src/fpnew_opgroup_block.sv          |  244 ++
 .../cvfpu/src/fpnew_opgroup_fmt_slice.sv      |  292 ++
 .../cvfpu/src/fpnew_opgroup_multifmt_slice.sv |  449 +++
 .../vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv |  495 +++
 .../openhwgroup/cvfpu/src/fpnew_rounding.sv   |   76 +
 .../vendor/openhwgroup/cvfpu/src/fpnew_top.sv |  185 +
 .../src/fpu_div_sqrt_mvp/hdl/control_mvp.sv   | 3413 +++++++++++++++++
 .../fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv |   83 +
 .../fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv  |  180 +
 .../hdl/iteration_div_sqrt_mvp.sv             |   61 +
 .../fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv |  470 +++
 .../src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv |  104 +
 .../fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv    |  425 ++
 .../pulp-platform/axi/include/axi/assign.svh  |  541 +++
 .../pulp-platform/axi/include/axi/typedef.svh |  211 +
 .../pulp-platform/axi/src/axi_atop_filter.sv  |  444 +++
 .../vendor/pulp-platform/axi/src/axi_cut.sv   |  265 ++
 .../pulp-platform/axi/src/axi_delayer.sv      |  198 +
 .../vendor/pulp-platform/axi/src/axi_demux.sv |  786 ++++
 .../pulp-platform/axi/src/axi_err_slv.sv      |  261 ++
 .../pulp-platform/axi/src/axi_id_prepend.sv   |  161 +
 .../vendor/pulp-platform/axi/src/axi_join.sv  |   37 +
 .../pulp-platform/axi/src/axi_multicut.sv     |  237 ++
 .../vendor/pulp-platform/axi/src/axi_mux.sv   |  522 +++
 .../vendor/pulp-platform/axi/src/axi_pkg.sv   |  423 ++
 .../pulp-platform/axi/src/axi_to_axi_lite.sv  |  323 ++
 .../vendor/pulp-platform/axi/src/axi_xbar.sv  |  324 ++
 .../include/common_cells/registers.svh        |  221 ++
 .../common_cells/src/addr_decode.sv           |  161 +
 .../common_cells/src/cdc_2phase.sv            |  175 +
 .../common_cells/src/cf_math_pkg.sv           |   61 +
 .../pulp-platform/common_cells/src/counter.sv |   43 +
 .../common_cells/src/delta_counter.sv         |   74 +
 .../common_cells/src/deprecated/fifo_v1.sv    |   57 +
 .../common_cells/src/deprecated/fifo_v2.sv    |   79 +
 .../common_cells/src/exp_backoff.sv           |   98 +
 .../pulp-platform/common_cells/src/fifo_v3.sv |  191 +
 .../pulp-platform/common_cells/src/lfsr.sv    |  315 ++
 .../common_cells/src/lfsr_16bit.sv            |   68 +
 .../common_cells/src/lfsr_8bit.sv             |   61 +
 .../pulp-platform/common_cells/src/lzc.sv     |  112 +
 .../common_cells/src/popcount.sv              |   60 +
 .../common_cells/src/rr_arb_tree.sv           |  348 ++
 .../pulp-platform/common_cells/src/rstgen.sv  |   30 +
 .../common_cells/src/rstgen_bypass.sv         |   57 +
 .../common_cells/src/shift_reg.sv             |   53 +
 .../common_cells/src/spill_register.sv        |   46 +
 .../src/spill_register_flushable.sv           |  105 +
 .../common_cells/src/stream_arbiter.sv        |   49 +
 .../src/stream_arbiter_flushable.sv           |   82 +
 .../common_cells/src/stream_delay.sv          |  132 +
 .../common_cells/src/stream_demux.sv          |   36 +
 .../common_cells/src/stream_mux.sv            |   46 +
 .../common_cells/src/stream_register.sv       |   57 +
 .../pulp-platform/common_cells/src/unread.sv  |   21 +
 .../fpga-support/rtl/AsyncDpRam.sv            |   62 +
 .../fpga-support/rtl/AsyncThreePortRam.sv     |   66 +
 .../fpga-support/rtl/SyncDpRam.sv             |  182 +
 .../src/deprecated/cluster_clk_cells.sv       |   94 +
 .../src/deprecated/pulp_clk_cells.sv          |  107 +
 .../tech_cells_generic/src/rtl/tc_clk.sv      |  120 +
 .../tech_cells_generic/src/rtl/tc_sram.sv     |  245 ++
 250 files changed, 71133 insertions(+)
 create mode 100644 test/type_param/common/local/util/instr_tracer.sv
 create mode 100644 test/type_param/common/local/util/instr_tracer_if.sv
 create mode 100644 test/type_param/common/local/util/sram.sv
 create mode 100644 test/type_param/common/local/util/tc_sram_wrapper.sv
 create mode 100644 test/type_param/core/acc_dispatcher.sv
 create mode 100644 test/type_param/core/acc_dispatcher_corrected.sv
 create mode 100644 test/type_param/core/alu.sv
 create mode 100644 test/type_param/core/amo_buffer.sv
 create mode 100644 test/type_param/core/ariane_regfile_ff.sv
 create mode 100644 test/type_param/core/ariane_regfile_fpga.sv
 create mode 100644 test/type_param/core/axi_shim.sv
 create mode 100644 test/type_param/core/branch_unit.sv
 create mode 100644 test/type_param/core/cache_subsystem/axi_adapter.sv
 create mode 100644 test/type_param/core/cache_subsystem/cache_ctrl.sv
 create mode 100644 test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv
 create mode 100644 test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv
 create mode 100644 test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv
 create mode 100644 test/type_param/core/cache_subsystem/cva6_icache.sv
 create mode 100644 test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv
 create mode 100755 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv
 create mode 100755 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv
 create mode 100755 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv
 create mode 100755 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv
 create mode 100644 test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv
 create mode 100644 test/type_param/core/cache_subsystem/miss_handler.sv
 create mode 100644 test/type_param/core/cache_subsystem/std_cache_subsystem.sv
 create mode 100644 test/type_param/core/cache_subsystem/std_nbdcache.sv
 create mode 100644 test/type_param/core/cache_subsystem/tag_cmp.sv
 create mode 100644 test/type_param/core/cache_subsystem/wt_axi_adapter.sv
 create mode 100644 test/type_param/core/cache_subsystem/wt_cache_subsystem.sv
 create mode 100644 test/type_param/core/cache_subsystem/wt_dcache.sv
 create mode 100644 test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv
 create mode 100644 test/type_param/core/cache_subsystem/wt_dcache_mem.sv
 create mode 100644 test/type_param/core/cache_subsystem/wt_dcache_missunit.sv
 create mode 100644 test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv
 create mode 100644 test/type_param/core/commit_stage.sv
 create mode 100644 test/type_param/core/compressed_decoder.sv
 create mode 100644 test/type_param/core/controller.sv
 create mode 100644 test/type_param/core/csr_buffer.sv
 create mode 100644 test/type_param/core/csr_regfile.sv
 create mode 100644 test/type_param/core/cva6.sv
 create mode 100644 test/type_param/core/cva6_accel_first_pass_decoder_stub.sv
 create mode 100644 test/type_param/core/cva6_rvfi.sv
 create mode 100644 test/type_param/core/cva6_rvfi_probes.sv
 create mode 100644 test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv
 create mode 100644 test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv
 create mode 100644 test/type_param/core/cvxif_example/instr_decoder.sv
 create mode 100644 test/type_param/core/cvxif_fu.sv
 create mode 100644 test/type_param/core/decoder.sv
 create mode 100644 test/type_param/core/ex_stage.sv
 create mode 100644 test/type_param/core/fpu_wrap.sv
 create mode 100644 test/type_param/core/frontend/bht.sv
 create mode 100644 test/type_param/core/frontend/btb.sv
 create mode 100644 test/type_param/core/frontend/frontend.sv
 create mode 100644 test/type_param/core/frontend/instr_queue.sv
 create mode 100644 test/type_param/core/frontend/instr_scan.sv
 create mode 100644 test/type_param/core/frontend/ras.sv
 create mode 100644 test/type_param/core/id_stage.sv
 create mode 100644 test/type_param/core/include/acc_pkg.sv
 create mode 100644 test/type_param/core/include/ariane_pkg.sv
 create mode 100644 test/type_param/core/include/config_pkg.sv
 create mode 100644 test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv
 create mode 100644 test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv
 create mode 100644 test/type_param/core/include/cvxif_pkg.sv
 create mode 100644 test/type_param/core/include/instr_tracer_pkg.sv
 create mode 100644 test/type_param/core/include/riscv_pkg.sv
 create mode 100644 test/type_param/core/include/std_cache_pkg.sv
 create mode 100644 test/type_param/core/include/wt_cache_pkg.sv
 create mode 100644 test/type_param/core/instr_realign.sv
 create mode 100644 test/type_param/core/issue_read_operands.sv
 create mode 100644 test/type_param/core/issue_stage.sv
 create mode 100644 test/type_param/core/load_store_unit.sv
 create mode 100644 test/type_param/core/load_unit.sv
 create mode 100644 test/type_param/core/lsu_bypass.sv
 create mode 100644 test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv
 create mode 100644 test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv
 create mode 100644 test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv
 create mode 100644 test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv
 create mode 100644 test/type_param/core/mmu_sv39/mmu.sv
 create mode 100644 test/type_param/core/mmu_sv39/ptw.sv
 create mode 100644 test/type_param/core/mmu_sv39/tlb.sv
 create mode 100644 test/type_param/core/mult.sv
 create mode 100644 test/type_param/core/multiplier.sv
 create mode 100644 test/type_param/core/perf_counters.sv
 create mode 100644 test/type_param/core/pmp/src/pmp.sv
 create mode 100644 test/type_param/core/pmp/src/pmp_entry.sv
 create mode 100644 test/type_param/core/scoreboard.sv
 create mode 100644 test/type_param/core/serdiv.sv
 create mode 100644 test/type_param/core/store_buffer.sv
 create mode 100644 test/type_param/core/store_unit.sv
 create mode 100644 test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv
 create mode 100644 test/type_param/corev_apu/bootrom/bootrom.sv
 create mode 100644 test/type_param/corev_apu/clint/axi_lite_interface.sv
 create mode 100644 test/type_param/corev_apu/clint/clint.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/apb_timer/timer.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv
 create mode 100644 test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv
 create mode 100644 test/type_param/corev_apu/register_interface/include/register_interface/assign.svh
 create mode 100644 test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh
 create mode 100644 test/type_param/corev_apu/register_interface/src/apb_to_reg.sv
 create mode 100644 test/type_param/corev_apu/register_interface/src/reg_intf.sv
 create mode 100644 test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv
 create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv
 create mode 100755 test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv
 create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv
 create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv
 create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dm_top.sv
 create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv
 create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv
 create mode 100644 test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv
 create mode 100644 test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv
 create mode 100644 test/type_param/corev_apu/rv_plic/rtl/plic_top.sv
 create mode 100644 test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv
 create mode 100644 test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv
 create mode 100644 test/type_param/corev_apu/src/ariane.sv
 create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv
 create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv
 create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv
 create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv
 create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv
 create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv
 create mode 100644 test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv
 create mode 100644 test/type_param/corev_apu/tb/ariane_axi_pkg.sv
 create mode 100644 test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv
 create mode 100644 test/type_param/corev_apu/tb/ariane_peripherals.sv
 create mode 100644 test/type_param/corev_apu/tb/ariane_soc_pkg.sv
 create mode 100644 test/type_param/corev_apu/tb/ariane_testharness.sv
 create mode 100644 test/type_param/corev_apu/tb/axi_intf.sv
 create mode 100644 test/type_param/corev_apu/tb/common/mock_uart.sv
 create mode 100644 test/type_param/corev_apu/tb/common/uart.sv
 create mode 100644 test/type_param/corev_apu/tb/rvfi_tracer.sv
 create mode 100755 test/type_param/sv2v.sh
 create mode 100755 test/type_param/sv2v_corrected.sh
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
 create mode 100644 test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh
 create mode 100644 test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_join.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv
 create mode 100644 test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/counter.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv
 create mode 100644 test/type_param/vendor/pulp-platform/common_cells/src/unread.sv
 create mode 100644 test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv
 create mode 100644 test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv
 create mode 100644 test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv
 create mode 100644 test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv
 create mode 100644 test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv
 create mode 100644 test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv
 create mode 100644 test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv

diff --git a/test/type_param/common/local/util/instr_tracer.sv b/test/type_param/common/local/util/instr_tracer.sv
new file mode 100644
index 0000000..17c11e5
--- /dev/null
+++ b/test/type_param/common/local/util/instr_tracer.sv
@@ -0,0 +1,223 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 16.05.2017
+// Description: Instruction Tracer Main Class
+
+`ifndef VERILATOR
+//pragma translate_off
+`include "ex_trace_item.svh"
+`include "instr_trace_item.svh"
+
+module instr_tracer (
+  instr_tracer_if   tracer_if,
+  input logic[riscv::XLEN-1:0] hart_id_i
+);
+
+  // keep the decoded instructions in a queue
+  logic [31:0] decode_queue [$];
+  // keep the issued instructions in a queue
+  logic [31:0] issue_queue [$];
+  // issue scoreboard entries
+  ariane_pkg::scoreboard_entry_t issue_sbe_queue [$];
+  ariane_pkg::scoreboard_entry_t issue_sbe;
+  // store resolved branches, get (mis-)predictions
+  ariane_pkg::bp_resolve_t bp [$];
+  // shadow copy of the register files
+  logic [63:0] gp_reg_file [32];
+  logic [63:0] fp_reg_file [32];
+  // 64 bit clock tick count
+  longint unsigned clk_ticks;
+  int f, commit_log;
+  // address mapping
+  // contains mappings of the form vaddr <-> paddr
+  logic [63:0] store_mapping[$], load_mapping[$], address_mapping;
+
+  // static uvm_cmdline_processor uvcl = uvm_cmdline_processor::get_inst();
+
+  function void create_file(logic [63:0] hart_id);
+    string fn, fn_commit_log;
+    $sformat(fn, "trace_hart_%0.0f.log", hart_id);
+    $sformat(fn_commit_log, "trace_hart_%0.0f_commit.log", hart_id);
+    $display("[TRACER] Output filename is: %s", fn);
+
+    f = $fopen(fn,"w");
+    if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG) commit_log = $fopen(fn_commit_log, "w");
+  endfunction : create_file
+
+  task trace();
+    automatic logic [31:0] decode_instruction, issue_instruction, issue_commit_instruction;
+    automatic ariane_pkg::scoreboard_entry_t commit_instruction;
+    // initialize register 0
+    gp_reg_file  = '{default:0};
+    fp_reg_file  = '{default:0};
+
+    forever begin
+      automatic ariane_pkg::bp_resolve_t bp_instruction = '0;
+      // new cycle, we are only interested if reset is de-asserted
+      @(tracer_if.pck) if (tracer_if.pck.rstn !== 1'b1) begin
+        flush();
+        continue;
+      end
+
+      // increment clock tick
+      clk_ticks++;
+
+      // -------------------
+      // Instruction Decode
+      // -------------------
+      // we are decoding an instruction
+      if (tracer_if.pck.fetch_valid && tracer_if.pck.fetch_ack) begin
+        decode_instruction = tracer_if.pck.instruction;
+        decode_queue.push_back(decode_instruction);
+      end
+      // -------------------
+      // Instruction Issue
+      // -------------------
+      // we got a new issue ack, so put the element from the decode queue to
+      // the issue queue
+      if (tracer_if.pck.issue_ack && !tracer_if.pck.flush_unissued) begin
+        issue_instruction = decode_queue.pop_front();
+        issue_queue.push_back(issue_instruction);
+        // also save the scoreboard entry to a separate issue queue
+        issue_sbe_queue.push_back(ariane_pkg::scoreboard_entry_t'(tracer_if.pck.issue_sbe));
+      end
+
+      // --------------------
+      // Address Translation
+      // --------------------
+      if (tracer_if.pck.st_valid) begin
+        store_mapping.push_back(tracer_if.pck.st_paddr);
+      end
+
+      if (tracer_if.pck.ld_valid && !tracer_if.pck.ld_kill) begin
+        load_mapping.push_back(tracer_if.pck.ld_paddr);
+      end
+      // ----------------------
+      // Store predictions
+      // ----------------------
+      if (tracer_if.pck.resolve_branch.valid) begin
+        bp.push_back(tracer_if.pck.resolve_branch);
+      end
+      // --------------
+      //  Commit
+      // --------------
+      // we are committing an instruction
+      for (int i = 0; i < 2; i++) begin
+        if (tracer_if.pck.commit_ack[i]) begin
+          commit_instruction = ariane_pkg::scoreboard_entry_t'(tracer_if.pck.commit_instr[i]);
+          issue_commit_instruction = issue_queue.pop_front();
+          issue_sbe = issue_sbe_queue.pop_front();
+          // check if the instruction retiring is a load or store, get the physical address accordingly
+          if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::LOAD)
+            address_mapping = load_mapping.pop_front();
+          else if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::STORE)
+            address_mapping = store_mapping.pop_front();
+
+          if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::CTRL_FLOW)
+            bp_instruction = bp.pop_front();
+          // the scoreboards issue entry still contains the immediate value as a result
+          // check if the write back is valid, if not we need to source the result from the register file
+          // as the most recent version of this register will be there.
+          if (tracer_if.pck.we_gpr[i] || tracer_if.pck.we_fpr[i]) begin
+            printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
+          end else if (ariane_pkg::is_rd_fpr(commit_instruction.op)) begin
+            printInstr(issue_sbe, issue_commit_instruction, fp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
+          end else begin
+            printInstr(issue_sbe, issue_commit_instruction, gp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
+          end
+        end
+      end
+      // --------------
+      // Exceptions
+      // --------------
+      if (tracer_if.pck.exception.valid && !(tracer_if.pck.debug_mode && tracer_if.pck.exception.cause == riscv::BREAKPOINT)) begin
+        // print exception
+        printException(tracer_if.pck.commit_instr[0].pc, tracer_if.pck.exception.cause, tracer_if.pck.exception.tval);
+      end
+      // ----------------------
+      // Commit Registers
+      // ----------------------
+      // update shadow reg files here
+      for (int i = 0; i < 2; i++) begin
+        if (tracer_if.pck.we_gpr[i] && tracer_if.pck.waddr[i] != 5'b0) begin
+          gp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i];
+        end else if (tracer_if.pck.we_fpr[i]) begin
+          fp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i];
+        end
+      end
+      // --------------
+      // Flush Signals
+      // --------------
+      // flush un-issued instructions
+      if (tracer_if.pck.flush_unissued) begin
+        flushDecode();
+      end
+      // flush whole pipeline
+      if (tracer_if.pck.flush) begin
+        flush();
+      end
+    end
+
+  endtask
+
+  // flush all decoded instructions
+  function void flushDecode ();
+    decode_queue = {};
+  endfunction
+
+  // flush everything, we took an exception/interrupt
+  function void flush ();
+    flushDecode();
+    // clear all elements in the queue
+    issue_queue     = {};
+    issue_sbe_queue = {};
+    // also clear mappings
+    store_mapping   = {};
+    load_mapping    = {};
+    bp              = {};
+  endfunction
+
+  function void printInstr(ariane_pkg::scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [riscv::PLEN-1:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, ariane_pkg::bp_resolve_t bp);
+    automatic instr_trace_item iti = new ($time, clk_ticks, sbe, instr, gp_reg_file, fp_reg_file, result, paddr, priv_lvl, debug_mode, bp);
+    // print instruction to console
+    automatic string print_instr = iti.printInstr();
+    if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin
+      $fwrite(commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result, ariane_pkg::is_rd_fpr(sbe.op)));
+    end
+    $fwrite(f, {print_instr, "\n"});
+  endfunction
+
+  function void printException(logic [riscv::VLEN-1:0] pc, logic [63:0] cause, logic [63:0] tval);
+    automatic ex_trace_item eti = new (pc, cause, tval);
+    automatic string print_ex = eti.printException();
+    $fwrite(f, {print_ex, "\n"});
+  endfunction
+
+  function void close();
+    if (f) $fclose(f);
+    if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG && commit_log) $fclose(commit_log);
+  endfunction
+
+
+  initial begin
+    #15ns;
+    create_file(hart_id_i);
+    trace();
+  end
+
+  final begin
+    close();
+  end
+
+endmodule : instr_tracer
+//pragma translate_on
+`endif
diff --git a/test/type_param/common/local/util/instr_tracer_if.sv b/test/type_param/common/local/util/instr_tracer_if.sv
new file mode 100644
index 0000000..5015cfd
--- /dev/null
+++ b/test/type_param/common/local/util/instr_tracer_if.sv
@@ -0,0 +1,67 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 16.05.2017
+// Description: Instruction Tracer Interface
+
+`ifndef VERILATOR
+`ifndef INSTR_TRACER_IF_SV
+`define INSTR_TRACER_IF_SV
+interface instr_tracer_if (
+        input clk
+    );
+
+    logic             rstn;
+    logic             flush_unissued;
+    logic             flush;
+    // Decode
+    logic [31:0]      instruction;
+    logic             fetch_valid;
+    logic             fetch_ack;
+    // Issue stage
+    logic                           issue_ack; // issue acknowledged
+    ariane_pkg::scoreboard_entry_t  issue_sbe; // issue scoreboard entry
+    // WB stage
+    logic [1:0][4:0]  waddr;
+    logic [1:0][63:0] wdata;
+    logic [1:0]       we_gpr;
+    logic [1:0]       we_fpr;
+    // commit stage
+    ariane_pkg::scoreboard_entry_t [1:0] commit_instr; // commit instruction
+    logic                          [1:0] commit_ack;
+    // address translation
+    // stores
+    logic                         st_valid;
+    logic [riscv::PLEN-1:0]       st_paddr;
+    // loads
+    logic                         ld_valid;
+    logic                         ld_kill;
+    logic [riscv::PLEN-1:0]       ld_paddr;
+    // misprediction
+    ariane_pkg::bp_resolve_t resolve_branch;
+    // exceptions
+    ariane_pkg::exception_t  exception;
+    // current privilege level
+    riscv::priv_lvl_t  priv_lvl;
+    logic              debug_mode;
+    // the tracer just has a passive interface we do not drive anything with it
+
+    //pragma translate_off
+    clocking pck @(posedge clk);
+        input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr,
+              st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, resolve_branch,
+              wdata, we_gpr, we_fpr, commit_instr, commit_ack, exception, priv_lvl, debug_mode;
+    endclocking
+    //pragma translate_on
+
+endinterface
+`endif
+`endif
diff --git a/test/type_param/common/local/util/sram.sv b/test/type_param/common/local/util/sram.sv
new file mode 100644
index 0000000..4c0f2d2
--- /dev/null
+++ b/test/type_param/common/local/util/sram.sv
@@ -0,0 +1,107 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba    <zarubaf@iis.ee.ethz.ch>, ETH Zurich
+//         Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 15.08.2018
+// Description: SRAM wrapper for FPGA (requires the fpga-support submodule)
+//
+// Note: the wrapped module contains two different implementations for
+// ALTERA and XILINX tools, since these follow different coding styles for
+// inferrable RAMS with byte enable. define `FPGA_TARGET_XILINX or
+// `FPGA_TARGET_ALTERA in your build environment (default is ALTERA)
+
+module sram #(
+    parameter DATA_WIDTH = 64,
+    parameter USER_WIDTH = 1,
+    parameter USER_EN    = 0,
+    parameter NUM_WORDS  = 1024,
+    parameter SIM_INIT   = "none",
+    parameter OUT_REGS   = 0     // enables output registers in FPGA macro (read lat = 2)
+)(
+   input  logic                          clk_i,
+   input  logic                          rst_ni,
+   input  logic                          req_i,
+   input  logic                          we_i,
+   input  logic [$clog2(NUM_WORDS)-1:0]  addr_i,
+   input  logic [USER_WIDTH-1:0]         wuser_i,
+   input  logic [DATA_WIDTH-1:0]         wdata_i,
+   input  logic [(DATA_WIDTH+7)/8-1:0]   be_i,
+   output logic [USER_WIDTH-1:0]         ruser_o,
+   output logic [DATA_WIDTH-1:0]         rdata_o
+);
+
+localparam DATA_WIDTH_ALIGNED = ((DATA_WIDTH+63)/64)*64;
+localparam USER_WIDTH_ALIGNED = DATA_WIDTH_ALIGNED; // To be fine tuned to reduce memory size
+localparam BE_WIDTH_ALIGNED   = (((DATA_WIDTH+7)/8+7)/8)*8;
+
+logic [DATA_WIDTH_ALIGNED-1:0]  wdata_aligned;
+logic [USER_WIDTH_ALIGNED-1:0]  wuser_aligned;
+logic [BE_WIDTH_ALIGNED-1:0]    be_aligned;
+logic [DATA_WIDTH_ALIGNED-1:0]  rdata_aligned;
+logic [USER_WIDTH_ALIGNED-1:0]  ruser_aligned;
+
+// align to 64 bits for inferrable macro below
+always_comb begin : p_align
+    wdata_aligned                    ='0;
+    wuser_aligned                    ='0;
+    be_aligned                       ='0;
+    wdata_aligned[DATA_WIDTH-1:0]    = wdata_i;
+    wuser_aligned[USER_WIDTH-1:0]    = wuser_i;
+    be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i;
+
+    rdata_o = rdata_aligned[DATA_WIDTH-1:0];
+    ruser_o = ruser_aligned[USER_WIDTH-1:0];
+end
+
+  for (genvar k = 0; k<(DATA_WIDTH+63)/64; k++) begin : gen_cut
+      // unused byte-enable segments (8bits) are culled by the tool
+      tc_sram_wrapper #(
+        .NumWords(NUM_WORDS),           // Number of Words in data array
+        .DataWidth(64),                 // Data signal width
+        .ByteWidth(32'd8),              // Width of a data byte
+        .NumPorts(32'd1),               // Number of read and write ports
+        .Latency(32'd1),                // Latency when the read data is available
+        .SimInit(SIM_INIT),             // Simulation initialization
+        .PrintSimCfg(1'b0)              // Print configuration
+      ) i_tc_sram_wrapper (
+          .clk_i    ( clk_i                     ),
+          .rst_ni   ( rst_ni                    ),
+          .req_i    ( req_i                     ),
+          .we_i     ( we_i                      ),
+          .be_i     ( be_aligned[k*8 +: 8]      ),
+          .wdata_i  ( wdata_aligned[k*64 +: 64] ),
+          .addr_i   ( addr_i                    ),
+          .rdata_o  ( rdata_aligned[k*64 +: 64] )
+      );
+      if (USER_EN > 0) begin : gen_mem_user
+        tc_sram_wrapper #(
+          .NumWords(NUM_WORDS),           // Number of Words in data array
+          .DataWidth(64),                 // Data signal width
+          .ByteWidth(32'd8),              // Width of a data byte
+          .NumPorts(32'd1),               // Number of read and write ports
+          .Latency(32'd1),                // Latency when the read data is available
+          .SimInit(SIM_INIT),             // Simulation initialization
+          .PrintSimCfg(1'b0)              // Print configuration
+        ) i_tc_sram_wrapper_user (
+            .clk_i    ( clk_i                     ),
+            .rst_ni   ( rst_ni                    ),
+            .req_i    ( req_i                     ),
+            .we_i     ( we_i                      ),
+            .be_i     ( be_aligned[k*8 +: 8]      ),
+            .wdata_i  ( wuser_aligned[k*64 +: 64] ),
+            .addr_i   ( addr_i                    ),
+            .rdata_o  ( ruser_aligned[k*64 +: 64] )
+        );
+      end else begin
+        assign ruser_aligned[k*64 +: 64] = '0;
+      end
+  end
+endmodule : sram
diff --git a/test/type_param/common/local/util/tc_sram_wrapper.sv b/test/type_param/common/local/util/tc_sram_wrapper.sv
new file mode 100644
index 0000000..ae3287d
--- /dev/null
+++ b/test/type_param/common/local/util/tc_sram_wrapper.sv
@@ -0,0 +1,60 @@
+// Copyright 2022 Thales DIS design services SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Jean-Roch COULON - Thales
+
+module tc_sram_wrapper #(
+  parameter int unsigned NumWords     = 32'd1024, // Number of Words in data array
+  parameter int unsigned DataWidth    = 32'd128,  // Data signal width
+  parameter int unsigned ByteWidth    = 32'd8,    // Width of a data byte
+  parameter int unsigned NumPorts     = 32'd2,    // Number of read and write ports
+  parameter int unsigned Latency      = 32'd1,    // Latency when the read data is available
+  parameter              SimInit      = "none",   // Simulation initialization
+  parameter bit          PrintSimCfg  = 1'b0,     // Print configuration
+  // DEPENDENT PARAMETERS, DO NOT OVERWRITE!
+  parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1,
+  parameter int unsigned BeWidth   = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div
+  parameter type         addr_t    = logic [AddrWidth-1:0],
+  parameter type         data_t    = logic [DataWidth-1:0],
+  parameter type         be_t      = logic [BeWidth-1:0]
+) (
+  input  logic                 clk_i,      // Clock
+  input  logic                 rst_ni,     // Asynchronous reset active low
+  // input ports
+  input  logic  [NumPorts-1:0] req_i,      // request
+  input  logic  [NumPorts-1:0] we_i,       // write enable
+  input  addr_t [NumPorts-1:0] addr_i,     // request address
+  input  data_t [NumPorts-1:0] wdata_i,    // write data
+  input  be_t   [NumPorts-1:0] be_i,       // write byte enable
+  // output ports
+  output data_t [NumPorts-1:0] rdata_o     // read data
+);
+
+// synthesis translate_off
+
+  tc_sram #(
+    .NumWords(NumWords),
+    .DataWidth(DataWidth),
+    .ByteWidth(ByteWidth),
+    .NumPorts(NumPorts),
+    .Latency(Latency),
+    .SimInit(SimInit),
+    .PrintSimCfg(PrintSimCfg)
+  ) i_tc_sram (
+      .clk_i    ( clk_i   ),
+      .rst_ni   ( rst_ni  ),
+      .req_i    ( req_i   ),
+      .we_i     ( we_i    ),
+      .be_i     ( be_i    ),
+      .wdata_i  ( wdata_i ),
+      .addr_i   ( addr_i  ),
+      .rdata_o  ( rdata_o )
+    );
+
+// synthesis translate_on
+
+endmodule
diff --git a/test/type_param/core/acc_dispatcher.sv b/test/type_param/core/acc_dispatcher.sv
new file mode 100644
index 0000000..8b5998a
--- /dev/null
+++ b/test/type_param/core/acc_dispatcher.sv
@@ -0,0 +1,423 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors: Matheus Cavalcante, ETH Zurich
+//          Nils Wistoff, ETH Zurich
+// Date: 20.11.2020
+// Description: Functional unit that dispatches CVA6 instructions to accelerators.
+
+module acc_dispatcher
+  import ariane_pkg::*;
+  import riscv::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg    = config_pkg::cva6_cfg_empty,
+    parameter type                   acc_req_t  = acc_pkg::accelerator_req_t,
+    parameter type                   acc_resp_t = acc_pkg::accelerator_resp_t,
+    parameter type                   acc_cfg_t  = logic,
+    parameter acc_cfg_t              AccCfg     = '0
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    // Interface with the CSR regfile
+    input logic acc_cons_en_i,  // Accelerator memory consistent mode
+    output logic acc_fflags_valid_o,
+    output logic [4:0] acc_fflags_o,
+    // Interface with the CSRs
+    input priv_lvl_t ld_st_priv_lvl_i,
+    input logic sum_i,
+    input pmpcfg_t [15:0] pmpcfg_i,
+    input logic [15:0][PLEN-3:0] pmpaddr_i,
+    input logic [2:0] fcsr_frm_i,
+    output logic dirty_v_state_o,
+    // Interface with the issue stage
+    input scoreboard_entry_t issue_instr_i,
+    input logic issue_instr_hs_i,
+    output logic issue_stall_o,
+    input fu_data_t fu_data_i,
+    input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
+    output logic [TRANS_ID_BITS-1:0] acc_trans_id_o,
+    output xlen_t acc_result_o,
+    output logic acc_valid_o,
+    output exception_t acc_exception_o,
+    // Interface with the execute stage
+    output logic acc_valid_ex_o,  // FU executed
+    // Interface with the commit stage
+    input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
+    input logic commit_st_barrier_i,  // A store barrier was commited
+    // Interface with the load/store unit
+    output logic acc_stall_st_pending_o,
+    input logic acc_no_st_pending_i,
+    input dcache_req_i_t [2:0] dcache_req_ports_i,
+    // Interface with the controller
+    output logic ctrl_halt_o,
+    input logic flush_unissued_instr_i,
+    input logic flush_ex_i,
+    output logic flush_pipeline_o,
+    // Interface with cache subsystem
+    output dcache_req_i_t [1:0] acc_dcache_req_ports_o,
+    input dcache_req_o_t [1:0] acc_dcache_req_ports_i,
+    input logic inval_ready_i,
+    output logic inval_valid_o,
+    output logic [63:0] inval_addr_o,
+    // Accelerator interface
+    output acc_req_t acc_req_o,
+    input acc_resp_t acc_resp_i
+);
+
+  `include "common_cells/registers.svh"
+
+  import cf_math_pkg::idx_width;
+
+  /***********************
+   *  Common signals     *
+   ***********************/
+
+  logic acc_ready;
+  logic acc_valid_d, acc_valid_q;
+
+  /**************************
+   *  Accelerator issue     *
+   **************************/
+
+  // Issue accelerator instructions
+  `FF(acc_valid_q, acc_valid_d, '0)
+
+  assign acc_valid_ex_o = acc_valid_q;
+  assign acc_valid_d    = ~issue_instr_i.ex.valid &
+                          issue_instr_hs_i &
+                          (issue_instr_i.fu == ACCEL) &
+                          ~flush_unissued_instr_i;
+
+  // Accelerator load/store pending signals
+  logic acc_no_ld_pending;
+  logic acc_no_st_pending;
+
+  // Stall issue stage in three cases:
+  always_comb begin : stall_issue
+    unique case (issue_instr_i.fu)
+      ACCEL:
+      // 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet
+      issue_stall_o = ~acc_ready;
+      LOAD:
+      // 2. We're issuing a scalar load but there is an inflight accelerator store.
+      issue_stall_o = acc_cons_en_i & ~acc_no_st_pending;
+      STORE:
+      // 3. We're issuing a scalar store but there is an inflight accelerator load or store.
+      issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending);
+      default: issue_stall_o = 1'b0;
+    endcase
+  end
+
+  /***********************
+   *  Instruction queue  *
+   ***********************/
+
+  localparam InstructionQueueDepth = 3;
+
+  fu_data_t                                        acc_data;
+  fu_data_t                                        acc_insn_queue_o;
+  logic                                            acc_insn_queue_pop;
+  logic                                            acc_insn_queue_empty;
+  logic     [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage;
+  logic                                            acc_commit;
+  logic     [                   TRANS_ID_BITS-1:0] acc_commit_trans_id;
+
+  assign acc_data = acc_valid_ex_o ? fu_data_i : '0;
+
+  fifo_v3 #(
+      .DEPTH       (InstructionQueueDepth),
+      .FALL_THROUGH(1'b1),
+      .dtype       (fu_data_t)
+  ) i_acc_insn_queue (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (flush_ex_i),
+      .testmode_i(1'b0),
+      .data_i    (fu_data_i),
+      .push_i    (acc_valid_q),
+      .full_o    (  /* Unused */),
+      .data_o    (acc_insn_queue_o),
+      .pop_i     (acc_insn_queue_pop),
+      .empty_o   (acc_insn_queue_empty),
+      .usage_o   (acc_insn_queue_usage)
+  );
+
+  // We are ready if the instruction queue is able to accept at least one more entry.
+  assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1);
+
+  /**********************************
+   *  Non-speculative instructions  *
+   **********************************/
+
+  // Keep track of the instructions that were received by the dispatcher.
+  logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q;
+  `FF(insn_pending_q, insn_pending_d, '0)
+
+  // Only non-speculative instructions can be issued to the accelerators.
+  // The following block keeps track of which transaction IDs reached the
+  // top of the scoreboard, and are therefore no longer speculative.
+  logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q;
+  `FF(insn_ready_q, insn_ready_d, '0)
+
+  always_comb begin : p_non_speculative_ff
+    // Maintain state
+    insn_pending_d = insn_pending_q;
+    insn_ready_d   = insn_ready_q;
+
+    // We received a new instruction
+    if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1;
+    // Flush all received instructions
+    if (flush_ex_i) insn_pending_d = '0;
+
+    // An accelerator instruction is no longer speculative.
+    if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin
+      insn_ready_d[acc_commit_trans_id]   = 1'b1;
+      insn_pending_d[acc_commit_trans_id] = 1'b0;
+    end
+
+    // An accelerator instruction was issued.
+    if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0;
+  end : p_non_speculative_ff
+
+  /*************************
+   *  Accelerator request  *
+   *************************/
+
+  acc_pkg::accelerator_req_t acc_req;
+  logic                      acc_req_valid;
+  logic                      acc_req_ready;
+
+  acc_pkg::accelerator_req_t acc_req_int;
+  fall_through_register #(
+      .T(acc_pkg::accelerator_req_t)
+  ) i_accelerator_req_register (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clr_i     (1'b0),
+      .testmode_i(1'b0),
+      .data_i    (acc_req),
+      .valid_i   (acc_req_valid),
+      .ready_o   (acc_req_ready),
+      .data_o    (acc_req_int),
+      .valid_o   (acc_req_o.req_valid),
+      .ready_i   (acc_resp_i.req_ready)
+  );
+
+  assign acc_req_o.insn          = acc_req_int.insn;
+  assign acc_req_o.rs1           = acc_req_int.rs1;
+  assign acc_req_o.rs2           = acc_req_int.rs2;
+  assign acc_req_o.frm           = acc_req_int.frm;
+  assign acc_req_o.trans_id      = acc_req_int.trans_id;
+  assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i;
+  assign acc_req_o.acc_cons_en   = acc_cons_en_i;
+  assign acc_req_o.inval_ready   = inval_ready_i;
+
+  always_comb begin : accelerator_req_dispatcher
+    // Do not fetch from the instruction queue
+    acc_insn_queue_pop = 1'b0;
+
+    // Default values
+    acc_req            = '0;
+    acc_req_valid      = 1'b0;
+
+    // Unpack fu_data_t into accelerator_req_t
+    if (!acc_insn_queue_empty) begin
+      acc_req = '{
+          // Instruction is forwarded from the decoder as an immediate
+          // -
+          // frm rounding information is up to date during a valid request to the accelerator
+          // The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes
+          // do not take place until the accelerator answers (Ariane commits in-order)
+          insn    :
+          acc_insn_queue_o.imm[
+          31
+          :
+          0
+          ],
+          rs1     : acc_insn_queue_o.operand_a,
+          rs2     : acc_insn_queue_o.operand_b,
+          frm     : fpnew_pkg::roundmode_e'(fcsr_frm_i),
+          trans_id: acc_insn_queue_o.trans_id,
+          default: '0
+      };
+      // Wait until the instruction is no longer speculative.
+      acc_req_valid      = insn_ready_q[acc_insn_queue_o.trans_id] ||
+                           (acc_commit && insn_pending_q[acc_commit_trans_id]);
+      acc_insn_queue_pop = acc_req_valid && acc_req_ready;
+    end
+  end
+
+  /**************************
+   *  Accelerator response  *
+   **************************/
+
+  logic acc_ld_disp;
+  logic acc_st_disp;
+
+  // Unpack the accelerator response
+  assign acc_trans_id_o       = acc_resp_i.trans_id;
+  assign acc_result_o         = acc_resp_i.result;
+  assign acc_valid_o          = acc_resp_i.resp_valid;
+  assign acc_exception_o      = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error};
+  assign acc_fflags_valid_o   = acc_resp_i.fflags_valid;
+  assign acc_fflags_o         = acc_resp_i.fflags;
+  // Always ready to receive responses
+  assign acc_req_o.resp_ready = 1'b1;
+
+  // Signal dispatched load/store to issue stage
+  assign acc_ld_disp          = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
+  assign acc_st_disp          = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);
+
+  // Cache invalidation
+  assign inval_valid_o        = acc_resp_i.inval_valid;
+  assign inval_addr_o         = acc_resp_i.inval_addr;
+
+  /**************************
+   *  Accelerator commit    *
+   **************************/
+
+  // Instruction can be issued to the (in-order) back-end if
+  // it reached the top of the scoreboard and it hasn't been
+  // issued yet
+  always_comb begin : accelerator_commit
+    acc_commit = 1'b0;
+    if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1;
+    if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL)
+      acc_commit = 1'b1;
+  end
+
+  // Dirty the V state if we are committing anything related to the vector accelerator
+  always_comb begin : dirty_v_state
+    dirty_v_state_o = 1'b0;
+    for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
+      dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL);
+    end
+  end
+
+  assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id
+                                                        : commit_instr_i[1].trans_id;
+
+  /**************************
+   *  Accelerator barriers  *
+   **************************/
+
+  // On a store barrier (i.e. any barrier that requires preceeding stores to complete
+  // before continuing execution), halt execution while there are pending stores in
+  // the accelerator pipeline.
+  logic wait_acc_store_d, wait_acc_store_q;
+  `FF(wait_acc_store_q, wait_acc_store_d, '0)
+
+  // Set on store barrier. Clear when no store is pending.
+  assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending;
+  assign ctrl_halt_o      = wait_acc_store_q;
+
+  /**************************
+   *  Load/Store tracking   *
+   **************************/
+
+  // Loads
+  logic       acc_spec_loads_overflow;
+  logic [2:0] acc_spec_loads_pending;
+  logic       acc_disp_loads_overflow;
+  logic [2:0] acc_disp_loads_pending;
+
+  assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0);
+
+  // Count speculative loads. These can still be flushed.
+  counter #(
+      .WIDTH          (3),
+      .STICKY_OVERFLOW(0)
+  ) i_acc_spec_loads (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clear_i   (flush_ex_i),
+      .en_i      ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp),
+      .load_i    (1'b0),
+      .down_i    (acc_ld_disp),
+      .d_i       ('0),
+      .q_o       (acc_spec_loads_pending),
+      .overflow_o(acc_spec_loads_overflow)
+  );
+
+  // Count dispatched loads. These cannot be flushed anymore.
+  counter #(
+      .WIDTH          (3),
+      .STICKY_OVERFLOW(0)
+  ) i_acc_disp_loads (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clear_i   (1'b0),
+      .en_i      (acc_ld_disp ^ acc_resp_i.load_complete),
+      .load_i    (1'b0),
+      .down_i    (acc_resp_i.load_complete),
+      .d_i       ('0),
+      .q_o       (acc_disp_loads_pending),
+      .overflow_o(acc_disp_loads_overflow)
+  );
+
+  acc_dispatcher_no_load_overflow :
+  assert property (
+      @(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) )
+  else $error("[acc_dispatcher] Too many pending loads.");
+
+  // Stores
+  logic       acc_spec_stores_overflow;
+  logic [2:0] acc_spec_stores_pending;
+  logic       acc_disp_stores_overflow;
+  logic [2:0] acc_disp_stores_pending;
+
+  assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0);
+
+  // Count speculative stores. These can still be flushed.
+  counter #(
+      .WIDTH          (3),
+      .STICKY_OVERFLOW(0)
+  ) i_acc_spec_stores (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clear_i   (flush_ex_i),
+      .en_i      ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp),
+      .load_i    (1'b0),
+      .down_i    (acc_st_disp),
+      .d_i       ('0),
+      .q_o       (acc_spec_stores_pending),
+      .overflow_o(acc_spec_stores_overflow)
+  );
+
+  // Count dispatched stores. These cannot be flushed anymore.
+  counter #(
+      .WIDTH          (3),
+      .STICKY_OVERFLOW(0)
+  ) i_acc_disp_stores (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clear_i   (1'b0),
+      .en_i      (acc_st_disp ^ acc_resp_i.store_complete),
+      .load_i    (1'b0),
+      .down_i    (acc_resp_i.store_complete),
+      .d_i       ('0),
+      .q_o       (acc_disp_stores_pending),
+      .overflow_o(acc_disp_stores_overflow)
+  );
+
+  acc_dispatcher_no_store_overflow :
+  assert property (
+      @(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) )
+  else $error("[acc_dispatcher] Too many pending stores.");
+
+  /**************************
+   * Tie Off Unused Signals *
+   **************************/
+
+  assign acc_stall_st_pending_o = 1'b0;
+  assign flush_pipeline_o       = 1'b0;
+  assign acc_dcache_req_ports_o = '0;
+
+endmodule : acc_dispatcher
diff --git a/test/type_param/core/acc_dispatcher_corrected.sv b/test/type_param/core/acc_dispatcher_corrected.sv
new file mode 100644
index 0000000..6f9c8fc
--- /dev/null
+++ b/test/type_param/core/acc_dispatcher_corrected.sv
@@ -0,0 +1,423 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors: Matheus Cavalcante, ETH Zurich
+//          Nils Wistoff, ETH Zurich
+// Date: 20.11.2020
+// Description: Functional unit that dispatches CVA6 instructions to accelerators.
+
+module acc_dispatcher
+  import ariane_pkg::*;
+  import riscv::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg    = config_pkg::cva6_cfg_empty,
+    parameter type                   acc_req_t  = acc_pkg::accelerator_req_t,
+    parameter type                   acc_resp_t = acc_pkg::accelerator_resp_t,
+    parameter type                   acc_cfg_t  = logic,
+    parameter acc_cfg_t              AccCfg     = '0
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    // Interface with the CSR regfile
+    input logic acc_cons_en_i,  // Accelerator memory consistent mode
+    output logic acc_fflags_valid_o,
+    output logic [4:0] acc_fflags_o,
+    // Interface with the CSRs
+    input priv_lvl_t ld_st_priv_lvl_i,
+    input logic sum_i,
+    input pmpcfg_t [15:0] pmpcfg_i,
+    input logic [15:0][PLEN-3:0] pmpaddr_i,
+    input logic [2:0] fcsr_frm_i,
+    output logic dirty_v_state_o,
+    // Interface with the issue stage
+    input scoreboard_entry_t issue_instr_i,
+    input logic issue_instr_hs_i,
+    output logic issue_stall_o,
+    input fu_data_t fu_data_i,
+    input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
+    output logic [TRANS_ID_BITS-1:0] acc_trans_id_o,
+    output xlen_t acc_result_o,
+    output logic acc_valid_o,
+    output exception_t acc_exception_o,
+    // Interface with the execute stage
+    output logic acc_valid_ex_o,  // FU executed
+    // Interface with the commit stage
+    input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
+    input logic commit_st_barrier_i,  // A store barrier was commited
+    // Interface with the load/store unit
+    output logic acc_stall_st_pending_o,
+    input logic acc_no_st_pending_i,
+    input dcache_req_i_t [2:0] dcache_req_ports_i,
+    // Interface with the controller
+    output logic ctrl_halt_o,
+    input logic flush_unissued_instr_i,
+    input logic flush_ex_i,
+    output logic flush_pipeline_o,
+    // Interface with cache subsystem
+    output dcache_req_i_t [1:0] acc_dcache_req_ports_o,
+    input dcache_req_o_t [1:0] acc_dcache_req_ports_i,
+    input logic inval_ready_i,
+    output logic inval_valid_o,
+    output logic [63:0] inval_addr_o,
+    // Accelerator interface
+    output acc_pkg::accelerator_req_t acc_req_o,
+    input acc_pkg::accelerator_resp_t acc_resp_i
+);
+
+  `include "common_cells/registers.svh"
+
+  import cf_math_pkg::idx_width;
+
+  /***********************
+   *  Common signals     *
+   ***********************/
+
+  logic acc_ready;
+  logic acc_valid_d, acc_valid_q;
+
+  /**************************
+   *  Accelerator issue     *
+   **************************/
+
+  // Issue accelerator instructions
+  `FF(acc_valid_q, acc_valid_d, '0)
+
+  assign acc_valid_ex_o = acc_valid_q;
+  assign acc_valid_d    = ~issue_instr_i.ex.valid &
+                          issue_instr_hs_i &
+                          (issue_instr_i.fu == ACCEL) &
+                          ~flush_unissued_instr_i;
+
+  // Accelerator load/store pending signals
+  logic acc_no_ld_pending;
+  logic acc_no_st_pending;
+
+  // Stall issue stage in three cases:
+  always_comb begin : stall_issue
+    unique case (issue_instr_i.fu)
+      ACCEL:
+      // 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet
+      issue_stall_o = ~acc_ready;
+      LOAD:
+      // 2. We're issuing a scalar load but there is an inflight accelerator store.
+      issue_stall_o = acc_cons_en_i & ~acc_no_st_pending;
+      STORE:
+      // 3. We're issuing a scalar store but there is an inflight accelerator load or store.
+      issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending);
+      default: issue_stall_o = 1'b0;
+    endcase
+  end
+
+  /***********************
+   *  Instruction queue  *
+   ***********************/
+
+  localparam InstructionQueueDepth = 3;
+
+  fu_data_t                                        acc_data;
+  fu_data_t                                        acc_insn_queue_o;
+  logic                                            acc_insn_queue_pop;
+  logic                                            acc_insn_queue_empty;
+  logic     [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage;
+  logic                                            acc_commit;
+  logic     [                   TRANS_ID_BITS-1:0] acc_commit_trans_id;
+
+  assign acc_data = acc_valid_ex_o ? fu_data_i : '0;
+
+  fifo_v3 #(
+      .DEPTH       (InstructionQueueDepth),
+      .FALL_THROUGH(1'b1),
+      .dtype       (fu_data_t)
+  ) i_acc_insn_queue (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (flush_ex_i),
+      .testmode_i(1'b0),
+      .data_i    (fu_data_i),
+      .push_i    (acc_valid_q),
+      .full_o    (  /* Unused */),
+      .data_o    (acc_insn_queue_o),
+      .pop_i     (acc_insn_queue_pop),
+      .empty_o   (acc_insn_queue_empty),
+      .usage_o   (acc_insn_queue_usage)
+  );
+
+  // We are ready if the instruction queue is able to accept at least one more entry.
+  assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1);
+
+  /**********************************
+   *  Non-speculative instructions  *
+   **********************************/
+
+  // Keep track of the instructions that were received by the dispatcher.
+  logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q;
+  `FF(insn_pending_q, insn_pending_d, '0)
+
+  // Only non-speculative instructions can be issued to the accelerators.
+  // The following block keeps track of which transaction IDs reached the
+  // top of the scoreboard, and are therefore no longer speculative.
+  logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q;
+  `FF(insn_ready_q, insn_ready_d, '0)
+
+  always_comb begin : p_non_speculative_ff
+    // Maintain state
+    insn_pending_d = insn_pending_q;
+    insn_ready_d   = insn_ready_q;
+
+    // We received a new instruction
+    if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1;
+    // Flush all received instructions
+    if (flush_ex_i) insn_pending_d = '0;
+
+    // An accelerator instruction is no longer speculative.
+    if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin
+      insn_ready_d[acc_commit_trans_id]   = 1'b1;
+      insn_pending_d[acc_commit_trans_id] = 1'b0;
+    end
+
+    // An accelerator instruction was issued.
+    if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0;
+  end : p_non_speculative_ff
+
+  /*************************
+   *  Accelerator request  *
+   *************************/
+
+  acc_pkg::accelerator_req_t acc_req;
+  logic                      acc_req_valid;
+  logic                      acc_req_ready;
+
+  acc_pkg::accelerator_req_t acc_req_int;
+  fall_through_register #(
+      .T(acc_pkg::accelerator_req_t)
+  ) i_accelerator_req_register (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clr_i     (1'b0),
+      .testmode_i(1'b0),
+      .data_i    (acc_req),
+      .valid_i   (acc_req_valid),
+      .ready_o   (acc_req_ready),
+      .data_o    (acc_req_int),
+      .valid_o   (acc_req_o.req_valid),
+      .ready_i   (acc_resp_i.req_ready)
+  );
+
+  assign acc_req_o.insn          = acc_req_int.insn;
+  assign acc_req_o.rs1           = acc_req_int.rs1;
+  assign acc_req_o.rs2           = acc_req_int.rs2;
+  assign acc_req_o.frm           = acc_req_int.frm;
+  assign acc_req_o.trans_id      = acc_req_int.trans_id;
+  assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i;
+  assign acc_req_o.acc_cons_en   = acc_cons_en_i;
+  assign acc_req_o.inval_ready   = inval_ready_i;
+
+  always_comb begin : accelerator_req_dispatcher
+    // Do not fetch from the instruction queue
+    acc_insn_queue_pop = 1'b0;
+
+    // Default values
+    acc_req            = '0;
+    acc_req_valid      = 1'b0;
+
+    // Unpack fu_data_t into accelerator_req_t
+    if (!acc_insn_queue_empty) begin
+      acc_req = '{
+          // Instruction is forwarded from the decoder as an immediate
+          // -
+          // frm rounding information is up to date during a valid request to the accelerator
+          // The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes
+          // do not take place until the accelerator answers (Ariane commits in-order)
+          insn    :
+          acc_insn_queue_o.imm[
+          31
+          :
+          0
+          ],
+          rs1     : acc_insn_queue_o.operand_a,
+          rs2     : acc_insn_queue_o.operand_b,
+          frm     : fpnew_pkg::roundmode_e'(fcsr_frm_i),
+          trans_id: acc_insn_queue_o.trans_id,
+          default: '0
+      };
+      // Wait until the instruction is no longer speculative.
+      acc_req_valid      = insn_ready_q[acc_insn_queue_o.trans_id] ||
+                           (acc_commit && insn_pending_q[acc_commit_trans_id]);
+      acc_insn_queue_pop = acc_req_valid && acc_req_ready;
+    end
+  end
+
+  /**************************
+   *  Accelerator response  *
+   **************************/
+
+  logic acc_ld_disp;
+  logic acc_st_disp;
+
+  // Unpack the accelerator response
+  assign acc_trans_id_o       = acc_resp_i.trans_id;
+  assign acc_result_o         = acc_resp_i.result;
+  assign acc_valid_o          = acc_resp_i.resp_valid;
+  assign acc_exception_o      = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error};
+  assign acc_fflags_valid_o   = acc_resp_i.fflags_valid;
+  assign acc_fflags_o         = acc_resp_i.fflags;
+  // Always ready to receive responses
+  assign acc_req_o.resp_ready = 1'b1;
+
+  // Signal dispatched load/store to issue stage
+  assign acc_ld_disp          = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
+  assign acc_st_disp          = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);
+
+  // Cache invalidation
+  assign inval_valid_o        = acc_resp_i.inval_valid;
+  assign inval_addr_o         = acc_resp_i.inval_addr;
+
+  /**************************
+   *  Accelerator commit    *
+   **************************/
+
+  // Instruction can be issued to the (in-order) back-end if
+  // it reached the top of the scoreboard and it hasn't been
+  // issued yet
+  always_comb begin : accelerator_commit
+    acc_commit = 1'b0;
+    if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1;
+    if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL)
+      acc_commit = 1'b1;
+  end
+
+  // Dirty the V state if we are committing anything related to the vector accelerator
+  always_comb begin : dirty_v_state
+    dirty_v_state_o = 1'b0;
+    for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
+      dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL);
+    end
+  end
+
+  assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id
+                                                        : commit_instr_i[1].trans_id;
+
+  /**************************
+   *  Accelerator barriers  *
+   **************************/
+
+  // On a store barrier (i.e. any barrier that requires preceeding stores to complete
+  // before continuing execution), halt execution while there are pending stores in
+  // the accelerator pipeline.
+  logic wait_acc_store_d, wait_acc_store_q;
+  `FF(wait_acc_store_q, wait_acc_store_d, '0)
+
+  // Set on store barrier. Clear when no store is pending.
+  assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending;
+  assign ctrl_halt_o      = wait_acc_store_q;
+
+  /**************************
+   *  Load/Store tracking   *
+   **************************/
+
+  // Loads
+  logic       acc_spec_loads_overflow;
+  logic [2:0] acc_spec_loads_pending;
+  logic       acc_disp_loads_overflow;
+  logic [2:0] acc_disp_loads_pending;
+
+  assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0);
+
+  // Count speculative loads. These can still be flushed.
+  counter #(
+      .WIDTH          (3),
+      .STICKY_OVERFLOW(0)
+  ) i_acc_spec_loads (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clear_i   (flush_ex_i),
+      .en_i      ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp),
+      .load_i    (1'b0),
+      .down_i    (acc_ld_disp),
+      .d_i       ('0),
+      .q_o       (acc_spec_loads_pending),
+      .overflow_o(acc_spec_loads_overflow)
+  );
+
+  // Count dispatched loads. These cannot be flushed anymore.
+  counter #(
+      .WIDTH          (3),
+      .STICKY_OVERFLOW(0)
+  ) i_acc_disp_loads (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clear_i   (1'b0),
+      .en_i      (acc_ld_disp ^ acc_resp_i.load_complete),
+      .load_i    (1'b0),
+      .down_i    (acc_resp_i.load_complete),
+      .d_i       ('0),
+      .q_o       (acc_disp_loads_pending),
+      .overflow_o(acc_disp_loads_overflow)
+  );
+
+  acc_dispatcher_no_load_overflow :
+  assert property (
+      @(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) )
+  else $error("[acc_dispatcher] Too many pending loads.");
+
+  // Stores
+  logic       acc_spec_stores_overflow;
+  logic [2:0] acc_spec_stores_pending;
+  logic       acc_disp_stores_overflow;
+  logic [2:0] acc_disp_stores_pending;
+
+  assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0);
+
+  // Count speculative stores. These can still be flushed.
+  counter #(
+      .WIDTH          (3),
+      .STICKY_OVERFLOW(0)
+  ) i_acc_spec_stores (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clear_i   (flush_ex_i),
+      .en_i      ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp),
+      .load_i    (1'b0),
+      .down_i    (acc_st_disp),
+      .d_i       ('0),
+      .q_o       (acc_spec_stores_pending),
+      .overflow_o(acc_spec_stores_overflow)
+  );
+
+  // Count dispatched stores. These cannot be flushed anymore.
+  counter #(
+      .WIDTH          (3),
+      .STICKY_OVERFLOW(0)
+  ) i_acc_disp_stores (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clear_i   (1'b0),
+      .en_i      (acc_st_disp ^ acc_resp_i.store_complete),
+      .load_i    (1'b0),
+      .down_i    (acc_resp_i.store_complete),
+      .d_i       ('0),
+      .q_o       (acc_disp_stores_pending),
+      .overflow_o(acc_disp_stores_overflow)
+  );
+
+  acc_dispatcher_no_store_overflow :
+  assert property (
+      @(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) )
+  else $error("[acc_dispatcher] Too many pending stores.");
+
+  /**************************
+   * Tie Off Unused Signals *
+   **************************/
+
+  assign acc_stall_st_pending_o = 1'b0;
+  assign flush_pipeline_o       = 1'b0;
+  assign acc_dcache_req_ports_o = '0;
+
+endmodule : acc_dispatcher
diff --git a/test/type_param/core/alu.sv b/test/type_param/core/alu.sv
new file mode 100644
index 0000000..a928725
--- /dev/null
+++ b/test/type_param/core/alu.sv
@@ -0,0 +1,359 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Matthias Baer <baermatt@student.ethz.ch>
+// Author: Igor Loi <igor.loi@unibo.it>
+// Author: Andreas Traber <atraber@student.ethz.ch>
+// Author: Lukas Mueller <lukasmue@student.ethz.ch>
+// Author: Florian Zaruba <zaruabf@iis.ee.ethz.ch>
+//
+// Date: 19.03.2017
+// Description: Ariane ALU based on RI5CY's ALU
+
+
+module alu
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input  logic         clk_i,            // Clock
+    input  logic         rst_ni,           // Asynchronous reset active low
+    input  fu_data_t     fu_data_i,
+    output riscv::xlen_t result_o,
+    output logic         alu_branch_res_o
+);
+
+  riscv::xlen_t                   operand_a_rev;
+  logic         [           31:0] operand_a_rev32;
+  logic         [  riscv::XLEN:0] operand_b_neg;
+  logic         [riscv::XLEN+1:0] adder_result_ext_o;
+  logic                           less;  // handles both signed and unsigned forms
+  logic         [           31:0] rolw;  // Rotate Left Word
+  logic         [           31:0] rorw;  // Rotate Right Word
+  logic [31:0] orcbw, rev8w;
+  logic [  $clog2(riscv::XLEN) : 0] cpop;  // Count Population
+  logic [$clog2(riscv::XLEN)-1 : 0] lz_tz_count;  // Count Leading Zeros
+  logic [                      4:0] lz_tz_wcount;  // Count Leading Zeros Word
+  logic lz_tz_empty, lz_tz_wempty;
+  riscv::xlen_t orcbw_result, rev8w_result;
+
+  // bit reverse operand_a for left shifts and bit counting
+  generate
+    genvar k;
+    for (k = 0; k < riscv::XLEN; k++)
+      assign operand_a_rev[k] = fu_data_i.operand_a[riscv::XLEN-1-k];
+
+    for (k = 0; k < 32; k++) assign operand_a_rev32[k] = fu_data_i.operand_a[31-k];
+  endgenerate
+
+  // ------
+  // Adder
+  // ------
+  logic adder_op_b_negate;
+  logic adder_z_flag;
+  logic [riscv::XLEN:0] adder_in_a, adder_in_b;
+  riscv::xlen_t adder_result;
+  logic [riscv::XLEN-1:0] operand_a_bitmanip, bit_indx;
+
+  always_comb begin
+    adder_op_b_negate = 1'b0;
+
+    unique case (fu_data_i.operation)
+      // ADDER OPS
+      EQ, NE, SUB, SUBW, ANDN, ORN, XNOR: adder_op_b_negate = 1'b1;
+      default: ;
+    endcase
+  end
+
+  always_comb begin
+    operand_a_bitmanip = fu_data_i.operand_a;
+
+    if (CVA6Cfg.RVB) begin
+      if (riscv::IS_XLEN64) begin
+        unique case (fu_data_i.operation)
+          SH1ADDUW:           operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1;
+          SH2ADDUW:           operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2;
+          SH3ADDUW:           operand_a_bitmanip = fu_data_i.operand_a[31:0] << 3;
+          CTZW:               operand_a_bitmanip = operand_a_rev32;
+          ADDUW, CPOPW, CLZW: operand_a_bitmanip = fu_data_i.operand_a[31:0];
+          default:            ;
+        endcase
+      end
+      unique case (fu_data_i.operation)
+        SH1ADD:  operand_a_bitmanip = fu_data_i.operand_a << 1;
+        SH2ADD:  operand_a_bitmanip = fu_data_i.operand_a << 2;
+        SH3ADD:  operand_a_bitmanip = fu_data_i.operand_a << 3;
+        CTZ:     operand_a_bitmanip = operand_a_rev;
+        default: ;
+      endcase
+    end
+  end
+
+  // prepare operand a
+  assign adder_in_a         = {operand_a_bitmanip, 1'b1};
+
+  // prepare operand b
+  assign operand_b_neg      = {fu_data_i.operand_b, 1'b0} ^ {riscv::XLEN + 1{adder_op_b_negate}};
+  assign adder_in_b         = operand_b_neg;
+
+  // actual adder
+  assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
+  assign adder_result       = adder_result_ext_o[riscv::XLEN:1];
+  assign adder_z_flag       = ~|adder_result;
+
+  // get the right branch comparison result
+  always_comb begin : branch_resolve
+    // set comparison by default
+    alu_branch_res_o = 1'b1;
+    case (fu_data_i.operation)
+      EQ:       alu_branch_res_o = adder_z_flag;
+      NE:       alu_branch_res_o = ~adder_z_flag;
+      LTS, LTU: alu_branch_res_o = less;
+      GES, GEU: alu_branch_res_o = ~less;
+      default:  alu_branch_res_o = 1'b1;
+    endcase
+  end
+
+  // ---------
+  // Shifts
+  // ---------
+
+  // TODO: this can probably optimized significantly
+  logic                         shift_left;  // should we shift left
+  logic                         shift_arithmetic;
+
+  riscv::xlen_t                 shift_amt;  // amount of shift, to the right
+  riscv::xlen_t                 shift_op_a;  // input of the shifter
+  logic         [         31:0] shift_op_a32;  // input to the 32 bit shift operation
+
+  riscv::xlen_t                 shift_result;
+  logic         [         31:0] shift_result32;
+
+  logic         [riscv::XLEN:0] shift_right_result;
+  logic         [         32:0] shift_right_result32;
+
+  riscv::xlen_t                 shift_left_result;
+  logic         [         31:0] shift_left_result32;
+
+  assign shift_amt = fu_data_i.operand_b;
+
+  assign shift_left = (fu_data_i.operation == SLL) | (fu_data_i.operation == SLLW);
+
+  assign shift_arithmetic = (fu_data_i.operation == SRA) | (fu_data_i.operation == SRAW);
+
+  // right shifts, we let the synthesizer optimize this
+  logic [riscv::XLEN:0] shift_op_a_64;
+  logic [32:0] shift_op_a_32;
+
+  // choose the bit reversed or the normal input for shift operand a
+  assign shift_op_a           = shift_left ? operand_a_rev : fu_data_i.operand_a;
+  assign shift_op_a32         = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0];
+
+  assign shift_op_a_64        = {shift_arithmetic & shift_op_a[riscv::XLEN-1], shift_op_a};
+  assign shift_op_a_32        = {shift_arithmetic & shift_op_a[31], shift_op_a32};
+
+  assign shift_right_result   = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]);
+
+  assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]);
+  // bit reverse the shift_right_result for left shifts
+  genvar j;
+  generate
+    for (j = 0; j < riscv::XLEN; j++)
+      assign shift_left_result[j] = shift_right_result[riscv::XLEN-1-j];
+
+    for (j = 0; j < 32; j++) assign shift_left_result32[j] = shift_right_result32[31-j];
+
+  endgenerate
+
+  assign shift_result   = shift_left ? shift_left_result : shift_right_result[riscv::XLEN-1:0];
+  assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0];
+
+  // ------------
+  // Comparisons
+  // ------------
+
+  always_comb begin
+    logic sgn;
+    sgn = 1'b0;
+
+    if ((fu_data_i.operation == SLTS) ||
+            (fu_data_i.operation == LTS)  ||
+            (fu_data_i.operation == GES)  ||
+            (fu_data_i.operation == MAX)  ||
+            (fu_data_i.operation == MIN))
+      sgn = 1'b1;
+
+    less = ($signed({sgn & fu_data_i.operand_a[riscv::XLEN-1], fu_data_i.operand_a}) <
+            $signed({sgn & fu_data_i.operand_b[riscv::XLEN-1], fu_data_i.operand_b}));
+  end
+
+  if (CVA6Cfg.RVB) begin : gen_bitmanip
+    // Count Population + Count population Word
+
+    popcount #(
+        .INPUT_WIDTH(riscv::XLEN)
+    ) i_cpop_count (
+        .data_i    (operand_a_bitmanip),
+        .popcount_o(cpop)
+    );
+
+    // Count Leading/Trailing Zeros
+    // 64b
+    lzc #(
+        .WIDTH(riscv::XLEN),
+        .MODE (1)
+    ) i_clz_64b (
+        .in_i(operand_a_bitmanip),
+        .cnt_o(lz_tz_count),
+        .empty_o(lz_tz_empty)
+    );
+    if (riscv::IS_XLEN64) begin
+      //32b
+      lzc #(
+          .WIDTH(32),
+          .MODE (1)
+      ) i_clz_32b (
+          .in_i(operand_a_bitmanip[31:0]),
+          .cnt_o(lz_tz_wcount),
+          .empty_o(lz_tz_wempty)
+      );
+    end
+  end
+
+  if (CVA6Cfg.RVB) begin : gen_orcbw_rev8w_results
+    assign orcbw = {
+      {8{|fu_data_i.operand_a[31:24]}},
+      {8{|fu_data_i.operand_a[23:16]}},
+      {8{|fu_data_i.operand_a[15:8]}},
+      {8{|fu_data_i.operand_a[7:0]}}
+    };
+    assign rev8w = {
+      {fu_data_i.operand_a[7:0]},
+      {fu_data_i.operand_a[15:8]},
+      {fu_data_i.operand_a[23:16]},
+      {fu_data_i.operand_a[31:24]}
+    };
+    if (riscv::IS_XLEN64) begin : gen_64b
+      assign orcbw_result = {
+        {8{|fu_data_i.operand_a[63:56]}},
+        {8{|fu_data_i.operand_a[55:48]}},
+        {8{|fu_data_i.operand_a[47:40]}},
+        {8{|fu_data_i.operand_a[39:32]}},
+        orcbw
+      };
+      assign rev8w_result = {
+        rev8w,
+        {fu_data_i.operand_a[39:32]},
+        {fu_data_i.operand_a[47:40]},
+        {fu_data_i.operand_a[55:48]},
+        {fu_data_i.operand_a[63:56]}
+      };
+    end else begin : gen_32b
+      assign orcbw_result = orcbw;
+      assign rev8w_result = rev8w;
+    end
+  end
+
+  // -----------
+  // Result MUX
+  // -----------
+  always_comb begin
+    result_o = '0;
+    if (riscv::IS_XLEN64) begin
+      unique case (fu_data_i.operation)
+        // Add word: Ignore the upper bits and sign extend to 64 bit
+        ADDW, SUBW: result_o = {{riscv::XLEN - 32{adder_result[31]}}, adder_result[31:0]};
+        SH1ADDUW, SH2ADDUW, SH3ADDUW: result_o = adder_result;
+        // Shifts 32 bit
+        SLLW, SRLW, SRAW: result_o = {{riscv::XLEN - 32{shift_result32[31]}}, shift_result32[31:0]};
+        default: ;
+      endcase
+    end
+    unique case (fu_data_i.operation)
+      // Standard Operations
+      ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[riscv::XLEN:1];
+      ORL, ORN: result_o = fu_data_i.operand_a | operand_b_neg[riscv::XLEN:1];
+      XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[riscv::XLEN:1];
+      // Adder Operations
+      ADD, SUB, ADDUW, SH1ADD, SH2ADD, SH3ADD: result_o = adder_result;
+      // Shift Operations
+      SLL, SRL, SRA: result_o = (riscv::IS_XLEN64) ? shift_result : shift_result32;
+      // Comparison Operations
+      SLTS, SLTU: result_o = {{riscv::XLEN - 1{1'b0}}, less};
+      default: ;  // default case to suppress unique warning
+    endcase
+
+    if (CVA6Cfg.RVB) begin
+      // Index for Bitwise Rotation
+      bit_indx = 1 << (fu_data_i.operand_b & (riscv::XLEN - 1));
+      // rolw, roriw, rorw
+      rolw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
+      rorw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
+      if (riscv::IS_XLEN64) begin
+        unique case (fu_data_i.operation)
+          CLZW, CTZW:
+          result_o = (lz_tz_wempty) ? 32 : {{riscv::XLEN - 5{1'b0}}, lz_tz_wcount};  // change
+          ROLW: result_o = {{riscv::XLEN - 32{rolw[31]}}, rolw};
+          RORW, RORIW: result_o = {{riscv::XLEN - 32{rorw[31]}}, rorw};
+          default: ;
+        endcase
+      end
+      unique case (fu_data_i.operation)
+        // Integer minimum/maximum
+        MAX:  result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
+        MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
+        MIN:  result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
+        MINU: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
+
+        // Single bit instructions operations
+        BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx;
+        BEXT, BEXTI: result_o = {{riscv::XLEN - 1{1'b0}}, |(fu_data_i.operand_a & bit_indx)};
+        BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx;
+        BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx;
+
+        // Count Leading/Trailing Zeros
+        CLZ, CTZ:
+        result_o = (lz_tz_empty) ? ({{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count} + 1) :
+            {{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count};
+
+        // Count population
+        CPOP, CPOPW: result_o = {{(riscv::XLEN - ($clog2(riscv::XLEN) + 1)) {1'b0}}, cpop};
+
+        // Sign and Zero Extend
+        SEXTB: result_o = {{riscv::XLEN - 8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]};
+        SEXTH: result_o = {{riscv::XLEN - 16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]};
+        ZEXTH: result_o = {{riscv::XLEN - 16{1'b0}}, fu_data_i.operand_a[15:0]};
+
+        // Bitwise Rotation
+        ROL:
+        result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[4:0])));
+
+        ROR, RORI:
+        result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[4:0])));
+
+        ORCB: result_o = orcbw_result;
+        REV8: result_o = rev8w_result;
+
+        default:
+        if (fu_data_i.operation == SLLIUW && riscv::IS_XLEN64)
+          result_o = {{riscv::XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0];  // Left Shift 32 bit unsigned
+      endcase
+    end
+    if (CVA6Cfg.ZiCondExtEn) begin
+      unique case (fu_data_i.operation)
+        CZERO_EQZ:
+        result_o = (|fu_data_i.operand_b) ? fu_data_i.operand_a : '0;  // move zero to rd if rs2 is equal to zero else rs1
+        CZERO_NEZ:
+        result_o = (|fu_data_i.operand_b) ? '0 : fu_data_i.operand_a; // move zero to rd if rs2 is nonzero else rs1
+        default: ;  // default case to suppress unique warning
+      endcase
+    end
+  end
+endmodule
diff --git a/test/type_param/core/amo_buffer.sv b/test/type_param/core/amo_buffer.sv
new file mode 100644
index 0000000..24a98dd
--- /dev/null
+++ b/test/type_param/core/amo_buffer.sv
@@ -0,0 +1,82 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 20.09.2018
+// Description: Buffers AMO requests
+// This unit buffers an atomic memory operations for the cache subsyste.
+// Furthermore it handles interfacing with the commit stage
+
+module amo_buffer #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,   // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic flush_i, // pipeline flush
+
+    input logic valid_i,  // AMO is valid
+    output logic ready_o,  // AMO unit is ready
+    input ariane_pkg::amo_t amo_op_i,  // AMO Operation
+    input  logic [riscv::PLEN-1:0]      paddr_i,            // physical address of store which needs to be placed in the queue
+    input riscv::xlen_t data_i,  // data which is placed in the queue
+    input logic [1:0] data_size_i,  // type of request we are making (e.g.: bytes to write)
+    // D$
+    output ariane_pkg::amo_req_t amo_req_o,  // request to cache subsytem
+    input ariane_pkg::amo_resp_t amo_resp_i,  // response from cache subsystem
+    // Auxiliary signals
+    input logic amo_valid_commit_i,  // We have a vaild AMO in the commit stage
+    input logic no_st_pending_i  // there is currently no store pending anymore
+);
+  logic flush_amo_buffer;
+  logic amo_valid;
+
+  typedef struct packed {
+    ariane_pkg::amo_t       op;
+    logic [riscv::PLEN-1:0] paddr;
+    riscv::xlen_t           data;
+    logic [1:0]             size;
+  } amo_op_t;
+
+  amo_op_t amo_data_in, amo_data_out;
+
+  // validate this request as soon as all stores have drained and the AMO is in the commit stage
+  assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid;
+  assign amo_req_o.amo_op = amo_data_out.op;
+  assign amo_req_o.size = amo_data_out.size;
+  assign amo_req_o.operand_a = {{64 - riscv::PLEN{1'b0}}, amo_data_out.paddr};
+  assign amo_req_o.operand_b = {{64 - riscv::XLEN{1'b0}}, amo_data_out.data};
+
+  assign amo_data_in.op = amo_op_i;
+  assign amo_data_in.data = data_i;
+  assign amo_data_in.paddr = paddr_i;
+  assign amo_data_in.size = data_size_i;
+
+  // only flush if we are currently not committing the AMO
+  // e.g.: it is not speculative anymore
+  assign flush_amo_buffer = flush_i & !amo_valid_commit_i;
+
+  fifo_v3 #(
+      .DEPTH(1),
+      .dtype(amo_op_t)
+  ) i_amo_fifo (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (flush_amo_buffer),
+      .testmode_i(1'b0),
+      .full_o    (amo_valid),
+      .empty_o   (ready_o),
+      .usage_o   (),                  // left open
+      .data_i    (amo_data_in),
+      .push_i    (valid_i),
+      .data_o    (amo_data_out),
+      .pop_i     (amo_resp_i.ack)
+  );
+
+endmodule
diff --git a/test/type_param/core/ariane_regfile_ff.sv b/test/type_param/core/ariane_regfile_ff.sv
new file mode 100644
index 0000000..ae5cbeb
--- /dev/null
+++ b/test/type_param/core/ariane_regfile_ff.sv
@@ -0,0 +1,83 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Engineer:       Francesco Conti - f.conti@unibo.it
+//
+// Additional contributions by:
+//                 Markus Wegmann - markus.wegmann@technokrat.ch
+//
+// Design Name:    RISC-V register file
+// Project Name:   zero-riscy
+// Language:       SystemVerilog
+//
+// Description:    Register file with 31 or 15x 32 bit wide registers.
+//                 Register 0 is fixed to 0. This register file is based on
+//                 flip flops.
+//
+
+module ariane_regfile #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg       = config_pkg::cva6_cfg_empty,
+    parameter int unsigned           DATA_WIDTH    = 32,
+    parameter int unsigned           NR_READ_PORTS = 2,
+    parameter bit                    ZERO_REG_ZERO = 0
+) (
+    // clock and reset
+    input  logic                                             clk_i,
+    input  logic                                             rst_ni,
+    // disable clock gates for testing
+    input  logic                                             test_en_i,
+    // read port
+    input  logic [        NR_READ_PORTS-1:0][           4:0] raddr_i,
+    output logic [        NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
+    // write port
+    input  logic [CVA6Cfg.NrCommitPorts-1:0][           4:0] waddr_i,
+    input  logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
+    input  logic [CVA6Cfg.NrCommitPorts-1:0]                 we_i
+);
+
+  localparam ADDR_WIDTH = 5;
+  localparam NUM_WORDS = 2 ** ADDR_WIDTH;
+
+  logic [            NUM_WORDS-1:0][DATA_WIDTH-1:0] mem;
+  logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec;
+
+
+  always_comb begin : we_decoder
+    for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
+      for (int unsigned i = 0; i < NUM_WORDS; i++) begin
+        if (waddr_i[j] == i) we_dec[j][i] = we_i[j];
+        else we_dec[j][i] = 1'b0;
+      end
+    end
+  end
+
+  // loop from 1 to NUM_WORDS-1 as R0 is nil
+  always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral
+    if (~rst_ni) begin
+      mem <= '{default: '0};
+    end else begin
+      for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
+        for (int unsigned i = 0; i < NUM_WORDS; i++) begin
+          if (we_dec[j][i]) begin
+            mem[i] <= wdata_i[j];
+          end
+        end
+        if (ZERO_REG_ZERO) begin
+          mem[0] <= '0;
+        end
+      end
+    end
+  end
+
+  for (genvar i = 0; i < NR_READ_PORTS; i++) begin
+    assign rdata_o[i] = mem[raddr_i[i]];
+  end
+
+endmodule
diff --git a/test/type_param/core/ariane_regfile_fpga.sv b/test/type_param/core/ariane_regfile_fpga.sv
new file mode 100644
index 0000000..22d5aaa
--- /dev/null
+++ b/test/type_param/core/ariane_regfile_fpga.sv
@@ -0,0 +1,125 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Engineer:       Francesco Conti - f.conti@unibo.it
+//
+// Additional contributions by:
+//                 Markus Wegmann - markus.wegmann@technokrat.ch
+//                 Noam Gallmann - gnoam@live.com
+//                 Felipe Lisboa Malaquias
+//                 Henry Suzukawa
+//
+//
+// Description:    This register file is optimized for implementation on
+//                 FPGAs. The register file features one distributed RAM block per implemented
+//                 sync-write port, each with a parametrized number of async-read ports.
+//                 Read-accesses are multiplexed from the relevant block depending on which block
+//                 was last written to. For that purpose an additional array of registers is
+//                 maintained keeping track of write acesses.
+//
+
+module ariane_regfile_fpga #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg       = config_pkg::cva6_cfg_empty,
+    parameter int unsigned           DATA_WIDTH    = 32,
+    parameter int unsigned           NR_READ_PORTS = 2,
+    parameter bit                    ZERO_REG_ZERO = 0
+) (
+    // clock and reset
+    input  logic                                             clk_i,
+    input  logic                                             rst_ni,
+    // disable clock gates for testing
+    input  logic                                             test_en_i,
+    // read port
+    input  logic [        NR_READ_PORTS-1:0][           4:0] raddr_i,
+    output logic [        NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
+    // write port
+    input  logic [CVA6Cfg.NrCommitPorts-1:0][           4:0] waddr_i,
+    input  logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
+    input  logic [CVA6Cfg.NrCommitPorts-1:0]                 we_i
+);
+
+  localparam ADDR_WIDTH = 5;
+  localparam NUM_WORDS = 2 ** ADDR_WIDTH;
+  localparam LOG_NR_WRITE_PORTS = CVA6Cfg.NrCommitPorts == 1 ? 1 : $clog2(CVA6Cfg.NrCommitPorts);
+
+  // Distributed RAM usually supports one write port per block - duplicate for each write port.
+  logic [            NUM_WORDS-1:0][        DATA_WIDTH-1:0] mem             [CVA6Cfg.NrCommitPorts];
+
+  logic [CVA6Cfg.NrCommitPorts-1:0][         NUM_WORDS-1:0] we_dec;
+  logic [            NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel;
+  logic [            NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q;
+
+  // write adress decoder (for block selector)
+  always_comb begin
+    for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
+      for (int unsigned i = 0; i < NUM_WORDS; i++) begin
+        if (waddr_i[j] == i) begin
+          we_dec[j][i] = we_i[j];
+        end else begin
+          we_dec[j][i] = 1'b0;
+        end
+      end
+    end
+  end
+
+  // update block selector:
+  // signal mem_block_sel records where the current valid value is stored.
+  // if multiple ports try to write to the same address simultaneously, the port with the highest
+  // index has priority.
+  always_comb begin
+    mem_block_sel = mem_block_sel_q;
+    for (int i = 0; i < NUM_WORDS; i++) begin
+      for (int j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
+        if (we_dec[j][i] == 1'b1) begin
+          mem_block_sel[i] = LOG_NR_WRITE_PORTS'(j);
+        end
+      end
+    end
+  end
+
+  // block selector flops
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      mem_block_sel_q <= '0;
+    end else begin
+      mem_block_sel_q <= mem_block_sel;
+    end
+  end
+
+  // distributed RAM blocks
+  logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts];
+  for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block
+    always_ff @(posedge clk_i) begin
+      if (we_i[j] && ~waddr_i[j] != 0) begin
+        mem[j][waddr_i[j]] <= wdata_i[j];
+      end
+    end
+    for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read
+      assign mem_read[j][k] = mem[j][raddr_i[k]];
+    end
+  end
+
+  // output MUX
+  logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr;
+  for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port
+    assign block_addr[k] = mem_block_sel_q[raddr_i[k]];
+    assign rdata_o[k] = (ZERO_REG_ZERO && raddr_i[k] == '0) ? '0 : mem_read[block_addr[k]][k];
+  end
+
+  // random initialization of the memory to suppress assert warnings on Questa.
+  initial begin
+    for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
+      for (int j = 0; j < NUM_WORDS; j++) begin
+        mem[i][j] = $random();
+      end
+    end
+  end
+
+endmodule
diff --git a/test/type_param/core/axi_shim.sv b/test/type_param/core/axi_shim.sv
new file mode 100644
index 0000000..8e1cfa8
--- /dev/null
+++ b/test/type_param/core/axi_shim.sv
@@ -0,0 +1,310 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:  axi_shim.sv
+ * Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>
+ *         Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   1.8.2018
+ *
+ * Description: Manages communication with the AXI Bus. Note that this unit does not
+ *              buffer requests and register the signals.
+ *
+ */
+
+
+module axi_shim #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned AxiNumWords = 4, // data width in dwords, this is also the maximum burst length, must be >=2
+    parameter type axi_req_t = logic,
+    parameter type axi_rsp_t = logic
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    // read channel
+    // request
+    input logic rd_req_i,
+    output logic rd_gnt_o,
+    input logic [CVA6Cfg.AxiAddrWidth-1:0] rd_addr_i,
+    input logic [$clog2(AxiNumWords)-1:0] rd_blen_i,  // axi convention: LEN-1
+    input logic [2:0] rd_size_i,
+    input  logic [CVA6Cfg.AxiIdWidth-1:0]   rd_id_i,   // use same ID for reads, or make sure you only have one outstanding read tx
+    input logic rd_lock_i,
+    // read response (we have to unconditionally sink the response)
+    input logic rd_rdy_i,
+    output logic rd_last_o,
+    output logic rd_valid_o,
+    output logic [CVA6Cfg.AxiDataWidth-1:0] rd_data_o,
+    output logic [CVA6Cfg.AxiUserWidth-1:0] rd_user_o,
+    output logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_o,
+    output logic rd_exokay_o,  // indicates whether exclusive tx succeeded
+    // write channel
+    input logic wr_req_i,
+    output logic wr_gnt_o,
+    input logic [CVA6Cfg.AxiAddrWidth-1:0] wr_addr_i,
+    input logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] wr_data_i,
+    input logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] wr_user_i,
+    input logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] wr_be_i,
+    input logic [$clog2(AxiNumWords)-1:0] wr_blen_i,  // axi convention: LEN-1
+    input logic [2:0] wr_size_i,
+    input logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_i,
+    input logic wr_lock_i,
+    input logic [5:0] wr_atop_i,
+    // write response
+    input logic wr_rdy_i,
+    output logic wr_valid_o,
+    output logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_o,
+    output logic wr_exokay_o,  // indicates whether exclusive tx succeeded
+    // AXI port
+    output axi_req_t axi_req_o,
+    input axi_rsp_t axi_resp_i
+);
+  localparam AddrIndex = ($clog2(AxiNumWords) > 0) ? $clog2(AxiNumWords) : 1;
+
+  ///////////////////////////////////////////////////////
+  // write channel
+  ///////////////////////////////////////////////////////
+
+  enum logic [3:0] {
+    IDLE,
+    WAIT_AW_READY,
+    WAIT_LAST_W_READY,
+    WAIT_LAST_W_READY_AW_READY,
+    WAIT_AW_READY_BURST
+  }
+      wr_state_q, wr_state_d;
+
+  // AXI tx counter
+  logic [AddrIndex-1:0] wr_cnt_d, wr_cnt_q;
+  logic wr_single_req, wr_cnt_done, wr_cnt_clr, wr_cnt_en;
+
+  assign wr_single_req = (wr_blen_i == 0);
+
+  // address
+  assign axi_req_o.aw.burst = axi_pkg::BURST_INCR;  // Use BURST_INCR for AXI regular transaction
+  assign axi_req_o.aw.addr = wr_addr_i[CVA6Cfg.AxiAddrWidth-1:0];
+  assign axi_req_o.aw.size = wr_size_i;
+  assign axi_req_o.aw.len = wr_blen_i;
+  assign axi_req_o.aw.id = wr_id_i;
+  assign axi_req_o.aw.prot = 3'b0;
+  assign axi_req_o.aw.region = 4'b0;
+  assign axi_req_o.aw.lock = wr_lock_i;
+  assign axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE;
+  assign axi_req_o.aw.qos = 4'b0;
+  assign axi_req_o.aw.atop = wr_atop_i;
+  assign axi_req_o.aw.user = '0;
+
+  // data
+  assign axi_req_o.w.data = wr_data_i[wr_cnt_q];
+  assign axi_req_o.w.user = wr_user_i[wr_cnt_q];
+  assign axi_req_o.w.strb = wr_be_i[wr_cnt_q];
+  assign axi_req_o.w.last = wr_cnt_done;
+
+  // write response
+  assign wr_exokay_o = (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY);
+  assign axi_req_o.b_ready = wr_rdy_i;
+  assign wr_valid_o = axi_resp_i.b_valid;
+  assign wr_id_o = axi_resp_i.b.id;
+
+  // tx counter
+  assign wr_cnt_done = (wr_cnt_q == wr_blen_i);
+  assign wr_cnt_d            = (wr_cnt_clr) ? '0 : (wr_cnt_en && CVA6Cfg.AxiBurstWriteEn) ? wr_cnt_q + 1 : wr_cnt_q;
+
+  always_comb begin : p_axi_write_fsm
+    // default
+    wr_state_d         = wr_state_q;
+
+    axi_req_o.aw_valid = 1'b0;
+    axi_req_o.w_valid  = 1'b0;
+    wr_gnt_o           = 1'b0;
+
+    wr_cnt_en          = 1'b0;
+    wr_cnt_clr         = 1'b0;
+
+    case (wr_state_q)
+      ///////////////////////////////////
+      IDLE: begin
+        // we have an incoming request
+        if (wr_req_i) begin
+          // is this a read or write?
+          axi_req_o.aw_valid = 1'b1;
+          axi_req_o.w_valid  = 1'b1;
+
+          if (CVA6Cfg.AxiBurstWriteEn && !wr_single_req) begin
+            wr_cnt_en = axi_resp_i.w_ready;
+
+            case ({
+              axi_resp_i.aw_ready, axi_resp_i.w_ready
+            })
+              2'b11:   wr_state_d = WAIT_LAST_W_READY;
+              2'b01:   wr_state_d = WAIT_LAST_W_READY_AW_READY;
+              2'b10:   wr_state_d = WAIT_LAST_W_READY;
+              default: ;
+            endcase
+          end else if (wr_single_req) begin  // its a single write
+            wr_cnt_clr = 1'b1;
+            // single req can be granted here
+            wr_gnt_o   = axi_resp_i.aw_ready & axi_resp_i.w_ready;
+            case ({
+              axi_resp_i.aw_ready, axi_resp_i.w_ready
+            })
+              2'b01:   wr_state_d = WAIT_AW_READY;
+              2'b10:   wr_state_d = WAIT_LAST_W_READY;
+              default: wr_state_d = IDLE;
+            endcase
+            // its a request for the whole cache line
+          end
+        end
+      end
+      ///////////////////////////////////
+      // ~> from single write
+      WAIT_AW_READY: begin
+        axi_req_o.aw_valid = 1'b1;
+
+        if (axi_resp_i.aw_ready) begin
+          wr_state_d = IDLE;
+          wr_gnt_o   = 1'b1;
+        end
+      end
+      ///////////////////////////////////
+      // ~> from write, there is an outstanding write
+      WAIT_LAST_W_READY: begin
+        axi_req_o.w_valid = 1'b1;
+
+        if (CVA6Cfg.AxiBurstWriteEn && axi_resp_i.w_ready && !wr_cnt_done) begin
+          wr_cnt_en = 1'b1;
+        end else if (wr_cnt_done) begin  // this is the last write
+          if (axi_resp_i.w_ready) begin
+            wr_state_d = IDLE;
+            wr_cnt_clr = 1'b1;
+            wr_gnt_o   = 1'b1;
+          end
+        end
+      end
+      ///////////////////////////////////
+      default: begin
+        ///////////////////////////////////
+        // ~> we need to wait for an aw_ready and there is at least one outstanding write
+        if (CVA6Cfg.AxiBurstWriteEn) begin
+          if (wr_state_q == WAIT_LAST_W_READY_AW_READY) begin
+            axi_req_o.w_valid  = 1'b1;
+            axi_req_o.aw_valid = 1'b1;
+            // we got an aw_ready
+            case ({
+              axi_resp_i.aw_ready, axi_resp_i.w_ready
+            })
+              // we got an aw ready
+              2'b01: begin
+                // are there any outstanding transactions?
+                if (wr_cnt_done) begin
+                  wr_state_d = WAIT_AW_READY_BURST;
+                  wr_cnt_clr = 1'b1;
+                end else begin
+                  // yes, so reduce the count and stay here
+                  wr_cnt_en = 1'b1;
+                end
+              end
+              2'b10:   wr_state_d = WAIT_LAST_W_READY;
+              2'b11: begin
+                // we are finished
+                if (wr_cnt_done) begin
+                  wr_state_d = IDLE;
+                  wr_gnt_o   = 1'b1;
+                  wr_cnt_clr = 1'b1;
+                  // there are outstanding transactions
+                end else begin
+                  wr_state_d = WAIT_LAST_W_READY;
+                  wr_cnt_en  = 1'b1;
+                end
+              end
+              default: ;
+            endcase
+          end  ///////////////////////////////////
+               // ~> all data has already been sent, we are only waiting for the aw_ready
+          else if (wr_state_q == WAIT_AW_READY_BURST) begin
+            axi_req_o.aw_valid = 1'b1;
+
+            if (axi_resp_i.aw_ready) begin
+              wr_state_d = IDLE;
+              wr_gnt_o   = 1'b1;
+            end
+          end
+        end else begin
+          wr_state_d = IDLE;
+        end
+      end
+    endcase
+  end
+
+
+  ///////////////////////////////////////////////////////
+  // read channel
+  ///////////////////////////////////////////////////////
+
+  // address
+  // in case of a wrapping transfer we can simply begin at the address, if we want to request a cache-line
+  // with an incremental transfer we need to output the corresponding base address of the cache line
+  assign axi_req_o.ar.burst  = axi_pkg::BURST_INCR;  // Use BURST_INCR for AXI regular transaction
+  assign axi_req_o.ar.addr   = rd_addr_i[CVA6Cfg.AxiAddrWidth-1:0];
+  assign axi_req_o.ar.size   = rd_size_i;
+  assign axi_req_o.ar.len    = rd_blen_i;
+  assign axi_req_o.ar.id     = rd_id_i;
+  assign axi_req_o.ar.prot   = 3'b0;
+  assign axi_req_o.ar.region = 4'b0;
+  assign axi_req_o.ar.lock   = rd_lock_i;
+  assign axi_req_o.ar.cache  = axi_pkg::CACHE_MODIFIABLE;
+  assign axi_req_o.ar.qos    = 4'b0;
+  assign axi_req_o.ar.user   = '0;
+
+  // make the read request
+  assign axi_req_o.ar_valid  = rd_req_i;
+  assign rd_gnt_o            = rd_req_i & axi_resp_i.ar_ready;
+
+  // return path
+  assign axi_req_o.r_ready   = rd_rdy_i;
+  assign rd_data_o           = axi_resp_i.r.data;
+  if (ariane_pkg::AXI_USER_EN) begin
+    assign rd_user_o = axi_resp_i.r.user;
+  end else begin
+    assign rd_user_o = '0;
+  end
+  assign rd_last_o   = axi_resp_i.r.last;
+  assign rd_valid_o  = axi_resp_i.r_valid;
+  assign rd_id_o     = axi_resp_i.r.id;
+  assign rd_exokay_o = (axi_resp_i.r.resp == axi_pkg::RESP_EXOKAY);
+
+
+  // ----------------
+  // Registers
+  // ----------------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      // start in flushing state and initialize the memory
+      wr_state_q <= IDLE;
+      wr_cnt_q   <= '0;
+    end else begin
+      wr_state_q <= wr_state_d;
+      wr_cnt_q   <= wr_cnt_d;
+    end
+  end
+
+  // ----------------
+  // Assertions
+  // ----------------
+
+  //pragma translate_off
+  initial begin
+    assert (AxiNumWords >= 1)
+    else $fatal(1, "[axi adapter] AxiNumWords must be >= 1");
+    assert (CVA6Cfg.AxiIdWidth >= 2)
+    else $fatal(1, "[axi adapter] AXI id width must be at least 2 bit wide");
+  end
+  //pragma translate_on
+
+endmodule  // axi_adapter2
diff --git a/test/type_param/core/branch_unit.sv b/test/type_param/core/branch_unit.sv
new file mode 100644
index 0000000..dfcb1c7
--- /dev/null
+++ b/test/type_param/core/branch_unit.sv
@@ -0,0 +1,106 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 09.05.2017
+// Description: Branch target calculation and comparison
+
+module branch_unit #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    input logic debug_mode_i,
+    input ariane_pkg::fu_data_t fu_data_i,
+    input logic [riscv::VLEN-1:0] pc_i,  // PC of instruction
+    input logic is_compressed_instr_i,
+    input  logic                      fu_valid_i,             // any functional unit is valid, check that there is no accidental mis-predict
+    input logic branch_valid_i,
+    input logic branch_comp_res_i,  // branch comparison result from ALU
+    output logic [riscv::VLEN-1:0] branch_result_o,
+
+    input ariane_pkg::branchpredict_sbe_t branch_predict_i,  // this is the address we predicted
+    output ariane_pkg::bp_resolve_t               resolved_branch_o,      // this is the actual address we are targeting
+    output logic resolve_branch_o,  // to ID to clear that we resolved the branch and we can
+                                    // accept new entries to the scoreboard
+    output ariane_pkg::exception_t branch_exception_o  // branch exception out
+);
+  logic [riscv::VLEN-1:0] target_address;
+  logic [riscv::VLEN-1:0] next_pc;
+
+  // here we handle the various possibilities of mis-predicts
+  always_comb begin : mispredict_handler
+    // set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC
+    automatic logic [riscv::VLEN-1:0] jump_base;
+    // TODO(zarubaf): The ALU can be used to calculate the branch target
+    jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[riscv::VLEN-1:0] : pc_i;
+
+    target_address = {riscv::VLEN{1'b0}};
+    resolve_branch_o = 1'b0;
+    resolved_branch_o.target_address = {riscv::VLEN{1'b0}};
+    resolved_branch_o.is_taken = 1'b0;
+    resolved_branch_o.valid = branch_valid_i;
+    resolved_branch_o.is_mispredict = 1'b0;
+    resolved_branch_o.cf_type = branch_predict_i.cf;
+    // calculate next PC, depending on whether the instruction is compressed or not this may be different
+    // TODO(zarubaf): We already calculate this a couple of times, maybe re-use?
+    next_pc                          = pc_i + ((is_compressed_instr_i) ? {{riscv::VLEN-2{1'b0}}, 2'h2} : {{riscv::VLEN-3{1'b0}}, 3'h4});
+    // calculate target address simple 64 bit addition
+    target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[riscv::VLEN-1:0]));
+    // on a JALR we are supposed to reset the LSB to 0 (according to the specification)
+    if (fu_data_i.operation == ariane_pkg::JALR) target_address[0] = 1'b0;
+    // we need to put the branch target address into rd, this is the result of this unit
+    branch_result_o = next_pc;
+    resolved_branch_o.pc = pc_i;
+    // There are only two sources of mispredicts:
+    // 1. Branches
+    // 2. Jumps to register addresses
+    if (branch_valid_i) begin
+      // write target address which goes to PC Gen
+      resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
+      resolved_branch_o.is_taken = branch_comp_res_i;
+      // check the outcome of the branch speculation
+      if (ariane_pkg::op_is_branch(fu_data_i.operation)) begin
+        // Set the `cf_type` of the output as `branch`, this will update the BHT.
+        resolved_branch_o.cf_type = ariane_pkg::Branch;
+        // If the ALU comparison does not agree with the BHT prediction set the resolution as mispredicted.
+        resolved_branch_o.is_mispredict  = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch);
+      end
+      if (fu_data_i.operation == ariane_pkg::JALR
+          // check if the address of the jump register is correct and that we actually predicted
+          && (branch_predict_i.cf == ariane_pkg::NoCF || target_address != branch_predict_i.predict_address)) begin
+        resolved_branch_o.is_mispredict = 1'b1;
+        // update BTB only if this wasn't a return
+        if (branch_predict_i.cf != ariane_pkg::Return)
+          resolved_branch_o.cf_type = ariane_pkg::JumpR;
+      end
+      // to resolve the branch in ID
+      resolve_branch_o = 1'b1;
+    end
+  end
+  // use ALU exception signal for storing instruction fetch exceptions if
+  // the target address is not aligned to a 2 byte boundary
+  //
+  logic jump_taken;
+  always_comb begin : exception_handling
+
+    // Do a jump if it is either unconditional jump (JAL | JALR) or `taken` conditional jump
+    jump_taken = !(ariane_pkg::op_is_branch(fu_data_i.operation)) ||
+        ((ariane_pkg::op_is_branch(fu_data_i.operation)) && branch_comp_res_i);
+    branch_exception_o.cause = riscv::INSTR_ADDR_MISALIGNED;
+    branch_exception_o.valid = 1'b0;
+    branch_exception_o.tval = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
+    // Only throw instruction address misaligned exception if this is indeed a `taken` conditional branch or
+    // an unconditional jump
+    if (branch_valid_i && (target_address[0] || (!CVA6Cfg.RVC && target_address[1])) && jump_taken) begin
+      branch_exception_o.valid = 1'b1;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/cache_subsystem/axi_adapter.sv b/test/type_param/core/cache_subsystem/axi_adapter.sv
new file mode 100644
index 0000000..0b8f9eb
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/axi_adapter.sv
@@ -0,0 +1,520 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:  axi_adapter.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   1.8.2018
+ *
+ * Description: Manages communication with the AXI Bus
+ */
+//import std_cache_pkg::*;
+
+module axi_adapter #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned DATA_WIDTH = 256,
+    parameter logic        CRITICAL_WORD_FIRST   = 0, // the AXI subsystem needs to support wrapping reads for this feature
+    parameter int unsigned CACHELINE_BYTE_OFFSET = 8,
+    parameter type axi_req_t = logic,
+    parameter type axi_rsp_t = logic
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni, // Asynchronous reset active low
+
+    input logic req_i,
+    input ariane_pkg::ad_req_t type_i,
+    input ariane_pkg::amo_t amo_i,
+    output logic gnt_o,
+    input logic [riscv::XLEN-1:0] addr_i,
+    input logic we_i,
+    input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] wdata_i,
+    input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] be_i,
+    input logic [1:0] size_i,
+    input logic [CVA6Cfg.AxiIdWidth-1:0] id_i,
+    // read port
+    output logic valid_o,
+    output logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] rdata_o,
+    output logic [CVA6Cfg.AxiIdWidth-1:0] id_o,
+    // critical word - read port
+    output logic [CVA6Cfg.AxiDataWidth-1:0] critical_word_o,
+    output logic critical_word_valid_o,
+    // AXI port
+    output axi_req_t axi_req_o,
+    input axi_rsp_t axi_resp_i
+);
+  localparam BURST_SIZE = (DATA_WIDTH / CVA6Cfg.AxiDataWidth) - 1;
+  localparam ADDR_INDEX = ($clog2(
+      DATA_WIDTH / CVA6Cfg.AxiDataWidth
+  ) > 0) ? $clog2(
+      DATA_WIDTH / CVA6Cfg.AxiDataWidth
+  ) : 1;
+  localparam MAX_OUTSTANDING_AW = CVA6Cfg.MaxOutstandingStores;
+  localparam MAX_OUTSTANDING_AW_CNT_WIDTH = $clog2(
+      MAX_OUTSTANDING_AW + 1
+  ) > 0 ? $clog2(
+      MAX_OUTSTANDING_AW + 1
+  ) : 1;
+
+  typedef logic [MAX_OUTSTANDING_AW_CNT_WIDTH-1:0] outstanding_aw_cnt_t;
+
+  enum logic [3:0] {
+    IDLE,
+    WAIT_B_VALID,
+    WAIT_AW_READY,
+    WAIT_LAST_W_READY,
+    WAIT_LAST_W_READY_AW_READY,
+    WAIT_AW_READY_BURST,
+    WAIT_R_VALID,
+    WAIT_R_VALID_MULTIPLE,
+    COMPLETE_READ,
+    WAIT_AMO_R_VALID
+  }
+      state_q, state_d;
+
+  // counter for AXI transfers
+  logic [ADDR_INDEX-1:0] cnt_d, cnt_q;
+  logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0]
+      cache_line_d, cache_line_q;
+  // save the address for a read, as we allow for non-cacheline aligned accesses
+  logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0] addr_offset_d, addr_offset_q;
+  logic [CVA6Cfg.AxiIdWidth-1:0] id_d, id_q;
+  logic [ADDR_INDEX-1:0] index;
+  // save the atomic operation and size
+  ariane_pkg::amo_t amo_d, amo_q;
+  logic [1:0] size_d, size_q;
+  // outstanding write transactions counter
+  outstanding_aw_cnt_t outstanding_aw_cnt_q, outstanding_aw_cnt_d;
+  logic any_outstanding_aw;
+
+  assign any_outstanding_aw = outstanding_aw_cnt_q != '0;
+
+  always_comb begin : axi_fsm
+    // Default assignments
+    axi_req_o.aw_valid  = 1'b0;
+    // Cast to AXI address width
+    axi_req_o.aw.addr   = addr_i;
+    axi_req_o.aw.prot   = 3'b0;
+    axi_req_o.aw.region = 4'b0;
+    axi_req_o.aw.len    = 8'b0;
+    axi_req_o.aw.size   = {1'b0, size_i};  // 1, 2, 4 or 8 bytes
+    axi_req_o.aw.burst  = axi_pkg::BURST_INCR;  // Use BURST_INCR for AXI regular transaction
+    axi_req_o.aw.lock   = 1'b0;
+    axi_req_o.aw.cache  = axi_pkg::CACHE_MODIFIABLE;
+    axi_req_o.aw.qos    = 4'b0;
+    axi_req_o.aw.id     = id_i;
+    axi_req_o.aw.atop   = atop_from_amo(amo_i);
+    axi_req_o.aw.user   = '0;
+
+    axi_req_o.ar_valid  = 1'b0;
+    // Cast to AXI address width
+    axi_req_o.ar.addr   = addr_i;
+    // in case of a single request or wrapping transfer we can simply begin at the address, if we want to request a cache-line
+    // with an incremental transfer we need to output the corresponding base address of the cache line
+    if (!CRITICAL_WORD_FIRST && type_i != ariane_pkg::SINGLE_REQ) begin
+      axi_req_o.ar.addr[CACHELINE_BYTE_OFFSET-1:0] = '0;
+    end
+    axi_req_o.ar.prot = 3'b0;
+    axi_req_o.ar.region = 4'b0;
+    axi_req_o.ar.len = 8'b0;
+    axi_req_o.ar.size = {1'b0, size_i};  // 1, 2, 4 or 8 bytes
+    axi_req_o.ar.burst  = (CRITICAL_WORD_FIRST ? axi_pkg::BURST_WRAP : axi_pkg::BURST_INCR); // wrapping transfer in case of a critical word first strategy
+    axi_req_o.ar.lock = 1'b0;
+    axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE;
+    axi_req_o.ar.qos = 4'b0;
+    axi_req_o.ar.id = id_i;
+    axi_req_o.ar.user = '0;
+
+    axi_req_o.w_valid = 1'b0;
+    axi_req_o.w.data = wdata_i[0];
+    axi_req_o.w.strb = be_i[0];
+    axi_req_o.w.last = 1'b0;
+    axi_req_o.w.user = '0;
+
+    axi_req_o.b_ready = 1'b0;
+    axi_req_o.r_ready = 1'b0;
+
+    gnt_o = 1'b0;
+    valid_o = 1'b0;
+    id_o = axi_resp_i.r.id;
+
+    critical_word_o = axi_resp_i.r.data;
+    critical_word_valid_o = 1'b0;
+    rdata_o = cache_line_q;
+
+    state_d = state_q;
+    cnt_d = cnt_q;
+    cache_line_d = cache_line_q;
+    addr_offset_d = addr_offset_q;
+    id_d = id_q;
+    amo_d = amo_q;
+    size_d = size_q;
+    index = '0;
+
+    outstanding_aw_cnt_d = outstanding_aw_cnt_q;
+
+    case (state_q)
+
+      IDLE: begin
+        cnt_d = '0;
+        // we have an incoming request
+        if (req_i) begin
+          // is this a read or write?
+          // write
+          if (we_i) begin
+            // multiple outstanding write transactions are only
+            // allowed if they are guaranteed not to be reordered
+            // i.e. same ID
+            if (!any_outstanding_aw || ((id_i == id_q) && (amo_i == ariane_pkg::AMO_NONE))) begin
+              // the data is valid
+              axi_req_o.aw_valid = 1'b1;
+              axi_req_o.w_valid  = 1'b1;
+              // store-conditional requires exclusive access
+              axi_req_o.aw.lock  = amo_i == ariane_pkg::AMO_SC;
+              // its a single write
+              if (type_i == ariane_pkg::SINGLE_REQ) begin
+                // only a single write so the data is already the last one
+                axi_req_o.w.last = 1'b1;
+                // single req can be granted here
+                gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
+                case ({
+                  axi_resp_i.aw_ready, axi_resp_i.w_ready
+                })
+                  2'b11:   state_d = WAIT_B_VALID;
+                  2'b01:   state_d = WAIT_AW_READY;
+                  2'b10:   state_d = WAIT_LAST_W_READY;
+                  default: state_d = IDLE;
+                endcase
+
+                if (axi_resp_i.aw_ready) begin
+                  id_d   = id_i;
+                  amo_d  = amo_i;
+                  size_d = size_i;
+                end
+
+                // its a request for the whole cache line
+              end else begin
+                // bursts of AMOs unsupported
+                assert (amo_i == ariane_pkg::AMO_NONE)
+                else $fatal("Bursts of atomic operations are not supported");
+
+                axi_req_o.aw.len = BURST_SIZE[7:0];  // number of bursts to do
+                axi_req_o.w.data = wdata_i[0];
+                axi_req_o.w.strb = be_i[0];
+
+                if (axi_resp_i.w_ready) cnt_d = BURST_SIZE[ADDR_INDEX-1:0] - 1;
+                else cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
+
+                case ({
+                  axi_resp_i.aw_ready, axi_resp_i.w_ready
+                })
+                  2'b11:   state_d = WAIT_LAST_W_READY;
+                  2'b01:   state_d = WAIT_LAST_W_READY_AW_READY;
+                  2'b10:   state_d = WAIT_LAST_W_READY;
+                  default: ;
+                endcase
+              end
+            end
+            // read
+          end else begin
+            // only multiple outstanding write transactions are allowed
+            if (!any_outstanding_aw) begin
+
+              axi_req_o.ar_valid = 1'b1;
+              // load-reserved requires exclusive access
+              axi_req_o.ar.lock = amo_i == ariane_pkg::AMO_LR;
+
+              gnt_o = axi_resp_i.ar_ready;
+              if (type_i != ariane_pkg::SINGLE_REQ) begin
+                assert (amo_i == ariane_pkg::AMO_NONE)
+                else $fatal("Bursts of atomic operations are not supported");
+
+                axi_req_o.ar.len = BURST_SIZE[7:0];
+                cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
+              end
+
+              if (axi_resp_i.ar_ready) begin
+                state_d = (type_i == ariane_pkg::SINGLE_REQ) ? WAIT_R_VALID : WAIT_R_VALID_MULTIPLE;
+                addr_offset_d = addr_i[ADDR_INDEX-1+3:3];
+              end
+            end
+          end
+        end
+      end
+
+      // ~> from single write
+      WAIT_AW_READY: begin
+        axi_req_o.aw_valid = 1'b1;
+
+        if (axi_resp_i.aw_ready) begin
+          gnt_o   = 1'b1;
+          state_d = WAIT_B_VALID;
+          id_d    = id_i;
+          amo_d   = amo_i;
+          size_d  = size_i;
+        end
+      end
+
+      // ~> we need to wait for an aw_ready and there is at least one outstanding write
+      WAIT_LAST_W_READY_AW_READY: begin
+        axi_req_o.w_valid = 1'b1;
+        axi_req_o.w.last  = (cnt_q == '0);
+        if (type_i == ariane_pkg::SINGLE_REQ) begin
+          axi_req_o.w.data = wdata_i[0];
+          axi_req_o.w.strb = be_i[0];
+        end else begin
+          axi_req_o.w.data = wdata_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
+          axi_req_o.w.strb = be_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
+        end
+        axi_req_o.aw_valid = 1'b1;
+        // we are here because we want to write a cache line
+        axi_req_o.aw.len   = BURST_SIZE[7:0];
+        // we got an aw_ready
+        case ({
+          axi_resp_i.aw_ready, axi_resp_i.w_ready
+        })
+          // we got an aw ready
+          2'b01: begin
+            // are there any outstanding transactions?
+            if (cnt_q == 0) state_d = WAIT_AW_READY_BURST;
+            else  // yes, so reduce the count and stay here
+              cnt_d = cnt_q - 1;
+          end
+          2'b10:   state_d = WAIT_LAST_W_READY;
+          2'b11: begin
+            // we are finished
+            if (cnt_q == 0) begin
+              state_d = WAIT_B_VALID;
+              gnt_o   = 1'b1;
+              // there are outstanding transactions
+            end else begin
+              state_d = WAIT_LAST_W_READY;
+              cnt_d   = cnt_q - 1;
+            end
+          end
+          default: ;
+        endcase
+
+      end
+
+      // ~> all data has already been sent, we are only waiting for the aw_ready
+      WAIT_AW_READY_BURST: begin
+        axi_req_o.aw_valid = 1'b1;
+        axi_req_o.aw.len   = BURST_SIZE[7:0];
+
+        if (axi_resp_i.aw_ready) begin
+          state_d = WAIT_B_VALID;
+          gnt_o   = 1'b1;
+        end
+      end
+
+      // ~> from write, there is an outstanding write
+      WAIT_LAST_W_READY: begin
+        axi_req_o.w_valid = 1'b1;
+
+        if (type_i != ariane_pkg::SINGLE_REQ) begin
+          axi_req_o.w.data = wdata_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
+          axi_req_o.w.strb = be_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
+        end
+
+        // this is the last write
+        if (cnt_q == '0) begin
+          axi_req_o.w.last = 1'b1;
+          if (axi_resp_i.w_ready) begin
+            state_d = WAIT_B_VALID;
+            gnt_o   = 1'b1;
+          end
+        end else if (axi_resp_i.w_ready) begin
+          cnt_d = cnt_q - 1;
+        end
+      end
+
+      // ~> finish write transaction
+      WAIT_B_VALID: begin
+        id_o = axi_resp_i.b.id;
+
+        // Write is valid
+        if (axi_resp_i.b_valid && !any_outstanding_aw) begin
+          axi_req_o.b_ready = 1'b1;
+
+          // some atomics must wait for read data
+          // we only accept it after accepting bvalid
+          if (amo_returns_data(amo_q)) begin
+            if (axi_resp_i.r_valid) begin
+              // return read data if valid
+              valid_o           = 1'b1;
+              axi_req_o.r_ready = 1'b1;
+              state_d           = IDLE;
+              rdata_o           = axi_resp_i.r.data;
+            end else begin
+              // wait otherwise
+              state_d = WAIT_AMO_R_VALID;
+            end
+          end else begin
+            valid_o = 1'b1;
+            state_d = IDLE;
+
+            // store-conditional response
+            if (amo_q == ariane_pkg::AMO_SC) begin
+              if (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY) begin
+                // success -> return 0
+                rdata_o = 'b0;
+              end else begin
+                // failure -> when request is 64-bit, return 1;
+                // when request is 32-bit place a 1 in both upper
+                // and lower half words. The right word will be
+                // realigned/masked externally
+                rdata_o = size_q == 2'b10 ? (1'b1 << 32) | 64'b1 : 64'b1;
+              end
+            end
+          end
+          // if the request was not an atomic we can possibly issue
+          // other requests while waiting for the response
+        end else begin
+          if ((amo_q == ariane_pkg::AMO_NONE) && (outstanding_aw_cnt_q != MAX_OUTSTANDING_AW)) begin
+            state_d = IDLE;
+            outstanding_aw_cnt_d = outstanding_aw_cnt_q + 1;
+          end
+        end
+      end
+
+      // ~> some atomics wait for read data
+      WAIT_AMO_R_VALID: begin
+        // acknowledge data and terminate atomic
+        if (axi_resp_i.r_valid) begin
+          axi_req_o.r_ready = 1'b1;
+          state_d           = IDLE;
+          valid_o           = 1'b1;
+          rdata_o           = axi_resp_i.r.data;
+        end
+      end
+
+      // ~> cacheline read, single read
+      WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin
+        if (CRITICAL_WORD_FIRST) index = addr_offset_q + (BURST_SIZE[ADDR_INDEX-1:0] - cnt_q);
+        else index = BURST_SIZE[ADDR_INDEX-1:0] - cnt_q;
+
+        // reads are always wrapping here
+        axi_req_o.r_ready = 1'b1;
+        // this is the first read a.k.a the critical word
+        if (axi_resp_i.r_valid) begin
+          if (CRITICAL_WORD_FIRST) begin
+            // this is the first word of a cacheline read, e.g.: the word which was causing the miss
+            if (state_q == WAIT_R_VALID_MULTIPLE && cnt_q == BURST_SIZE) begin
+              critical_word_valid_o = 1'b1;
+              critical_word_o       = axi_resp_i.r.data;
+            end
+          end else begin
+            // check if the address offset matches - then we are getting the critical word
+            if (index == addr_offset_q) begin
+              critical_word_valid_o = 1'b1;
+              critical_word_o       = axi_resp_i.r.data;
+            end
+          end
+
+          // this is the last read
+          if (axi_resp_i.r.last) begin
+            id_d    = axi_resp_i.r.id;
+            state_d = COMPLETE_READ;
+          end
+
+          // save the word
+          if (state_q == WAIT_R_VALID_MULTIPLE) begin
+            cache_line_d[index] = axi_resp_i.r.data;
+
+          end else cache_line_d[0] = axi_resp_i.r.data;
+
+          // Decrease the counter
+          cnt_d = cnt_q - 1;
+        end
+      end
+      // ~> read is complete
+      COMPLETE_READ: begin
+        valid_o = 1'b1;
+        state_d = IDLE;
+        id_o    = id_q;
+      end
+
+      default: state_d = IDLE;
+    endcase
+
+    // This process handles B responses when accepting
+    // multiple outstanding write transactions
+    if (any_outstanding_aw && axi_resp_i.b_valid) begin
+      axi_req_o.b_ready = 1'b1;
+      valid_o = 1'b1;
+      // Right hand side contains non-registered signal as we want
+      // to preserve a possible increment from the WAIT_B_VALID state
+      outstanding_aw_cnt_d = outstanding_aw_cnt_d - 1;
+    end
+  end
+
+  // ----------------
+  // Registers
+  // ----------------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      // start in flushing state and initialize the memory
+      state_q              <= IDLE;
+      cnt_q                <= '0;
+      cache_line_q         <= '0;
+      addr_offset_q        <= '0;
+      id_q                 <= '0;
+      amo_q                <= ariane_pkg::AMO_NONE;
+      size_q               <= '0;
+      outstanding_aw_cnt_q <= '0;
+    end else begin
+      state_q              <= state_d;
+      cnt_q                <= cnt_d;
+      cache_line_q         <= cache_line_d;
+      addr_offset_q        <= addr_offset_d;
+      id_q                 <= id_d;
+      amo_q                <= amo_d;
+      size_q               <= size_d;
+      outstanding_aw_cnt_q <= outstanding_aw_cnt_d;
+    end
+  end
+
+  function automatic axi_pkg::atop_t atop_from_amo(ariane_pkg::amo_t amo);
+    axi_pkg::atop_t result = 6'b000000;
+
+    unique case (amo)
+      ariane_pkg::AMO_NONE: result = {axi_pkg::ATOP_NONE, 4'b0000};
+      ariane_pkg::AMO_SWAP: result = {axi_pkg::ATOP_ATOMICSWAP};
+      ariane_pkg::AMO_ADD:
+      result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
+      ariane_pkg::AMO_AND:
+      result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR};
+      ariane_pkg::AMO_OR:
+      result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
+      ariane_pkg::AMO_XOR:
+      result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
+      ariane_pkg::AMO_MAX:
+      result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX};
+      ariane_pkg::AMO_MAXU:
+      result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX};
+      ariane_pkg::AMO_MIN:
+      result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN};
+      ariane_pkg::AMO_MINU:
+      result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN};
+      ariane_pkg::AMO_CAS1: result = {axi_pkg::ATOP_NONE, 4'b0000};  // Unsupported
+      ariane_pkg::AMO_CAS2: result = {axi_pkg::ATOP_NONE, 4'b0000};  // Unsupported
+      default: result = 6'b000000;
+    endcase
+
+    return result;
+  endfunction
+
+  function automatic logic amo_returns_data(ariane_pkg::amo_t amo);
+    axi_pkg::atop_t atop = atop_from_amo(amo);
+    logic           is_load = atop[5:4] == axi_pkg::ATOP_ATOMICLOAD;
+    logic           is_swap_or_cmp = atop[5:4] == axi_pkg::ATOP_ATOMICSWAP[5:4];
+    return is_load || is_swap_or_cmp;
+  endfunction
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/cache_ctrl.sv b/test/type_param/core/cache_subsystem/cache_ctrl.sv
new file mode 100644
index 0000000..e8770d2
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/cache_ctrl.sv
@@ -0,0 +1,475 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// File:   cache_ctrl.svh
+// Author: Florian Zaruba <zarubaf@ethz.ch>
+// Date:   14.10.2017
+//
+// Copyright (C) 2017 ETH Zurich, University of Bologna
+// All rights reserved.
+//
+// Description: Cache controller
+
+
+module cache_ctrl
+  import ariane_pkg::*;
+  import std_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic flush_i,
+    input logic bypass_i,  // enable cache
+    output logic busy_o,
+    // Core request ports
+    input dcache_req_i_t req_port_i,
+    output dcache_req_o_t req_port_o,
+    // SRAM interface
+    output logic [DCACHE_SET_ASSOC-1:0] req_o,  // req is valid
+    output logic [DCACHE_INDEX_WIDTH-1:0] addr_o,  // address into cache array
+    input logic gnt_i,
+    output cache_line_t data_o,
+    output cl_be_t be_o,
+    output logic [DCACHE_TAG_WIDTH-1:0] tag_o,  //valid one cycle later
+    input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
+    output logic we_o,
+    input logic [DCACHE_SET_ASSOC-1:0] hit_way_i,
+    // Miss handling
+    output miss_req_t miss_req_o,
+    // return
+    input logic miss_gnt_i,
+    input  logic                                 active_serving_i, // the miss unit is currently active for this unit, serving the miss
+    input logic [63:0] critical_word_i,
+    input logic critical_word_valid_i,
+    // bypass ports
+    input logic bypass_gnt_i,
+    input logic bypass_valid_i,
+    input logic [63:0] bypass_data_i,
+    // check MSHR for aliasing
+    output logic [55:0] mshr_addr_o,
+    input logic mshr_addr_matches_i,
+    input logic mshr_index_matches_i
+);
+
+  enum logic [3:0] {
+    IDLE,               // 0
+    WAIT_TAG,           // 1
+    WAIT_TAG_BYPASSED,  // 2
+    WAIT_GNT,           // 3
+    WAIT_GNT_SAVED,     // 4
+    STORE_REQ,          // 5
+    WAIT_REFILL_VALID,  // 6
+    WAIT_REFILL_GNT,    // 7
+    WAIT_TAG_SAVED,     // 8
+    WAIT_MSHR,          // 9
+    WAIT_CRITICAL_WORD  // 10
+  }
+      state_d, state_q;
+
+  typedef struct packed {
+    logic [DCACHE_INDEX_WIDTH-1:0] index;
+    logic [DCACHE_TAG_WIDTH-1:0]   tag;
+    logic [DCACHE_TID_WIDTH-1:0]   id;
+    logic [7:0]                    be;
+    logic [1:0]                    size;
+    logic                          we;
+    logic [63:0]                   wdata;
+    logic                          bypass;
+    logic                          killed;
+  } mem_req_t;
+
+  logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q;
+
+  mem_req_t mem_req_d, mem_req_q;
+
+  assign busy_o = (state_q != IDLE);
+  assign tag_o  = mem_req_d.tag;
+
+  logic [DCACHE_LINE_WIDTH-1:0] cl_i;
+
+  always_comb begin : way_select
+    cl_i = '0;
+    for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) if (hit_way_i[i]) cl_i = data_i[i].data;
+
+    // cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
+  end
+
+  // --------------
+  // Cache FSM
+  // --------------
+  always_comb begin : cache_ctrl_fsm
+    automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
+    // incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
+    // cache-line offset -> multiple of 64
+    cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6;  // shift by 6 to the left
+    // default assignments
+    state_d = state_q;
+    mem_req_d = mem_req_q;
+    hit_way_d = hit_way_q;
+    // output assignments
+    req_port_o.data_gnt = 1'b0;
+    req_port_o.data_rvalid = 1'b0;
+    req_port_o.data_rdata = '0;
+    req_port_o.data_rid = mem_req_q.id;
+    miss_req_o = '0;
+    mshr_addr_o = '0;
+    // Memory array communication
+    req_o = '0;
+    addr_o = req_port_i.address_index;
+    data_o = '0;
+    be_o = '0;
+    we_o = '0;
+
+    mem_req_d.killed |= req_port_i.kill_req;
+
+    case (state_q)
+
+      IDLE: begin
+        // a new request arrived
+        if (req_port_i.data_req && !flush_i) begin
+          // request the cache line - we can do this speculatively
+          req_o = '1;
+
+          // save index, be and we
+          mem_req_d.index = req_port_i.address_index;
+          mem_req_d.id    = req_port_i.data_id;
+          mem_req_d.be    = req_port_i.data_be;
+          mem_req_d.size  = req_port_i.data_size;
+          mem_req_d.we    = req_port_i.data_we;
+          mem_req_d.wdata = req_port_i.data_wdata;
+          mem_req_d.killed = req_port_i.kill_req;
+
+          // Bypass mode, check for uncacheable address here as well
+          if (bypass_i) begin
+            state_d = WAIT_TAG_BYPASSED;
+            // grant this access only if it was a load
+            req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1;
+            mem_req_d.bypass = 1'b1;
+            // ------------------
+            // Cache is enabled
+            // ------------------
+          end else begin
+            // Wait that we have access on the memory array
+            if (gnt_i) begin
+              state_d = WAIT_TAG;
+              mem_req_d.bypass = 1'b0;
+              // only for a read
+              if (!req_port_i.data_we) req_port_o.data_gnt = 1'b1;
+            end
+          end
+        end
+      end
+
+      // cache enabled and waiting for tag
+      WAIT_TAG, WAIT_TAG_SAVED: begin
+        // check that the client really wants to do the request and that we have a valid tag
+        if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin
+          // save tag if we didn't already save it
+          if (state_q != WAIT_TAG_SAVED) begin
+            mem_req_d.tag = req_port_i.address_tag;
+          end
+          // we speculatively request another transfer
+          if (req_port_i.data_req && !flush_i) begin
+            req_o = '1;
+          end
+          // ------------
+          // HIT CASE
+          // ------------
+          if (|hit_way_i) begin
+            // we can request another cache-line if this was a load
+            if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin
+              state_d             = WAIT_TAG;  // switch back to WAIT_TAG
+              mem_req_d.index     = req_port_i.address_index;
+              mem_req_d.id        = req_port_i.data_id;
+              mem_req_d.be        = req_port_i.data_be;
+              mem_req_d.size      = req_port_i.data_size;
+              mem_req_d.we        = req_port_i.data_we;
+              mem_req_d.wdata     = req_port_i.data_wdata;
+              mem_req_d.killed    = req_port_i.kill_req;
+              mem_req_d.bypass    = 1'b0;
+
+              req_port_o.data_gnt = gnt_i;
+
+              if (!gnt_i) begin
+                state_d = IDLE;
+              end
+            end else begin
+              state_d = IDLE;
+            end
+
+            // this is timing critical
+            req_port_o.data_rdata = cl_i[cl_offset+:64];
+
+            // report data for a read
+            if (!mem_req_q.we) begin
+              req_port_o.data_rvalid = ~mem_req_q.killed;
+              // else this was a store so we need an extra step to handle it
+            end else begin
+              state_d   = STORE_REQ;
+              hit_way_d = hit_way_i;
+            end
+            // ------------
+            // MISS CASE
+            // ------------
+          end else begin
+            // make a miss request
+            state_d = WAIT_REFILL_GNT;
+          end
+          // ----------------------------------------------
+          // Check MSHR - Miss Status Handling Register
+          // ----------------------------------------------
+          mshr_addr_o = {tag_o, mem_req_q.index};
+          // 1. We've got a match on MSHR and while are going down the
+          //    store path. This means that the miss controller is
+          //    currently evicting our cache-line. As the store is
+          //    non-atomic we need to constantly check whether we are
+          //    matching the address the miss handler is serving.
+          //    Furthermore we need to check for the whole index
+          //    because a completely different memory line could alias
+          //    with the cache-line we are evicting.
+          // 2. The second case is where we are currently loading and
+          //    the address matches the exact CL the miss controller
+          //    is currently serving. That means we need to wait for
+          //    the miss controller to finish its request before we
+          //    can continue to serve this CL. Otherwise we will fetch
+          //    the cache-line again and potentially loosing any
+          //    content we've written so far. This as a consequence
+          //    means we can't have hit on the CL which mean the
+          //    req_port_o.data_rvalid will be de-asserted.
+          if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin
+            state_d = WAIT_MSHR;
+          end
+
+          // -------------------------
+          // Check for cache-ability
+          // -------------------------
+          if (!config_pkg::is_inside_cacheable_regions(
+                  CVA6Cfg, {{{64 - riscv::PLEN} {1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}}
+              )) begin
+            mem_req_d.bypass = 1'b1;
+            state_d = WAIT_REFILL_GNT;
+          end
+
+          // we are still waiting for a valid tag
+        end else begin
+          // request cache line for saved index
+          addr_o = mem_req_q.index;
+          req_o  = '1;
+
+          // check that we still have a memory grant
+          if (!gnt_i) begin
+            state_d = WAIT_GNT;
+          end
+        end
+      end
+
+      // ~> we already granted the request but lost the memory grant while waiting for the tag
+      WAIT_GNT, WAIT_GNT_SAVED: begin
+        // request cache line for saved index
+        addr_o = mem_req_q.index;
+        req_o  = '1;
+
+        // if we get a valid tag while waiting for the memory grant, save it
+        if (req_port_i.tag_valid) begin
+          mem_req_d.tag = req_port_i.address_tag;
+          state_d = WAIT_GNT_SAVED;
+        end
+
+        // we have a memory grant again ~> go back to WAIT_TAG
+        if (gnt_i) begin
+          state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED;
+        end
+      end
+
+      // ~> we are here as we need a second round of memory access for a store
+      STORE_REQ: begin
+        // check if the MSHR still doesn't match
+        mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
+
+        // We need to re-check for MSHR aliasing here as the store requires at least
+        // two memory look-ups on a single-ported SRAM and therefore is non-atomic
+        if (!mshr_index_matches_i) begin
+          // store data, write dirty bit
+          req_o                      = hit_way_q;
+          addr_o                     = mem_req_q.index;
+          we_o                       = 1'b1;
+
+          be_o.vldrty                = hit_way_q;
+
+          // set the correct byte enable
+          be_o.data[cl_offset>>3+:8] = mem_req_q.be;
+          data_o.data[cl_offset+:64] = mem_req_q.wdata;
+          // ~> change the state
+          data_o.dirty               = 1'b1;
+          data_o.valid               = 1'b1;
+
+          // got a grant ~> this is finished now
+          if (gnt_i) begin
+            req_port_o.data_gnt = 1'b1;
+            state_d = IDLE;
+          end
+        end else begin
+          state_d = WAIT_MSHR;
+        end
+      end  // case: STORE_REQ
+
+      // we've got a match on MSHR ~> miss unit is currently serving a request
+      WAIT_MSHR: begin
+        mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
+        // we can start a new request
+        if (!mshr_index_matches_i) begin
+          req_o  = '1;
+
+          addr_o = mem_req_q.index;
+
+          if (gnt_i) state_d = WAIT_TAG_SAVED;
+        end
+      end
+
+      // its for sure a miss
+      WAIT_TAG_BYPASSED: begin
+        // check that the client really wants to do the request and that we have a valid tag
+        if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin
+          // save tag
+          mem_req_d.tag = req_port_i.address_tag;
+          state_d = WAIT_REFILL_GNT;
+        end
+      end
+
+      // ~> wait for grant from miss unit
+      WAIT_REFILL_GNT: begin
+
+        mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
+
+        miss_req_o.valid = 1'b1;
+        miss_req_o.bypass = mem_req_q.bypass;
+        miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
+        miss_req_o.be = mem_req_q.be;
+        miss_req_o.size = mem_req_q.size;
+        miss_req_o.we = mem_req_q.we;
+        miss_req_o.wdata = mem_req_q.wdata;
+
+        // got a grant so go to valid
+        if (bypass_gnt_i) begin
+          state_d = WAIT_REFILL_VALID;
+          // if this was a write we still need to give a grant to the store unit.
+          // We can also avoid waiting for the response valid, this signal is
+          // currently not used by the store unit
+          if (mem_req_q.we) begin
+            req_port_o.data_gnt = 1'b1;
+            state_d = IDLE;
+          end
+        end
+
+        if (miss_gnt_i && !mem_req_q.we) state_d = WAIT_CRITICAL_WORD;
+        else if (miss_gnt_i) begin
+          state_d = IDLE;
+          req_port_o.data_gnt = 1'b1;
+        end
+
+        // it can be the case that the miss unit is currently serving a
+        // request which matches ours
+        // so we need to check the MSHR for matching continuously
+        // if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i
+        if (mshr_addr_matches_i && !active_serving_i) begin
+          state_d = WAIT_MSHR;
+        end
+      end
+
+      // ~> wait for critical word to arrive
+      WAIT_CRITICAL_WORD: begin
+        // speculatively request another word
+        if (req_port_i.data_req) begin
+          // request the cache line
+          req_o = '1;
+        end
+
+        if (critical_word_valid_i) begin
+          req_port_o.data_rvalid = ~mem_req_q.killed;
+          req_port_o.data_rdata  = critical_word_i;
+          // we can make another request
+          if (req_port_i.data_req && !flush_i) begin
+            // save index, be and we
+            mem_req_d.index = req_port_i.address_index;
+            mem_req_d.id    = req_port_i.data_id;
+            mem_req_d.be    = req_port_i.data_be;
+            mem_req_d.size  = req_port_i.data_size;
+            mem_req_d.we    = req_port_i.data_we;
+            mem_req_d.wdata = req_port_i.data_wdata;
+            mem_req_d.killed = req_port_i.kill_req;
+
+            state_d = IDLE;
+
+            // Wait until we have access on the memory array
+            if (gnt_i) begin
+              state_d = WAIT_TAG;
+              mem_req_d.bypass = 1'b0;
+              req_port_o.data_gnt = 1'b1;
+            end
+          end else begin
+            state_d = IDLE;
+          end
+        end
+      end
+      // ~> wait until the bypass request is valid
+      WAIT_REFILL_VALID: begin
+        // got a valid answer
+        if (bypass_valid_i) begin
+          req_port_o.data_rdata = bypass_data_i;
+          req_port_o.data_rvalid = ~mem_req_q.killed;
+          state_d = IDLE;
+        end
+      end
+    endcase
+
+    if (req_port_i.kill_req) begin
+      req_port_o.data_rvalid = 1'b1;
+      if (!(state_q inside {WAIT_REFILL_GNT, WAIT_CRITICAL_WORD})) begin
+        state_d = IDLE;
+      end
+    end
+  end
+
+  // --------------
+  // Registers
+  // --------------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      state_q   <= IDLE;
+      mem_req_q <= '0;
+      hit_way_q <= '0;
+    end else begin
+      state_q   <= state_d;
+      mem_req_q <= mem_req_d;
+      hit_way_q <= hit_way_d;
+    end
+  end
+
+  //pragma translate_off
+`ifndef VERILATOR
+  initial begin
+    assert (DCACHE_LINE_WIDTH == 128)
+    else
+      $error(
+          "Cacheline width has to be 128 for the moment. But only small changes required in data select logic"
+      );
+  end
+  // if the full MSHR address matches so should also match the partial one
+  partial_full_mshr_match :
+  assert property(@(posedge  clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i)
+  else $fatal(1, "partial mshr index doesn't match");
+  // there should never be a valid answer when the MSHR matches and we are not being served
+  no_valid_on_mshr_match :
+  assert property(@(posedge  clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req)
+  else $fatal(1, "rvalid_o should not be set on MSHR match");
+`endif
+  //pragma translate_on
+endmodule
diff --git a/test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv b/test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv
new file mode 100644
index 0000000..3115403
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv
@@ -0,0 +1,200 @@
+// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies
+//                Alternatives (CEA)
+//
+// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”);
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Authors: Cesar Fuguet
+// Date: February, 2023
+// Description: Interface adapter for the CVA6 core
+module cva6_hpdcache_if_adapter
+  import hpdcache_pkg::*;
+
+//  Parameters
+//  {{{
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg      = config_pkg::cva6_cfg_empty,
+    parameter bit                    is_load_port = 1'b1
+)
+//  }}}
+
+//  Ports
+//  {{{
+(
+    //  Clock and active-low reset pins
+    input logic clk_i,
+    input logic rst_ni,
+
+    //  Port ID
+    input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i,
+
+    //  Request/response ports from/to the CVA6 core
+    input  ariane_pkg::dcache_req_i_t cva6_req_i,
+    output ariane_pkg::dcache_req_o_t cva6_req_o,
+    input  ariane_pkg::amo_req_t      cva6_amo_req_i,
+    output ariane_pkg::amo_resp_t     cva6_amo_resp_o,
+
+    //  Request port to the L1 Dcache
+    output logic                        hpdcache_req_valid_o,
+    input  logic                        hpdcache_req_ready_i,
+    output hpdcache_pkg::hpdcache_req_t hpdcache_req_o,
+    output logic                        hpdcache_req_abort_o,
+    output hpdcache_pkg::hpdcache_tag_t hpdcache_req_tag_o,
+    output hpdcache_pkg::hpdcache_pma_t hpdcache_req_pma_o,
+
+    //  Response port from the L1 Dcache
+    input logic                        hpdcache_rsp_valid_i,
+    input hpdcache_pkg::hpdcache_rsp_t hpdcache_rsp_i
+);
+  //  }}}
+
+  //  Internal nets and registers
+  //  {{{
+  logic forward_store, forward_amo;
+  logic hpdcache_req_is_uncacheable;
+  //  }}}
+
+  //  Request forwarding
+  //  {{{
+  generate
+    //  LOAD request
+    //  {{{
+    if (is_load_port == 1'b1) begin : load_port_gen
+      assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions(
+          CVA6Cfg,
+          {
+            {64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
+            , cva6_req_i.address_tag
+            , {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}
+          }
+      );
+
+      //    Request forwarding
+      assign hpdcache_req_valid_o = cva6_req_i.data_req,
+          hpdcache_req_o.addr_offset = cva6_req_i.address_index,
+          hpdcache_req_o.wdata = '0,
+          hpdcache_req_o.op = hpdcache_pkg::HPDCACHE_REQ_LOAD,
+          hpdcache_req_o.be = cva6_req_i.data_be,
+          hpdcache_req_o.size = cva6_req_i.data_size,
+          hpdcache_req_o.sid = hpdcache_req_sid_i,
+          hpdcache_req_o.tid = cva6_req_i.data_id,
+          hpdcache_req_o.need_rsp = 1'b1,
+          hpdcache_req_o.phys_indexed = 1'b0,
+          hpdcache_req_o.addr_tag = '0,  // unused on virtually indexed request
+          hpdcache_req_o.pma = '0;  // unused on virtually indexed request
+
+      assign hpdcache_req_abort_o = cva6_req_i.kill_req,
+          hpdcache_req_tag_o = cva6_req_i.address_tag,
+          hpdcache_req_pma_o.uncacheable = hpdcache_req_is_uncacheable,
+          hpdcache_req_pma_o.io = 1'b0;
+
+      //    Response forwarding
+      assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i,
+          cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
+          cva6_req_o.data_rid = hpdcache_rsp_i.tid,
+          cva6_req_o.data_gnt = hpdcache_req_ready_i;
+    end  //  }}}
+
+         //  {{{
+    else begin : store_amo_gen
+      //  STORE/AMO request
+      hpdcache_req_addr_t   amo_addr;
+      hpdcache_req_offset_t amo_addr_offset;
+      hpdcache_tag_t        amo_tag;
+      logic amo_is_word, amo_is_word_hi;
+      hpdcache_req_data_t        amo_data;
+      hpdcache_req_be_t          amo_data_be;
+      hpdcache_req_op_t          amo_op;
+      logic               [31:0] amo_resp_word;
+
+      //  AMO logic
+      //  {{{
+      always_comb begin : amo_op_comb
+        amo_addr = cva6_amo_req_i.operand_a;
+        amo_addr_offset = amo_addr[0+:HPDCACHE_REQ_OFFSET_WIDTH];
+        amo_tag = amo_addr[HPDCACHE_REQ_OFFSET_WIDTH+:HPDCACHE_TAG_WIDTH];
+        amo_is_word = (cva6_amo_req_i.size == 2'b10);
+        amo_is_word_hi = cva6_amo_req_i.operand_a[2];
+
+        amo_data = amo_is_word ? {2{cva6_amo_req_i.operand_b[0+:32]}} : cva6_amo_req_i.operand_b;
+
+        amo_data_be = amo_is_word_hi ? 8'hf0 : amo_is_word ? 8'h0f : 8'hff;
+
+        unique case (cva6_amo_req_i.amo_op)
+          ariane_pkg::AMO_LR:   amo_op = HPDCACHE_REQ_AMO_LR;
+          ariane_pkg::AMO_SC:   amo_op = HPDCACHE_REQ_AMO_SC;
+          ariane_pkg::AMO_SWAP: amo_op = HPDCACHE_REQ_AMO_SWAP;
+          ariane_pkg::AMO_ADD:  amo_op = HPDCACHE_REQ_AMO_ADD;
+          ariane_pkg::AMO_AND:  amo_op = HPDCACHE_REQ_AMO_AND;
+          ariane_pkg::AMO_OR:   amo_op = HPDCACHE_REQ_AMO_OR;
+          ariane_pkg::AMO_XOR:  amo_op = HPDCACHE_REQ_AMO_XOR;
+          ariane_pkg::AMO_MAX:  amo_op = HPDCACHE_REQ_AMO_MAX;
+          ariane_pkg::AMO_MAXU: amo_op = HPDCACHE_REQ_AMO_MAXU;
+          ariane_pkg::AMO_MIN:  amo_op = HPDCACHE_REQ_AMO_MIN;
+          ariane_pkg::AMO_MINU: amo_op = HPDCACHE_REQ_AMO_MINU;
+          default:              amo_op = HPDCACHE_REQ_LOAD;
+        endcase
+      end
+
+      assign amo_resp_word  = amo_is_word_hi ? hpdcache_rsp_i.rdata[0][32 +: 32]
+                                                   : hpdcache_rsp_i.rdata[0][0  +: 32];
+      //  }}}
+
+      //  Request forwarding
+      //  {{{
+      assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions(
+          CVA6Cfg,
+          {
+            {64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
+            , hpdcache_req_o.addr_tag,
+            {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}
+          }
+      );
+
+      assign forward_store = cva6_req_i.data_req, forward_amo = cva6_amo_req_i.req;
+
+      assign hpdcache_req_valid_o = forward_store | forward_amo,
+          hpdcache_req_o.addr_offset = forward_amo ? amo_addr_offset : cva6_req_i.address_index,
+          hpdcache_req_o.wdata = forward_amo ? amo_data : cva6_req_i.data_wdata,
+          hpdcache_req_o.op = forward_amo ? amo_op : hpdcache_pkg::HPDCACHE_REQ_STORE,
+          hpdcache_req_o.be = forward_amo ? amo_data_be : cva6_req_i.data_be,
+          hpdcache_req_o.size = forward_amo ? cva6_amo_req_i.size : cva6_req_i.data_size,
+          hpdcache_req_o.sid = hpdcache_req_sid_i,
+          hpdcache_req_o.tid = forward_amo ? '1 : '0,
+          hpdcache_req_o.need_rsp = forward_amo,
+          hpdcache_req_o.phys_indexed = 1'b1,
+          hpdcache_req_o.addr_tag = forward_amo ? amo_tag : cva6_req_i.address_tag,
+          hpdcache_req_o.pma.uncacheable = hpdcache_req_is_uncacheable,
+          hpdcache_req_o.pma.io = 1'b0,
+          hpdcache_req_abort_o = 1'b0,  // unused on physically indexed requests
+          hpdcache_req_tag_o = '0,  // unused on physically indexed requests
+          hpdcache_req_pma_o = '0;  // unused on physically indexed requests
+      //  }}}
+
+      //  Response forwarding
+      //  {{{
+      assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid != '1),
+          cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
+          cva6_req_o.data_rid = hpdcache_rsp_i.tid,
+          cva6_req_o.data_gnt = hpdcache_req_ready_i;
+
+      assign cva6_amo_resp_o.ack = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid == '1),
+          cva6_amo_resp_o.result = amo_is_word ? {{32{amo_resp_word[31]}}, amo_resp_word}
+                                                        : hpdcache_rsp_i.rdata[0][63:0];
+      //  }}}
+    end
+    //  }}}
+  endgenerate
+  //  }}}
+
+  //  Assertions
+  //  {{{
+  //    pragma translate_off
+  forward_one_request_assert :
+  assert property (@(posedge clk_i) ($onehot0({forward_store, forward_amo})))
+  else $error("Only one request shall be forwarded");
+  //    pragma translate_on
+  //  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv
new file mode 100644
index 0000000..7e90b91
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv
@@ -0,0 +1,609 @@
+// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies
+//                Alternatives (CEA)
+//
+// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”);
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Authors: Cesar Fuguet
+// Date: February, 2023
+// Description: CVA6 cache subsystem integrating standard CVA6's
+//              instruction cache and the Core-V High-Performance L1
+//              data cache (CV-HPDcache).
+
+module cva6_hpdcache_subsystem
+//  Parameters
+//  {{{
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int NumPorts = 4,
+    parameter int NrHwPrefetchers = 4,
+    parameter type noc_req_t = logic,
+    parameter type noc_resp_t = logic,
+    parameter type cmo_req_t = logic,
+    parameter type cmo_rsp_t = logic
+)
+//  }}}
+
+//  Ports
+//  {{{
+(
+    input logic clk_i,
+    input logic rst_ni,
+
+    //  I$
+    //  {{{
+    input logic icache_en_i,  // enable icache (or bypass e.g: in debug mode)
+    input logic icache_flush_i,  // flush the icache, flush and kill have to be asserted together
+    output logic icache_miss_o,  // to performance counter
+    // address translation requests
+    input ariane_pkg::icache_areq_t icache_areq_i,  // to/from frontend
+    output ariane_pkg::icache_arsp_t icache_areq_o,
+    // data requests
+    input ariane_pkg::icache_dreq_t icache_dreq_i,  // to/from frontend
+    output ariane_pkg::icache_drsp_t icache_dreq_o,
+    //   }}}
+
+    //  D$
+    //  {{{
+    //    Cache management
+    input logic dcache_enable_i,  // from CSR
+    input logic dcache_flush_i,  // high until acknowledged
+    output logic                       dcache_flush_ack_o,     // send a single cycle acknowledge signal when the cache is flushed
+    output logic dcache_miss_o,  // we missed on a ld/st
+
+    //  AMO interface
+    input  ariane_pkg::amo_req_t                     dcache_amo_req_i,    // from LSU
+    output ariane_pkg::amo_resp_t                    dcache_amo_resp_o,   // to LSU
+    //  CMO interface
+    input  cmo_req_t                                 dcache_cmo_req_i,    // from CMO FU
+    output cmo_rsp_t                                 dcache_cmo_resp_o,   // to CMO FU
+    //  Request ports
+    input  ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i,  // from LSU
+    output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o,  // to LSU
+    //  Write Buffer status
+    output logic                                     wbuffer_empty_o,
+    output logic                                     wbuffer_not_ni_o,
+
+    //  Hardware memory prefetcher configuration
+    input  logic [NrHwPrefetchers-1:0]       hwpf_base_set_i,
+    input  logic [NrHwPrefetchers-1:0][63:0] hwpf_base_i,
+    output logic [NrHwPrefetchers-1:0][63:0] hwpf_base_o,
+    input  logic [NrHwPrefetchers-1:0]       hwpf_param_set_i,
+    input  logic [NrHwPrefetchers-1:0][63:0] hwpf_param_i,
+    output logic [NrHwPrefetchers-1:0][63:0] hwpf_param_o,
+    input  logic [NrHwPrefetchers-1:0]       hwpf_throttle_set_i,
+    input  logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_i,
+    output logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_o,
+    output logic [               63:0]       hwpf_status_o,
+    //  }}}
+
+    //  AXI port to upstream memory/peripherals
+    //  {{{
+    output noc_req_t  noc_req_o,
+    input  noc_resp_t noc_resp_i
+    //  }}}
+);
+  //  }}}
+
+  `include "axi/typedef.svh"
+
+  //  I$ instantiation
+  //  {{{
+  logic icache_miss_valid, icache_miss_ready;
+  wt_cache_pkg::icache_req_t icache_miss;
+
+  logic icache_miss_resp_valid;
+  wt_cache_pkg::icache_rtrn_t icache_miss_resp;
+
+  localparam int ICACHE_RDTXID = 1 << (ariane_pkg::MEM_TID_WIDTH - 1);
+
+  cva6_icache #(
+      .CVA6Cfg(CVA6Cfg),
+      .RdTxId (ICACHE_RDTXID)
+  ) i_cva6_icache (
+      .clk_i         (clk_i),
+      .rst_ni        (rst_ni),
+      .flush_i       (icache_flush_i),
+      .en_i          (icache_en_i),
+      .miss_o        (icache_miss_o),
+      .areq_i        (icache_areq_i),
+      .areq_o        (icache_areq_o),
+      .dreq_i        (icache_dreq_i),
+      .dreq_o        (icache_dreq_o),
+      .mem_rtrn_vld_i(icache_miss_resp_valid),
+      .mem_rtrn_i    (icache_miss_resp),
+      .mem_data_req_o(icache_miss_valid),
+      .mem_data_ack_i(icache_miss_ready),
+      .mem_data_o    (icache_miss)
+  );
+  //  }}}
+
+  //  D$ instantiation
+  //  {{{
+  `include "hpdcache_typedef.svh"
+
+  //    0: Page-Table Walk (PTW)
+  //    1: Load unit
+  //    2: Accelerator load
+  //    3: Store/AMO
+  //    .
+  //    .
+  //    .
+  //    NumPorts: CMO
+  //    NumPorts + 1: Hardware Memory Prefetcher (hwpf)
+  localparam int HPDCACHE_NREQUESTERS = NumPorts + 2;
+
+  typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t;
+  typedef logic [ariane_pkg::MEM_TID_WIDTH-1:0] hpdcache_mem_id_t;
+  typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t;
+  typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t;
+  `HPDCACHE_TYPEDEF_MEM_REQ_T(hpdcache_mem_req_t, hpdcache_mem_addr_t, hpdcache_mem_id_t);
+  `HPDCACHE_TYPEDEF_MEM_RESP_R_T(hpdcache_mem_resp_r_t, hpdcache_mem_id_t, hpdcache_mem_data_t);
+  `HPDCACHE_TYPEDEF_MEM_REQ_W_T(hpdcache_mem_req_w_t, hpdcache_mem_data_t, hpdcache_mem_be_t);
+  `HPDCACHE_TYPEDEF_MEM_RESP_W_T(hpdcache_mem_resp_w_t, hpdcache_mem_id_t);
+
+  typedef logic [63:0] hwpf_stride_param_t;
+
+  logic                        dcache_req_valid[HPDCACHE_NREQUESTERS-1:0];
+  logic                        dcache_req_ready[HPDCACHE_NREQUESTERS-1:0];
+  hpdcache_pkg::hpdcache_req_t dcache_req      [HPDCACHE_NREQUESTERS-1:0];
+  logic                        dcache_req_abort[HPDCACHE_NREQUESTERS-1:0];
+  hpdcache_pkg::hpdcache_tag_t dcache_req_tag  [HPDCACHE_NREQUESTERS-1:0];
+  hpdcache_pkg::hpdcache_pma_t dcache_req_pma  [HPDCACHE_NREQUESTERS-1:0];
+  logic                        dcache_rsp_valid[HPDCACHE_NREQUESTERS-1:0];
+  hpdcache_pkg::hpdcache_rsp_t dcache_rsp      [HPDCACHE_NREQUESTERS-1:0];
+  logic dcache_read_miss, dcache_write_miss;
+
+  logic                                   [                2:0] snoop_valid;
+  logic                                   [                2:0] snoop_abort;
+  hpdcache_pkg::hpdcache_req_offset_t     [                2:0] snoop_addr_offset;
+  hpdcache_pkg::hpdcache_tag_t            [                2:0] snoop_addr_tag;
+  logic                                   [                2:0] snoop_phys_indexed;
+
+  logic                                                         dcache_cmo_req_is_prefetch;
+
+  logic                                                         dcache_miss_ready;
+  logic                                                         dcache_miss_valid;
+  hpdcache_mem_req_t                                            dcache_miss;
+
+  logic                                                         dcache_miss_resp_ready;
+  logic                                                         dcache_miss_resp_valid;
+  hpdcache_mem_resp_r_t                                         dcache_miss_resp;
+
+  logic                                                         dcache_wbuf_ready;
+  logic                                                         dcache_wbuf_valid;
+  hpdcache_mem_req_t                                            dcache_wbuf;
+
+  logic                                                         dcache_wbuf_data_ready;
+  logic                                                         dcache_wbuf_data_valid;
+  hpdcache_mem_req_w_t                                          dcache_wbuf_data;
+
+  logic                                                         dcache_wbuf_resp_ready;
+  logic                                                         dcache_wbuf_resp_valid;
+  hpdcache_mem_resp_w_t                                         dcache_wbuf_resp;
+
+  logic                                                         dcache_uc_read_ready;
+  logic                                                         dcache_uc_read_valid;
+  hpdcache_mem_req_t                                            dcache_uc_read;
+
+  logic                                                         dcache_uc_read_resp_ready;
+  logic                                                         dcache_uc_read_resp_valid;
+  hpdcache_mem_resp_r_t                                         dcache_uc_read_resp;
+
+  logic                                                         dcache_uc_write_ready;
+  logic                                                         dcache_uc_write_valid;
+  hpdcache_mem_req_t                                            dcache_uc_write;
+
+  logic                                                         dcache_uc_write_data_ready;
+  logic                                                         dcache_uc_write_data_valid;
+  hpdcache_mem_req_w_t                                          dcache_uc_write_data;
+
+  logic                                                         dcache_uc_write_resp_ready;
+  logic                                                         dcache_uc_write_resp_valid;
+  hpdcache_mem_resp_w_t                                         dcache_uc_write_resp;
+
+  hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_in;
+  hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_out;
+
+  generate
+    ariane_pkg::dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0];
+
+    for (genvar r = 0; r < (NumPorts - 1); r++) begin : cva6_hpdcache_load_if_adapter_gen
+      assign dcache_req_ports[r] = dcache_req_ports_i[r];
+
+      cva6_hpdcache_if_adapter #(
+          .CVA6Cfg     (CVA6Cfg),
+          .is_load_port(1'b1)
+      ) i_cva6_hpdcache_load_if_adapter (
+          .clk_i,
+          .rst_ni,
+
+          .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(r)),
+
+          .cva6_req_i     (dcache_req_ports[r]),
+          .cva6_req_o     (dcache_req_ports_o[r]),
+          .cva6_amo_req_i ('0),
+          .cva6_amo_resp_o(  /* unused */),
+
+          .hpdcache_req_valid_o(dcache_req_valid[r]),
+          .hpdcache_req_ready_i(dcache_req_ready[r]),
+          .hpdcache_req_o      (dcache_req[r]),
+          .hpdcache_req_abort_o(dcache_req_abort[r]),
+          .hpdcache_req_tag_o  (dcache_req_tag[r]),
+          .hpdcache_req_pma_o  (dcache_req_pma[r]),
+
+          .hpdcache_rsp_valid_i(dcache_rsp_valid[r]),
+          .hpdcache_rsp_i      (dcache_rsp[r])
+      );
+    end
+
+    cva6_hpdcache_if_adapter #(
+        .CVA6Cfg     (CVA6Cfg),
+        .is_load_port(1'b0)
+    ) i_cva6_hpdcache_store_if_adapter (
+        .clk_i,
+        .rst_ni,
+
+        .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts - 1)),
+
+        .cva6_req_i     (dcache_req_ports_i[NumPorts-1]),
+        .cva6_req_o     (dcache_req_ports_o[NumPorts-1]),
+        .cva6_amo_req_i (dcache_amo_req_i),
+        .cva6_amo_resp_o(dcache_amo_resp_o),
+
+        .hpdcache_req_valid_o(dcache_req_valid[NumPorts-1]),
+        .hpdcache_req_ready_i(dcache_req_ready[NumPorts-1]),
+        .hpdcache_req_o      (dcache_req[NumPorts-1]),
+        .hpdcache_req_abort_o(dcache_req_abort[NumPorts-1]),
+        .hpdcache_req_tag_o  (dcache_req_tag[NumPorts-1]),
+        .hpdcache_req_pma_o  (dcache_req_pma[NumPorts-1]),
+
+        .hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts-1]),
+        .hpdcache_rsp_i      (dcache_rsp[NumPorts-1])
+    );
+
+`ifdef HPDCACHE_ENABLE_CMO
+    cva6_hpdcache_cmo_if_adapter #(
+        .cmo_req_t(cmo_req_t),
+        .cmo_rsp_t(cmo_rsp_t)
+    ) i_cva6_hpdcache_cmo_if_adapter (
+        .clk_i,
+        .rst_ni,
+
+        .dcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts)),
+
+        .cva6_cmo_req_i (dcache_cmo_req_i),
+        .cva6_cmo_resp_o(dcache_cmo_resp_o),
+
+        .dcache_req_valid_o(dcache_req_valid[NumPorts]),
+        .dcache_req_ready_i(dcache_req_ready[NumPorts]),
+        .dcache_req_o      (dcache_req[NumPorts]),
+        .dcache_req_abort_o(dcache_req_abort[NumPorts]),
+        .dcache_req_tag_o  (dcache_req_tag[NumPorts]),
+        .dcache_req_pma_o  (dcache_req_pma[NumPorts]),
+
+        .dcache_rsp_valid_i(dcache_rsp_valid[NumPorts]),
+        .dcache_rsp_i      (dcache_rsp[NumPorts])
+    );
+`else
+    assign dcache_req_valid[NumPorts] = 1'b0,
+        dcache_req[NumPorts] = '0,
+        dcache_req_abort[NumPorts] = 1'b0,
+        dcache_req_tag[NumPorts] = '0,
+        dcache_req_pma[NumPorts] = '0;
+`endif
+  endgenerate
+
+  //  Snoop load port
+  assign snoop_valid[0] = dcache_req_valid[1] & dcache_req_ready[1],
+      snoop_abort[0] = dcache_req_abort[1],
+      snoop_addr_offset[0] = dcache_req[1].addr_offset,
+      snoop_addr_tag[0] = dcache_req_tag[1],
+      snoop_phys_indexed[0] = dcache_req[1].phys_indexed;
+
+  //  Snoop Store/AMO port
+  assign snoop_valid[1] = dcache_req_valid[NumPorts-1] & dcache_req_ready[NumPorts-1],
+      snoop_abort[1] = dcache_req_abort[NumPorts-1],
+      snoop_addr_offset[1] = dcache_req[NumPorts-1].addr_offset,
+      snoop_addr_tag[1] = dcache_req_tag[NumPorts-1],
+      snoop_phys_indexed[1] = dcache_req[NumPorts-1].phys_indexed;
+
+`ifdef HPDCACHE_ENABLE_CMO
+  //  Snoop CMO port (in case of read prefetch accesses)
+  assign dcache_cmo_req_is_prefetch = hpdcache_pkg::is_cmo_prefetch(
+      dcache_req[NumPorts].op, dcache_req[NumPorts].size
+  );
+  assign snoop_valid[2]        = dcache_req_valid[NumPorts]
+                               & dcache_req_ready[NumPorts]
+                               & dcache_cmo_req_is_prefetch,
+      snoop_abort[2] = dcache_req_abort[NumPorts],
+      snoop_addr_offset[2] = dcache_req[NumPorts].addr_offset,
+      snoop_addr_tag[2] = dcache_req_tag[NumPorts],
+      snoop_phys_indexed[2] = dcache_req[NumPorts].phys_indexed;
+`else
+  assign snoop_valid[2] = 1'b0,
+      snoop_abort[2] = 1'b0,
+      snoop_addr_offset[2] = '0,
+      snoop_addr_tag[2] = '0,
+      snoop_phys_indexed[2] = 1'b0;
+`endif
+
+  generate
+    for (genvar h = 0; h < NrHwPrefetchers; h++) begin : hwpf_throttle_gen
+      assign hwpf_throttle_in[h] = hwpf_stride_pkg::hwpf_stride_throttle_t'(hwpf_throttle_i[h]),
+          hwpf_throttle_o[h] = hwpf_stride_pkg::hwpf_stride_param_t'(hwpf_throttle_out[h]);
+    end
+  endgenerate
+
+  hwpf_stride_wrapper #(
+      .NUM_HW_PREFETCH(NrHwPrefetchers),
+      .NUM_SNOOP_PORTS(3)
+  ) i_hwpf_stride_wrapper (
+      .clk_i,
+      .rst_ni,
+
+      .hwpf_stride_base_set_i    (hwpf_base_set_i),
+      .hwpf_stride_base_i        (hwpf_base_i),
+      .hwpf_stride_base_o        (hwpf_base_o),
+      .hwpf_stride_param_set_i   (hwpf_param_set_i),
+      .hwpf_stride_param_i       (hwpf_param_i),
+      .hwpf_stride_param_o       (hwpf_param_o),
+      .hwpf_stride_throttle_set_i(hwpf_throttle_set_i),
+      .hwpf_stride_throttle_i    (hwpf_throttle_in),
+      .hwpf_stride_throttle_o    (hwpf_throttle_out),
+      .hwpf_stride_status_o      (hwpf_status_o),
+
+      .snoop_valid_i       (snoop_valid),
+      .snoop_abort_i       (snoop_abort),
+      .snoop_addr_offset_i (snoop_addr_offset),
+      .snoop_addr_tag_i    (snoop_addr_tag),
+      .snoop_phys_indexed_i(snoop_phys_indexed),
+
+      .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts + 1)),
+
+      .hpdcache_req_valid_o(dcache_req_valid[NumPorts+1]),
+      .hpdcache_req_ready_i(dcache_req_ready[NumPorts+1]),
+      .hpdcache_req_o      (dcache_req[NumPorts+1]),
+      .hpdcache_req_abort_o(dcache_req_abort[NumPorts+1]),
+      .hpdcache_req_tag_o  (dcache_req_tag[NumPorts+1]),
+      .hpdcache_req_pma_o  (dcache_req_pma[NumPorts+1]),
+      .hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts+1]),
+      .hpdcache_rsp_i      (dcache_rsp[NumPorts+1])
+  );
+
+  hpdcache #(
+      .NREQUESTERS          (HPDCACHE_NREQUESTERS),
+      .HPDcacheMemIdWidth   (ariane_pkg::MEM_TID_WIDTH),
+      .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
+      .hpdcache_mem_req_t   (hpdcache_mem_req_t),
+      .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
+      .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
+      .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t)
+  ) i_hpdcache (
+      .clk_i,
+      .rst_ni,
+
+      .wbuf_flush_i(dcache_flush_i),
+
+      .core_req_valid_i(dcache_req_valid),
+      .core_req_ready_o(dcache_req_ready),
+      .core_req_i      (dcache_req),
+      .core_req_abort_i(dcache_req_abort),
+      .core_req_tag_i  (dcache_req_tag),
+      .core_req_pma_i  (dcache_req_pma),
+
+      .core_rsp_valid_o(dcache_rsp_valid),
+      .core_rsp_o      (dcache_rsp),
+
+      .mem_req_miss_read_ready_i(dcache_miss_ready),
+      .mem_req_miss_read_valid_o(dcache_miss_valid),
+      .mem_req_miss_read_o      (dcache_miss),
+
+      .mem_resp_miss_read_ready_o(dcache_miss_resp_ready),
+      .mem_resp_miss_read_valid_i(dcache_miss_resp_valid),
+      .mem_resp_miss_read_i      (dcache_miss_resp),
+
+      .mem_req_wbuf_write_ready_i(dcache_wbuf_ready),
+      .mem_req_wbuf_write_valid_o(dcache_wbuf_valid),
+      .mem_req_wbuf_write_o      (dcache_wbuf),
+
+      .mem_req_wbuf_write_data_ready_i(dcache_wbuf_data_ready),
+      .mem_req_wbuf_write_data_valid_o(dcache_wbuf_data_valid),
+      .mem_req_wbuf_write_data_o      (dcache_wbuf_data),
+
+      .mem_resp_wbuf_write_ready_o(dcache_wbuf_resp_ready),
+      .mem_resp_wbuf_write_valid_i(dcache_wbuf_resp_valid),
+      .mem_resp_wbuf_write_i      (dcache_wbuf_resp),
+
+      .mem_req_uc_read_ready_i(dcache_uc_read_ready),
+      .mem_req_uc_read_valid_o(dcache_uc_read_valid),
+      .mem_req_uc_read_o      (dcache_uc_read),
+
+      .mem_resp_uc_read_ready_o(dcache_uc_read_resp_ready),
+      .mem_resp_uc_read_valid_i(dcache_uc_read_resp_valid),
+      .mem_resp_uc_read_i      (dcache_uc_read_resp),
+
+      .mem_req_uc_write_ready_i(dcache_uc_write_ready),
+      .mem_req_uc_write_valid_o(dcache_uc_write_valid),
+      .mem_req_uc_write_o      (dcache_uc_write),
+
+      .mem_req_uc_write_data_ready_i(dcache_uc_write_data_ready),
+      .mem_req_uc_write_data_valid_o(dcache_uc_write_data_valid),
+      .mem_req_uc_write_data_o      (dcache_uc_write_data),
+
+      .mem_resp_uc_write_ready_o(dcache_uc_write_resp_ready),
+      .mem_resp_uc_write_valid_i(dcache_uc_write_resp_valid),
+      .mem_resp_uc_write_i      (dcache_uc_write_resp),
+
+      .evt_cache_write_miss_o(dcache_write_miss),
+      .evt_cache_read_miss_o (dcache_read_miss),
+      .evt_uncached_req_o    (  /* unused */),
+      .evt_cmo_req_o         (  /* unused */),
+      .evt_write_req_o       (  /* unused */),
+      .evt_read_req_o        (  /* unused */),
+      .evt_prefetch_req_o    (  /* unused */),
+      .evt_req_on_hold_o     (  /* unused */),
+      .evt_rtab_rollback_o   (  /* unused */),
+      .evt_stall_refill_o    (  /* unused */),
+      .evt_stall_o           (  /* unused */),
+
+      .wbuf_empty_o(wbuffer_empty_o),
+
+      .cfg_enable_i                       (dcache_enable_i),
+      .cfg_wbuf_threshold_i               (4'd2),
+      .cfg_wbuf_reset_timecnt_on_write_i  (1'b1),
+      .cfg_wbuf_sequential_waw_i          (1'b0),
+      .cfg_wbuf_inhibit_write_coalescing_i(1'b0),
+      .cfg_prefetch_updt_plru_i           (1'b1),
+      .cfg_error_on_cacheable_amo_i       (1'b0),
+      .cfg_rtab_single_entry_i            (1'b0)
+  );
+
+  assign dcache_miss_o = dcache_read_miss, wbuffer_not_ni_o = wbuffer_empty_o;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : dcache_flush_ff
+    if (!rst_ni) dcache_flush_ack_o <= 1'b0;
+    else dcache_flush_ack_o <= ~dcache_flush_ack_o & dcache_flush_i;
+  end
+
+  //  }}}
+
+  //  AXI arbiter instantiation
+  //  {{{
+  typedef logic [CVA6Cfg.AxiAddrWidth-1:0] axi_addr_t;
+  typedef logic [CVA6Cfg.AxiDataWidth-1:0] axi_data_t;
+  typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] axi_strb_t;
+  typedef logic [CVA6Cfg.AxiIdWidth-1:0] axi_id_t;
+  typedef logic [CVA6Cfg.AxiUserWidth-1:0] axi_user_t;
+  `AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, axi_addr_t, axi_id_t, axi_user_t)
+  `AXI_TYPEDEF_W_CHAN_T(axi_w_chan_t, axi_data_t, axi_strb_t, axi_user_t)
+  `AXI_TYPEDEF_B_CHAN_T(axi_b_chan_t, axi_id_t, axi_user_t)
+  `AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, axi_addr_t, axi_id_t, axi_user_t)
+  `AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, axi_data_t, axi_id_t, axi_user_t)
+
+  cva6_hpdcache_subsystem_axi_arbiter #(
+      .HPDcacheMemIdWidth   (ariane_pkg::MEM_TID_WIDTH),
+      .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
+      .hpdcache_mem_req_t   (hpdcache_mem_req_t),
+      .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
+      .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
+      .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t),
+
+      .AxiAddrWidth (CVA6Cfg.AxiAddrWidth),
+      .AxiDataWidth (CVA6Cfg.AxiDataWidth),
+      .AxiIdWidth   (CVA6Cfg.AxiIdWidth),
+      .AxiUserWidth (CVA6Cfg.AxiUserWidth),
+      .axi_ar_chan_t(axi_ar_chan_t),
+      .axi_aw_chan_t(axi_aw_chan_t),
+      .axi_w_chan_t (axi_w_chan_t),
+      .axi_req_t    (noc_req_t),
+      .axi_rsp_t    (noc_resp_t)
+  ) i_axi_arbiter (
+      .clk_i,
+      .rst_ni,
+
+      .icache_miss_valid_i(icache_miss_valid),
+      .icache_miss_ready_o(icache_miss_ready),
+      .icache_miss_i      (icache_miss),
+      .icache_miss_id_i   (hpdcache_mem_id_t'(ICACHE_RDTXID)),
+
+      .icache_miss_resp_valid_o(icache_miss_resp_valid),
+      .icache_miss_resp_o      (icache_miss_resp),
+
+      .dcache_miss_ready_o(dcache_miss_ready),
+      .dcache_miss_valid_i(dcache_miss_valid),
+      .dcache_miss_i      (dcache_miss),
+
+      .dcache_miss_resp_ready_i(dcache_miss_resp_ready),
+      .dcache_miss_resp_valid_o(dcache_miss_resp_valid),
+      .dcache_miss_resp_o      (dcache_miss_resp),
+
+      .dcache_wbuf_ready_o(dcache_wbuf_ready),
+      .dcache_wbuf_valid_i(dcache_wbuf_valid),
+      .dcache_wbuf_i      (dcache_wbuf),
+
+      .dcache_wbuf_data_ready_o(dcache_wbuf_data_ready),
+      .dcache_wbuf_data_valid_i(dcache_wbuf_data_valid),
+      .dcache_wbuf_data_i      (dcache_wbuf_data),
+
+      .dcache_wbuf_resp_ready_i(dcache_wbuf_resp_ready),
+      .dcache_wbuf_resp_valid_o(dcache_wbuf_resp_valid),
+      .dcache_wbuf_resp_o      (dcache_wbuf_resp),
+
+      .dcache_uc_read_ready_o(dcache_uc_read_ready),
+      .dcache_uc_read_valid_i(dcache_uc_read_valid),
+      .dcache_uc_read_i      (dcache_uc_read),
+      .dcache_uc_read_id_i   ('1),
+
+      .dcache_uc_read_resp_ready_i(dcache_uc_read_resp_ready),
+      .dcache_uc_read_resp_valid_o(dcache_uc_read_resp_valid),
+      .dcache_uc_read_resp_o      (dcache_uc_read_resp),
+
+      .dcache_uc_write_ready_o(dcache_uc_write_ready),
+      .dcache_uc_write_valid_i(dcache_uc_write_valid),
+      .dcache_uc_write_i      (dcache_uc_write),
+      .dcache_uc_write_id_i   ('1),
+
+      .dcache_uc_write_data_ready_o(dcache_uc_write_data_ready),
+      .dcache_uc_write_data_valid_i(dcache_uc_write_data_valid),
+      .dcache_uc_write_data_i      (dcache_uc_write_data),
+
+      .dcache_uc_write_resp_ready_i(dcache_uc_write_resp_ready),
+      .dcache_uc_write_resp_valid_o(dcache_uc_write_resp_valid),
+      .dcache_uc_write_resp_o      (dcache_uc_write_resp),
+
+      .axi_req_o (noc_req_o),
+      .axi_resp_i(noc_resp_i)
+  );
+  //  }}}
+
+  //  Assertions
+  //  {{{
+  //  pragma translate_off
+  initial
+    assert (hpdcache_pkg::HPDCACHE_REQ_SRC_ID_WIDTH >= $clog2(HPDCACHE_NREQUESTERS))
+    else $fatal(1, "HPDCACHE_REQ_SRC_ID_WIDTH is not wide enough");
+
+  a_invalid_instruction_fetch :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
+  else
+    $warning(
+        1,
+        "[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
+        icache_dreq_o.vaddr,
+        icache_dreq_o.data
+    );
+
+  a_invalid_write_data :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX)
+  else
+    $warning(
+        1,
+        "[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
+        {
+          dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index
+        },
+        dcache_req_ports_i[2].data_be,
+        dcache_req_ports_i[2].data_wdata
+    );
+
+  for (genvar j = 0; j < 2; j++) begin : gen_assertion
+    a_invalid_read_data :
+    assert property (
+      @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
+    else
+      $warning(
+          1,
+          "[l1 dcache] reading invalid data on port %01d: data=%016X",
+          j,
+          dcache_req_ports_o[j].data_rdata
+      );
+  end
+  //  pragma translate_on
+  //  }}}
+
+endmodule : cva6_hpdcache_subsystem
diff --git a/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv
new file mode 100644
index 0000000..9eb0a8b
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv
@@ -0,0 +1,586 @@
+// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies
+//                Alternatives (CEA)
+//
+// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”);
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Authors: Cesar Fuguet
+// Date: February, 2023
+// Description: AXI arbiter for the CVA6 cache subsystem integrating standard
+//              CVA6's instruction cache and the Core-V High-Performance
+//              L1 Dcache (CV-HPDcache).
+
+module cva6_hpdcache_subsystem_axi_arbiter
+//  Parameters
+//  {{{
+#(
+    parameter int HPDcacheMemIdWidth = 8,
+    parameter int HPDcacheMemDataWidth = 512,
+    parameter type hpdcache_mem_req_t = logic,
+    parameter type hpdcache_mem_req_w_t = logic,
+    parameter type hpdcache_mem_resp_r_t = logic,
+    parameter type hpdcache_mem_resp_w_t = logic,
+
+    parameter int unsigned AxiAddrWidth = 1,
+    parameter int unsigned AxiDataWidth = 1,
+    parameter int unsigned AxiIdWidth = 1,
+    parameter int unsigned AxiUserWidth = 1,
+    parameter type axi_ar_chan_t = logic,
+    parameter type axi_aw_chan_t = logic,
+    parameter type axi_w_chan_t = logic,
+    parameter type axi_req_t = logic,
+    parameter type axi_rsp_t = logic,
+
+    localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
+)
+//  }}}
+
+//  Ports
+//  {{{
+(
+    input logic clk_i,
+    input logic rst_ni,
+
+    //  Interfaces from/to I$
+    //  {{{
+    input  logic                      icache_miss_valid_i,
+    output logic                      icache_miss_ready_o,
+    input  wt_cache_pkg::icache_req_t icache_miss_i,
+    input  hpdcache_mem_id_t          icache_miss_id_i,
+
+    output logic                       icache_miss_resp_valid_o,
+    output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o,
+    //  }}}
+
+    //  Interfaces from/to D$
+    //  {{{
+    output logic              dcache_miss_ready_o,
+    input  logic              dcache_miss_valid_i,
+    input  hpdcache_mem_req_t dcache_miss_i,
+
+    input  logic                 dcache_miss_resp_ready_i,
+    output logic                 dcache_miss_resp_valid_o,
+    output hpdcache_mem_resp_r_t dcache_miss_resp_o,
+
+    //      Write-buffer write interface
+    output logic              dcache_wbuf_ready_o,
+    input  logic              dcache_wbuf_valid_i,
+    input  hpdcache_mem_req_t dcache_wbuf_i,
+
+    output logic                dcache_wbuf_data_ready_o,
+    input  logic                dcache_wbuf_data_valid_i,
+    input  hpdcache_mem_req_w_t dcache_wbuf_data_i,
+
+    input  logic                 dcache_wbuf_resp_ready_i,
+    output logic                 dcache_wbuf_resp_valid_o,
+    output hpdcache_mem_resp_w_t dcache_wbuf_resp_o,
+
+    //      Uncached read interface
+    output logic              dcache_uc_read_ready_o,
+    input  logic              dcache_uc_read_valid_i,
+    input  hpdcache_mem_req_t dcache_uc_read_i,
+    input  hpdcache_mem_id_t  dcache_uc_read_id_i,
+
+    input  logic                 dcache_uc_read_resp_ready_i,
+    output logic                 dcache_uc_read_resp_valid_o,
+    output hpdcache_mem_resp_r_t dcache_uc_read_resp_o,
+
+    //      Uncached write interface
+    output logic              dcache_uc_write_ready_o,
+    input  logic              dcache_uc_write_valid_i,
+    input  hpdcache_mem_req_t dcache_uc_write_i,
+    input  hpdcache_mem_id_t  dcache_uc_write_id_i,
+
+    output logic                dcache_uc_write_data_ready_o,
+    input  logic                dcache_uc_write_data_valid_i,
+    input  hpdcache_mem_req_w_t dcache_uc_write_data_i,
+
+    input  logic                 dcache_uc_write_resp_ready_i,
+    output logic                 dcache_uc_write_resp_valid_o,
+    output hpdcache_mem_resp_w_t dcache_uc_write_resp_o,
+    //  }}}
+
+    //  AXI port to upstream memory/peripherals
+    //  {{{
+    output axi_req_t axi_req_o,
+    input  axi_rsp_t axi_resp_i
+    //  }}}
+);
+  //  }}}
+
+  //  Internal type definitions
+  //  {{{
+  typedef struct packed {
+    logic [AxiIdWidth-1:0]   id;
+    logic [AxiDataWidth-1:0] data;
+    axi_pkg::resp_t          resp;
+    logic                    last;
+    logic [AxiUserWidth-1:0] user;
+  } axi_r_chan_t;
+
+  typedef struct packed {
+    logic [AxiIdWidth-1:0]   id;
+    axi_pkg::resp_t          resp;
+    logic [AxiUserWidth-1:0] user;
+  } axi_b_chan_t;
+
+  localparam int MEM_RESP_RT_DEPTH = (1 << HPDcacheMemIdWidth);
+  typedef hpdcache_mem_id_t [MEM_RESP_RT_DEPTH-1:0] mem_resp_rt_t;
+  typedef logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] icache_resp_data_t;
+  //  }}}
+
+  //  Adapt the I$ interface to the HPDcache memory interface
+  //  {{{
+  localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH / 64;
+  localparam int ICACHE_CL_WORD_INDEX = $clog2(ICACHE_CL_WORDS);
+  localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8);
+  localparam int ICACHE_WORD_SIZE = 3;
+  localparam int ICACHE_MEM_REQ_CL_LEN =
+    (ariane_pkg::ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth;
+  localparam int ICACHE_MEM_REQ_CL_SIZE =
+    (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) ?
+      $clog2(
+      HPDcacheMemDataWidth / 8
+  ) : ICACHE_CL_SIZE;
+
+  //    I$ request
+  hpdcache_mem_req_t icache_miss_req_wdata;
+  logic icache_miss_req_w, icache_miss_req_wok;
+
+  hpdcache_mem_req_t icache_miss_req_rdata;
+  logic icache_miss_req_r, icache_miss_req_rok;
+
+  logic icache_miss_pending_q;
+
+  //  This FIFO has two functionnalities:
+  //  -  Stabilize the ready-valid protocol. The ICACHE can abort a valid
+  //     transaction without receiving the corresponding ready signal. This
+  //     behavior is not supported by AXI.
+  //  -  Cut a possible long timing path.
+  hpdcache_fifo_reg #(
+      .FIFO_DEPTH (1),
+      .fifo_data_t(hpdcache_mem_req_t)
+  ) i_icache_miss_req_fifo (
+      .clk_i,
+      .rst_ni,
+
+      .w_i    (icache_miss_req_w),
+      .wok_o  (icache_miss_req_wok),
+      .wdata_i(icache_miss_req_wdata),
+
+      .r_i    (icache_miss_req_r),
+      .rok_o  (icache_miss_req_rok),
+      .rdata_o(icache_miss_req_rdata)
+  );
+
+  assign icache_miss_req_w = icache_miss_valid_i, icache_miss_ready_o = icache_miss_req_wok;
+
+  assign icache_miss_req_wdata.mem_req_addr = icache_miss_i.paddr,
+      icache_miss_req_wdata.mem_req_len = icache_miss_i.nc ? 0 : ICACHE_MEM_REQ_CL_LEN - 1,
+      icache_miss_req_wdata.mem_req_size      = icache_miss_i.nc ? ICACHE_WORD_SIZE : ICACHE_MEM_REQ_CL_SIZE,
+      icache_miss_req_wdata.mem_req_id = icache_miss_i.tid,
+      icache_miss_req_wdata.mem_req_command = hpdcache_pkg::HPDCACHE_MEM_READ,
+      icache_miss_req_wdata.mem_req_atomic = hpdcache_pkg::hpdcache_mem_atomic_e'(0),
+      icache_miss_req_wdata.mem_req_cacheable = ~icache_miss_i.nc;
+
+
+  //    I$ response
+  logic icache_miss_resp_w, icache_miss_resp_wok;
+  hpdcache_mem_resp_r_t icache_miss_resp_wdata;
+
+  logic icache_miss_resp_data_w, icache_miss_resp_data_wok;
+  logic icache_miss_resp_data_r, icache_miss_resp_data_rok;
+  icache_resp_data_t icache_miss_resp_data_rdata;
+
+  logic icache_miss_resp_meta_w, icache_miss_resp_meta_wok;
+  logic icache_miss_resp_meta_r, icache_miss_resp_meta_rok;
+  hpdcache_mem_id_t  icache_miss_resp_meta_id;
+
+  icache_resp_data_t icache_miss_rdata;
+
+  generate
+    if (HPDcacheMemDataWidth < ariane_pkg::ICACHE_LINE_WIDTH) begin
+      hpdcache_fifo_reg #(
+          .FIFO_DEPTH (1),
+          .fifo_data_t(hpdcache_mem_id_t)
+      ) i_icache_refill_meta_fifo (
+          .clk_i,
+          .rst_ni,
+
+          .w_i    (icache_miss_resp_meta_w),
+          .wok_o  (icache_miss_resp_meta_wok),
+          .wdata_i(icache_miss_resp_wdata.mem_resp_r_id),
+
+          .r_i    (icache_miss_resp_meta_r),
+          .rok_o  (icache_miss_resp_meta_rok),
+          .rdata_o(icache_miss_resp_meta_id)
+      );
+
+      hpdcache_data_upsize #(
+          .WR_WIDTH(HPDcacheMemDataWidth),
+          .RD_WIDTH(ariane_pkg::ICACHE_LINE_WIDTH),
+          .DEPTH   (1)
+      ) i_icache_hpdcache_data_upsize (
+          .clk_i,
+          .rst_ni,
+
+          .w_i    (icache_miss_resp_data_w),
+          .wlast_i(icache_miss_resp_wdata.mem_resp_r_last),
+          .wok_o  (icache_miss_resp_data_wok),
+          .wdata_i(icache_miss_resp_wdata.mem_resp_r_data),
+
+          .r_i    (icache_miss_resp_data_r),
+          .rok_o  (icache_miss_resp_data_rok),
+          .rdata_o(icache_miss_resp_data_rdata)
+      );
+
+      assign icache_miss_resp_meta_r = 1'b1, icache_miss_resp_data_r = 1'b1;
+
+      assign icache_miss_resp_meta_w = icache_miss_resp_w & icache_miss_resp_wdata.mem_resp_r_last;
+
+      assign icache_miss_resp_data_w = icache_miss_resp_w;
+
+      assign icache_miss_resp_wok = icache_miss_resp_data_wok & (
+               icache_miss_resp_meta_wok | ~icache_miss_resp_wdata.mem_resp_r_last);
+
+      assign icache_miss_rdata = icache_miss_resp_data_rdata;
+
+    end else begin
+      assign icache_miss_resp_data_rok = icache_miss_resp_w;
+      assign icache_miss_resp_meta_rok = icache_miss_resp_w;
+      assign icache_miss_resp_wok = 1'b1;
+      assign icache_miss_resp_meta_id = icache_miss_resp_wdata.mem_resp_r_id;
+      assign icache_miss_resp_data_rdata = icache_miss_resp_wdata.mem_resp_r_data;
+
+      //  In the case of uncacheable accesses, the Icache expects the data to be right-aligned
+      always_comb begin : icache_miss_resp_data_comb
+        if (!icache_miss_req_rdata.mem_req_cacheable) begin
+          automatic logic [ICACHE_CL_WORD_INDEX - 1:0] icache_miss_word_index;
+          automatic logic [63:0] icache_miss_word;
+          icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3+:ICACHE_CL_WORD_INDEX];
+          icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64+:64];
+          icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH - 64{1'b0}}, icache_miss_word};
+        end else begin
+          icache_miss_rdata = icache_miss_resp_data_rdata;
+        end
+      end
+    end
+  endgenerate
+
+  assign icache_miss_resp_valid_o = icache_miss_resp_meta_rok,
+      icache_miss_resp_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK,
+      icache_miss_resp_o.user = '0,
+      icache_miss_resp_o.inv = '0,
+      icache_miss_resp_o.tid = icache_miss_resp_meta_id,
+      icache_miss_resp_o.data = icache_miss_rdata;
+
+  //  consume the Icache miss on the arrival of the response. The request
+  //  metadata is decoded to forward the correct word in case of uncacheable
+  //  Icache access
+  assign icache_miss_req_r = icache_miss_resp_meta_rok;
+  //  }}}
+
+  //  Read request arbiter
+  //  {{{
+  logic              mem_req_read_ready     [2:0];
+  logic              mem_req_read_valid     [2:0];
+  hpdcache_mem_req_t mem_req_read           [2:0];
+
+  logic              mem_req_read_ready_arb;
+  logic              mem_req_read_valid_arb;
+  hpdcache_mem_req_t mem_req_read_arb;
+
+  assign mem_req_read_valid[0] = icache_miss_req_rok & ~icache_miss_pending_q,
+      mem_req_read[0] = icache_miss_req_rdata;
+
+  assign dcache_miss_ready_o = mem_req_read_ready[1],
+      mem_req_read_valid[1] = dcache_miss_valid_i,
+      mem_req_read[1] = dcache_miss_i;
+
+  assign dcache_uc_read_ready_o = mem_req_read_ready[2],
+      mem_req_read_valid[2] = dcache_uc_read_valid_i,
+      mem_req_read[2] = dcache_uc_read_i;
+
+  hpdcache_mem_req_read_arbiter #(
+      .N                 (3),
+      .hpdcache_mem_req_t(hpdcache_mem_req_t)
+  ) i_mem_req_read_arbiter (
+      .clk_i,
+      .rst_ni,
+
+      .mem_req_read_ready_o(mem_req_read_ready),
+      .mem_req_read_valid_i(mem_req_read_valid),
+      .mem_req_read_i      (mem_req_read),
+
+      .mem_req_read_ready_i(mem_req_read_ready_arb),
+      .mem_req_read_valid_o(mem_req_read_valid_arb),
+      .mem_req_read_o      (mem_req_read_arb)
+  );
+  //  }}}
+
+  //  Read response demultiplexor
+  //  {{{
+  logic                 mem_resp_read_ready;
+  logic                 mem_resp_read_valid;
+  hpdcache_mem_resp_r_t mem_resp_read;
+
+  logic                 mem_resp_read_ready_arb[2:0];
+  logic                 mem_resp_read_valid_arb[2:0];
+  hpdcache_mem_resp_r_t mem_resp_read_arb      [2:0];
+
+  mem_resp_rt_t         mem_resp_read_rt;
+
+  always_comb begin
+    for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin
+      mem_resp_read_rt[i] = (i == int'(   icache_miss_id_i)) ? 0 :
+                            (i == int'(dcache_uc_read_id_i)) ? 2 : 1;
+    end
+  end
+
+  hpdcache_mem_resp_demux #(
+      .N        (3),
+      .resp_t   (hpdcache_mem_resp_r_t),
+      .resp_id_t(hpdcache_mem_id_t)
+  ) i_mem_resp_read_demux (
+      .clk_i,
+      .rst_ni,
+
+      .mem_resp_ready_o(mem_resp_read_ready),
+      .mem_resp_valid_i(mem_resp_read_valid),
+      .mem_resp_id_i   (mem_resp_read.mem_resp_r_id),
+      .mem_resp_i      (mem_resp_read),
+
+      .mem_resp_ready_i(mem_resp_read_ready_arb),
+      .mem_resp_valid_o(mem_resp_read_valid_arb),
+      .mem_resp_o      (mem_resp_read_arb),
+
+      .mem_resp_rt_i(mem_resp_read_rt)
+  );
+
+  assign icache_miss_resp_w = mem_resp_read_valid_arb[0],
+      icache_miss_resp_wdata = mem_resp_read_arb[0],
+      mem_resp_read_ready_arb[0] = icache_miss_resp_wok;
+
+  assign dcache_miss_resp_valid_o = mem_resp_read_valid_arb[1],
+      dcache_miss_resp_o = mem_resp_read_arb[1],
+      mem_resp_read_ready_arb[1] = dcache_miss_resp_ready_i;
+
+  assign dcache_uc_read_resp_valid_o = mem_resp_read_valid_arb[2],
+      dcache_uc_read_resp_o = mem_resp_read_arb[2],
+      mem_resp_read_ready_arb[2] = dcache_uc_read_resp_ready_i;
+  //  }}}
+
+  //  Write request arbiter
+  //  {{{
+  logic                mem_req_write_ready          [1:0];
+  logic                mem_req_write_valid          [1:0];
+  hpdcache_mem_req_t   mem_req_write                [1:0];
+
+  logic                mem_req_write_data_ready     [1:0];
+  logic                mem_req_write_data_valid     [1:0];
+  hpdcache_mem_req_w_t mem_req_write_data           [1:0];
+
+  logic                mem_req_write_ready_arb;
+  logic                mem_req_write_valid_arb;
+  hpdcache_mem_req_t   mem_req_write_arb;
+
+  logic                mem_req_write_data_ready_arb;
+  logic                mem_req_write_data_valid_arb;
+  hpdcache_mem_req_w_t mem_req_write_data_arb;
+
+  assign dcache_wbuf_ready_o = mem_req_write_ready[0],
+      mem_req_write_valid[0] = dcache_wbuf_valid_i,
+      mem_req_write[0] = dcache_wbuf_i;
+
+  assign dcache_wbuf_data_ready_o = mem_req_write_data_ready[0],
+      mem_req_write_data_valid[0] = dcache_wbuf_data_valid_i,
+      mem_req_write_data[0] = dcache_wbuf_data_i;
+
+  assign dcache_uc_write_ready_o = mem_req_write_ready[1],
+      mem_req_write_valid[1] = dcache_uc_write_valid_i,
+      mem_req_write[1] = dcache_uc_write_i;
+
+  assign dcache_uc_write_data_ready_o = mem_req_write_data_ready[1],
+      mem_req_write_data_valid[1] = dcache_uc_write_data_valid_i,
+      mem_req_write_data[1] = dcache_uc_write_data_i;
+
+  hpdcache_mem_req_write_arbiter #(
+      .N                   (2),
+      .hpdcache_mem_req_t  (hpdcache_mem_req_t),
+      .hpdcache_mem_req_w_t(hpdcache_mem_req_w_t)
+  ) i_mem_req_write_arbiter (
+      .clk_i,
+      .rst_ni,
+
+      .mem_req_write_ready_o(mem_req_write_ready),
+      .mem_req_write_valid_i(mem_req_write_valid),
+      .mem_req_write_i      (mem_req_write),
+
+      .mem_req_write_data_ready_o(mem_req_write_data_ready),
+      .mem_req_write_data_valid_i(mem_req_write_data_valid),
+      .mem_req_write_data_i      (mem_req_write_data),
+
+      .mem_req_write_ready_i(mem_req_write_ready_arb),
+      .mem_req_write_valid_o(mem_req_write_valid_arb),
+      .mem_req_write_o      (mem_req_write_arb),
+
+      .mem_req_write_data_ready_i(mem_req_write_data_ready_arb),
+      .mem_req_write_data_valid_o(mem_req_write_data_valid_arb),
+      .mem_req_write_data_o      (mem_req_write_data_arb)
+  );
+  //  }}}
+
+  //  Write response demultiplexor
+  //  {{{
+  logic                 mem_resp_write_ready;
+  logic                 mem_resp_write_valid;
+  hpdcache_mem_resp_w_t mem_resp_write;
+
+  logic                 mem_resp_write_ready_arb[1:0];
+  logic                 mem_resp_write_valid_arb[1:0];
+  hpdcache_mem_resp_w_t mem_resp_write_arb      [1:0];
+
+  mem_resp_rt_t         mem_resp_write_rt;
+
+  always_comb begin
+    for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin
+      mem_resp_write_rt[i] = (i == int'(dcache_uc_write_id_i)) ? 1 : 0;
+    end
+  end
+
+  hpdcache_mem_resp_demux #(
+      .N        (2),
+      .resp_t   (hpdcache_mem_resp_w_t),
+      .resp_id_t(hpdcache_mem_id_t)
+  ) i_hpdcache_mem_resp_write_demux (
+      .clk_i,
+      .rst_ni,
+
+      .mem_resp_ready_o(mem_resp_write_ready),
+      .mem_resp_valid_i(mem_resp_write_valid),
+      .mem_resp_id_i   (mem_resp_write.mem_resp_w_id),
+      .mem_resp_i      (mem_resp_write),
+
+      .mem_resp_ready_i(mem_resp_write_ready_arb),
+      .mem_resp_valid_o(mem_resp_write_valid_arb),
+      .mem_resp_o      (mem_resp_write_arb),
+
+      .mem_resp_rt_i(mem_resp_write_rt)
+  );
+
+  assign dcache_wbuf_resp_valid_o = mem_resp_write_valid_arb[0],
+      dcache_wbuf_resp_o = mem_resp_write_arb[0],
+      mem_resp_write_ready_arb[0] = dcache_wbuf_resp_ready_i;
+
+  assign dcache_uc_write_resp_valid_o = mem_resp_write_valid_arb[1],
+      dcache_uc_write_resp_o = mem_resp_write_arb[1],
+      mem_resp_write_ready_arb[1] = dcache_uc_write_resp_ready_i;
+  //  }}}
+
+  //  I$ miss pending
+  //  {{{
+  always_ff @(posedge clk_i or negedge rst_ni) begin : icache_miss_pending_ff
+    if (!rst_ni) begin
+      icache_miss_pending_q <= 1'b0;
+    end else begin
+      icache_miss_pending_q <= ( (icache_miss_req_rok & mem_req_read_ready[0]) & ~icache_miss_pending_q) |
+                               (~(icache_miss_req_r   & icache_miss_req_rok)   &  icache_miss_pending_q);
+    end
+  end
+  // }}}
+
+  //  AXI adapters
+  //  {{{
+  axi_req_t axi_req;
+  axi_rsp_t axi_resp;
+
+  hpdcache_mem_to_axi_write #(
+      .hpdcache_mem_req_t   (hpdcache_mem_req_t),
+      .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
+      .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t),
+      .aw_chan_t            (axi_aw_chan_t),
+      .w_chan_t             (axi_w_chan_t),
+      .b_chan_t             (axi_b_chan_t)
+  ) i_hpdcache_mem_to_axi_write (
+      .req_ready_o(mem_req_write_ready_arb),
+      .req_valid_i(mem_req_write_valid_arb),
+      .req_i      (mem_req_write_arb),
+
+      .req_data_ready_o(mem_req_write_data_ready_arb),
+      .req_data_valid_i(mem_req_write_data_valid_arb),
+      .req_data_i      (mem_req_write_data_arb),
+
+      .resp_ready_i(mem_resp_write_ready),
+      .resp_valid_o(mem_resp_write_valid),
+      .resp_o      (mem_resp_write),
+
+      .axi_aw_valid_o(axi_req.aw_valid),
+      .axi_aw_o      (axi_req.aw),
+      .axi_aw_ready_i(axi_resp.aw_ready),
+
+      .axi_w_valid_o(axi_req.w_valid),
+      .axi_w_o      (axi_req.w),
+      .axi_w_ready_i(axi_resp.w_ready),
+
+      .axi_b_valid_i(axi_resp.b_valid),
+      .axi_b_i      (axi_resp.b),
+      .axi_b_ready_o(axi_req.b_ready)
+  );
+
+  hpdcache_mem_to_axi_read #(
+      .hpdcache_mem_req_t   (hpdcache_mem_req_t),
+      .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
+      .ar_chan_t            (axi_ar_chan_t),
+      .r_chan_t             (axi_r_chan_t)
+  ) i_hpdcache_mem_to_axi_read (
+      .req_ready_o(mem_req_read_ready_arb),
+      .req_valid_i(mem_req_read_valid_arb),
+      .req_i      (mem_req_read_arb),
+
+      .resp_ready_i(mem_resp_read_ready),
+      .resp_valid_o(mem_resp_read_valid),
+      .resp_o      (mem_resp_read),
+
+      .axi_ar_valid_o(axi_req.ar_valid),
+      .axi_ar_o      (axi_req.ar),
+      .axi_ar_ready_i(axi_resp.ar_ready),
+
+      .axi_r_valid_i(axi_resp.r_valid),
+      .axi_r_i      (axi_resp.r),
+      .axi_r_ready_o(axi_req.r_ready)
+  );
+
+  assign axi_req_o = axi_req;
+  assign axi_resp  = axi_resp_i;
+  //  }}}
+
+  //  Assertions
+  //  {{{
+  //  pragma translate_off
+  initial
+    assert (HPDcacheMemIdWidth <= AxiIdWidth)
+    else $fatal("HPDcacheMemIdWidth shall be less or equal to AxiIdWidth");
+  initial
+    assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_MSHR_SET_WIDTH + hpdcache_pkg::HPDCACHE_MSHR_WAY_WIDTH + 1))
+    else
+      $fatal(
+          "HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache misses and Icache misses"
+      );
+  initial
+    assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_WBUF_DIR_PTR_WIDTH + 1))
+    else
+      $fatal(
+          "HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes"
+      );
+  initial
+    assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH)
+    else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line");
+  initial
+    assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH)
+    else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line");
+  //  pragma translate_on
+  //  }}}
+
+endmodule : cva6_hpdcache_subsystem_axi_arbiter
diff --git a/test/type_param/core/cache_subsystem/cva6_icache.sv b/test/type_param/core/cache_subsystem/cva6_icache.sv
new file mode 100644
index 0000000..37dd8d1
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/cva6_icache.sv
@@ -0,0 +1,584 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 15.08.2018
+// Description: Instruction cache that is compatible with openpiton.
+//
+// Some notes:
+//
+// 1) refills always have the size of one cache line, except for accesses to the I/O region, which is mapped
+//    to the top half of the physical address space (bit 39 = 1). the data width of the interface has the width
+//    of one cache line, and hence the ifills can be transferred in a single cycle. note that the ifills must be
+//    consumed unconditionally.
+//
+// 2) instruction fetches are always assumed to be aligned to 32bit (lower 2 bits are ignored)
+//
+// 3) NC accesses to I/O space are expected to return 32bit from memory.
+//
+
+
+module cva6_icache
+  import ariane_pkg::*;
+  import wt_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    /// ID to be used for read transactions
+    parameter logic [MEM_TID_WIDTH-1:0] RdTxId = 0
+) (
+    input logic clk_i,
+    input logic rst_ni,
+
+    /// flush the icache, flush and kill have to be asserted together
+    input  logic         flush_i,
+    /// enable icache
+    input  logic         en_i,
+    /// to performance counter
+    output logic         miss_o,
+    // address translation requests
+    input  icache_areq_t areq_i,
+    output icache_arsp_t areq_o,
+    // data requests
+    input  icache_dreq_t dreq_i,
+    output icache_drsp_t dreq_o,
+    // refill port
+    input  logic         mem_rtrn_vld_i,
+    input  icache_rtrn_t mem_rtrn_i,
+    output logic         mem_data_req_o,
+    input  logic         mem_data_ack_i,
+    output icache_req_t  mem_data_o
+);
+
+  // functions
+  function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh(
+      input logic [L1I_WAY_WIDTH-1:0] in);
+    logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out;
+    out     = '0;
+    out[in] = 1'b1;
+    return out;
+  endfunction
+
+  // signals
+  logic cache_en_d, cache_en_q;  // cache is enabled
+  logic [riscv::VLEN-1:0] vaddr_d, vaddr_q;
+  logic                        paddr_is_nc;  // asserted if physical address is non-cacheable
+  logic [ICACHE_SET_ASSOC-1:0] cl_hit;  // hit from tag compare
+  logic                        cache_rden;  // triggers cache lookup
+  logic                        cache_wren;  // triggers write to cacheline
+  logic
+      cmp_en_d,
+      cmp_en_q;  // enable tag comparison in next cycle. used to cut long path due to NC signal.
+  logic flush_d, flush_q;  // used to register and signal pending flushes
+
+  // replacement strategy
+  logic                                update_lfsr;  // shift the LFSR
+  logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way;  // first non-valid encountered
+  logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way;  // random index for replacement
+  logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way;  // way to replace
+  logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q;  // way to replace (onehot)
+  logic all_ways_valid;  // we need to switch repl strategy since all are valid
+
+  // invalidations / flushing
+  logic inv_en;  // incoming invalidations
+  logic inv_d, inv_q;  // invalidation in progress
+  logic flush_en, flush_done;  // used to flush cache entries
+  logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q;  // used to flush cache entries
+
+  // mem arrays
+  logic                           cl_we;  // write enable to memory array
+  logic [   ICACHE_SET_ASSOC-1:0] cl_req;  // request to memory array
+  logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index;  // this is a cache-line index, to memory array
+  logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q;  // offset in cache line
+  logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q;  // this is the cache tag
+  logic [ICACHE_TAG_WIDTH-1:0]          cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem
+  logic [ICACHE_LINE_WIDTH-1:0]         cl_rdata     [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache
+  logic [ICACHE_USER_LINE_WIDTH-1:0]    cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache
+  logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel;  // selected word from each cacheline
+  logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user;  // selected word from each cacheline
+  logic [ICACHE_SET_ASSOC-1:0] vld_req;  // bit enable for valid regs
+  logic vld_we;  // valid bits write enable
+  logic [ICACHE_SET_ASSOC-1:0] vld_wdata;  // valid bits to write
+  logic [ICACHE_SET_ASSOC-1:0] vld_rdata;  // valid bits coming from valid regs
+  logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr;  // valid bit
+
+  // cpmtroller FSM
+  typedef enum logic [2:0] {
+    FLUSH,
+    IDLE,
+    READ,
+    MISS,
+    KILL_ATRANS,
+    KILL_MISS
+  } state_e;
+  state_e state_d, state_q;
+
+  ///////////////////////////////////////////////////////
+  // address -> cl_index mapping, interface plumbing
+  ///////////////////////////////////////////////////////
+
+  // extract tag from physical address, check if NC
+  assign cl_tag_d  = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q;
+
+  // noncacheable if request goes to I/O space, or if cache is disabled
+  assign paddr_is_nc = (~cache_en_q) | (~config_pkg::is_inside_cacheable_regions(
+      CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}}
+  ));
+
+  // pass exception through
+  assign dreq_o.ex = areq_i.fetch_exception;
+
+  // latch this in case we have to stall later on
+  // make sure this is 32bit aligned
+  assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q;
+  assign areq_o.fetch_vaddr = {vaddr_q[riscv::VLEN-1:2], 2'b0};
+
+  // split virtual address into index and offset to address cache arrays
+  assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH];
+
+
+  if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset
+    // if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory
+    assign cl_offset_d = ( dreq_o.ready & dreq_i.req)      ? {dreq_i.vaddr[ICACHE_OFFSET_WIDTH-1:2], 2'b0} :
+                         ( paddr_is_nc  & mem_data_req_o ) ? {{ICACHE_OFFSET_WIDTH-1{1'b0}}, cl_offset_q[2]}<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case
+        cl_offset_q;
+    // request word address instead of cl address in case of NC access
+    assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} :                                         // align to 64bit
+        {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
+  end else begin : gen_piton_offset
+    // icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not.
+    // since the piton cache system replicates the data, we can always index the full CL
+    assign cl_offset_d = (dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr >> 2, 2'b0} : cl_offset_q;
+
+    // request word address instead of cl address in case of NC access
+    assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} :                                         // align to 32bit
+        {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
+  end
+
+
+  assign mem_data_o.tid = RdTxId;
+
+  assign mem_data_o.nc  = paddr_is_nc;
+  // way that is being replaced
+  assign mem_data_o.way = repl_way;
+  assign dreq_o.vaddr   = vaddr_q;
+
+  // invalidations take two cycles
+  assign inv_d          = inv_en;
+
+  ///////////////////////////////////////////////////////
+  // main control logic
+  ///////////////////////////////////////////////////////
+  logic addr_ni;
+  assign addr_ni = config_pkg::is_inside_nonidempotent_regions(
+      CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, areq_i.fetch_paddr}
+  );
+  always_comb begin : p_fsm
+    // default assignment
+    state_d = state_q;
+    cache_en_d   = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush
+    flush_en = 1'b0;
+    cmp_en_d = 1'b0;
+    cache_rden = 1'b0;
+    cache_wren = 1'b0;
+    inv_en = 1'b0;
+    flush_d = flush_q | flush_i;  // register incoming flush
+
+    // interfaces
+    dreq_o.ready = 1'b0;
+    areq_o.fetch_req = 1'b0;
+    dreq_o.valid = 1'b0;
+    mem_data_req_o = 1'b0;
+    // performance counter
+    miss_o = 1'b0;
+
+    // handle invalidations unconditionally
+    // note: invald are mutually exclusive with
+    // ifills, since both arrive over the same IF
+    // however, we need to make sure below that we
+    // do not trigger a cache readout at the same time...
+    if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_INV_REQ) begin
+      inv_en = 1'b1;
+    end
+
+    unique case (state_q)
+      //////////////////////////////////
+      // this clears all valid bits
+      FLUSH: begin
+        flush_en = 1'b1;
+        if (flush_done) begin
+          state_d = IDLE;
+          flush_d = 1'b0;
+          // if the cache was not enabled set this
+          cache_en_d = en_i;
+        end
+      end
+      //////////////////////////////////
+      // wait for an incoming request
+      IDLE: begin
+        // only enable tag comparison if cache is enabled
+        cmp_en_d = cache_en_q;
+
+        // handle pending flushes, or perform cache clear upon enable
+        if (flush_d || (en_i && !cache_en_q)) begin
+          state_d = FLUSH;
+          // wait for incoming requests
+        end else begin
+          // mem requests are for sure invals here
+          if (!mem_rtrn_vld_i) begin
+            dreq_o.ready = 1'b1;
+            // we have a new request
+            if (dreq_i.req) begin
+              cache_rden = 1'b1;
+              state_d    = READ;
+            end
+          end
+          if (dreq_i.kill_s1) begin
+            state_d = IDLE;
+          end
+        end
+      end
+      //////////////////////////////////
+      // check whether we have a hit
+      // in case the cache is disabled,
+      // or in case the address is NC, we
+      // reuse the miss mechanism to handle
+      // the request
+      READ: begin
+        areq_o.fetch_req = '1;
+        // only enable tag comparison if cache is enabled
+        cmp_en_d    = cache_en_q;
+        // readout speculatively
+        cache_rden  = cache_en_q;
+
+        if (areq_i.fetch_valid && (!dreq_i.spec || ((CVA6Cfg.NonIdemPotenceEn && !addr_ni) || (!CVA6Cfg.NonIdemPotenceEn)))) begin
+          // check if we have to flush
+          if (flush_d) begin
+            state_d = IDLE;
+            // we have a hit or an exception output valid result
+          end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin
+            dreq_o.valid = ~dreq_i.kill_s2;  // just don't output in this case
+            state_d      = IDLE;
+
+            // we can accept another request
+            // and stay here, but only if no inval is coming in
+            // note: we are not expecting ifill return packets here...
+            if (!mem_rtrn_vld_i) begin
+              dreq_o.ready = 1'b1;
+              if (dreq_i.req) begin
+                state_d = READ;
+              end
+            end
+            // if a request is being killed at this stage,
+            // we have to bail out and wait for the address translation to complete
+            if (dreq_i.kill_s1) begin
+              state_d = IDLE;
+            end
+            // we have a miss / NC transaction
+          end else if (dreq_i.kill_s2) begin
+            state_d = IDLE;
+          end else if (!inv_q) begin
+            cmp_en_d = 1'b0;
+            // only count this as a miss if the cache is enabled, and
+            // the address is cacheable
+            // send out ifill request
+            mem_data_req_o = 1'b1;
+            if (mem_data_ack_i) begin
+              miss_o  = ~paddr_is_nc;
+              state_d = MISS;
+            end
+          end
+          // bail out if this request is being killed (and we missed on the TLB)
+        end else if (dreq_i.kill_s2 || flush_d) begin
+          state_d = KILL_ATRANS;
+        end
+      end
+      //////////////////////////////////
+      // wait until the memory transaction
+      // returns. do not write to memory
+      // if the nc bit is set.
+      MISS: begin
+        // note: this is mutually exclusive with ICACHE_INV_REQ,
+        // so we do not have to check for invals here
+        if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
+          state_d = IDLE;
+          // only return data if request is not being killed
+          if (!(dreq_i.kill_s2 || flush_d)) begin
+            dreq_o.valid = 1'b1;
+            // only write to cache if this address is cacheable
+            cache_wren   = ~paddr_is_nc;
+          end
+          // bail out if this request is being killed
+        end else if (dreq_i.kill_s2 || flush_d) begin
+          state_d = KILL_MISS;
+        end
+      end
+      //////////////////////////////////
+      // killed address translation,
+      // wait until paddr is valid, and go
+      // back to idle
+      KILL_ATRANS: begin
+        areq_o.fetch_req = '1;
+        if (areq_i.fetch_valid) begin
+          state_d = IDLE;
+        end
+      end
+      //////////////////////////////////
+      // killed miss,
+      // wait until memory responds and
+      // go back to idle
+      KILL_MISS: begin
+        if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
+          state_d = IDLE;
+        end
+      end
+      default: begin
+        // we should never get here
+        state_d = FLUSH;
+      end
+    endcase  // state_q
+  end
+
+  ///////////////////////////////////////////////////////
+  // valid bit invalidation and replacement strategy
+  ///////////////////////////////////////////////////////
+
+  // note: it cannot happen that we get an invalidation + a cl replacement
+  // in the same cycle as these requests arrive via the same interface
+  // flushes take precedence over invalidations (it is ok if we ignore
+  // the inval since the cache is cleared anyway)
+
+  assign flush_cnt_d = (flush_done) ? '0 : (flush_en) ? flush_cnt_q + 1 : flush_cnt_q;
+
+  assign flush_done = (flush_cnt_q == (ICACHE_NUM_WORDS - 1));
+
+  // invalidation/clearing address
+  // flushing takes precedence over invals
+  assign vld_addr = (flush_en)       ? flush_cnt_q        :
+                    (inv_en)         ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] :
+                                       cl_index;
+
+  assign vld_req  = (flush_en || cache_rden)        ? '1                                    :
+                    (mem_rtrn_i.inv.all && inv_en)  ? '1                                    :
+                    (mem_rtrn_i.inv.vld && inv_en)  ? icache_way_bin2oh(
+      mem_rtrn_i.inv.way
+  ) : repl_way_oh_q;
+
+  assign vld_wdata = (cache_wren) ? '1 : '0;
+
+  assign vld_we = (cache_wren | inv_en | flush_en);
+  // assign vld_req   = (vld_we | cache_rden);
+
+
+  // chose random replacement if all are valid
+  assign update_lfsr = cache_wren & all_ways_valid;
+  assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
+  assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q;
+
+  // enable signals for memory arrays
+  assign cl_req = (cache_rden) ? '1 : (cache_wren) ? repl_way_oh_q : '0;
+  assign cl_we = cache_wren;
+
+
+  // find invalid cache line
+  lzc #(
+      .WIDTH(ICACHE_SET_ASSOC)
+  ) i_lzc (
+      .in_i   (~vld_rdata),
+      .cnt_o  (inv_way),
+      .empty_o(all_ways_valid)
+  );
+
+  // generate random cacheline index
+  lfsr #(
+      .LfsrWidth(8),
+      .OutWidth ($clog2(ariane_pkg::ICACHE_SET_ASSOC))
+  ) i_lfsr (
+      .clk_i (clk_i),
+      .rst_ni(rst_ni),
+      .en_i  (update_lfsr),
+      .out_o (rnd_way)
+  );
+
+
+  ///////////////////////////////////////////////////////
+  // tag comparison, hit generation
+  ///////////////////////////////////////////////////////
+
+  logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx;
+
+  for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
+    assign cl_hit[i]  = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
+    assign cl_sel[i]  = cl_rdata[i][{cl_offset_q, 3'b0}+:FETCH_WIDTH];
+    assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH];
+  end
+
+
+  lzc #(
+      .WIDTH(ICACHE_SET_ASSOC)
+  ) i_lzc_hit (
+      .in_i   (cl_hit),
+      .cnt_o  (hit_idx),
+      .empty_o()
+  );
+
+  always_comb begin
+    if (cmp_en_q) begin
+      dreq_o.data = cl_sel[hit_idx];
+      dreq_o.user = cl_user[hit_idx];
+    end else begin
+      dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:FETCH_WIDTH];
+      dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH];
+    end
+  end
+
+  ///////////////////////////////////////////////////////
+  // memory arrays and regs
+  ///////////////////////////////////////////////////////
+
+
+  logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata[ICACHE_SET_ASSOC-1:0];
+
+  for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram
+    // Tag RAM
+    sram #(
+        // tag + valid bit
+        .DATA_WIDTH(ICACHE_TAG_WIDTH + 1),
+        .NUM_WORDS (ICACHE_NUM_WORDS)
+    ) tag_sram (
+        .clk_i  (clk_i),
+        .rst_ni (rst_ni),
+        .req_i  (vld_req[i]),
+        .we_i   (vld_we),
+        .addr_i (vld_addr),
+        // we can always use the saved tag here since it takes a
+        // couple of cycle until we write to the cache upon a miss
+        .wuser_i('0),
+        .wdata_i({vld_wdata[i], cl_tag_q}),
+        .be_i   ('1),
+        .ruser_o(),
+        .rdata_o(cl_tag_valid_rdata[i])
+    );
+
+    assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0];
+    assign vld_rdata[i]    = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH];
+
+    // Data RAM
+    sram #(
+        .USER_WIDTH(ICACHE_USER_LINE_WIDTH),
+        .DATA_WIDTH(ICACHE_LINE_WIDTH),
+        .USER_EN   (ariane_pkg::FETCH_USER_EN),
+        .NUM_WORDS (ICACHE_NUM_WORDS)
+    ) data_sram (
+        .clk_i  (clk_i),
+        .rst_ni (rst_ni),
+        .req_i  (cl_req[i]),
+        .we_i   (cl_we),
+        .addr_i (cl_index),
+        .wuser_i(mem_rtrn_i.user),
+        .wdata_i(mem_rtrn_i.data),
+        .be_i   ('1),
+        .ruser_o(cl_ruser[i]),
+        .rdata_o(cl_rdata[i])
+    );
+  end
+
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      cl_tag_q      <= '0;
+      flush_cnt_q   <= '0;
+      vaddr_q       <= '0;
+      cmp_en_q      <= '0;
+      cache_en_q    <= '0;
+      flush_q       <= '0;
+      state_q       <= FLUSH;
+      cl_offset_q   <= '0;
+      repl_way_oh_q <= '0;
+      inv_q         <= '0;
+    end else begin
+      cl_tag_q      <= cl_tag_d;
+      flush_cnt_q   <= flush_cnt_d;
+      vaddr_q       <= vaddr_d;
+      cmp_en_q      <= cmp_en_d;
+      cache_en_q    <= cache_en_d;
+      flush_q       <= flush_d;
+      state_q       <= state_d;
+      cl_offset_q   <= cl_offset_d;
+      repl_way_oh_q <= repl_way_oh_d;
+      inv_q         <= inv_d;
+    end
+  end
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  //pragma translate_off
+`ifndef VERILATOR
+  repl_inval0 :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) cache_wren |-> !(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld))
+  else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
+
+  repl_inval1 :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> !cache_wren)
+  else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
+
+  invalid_state :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS}))
+  else $fatal(1, "[l1 icache] fsm reached an invalid state");
+
+  hot1 :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(
+      cl_hit
+  ))
+  else $fatal(1, "[l1 icache] cl_hit signal must be hot1");
+
+  // this is only used for verification!
+  logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
+  logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
+  logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
+    if (!rst_ni) begin
+      vld_mirror <= '{default: '0};
+      tag_mirror <= '{default: '0};
+    end else begin
+      for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin
+        if (vld_req[i] & vld_we) begin
+          vld_mirror[vld_addr][i] <= vld_wdata[i];
+          tag_mirror[vld_addr][i] <= cl_tag_q;
+        end
+      end
+    end
+  end
+
+  for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_dupl
+    assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata);
+  end
+
+  tag_write_duplicate :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
+  else $fatal(1, "[l1 icache] cannot allocate a CL that is already present in the cache");
+
+
+  initial begin
+    // assert wrong parameterizations
+    assert (ICACHE_INDEX_WIDTH <= 12)
+    else $fatal(1, "[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages");
+  end
+`endif
+  //pragma translate_on
+
+endmodule  // cva6_icache
diff --git a/test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv b/test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv
new file mode 100644
index 0000000..7579fe4
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv
@@ -0,0 +1,202 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Nils Wistoff <nwistoff@iis.ee.ethz.ch>, ETH Zurich
+// Date: 07.09.2020
+// Description: wrapper module to connect the L1I$ to a 64bit AXI bus.
+//
+
+module cva6_icache_axi_wrapper
+  import ariane_pkg::*;
+  import wt_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter type axi_req_t = logic,
+    parameter type axi_rsp_t = logic
+) (
+    input logic             clk_i,
+    input logic             rst_ni,
+    input riscv::priv_lvl_t priv_lvl_i,
+
+    input logic flush_i,  // flush the icache, flush and kill have to be asserted together
+    input logic en_i,  // enable icache
+    output logic miss_o,  // to performance counter
+    // address translation requests
+    input icache_areq_t areq_i,
+    output icache_arsp_t areq_o,
+    // data requests
+    input icache_dreq_t dreq_i,
+    output icache_drsp_t dreq_o,
+    // AXI refill port
+    output axi_req_t axi_req_o,
+    input axi_rsp_t axi_resp_i
+);
+
+  localparam AxiNumWords = (ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH  > DCACHE_LINE_WIDTH)  +
+                           (DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ;
+
+  logic                                    icache_mem_rtrn_vld;
+  icache_rtrn_t                            icache_mem_rtrn;
+  logic                                    icache_mem_data_req;
+  logic                                    icache_mem_data_ack;
+  icache_req_t                             icache_mem_data;
+
+  logic                                    axi_rd_req;
+  logic                                    axi_rd_gnt;
+  logic         [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr;
+  logic         [ $clog2(AxiNumWords)-1:0] axi_rd_blen;
+  logic         [                     2:0] axi_rd_size;
+  logic         [  CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in;
+  logic                                    axi_rd_rdy;
+  logic                                    axi_rd_lock;
+  logic                                    axi_rd_last;
+  logic                                    axi_rd_valid;
+  logic         [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
+  logic         [  CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_out;
+  logic                                    axi_rd_exokay;
+
+  logic req_valid_d, req_valid_q;
+  icache_req_t req_data_d, req_data_q;
+  logic first_d, first_q;
+  logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
+      rd_shift_d, rd_shift_q;
+
+  // Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but
+  // required by AXI).
+  assign req_valid_d           = ~axi_rd_gnt & (icache_mem_data_req | req_valid_q);
+
+  // Update read request information on a new request
+  assign req_data_d            = (icache_mem_data_req) ? icache_mem_data : req_data_q;
+
+  // We have a new or pending read request
+  assign axi_rd_req            = icache_mem_data_req | req_valid_q;
+  assign axi_rd_addr           = CVA6Cfg.AxiAddrWidth'(req_data_d.paddr);
+
+  // Fetch a full cache line on a cache miss, or a single word on a bypassed access
+  assign axi_rd_blen           = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH / 64 - 1;
+  assign axi_rd_size           = $clog2(CVA6Cfg.AxiDataWidth / 8);  // Maximum
+  assign axi_rd_id_in          = req_data_d.tid;
+  assign axi_rd_rdy            = 1'b1;
+  assign axi_rd_lock           = 1'b0;
+
+  // Immediately acknowledge read request. This is an implicit requirement for the icache.
+  assign icache_mem_data_ack   = icache_mem_data_req;
+
+  // Return data as soon as last word arrives
+  assign icache_mem_rtrn_vld   = axi_rd_valid & axi_rd_last;
+  assign icache_mem_rtrn.data  = rd_shift_d;
+  assign icache_mem_rtrn.tid   = req_data_q.tid;
+  assign icache_mem_rtrn.rtype = wt_cache_pkg::ICACHE_IFILL_ACK;
+  assign icache_mem_rtrn.inv   = '0;
+
+  // -------
+  // I-Cache
+  // -------
+  cva6_icache #(
+      // use ID 0 for icache reads
+      .CVA6Cfg(CVA6Cfg),
+      .RdTxId (0)
+  ) i_cva6_icache (
+      .clk_i         (clk_i),
+      .rst_ni        (rst_ni),
+      .flush_i       (flush_i),
+      .en_i          (en_i),
+      .miss_o        (miss_o),
+      .areq_i        (areq_i),
+      .areq_o        (areq_o),
+      .dreq_i        (dreq_i),
+      .dreq_o        (dreq_o),
+      .mem_rtrn_vld_i(icache_mem_rtrn_vld),
+      .mem_rtrn_i    (icache_mem_rtrn),
+      .mem_data_req_o(icache_mem_data_req),
+      .mem_data_ack_i(icache_mem_data_ack),
+      .mem_data_o    (icache_mem_data)
+  );
+
+  // --------
+  // AXI shim
+  // --------
+  axi_shim #(
+      .CVA6Cfg    (CVA6Cfg),
+      .AxiNumWords(AxiNumWords),
+      .axi_req_t  (axi_req_t),
+      .axi_rsp_t  (axi_rsp_t)
+  ) i_axi_shim (
+      .clk_i      (clk_i),
+      .rst_ni     (rst_ni),
+      .rd_req_i   (axi_rd_req),
+      .rd_gnt_o   (axi_rd_gnt),
+      .rd_addr_i  (axi_rd_addr),
+      .rd_blen_i  (axi_rd_blen),
+      .rd_size_i  (axi_rd_size),
+      .rd_id_i    (axi_rd_id_in),
+      .rd_rdy_i   (axi_rd_rdy),
+      .rd_lock_i  (axi_rd_lock),
+      .rd_last_o  (axi_rd_last),
+      .rd_valid_o (axi_rd_valid),
+      .rd_data_o  (axi_rd_data),
+      .rd_user_o  (),
+      .rd_id_o    (axi_rd_id_out),
+      .rd_exokay_o(axi_rd_exokay),
+      .wr_req_i   ('0),
+      .wr_gnt_o   (),
+      .wr_addr_i  ('0),
+      .wr_data_i  ('0),
+      .wr_user_i  ('0),
+      .wr_be_i    ('0),
+      .wr_blen_i  ('0),
+      .wr_size_i  ('0),
+      .wr_id_i    ('0),
+      .wr_lock_i  ('0),
+      .wr_atop_i  ('0),
+      .wr_rdy_i   ('0),
+      .wr_valid_o (),
+      .wr_id_o    (),
+      .wr_exokay_o(),
+      .axi_req_o  (axi_req_o),
+      .axi_resp_i (axi_resp_i)
+  );
+
+  // Buffer burst data in shift register
+  always_comb begin : p_axi_rtrn_shift
+    first_d    = first_q;
+    rd_shift_d = rd_shift_q;
+
+    if (axi_rd_valid) begin
+      first_d = axi_rd_last;
+      if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
+        rd_shift_d = axi_rd_data;
+      end else begin
+        rd_shift_d = {axi_rd_data, rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]};
+      end
+
+      // If this is a single word transaction, we need to make sure that word is placed at offset 0
+      if (first_q) begin
+        rd_shift_d[0] = axi_rd_data;
+      end
+    end
+  end
+
+  // Registers
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf
+    if (!rst_ni) begin
+      req_valid_q <= 1'b0;
+      req_data_q  <= '0;
+      first_q     <= 1'b1;
+      rd_shift_q  <= '0;
+    end else begin
+      req_valid_q <= req_valid_d;
+      req_data_q  <= req_data_d;
+      first_q     <= first_d;
+      rd_shift_q  <= rd_shift_d;
+    end
+  end
+
+endmodule  // cva6_icache_axi_wrapper
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh b/test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh
new file mode 100644
index 0000000..5e92a79
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh
@@ -0,0 +1,62 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : February, 2023
+ *  Description   : HPDcache Types' Definition
+ *  History       :
+ */
+`ifndef __HPDCACHE_TYPEDEF_SVH__
+`define __HPDCACHE_TYPEDEF_SVH__
+
+`define HPDCACHE_TYPEDEF_MEM_REQ_T(__name__, addr_t, id_t) \
+    typedef struct packed { \
+        addr_t                                mem_req_addr; \
+        hpdcache_pkg::hpdcache_mem_len_t      mem_req_len; \
+        hpdcache_pkg::hpdcache_mem_size_t     mem_req_size; \
+        id_t                                  mem_req_id; \
+        hpdcache_pkg::hpdcache_mem_command_e  mem_req_command; \
+        hpdcache_pkg::hpdcache_mem_atomic_e   mem_req_atomic; \
+        logic                                 mem_req_cacheable; \
+    } __name__
+
+`define HPDCACHE_TYPEDEF_MEM_RESP_R_T(__name__, id_t, data_t) \
+    typedef struct packed { \
+        hpdcache_pkg::hpdcache_mem_error_e    mem_resp_r_error; \
+        id_t                                  mem_resp_r_id; \
+        data_t                                mem_resp_r_data; \
+        logic                                 mem_resp_r_last; \
+    } __name__
+
+`define HPDCACHE_TYPEDEF_MEM_REQ_W_T(__name__, data_t, be_t) \
+    typedef struct packed { \
+        data_t                                mem_req_w_data; \
+        be_t                                  mem_req_w_be; \
+        logic                                 mem_req_w_last; \
+    } __name__
+
+`define HPDCACHE_TYPEDEF_MEM_RESP_W_T(__name__, id_t) \
+    typedef struct packed { \
+        logic                                 mem_resp_w_is_atomic; \
+        hpdcache_pkg::hpdcache_mem_error_e    mem_resp_w_error; \
+        id_t                                  mem_resp_w_id; \
+    } __name__
+
+`endif
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv
new file mode 100644
index 0000000..d3e0a11
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv
@@ -0,0 +1,181 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : November 22, 2022
+ *  Description   : Refill data downsize
+ *  History       :
+ */
+module hpdcache_data_downsize
+//  {{{
+import hpdcache_pkg::*;
+//  Parameters
+//  {{{
+#(
+    parameter int WR_WIDTH = 0,
+    parameter int RD_WIDTH = 0,
+    parameter int DEPTH    = 0,
+
+    localparam type wdata_t = logic [WR_WIDTH-1:0],
+    localparam type rdata_t = logic [RD_WIDTH-1:0]
+)
+//  }}}
+//  Ports
+//  {{{
+(
+    input  logic   clk_i,
+    input  logic   rst_ni,
+
+    input  logic   w_i,
+    output logic   wok_o,
+    input  wdata_t wdata_i,
+
+    input  logic   r_i,
+    output logic   rok_o,
+    output rdata_t rdata_o
+);
+//  }}}
+//  Architecture
+//  {{{
+    //  Local definitions
+    //  {{{
+    localparam int RD_WORDS = WR_WIDTH/RD_WIDTH;
+    localparam int PTR_WIDTH = $clog2(DEPTH);
+    localparam int WORDCNT_WIDTH = $clog2(RD_WORDS);
+    typedef logic [PTR_WIDTH-1:0]  bufptr_t;
+    typedef logic [WORDCNT_WIDTH-1:0]  wordptr_t;
+    typedef logic [PTR_WIDTH:0]  occupancy_t;
+    //  }}}
+
+    //  Internal registers and signals
+    //  {{{
+    rdata_t [DEPTH-1:0][RD_WORDS-1:0]  buf_q;
+    bufptr_t  wrptr_q, wrptr_d;
+    bufptr_t  rdptr_q, rdptr_d;
+    occupancy_t  used_q, used_d;
+    wordptr_t [DEPTH-1:0]  words_q, words_d;
+    logic words_set;
+    logic  full, empty;
+    //  }}}
+
+    //  Control-Path
+    //  {{{
+    assign full = (hpdcache_uint'(used_q) == DEPTH),
+           empty = (used_q == 0),
+           wok_o = ~full,
+           rok_o = ~empty;
+
+    always_comb
+    begin : ctrl_comb
+        automatic logic used_inc, used_dec;
+        automatic logic words_dec;
+
+        rdptr_d = rdptr_q;
+        wrptr_d = wrptr_q;
+        used_dec = 1'b0;
+        used_inc = 1'b0;
+        words_dec = 1'b0;
+        words_set = 1'b0;
+
+        if (w_i && wok_o) begin
+            used_inc  = 1'b1;
+            words_set = 1'b1;
+            if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin
+                wrptr_d = 0;
+            end else begin
+                wrptr_d = wrptr_q + 1;
+            end
+        end
+
+        if (r_i && rok_o) begin
+            words_dec = (words_q[rdptr_q] > 0);
+            if (words_q[rdptr_q] == 0) begin
+                used_dec = 1'b1;
+                if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin
+                    rdptr_d = 0;
+                end else begin
+                    rdptr_d = rdptr_q + 1;
+                end
+            end
+        end
+
+        case ({used_inc, used_dec})
+            2'b10  : used_d = used_q + 1;
+            2'b01  : used_d = used_q - 1;
+            default: used_d = used_q;
+        endcase
+
+        words_d = words_q;
+        if (words_set) begin
+            words_d[wrptr_q] = wordptr_t'(RD_WORDS - 1);
+        end
+        if (words_dec) begin
+            words_d[rdptr_q] = words_q[rdptr_q] - 1;
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : ctrl_ff
+        if (!rst_ni) begin
+            rdptr_q <= 0;
+            wrptr_q <= 0;
+            used_q <= 0;
+            words_q <= 0;
+        end else begin
+            rdptr_q <= rdptr_d;
+            wrptr_q <= wrptr_d;
+            used_q <= used_d;
+            words_q <= words_d;
+        end
+    end
+    //  }}}
+
+    //  Data-Path
+    //  {{{
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : buf_ff
+        if (!rst_ni) begin
+            buf_q <= '0;
+        end else begin
+            if (words_set) begin
+                buf_q[wrptr_q] <= wdata_i;
+            end
+        end
+    end
+
+    assign rdata_o = buf_q[rdptr_q][RD_WORDS - hpdcache_uint'(words_q[rdptr_q]) - 1];
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    //  pragma translate_off
+    initial
+    begin : initial_assertions
+        assert  (DEPTH     >        0)       else $error("DEPTH must be greater than 0");
+        assert  (WR_WIDTH  >        0)       else $error("WR_WIDTH must be greater than 0");
+        assert  (RD_WIDTH  >        0)       else $error("RD_WIDTH must be greater than 0");
+        assert  (RD_WIDTH  < WR_WIDTH)       else $error("RD_WIDTH must be less to WR_WIDTH");
+        assert ((WR_WIDTH  % RD_WIDTH) == 0) else $error("WR_WIDTH must be a multiple RD_WIDTH");
+    end
+    //  pragma translate_on
+    //  }}}
+//  }}}
+endmodule
+//  }}}
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv
new file mode 100644
index 0000000..c4af81c
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv
@@ -0,0 +1,181 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : November 22, 2022
+ *  Description   : Refill data upsize
+ *  History       :
+ */
+module hpdcache_data_upsize
+//  {{{
+import hpdcache_pkg::*;
+//  Parameters
+//  {{{
+#(
+    parameter int WR_WIDTH = 0,
+    parameter int RD_WIDTH = 0,
+    parameter int DEPTH    = 0,
+
+    localparam type wdata_t = logic [WR_WIDTH-1:0],
+    localparam type rdata_t = logic [RD_WIDTH-1:0]
+)
+//  }}}
+//  Ports
+//  {{{
+(
+    input  logic   clk_i,
+    input  logic   rst_ni,
+
+    input  logic   w_i,
+    input  logic   wlast_i,
+    output logic   wok_o,
+    input  wdata_t wdata_i,
+
+    input  logic   r_i,
+    output logic   rok_o,
+    output rdata_t rdata_o
+);
+//  }}}
+//  Architecture
+//  {{{
+    //  Local definitions
+    //  {{{
+    localparam int WR_WORDS = RD_WIDTH/WR_WIDTH;
+    localparam int PTR_WIDTH = $clog2(DEPTH);
+    localparam int WORDCNT_WIDTH = $clog2(WR_WORDS);
+    typedef logic [PTR_WIDTH-1:0]  bufptr_t;
+    typedef logic [WORDCNT_WIDTH-1:0]  wordptr_t;
+    typedef logic [PTR_WIDTH:0]  occupancy_t;
+    //  }}}
+
+    //  Internal registers and signals
+    //  {{{
+    wdata_t [DEPTH-1:0][WR_WORDS-1:0] buf_q;
+    bufptr_t  wrptr_q, wrptr_d;
+    bufptr_t  rdptr_q, rdptr_d;
+    occupancy_t  used_q, used_d;
+    wordptr_t [DEPTH-1:0]  words_q, words_d;
+    logic  full, empty;
+    logic  shift;
+    //  }}}
+
+    //  Control-Path
+    //  {{{
+    assign full = (hpdcache_uint'(used_q) == DEPTH),
+           empty = (used_q == 0),
+           wok_o = ~full,
+           rok_o = ~empty;
+
+    always_comb
+    begin : ctrl_comb
+        automatic logic used_inc, used_dec;
+        automatic logic words_inc, words_reset;
+
+        wrptr_d = wrptr_q;
+        rdptr_d = rdptr_q;
+        words_d = words_q;
+        used_dec = 1'b0;
+        used_inc = 1'b0;
+        words_reset = 1'b0;
+        words_inc = 1'b0;
+        shift = 1'b0;
+
+        if (w_i && wok_o) begin
+            shift = 1'b1;
+            words_inc = (hpdcache_uint'(words_q[wrptr_q]) < (WR_WORDS-1));
+            if (hpdcache_uint'(words_q[wrptr_q]) == (WR_WORDS-1) || wlast_i) begin
+                used_inc = 1'b1;
+                if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin
+                    wrptr_d = 0;
+                end else begin
+                    wrptr_d = wrptr_q + 1;
+                end
+            end
+        end
+
+        if (r_i && rok_o) begin
+            used_dec = 1'b1;
+            words_reset = 1'b1;
+            if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin
+                rdptr_d = 0;
+            end else begin
+                rdptr_d = rdptr_q + 1;
+            end
+        end
+
+        case ({used_inc, used_dec})
+            2'b10  : used_d = used_q + 1;
+            2'b01  : used_d = used_q - 1;
+            default: used_d = used_q;
+        endcase
+
+        if (words_inc)   words_d[wrptr_q] = words_q[wrptr_q] + 1;
+        if (words_reset) words_d[rdptr_q] = 0;
+    end
+
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : ctrl_ff
+        if (!rst_ni) begin
+            rdptr_q <= 0;
+            wrptr_q <= 0;
+            used_q <= 0;
+            words_q <= '0;
+        end else begin
+            rdptr_q <= rdptr_d;
+            wrptr_q <= wrptr_d;
+            used_q <= used_d;
+            words_q <= words_d;
+        end
+    end
+    //  }}}
+
+    //  Data-Path
+    //  {{{
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : buf_ff
+        if (!rst_ni) begin
+            buf_q <= '0;
+        end else begin
+            if (shift) begin
+                buf_q[wrptr_q][words_q[wrptr_q]] <= wdata_i;
+            end
+        end
+    end
+
+    assign rdata_o = buf_q[rdptr_q];
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    //  pragma translate_off
+    initial
+    begin : initial_assertions
+        assert  (DEPTH     >        0)       else $error("DEPTH must be greater than 0");
+        assert  (WR_WIDTH  >        0)       else $error("WR_WIDTH must be greater than 0");
+        assert  (RD_WIDTH  >        0)       else $error("RD_WIDTH must be greater than 0");
+        assert  (WR_WIDTH  < RD_WIDTH)       else $error("WR_WIDTH must be less to RD_WIDTH");
+        assert ((RD_WIDTH  % WR_WIDTH) == 0) else $error("RD_WIDTH must be a multiple WR_WIDTH");
+    end
+    //  pragma translate_on
+    //  }}}
+//  }}}
+endmodule
+//  }}}
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv
new file mode 100644
index 0000000..3be21e0
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv
@@ -0,0 +1,69 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : Simple multiplexor
+ *  History       :
+ */
+module hpdcache_demux
+//  Parameters
+//  {{{
+#(
+    //  Number of outputs
+    parameter  int unsigned NOUTPUT     = 0,
+
+    //  Width in bits of each input
+    parameter  int unsigned DATA_WIDTH  = 0,
+
+    //  Selector signal is one-hot encoded
+    parameter  bit          ONE_HOT_SEL = 0,
+
+    //  Compute the width of the selection signal
+    localparam int unsigned NOUTPUT_LOG2 = $clog2(NOUTPUT),
+    localparam int unsigned SEL_WIDTH    = ONE_HOT_SEL ? NOUTPUT : NOUTPUT_LOG2,
+
+    localparam type data_t = logic [DATA_WIDTH-1:0],
+    localparam type sel_t  = logic [SEL_WIDTH-1:0]
+)
+//  }}}
+
+//  Ports
+//  {{{
+(
+    input  data_t               data_i,
+    input  sel_t                sel_i,
+    output data_t [NOUTPUT-1:0] data_o
+);
+//  }}}
+
+    generate
+        always_comb
+        begin : demux_comb
+            for (int unsigned i = 0; i < NOUTPUT; i++) begin
+                if (!ONE_HOT_SEL) begin
+                    data_o[i] = (sel_t'(i) == sel_i) ? data_i : '0;
+                end else begin
+                    data_o[i] =  sel_i[i]            ? data_i : '0;
+                end
+            end
+        end
+    endgenerate
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv
new file mode 100644
index 0000000..ba3be5f
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv
@@ -0,0 +1,167 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : FIFO buffer (using registers)
+ *  History       :
+ */
+module hpdcache_fifo_reg
+    //  Parameters
+    //  {{{
+#(
+    parameter int unsigned FIFO_DEPTH = 0,
+    parameter bit FEEDTHROUGH = 1'b0,
+    parameter type fifo_data_t = logic
+)
+    //  }}}
+    //  Ports
+    //  {{{
+(
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+    input  logic                  w_i,
+    output logic                  wok_o,
+    input  fifo_data_t            wdata_i,
+    input  logic                  r_i,
+    output logic                  rok_o,
+    output fifo_data_t            rdata_o
+);
+    //  }}}
+
+    /*
+     *  Single-entry FIFO buffer -> synchronization buffer
+     */
+    if (FIFO_DEPTH == 1) begin : gen_sync_buffer
+        hpdcache_sync_buffer #(
+            .FEEDTHROUGH     (FEEDTHROUGH),
+            .data_t          (fifo_data_t)
+        ) i_sync_buffer (
+            .clk_i,
+            .rst_ni,
+            .w_i,
+            .wok_o,
+            .wdata_i,
+            .r_i,
+            .rok_o,
+            .rdata_o
+        );
+
+    /*
+     *  Multi-entry FIFO buffer
+     */
+    end else if (FIFO_DEPTH > 0) begin : gen_fifo
+        //  Declaration of constants, types and functions
+        //  {{{
+        typedef logic unsigned [$clog2(FIFO_DEPTH)-1:0] fifo_addr_t;
+        //  }}}
+
+        //  Declaration of internal wires and registers
+        //  {{{
+        fifo_data_t [FIFO_DEPTH-1:0] fifo_mem_q;
+        fifo_addr_t rptr_q, rptr_d; // read pointer
+        fifo_addr_t wptr_q, wptr_d; // write pointer
+        logic       crossover_q, crossover_d; // write pointer has wrap
+        logic       rexec, wexec;
+        logic       rptr_max, wptr_max;
+        logic       match_ptr;
+        logic       empty, full;
+        //  }}}
+
+        //  Global control signals
+        //  {{{
+        assign match_ptr = (wptr_q == rptr_q);
+
+        assign empty = match_ptr & ~crossover_q,
+               full  = match_ptr &  crossover_q;
+
+        assign rok_o = ~empty | (FEEDTHROUGH & w_i),
+               wok_o = ~full  | (FEEDTHROUGH & r_i);
+
+        assign rexec = r_i & ~empty,
+               wexec = w_i & (( FEEDTHROUGH & ((empty & ~r_i) | (full & r_i) | (~full & ~empty))) |
+                              (~FEEDTHROUGH & ~full));
+
+        //  }}}
+
+        //  Control of read and write pointers
+        //  {{{
+        assign rptr_max = (rptr_q == fifo_addr_t'(FIFO_DEPTH-1));
+        assign wptr_max = (wptr_q == fifo_addr_t'(FIFO_DEPTH-1));
+
+        always_comb
+        begin : fifo_ctrl_comb
+            rptr_d = rptr_q;
+            wptr_d = wptr_q;
+            crossover_d = crossover_q;
+
+            if (rexec) begin
+                rptr_d = rptr_max ? 0 : rptr_q + 1;
+            end
+
+            if (wexec) begin
+                wptr_d = wptr_max ? 0 : wptr_q + 1;
+            end
+
+            if (wexec && wptr_max) begin
+                crossover_d = 1'b1;
+            end else if (rexec && rptr_max) begin
+                crossover_d = 1'b0;
+            end
+        end
+        //  }}}
+
+        //  FIFO buffer memory management
+        //  {{{
+        always_ff @(posedge clk_i)
+        begin
+            if (wexec) fifo_mem_q[wptr_q] <= wdata_i;
+        end
+
+        assign rdata_o = FEEDTHROUGH && empty ? wdata_i : fifo_mem_q[rptr_q];
+        //  }}}
+
+        //  Setting of internal state
+        //  {{{
+        always_ff @(posedge clk_i or negedge rst_ni)
+        begin
+            if (!rst_ni) begin
+                rptr_q      <= 0;
+                wptr_q      <= 0;
+                crossover_q <= 1'b0;
+            end else begin
+                rptr_q      <= rptr_d;
+                wptr_q      <= wptr_d;
+                crossover_q <= crossover_d;
+            end
+        end
+        //  }}}
+
+        //  Assertions
+        //  {{{
+        //  pragma translate_off
+        rptr_ahead_wptr_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
+                ((rptr_q <= wptr_q) && !crossover_q) ||
+                ((rptr_q >= wptr_q) &&  crossover_q)) else
+                $error("fifo: read pointer is ahead of the write pointer");
+        //  pragma translate_on
+        //  }}}
+    end
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv
new file mode 100644
index 0000000..bbd8d90
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv
@@ -0,0 +1,85 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : Fixed-Priority Arbiter
+ *  History       :
+ */
+module hpdcache_fxarb
+    //  Parameters
+    //  {{{
+#(
+    //    Number of requesters
+    parameter int unsigned N = 0
+)
+    //  }}}
+    //  Ports
+    //  {{{
+(
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+    input  logic [N-1:0]          req_i,
+    output logic [N-1:0]          gnt_o,
+    input  logic                  ready_i
+);
+    //  }}}
+
+    //  Declaration of internal wires and registers
+    //  {{{
+    logic [N-1:0]       gnt_q, gnt;
+    logic               wait_q;
+    //  }}}
+
+    //  Compute the grant vector
+    //  {{{
+    hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i), .val_o(gnt));
+    //  }}}
+
+    //  Compute the output grant vector
+    //  {{{
+    assign gnt_o = wait_q ? gnt_q : gnt;
+    //  }}}
+
+    //  Setting of internal state
+    //  {{{
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin
+        if (!rst_ni) begin
+            wait_q <= 1'b0;
+            gnt_q  <= '0;
+        end else begin
+            wait_q <= ~ready_i & (wait_q | (|req_i));
+            if (!ready_i && !wait_q && (|req_i)) begin
+                gnt_q <= gnt;
+            end
+        end
+    end
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    //  pragma translate_off
+    gnt_at_most_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni)
+            $onehot0(gnt_o)) else $error("arbiter: granting more than one requester");
+    //  pragma translate_on
+    //  }}}
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv
new file mode 100644
index 0000000..d78e1eb
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv
@@ -0,0 +1,79 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Author(s)     : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : Simple multiplexor
+ *  History       :
+ */
+module hpdcache_mux
+    //  Parameters
+    //  {{{
+#(
+    //  Number of inputs
+    parameter  int unsigned NINPUT      = 0,
+
+    //  Width in bits of each input
+    parameter  int unsigned DATA_WIDTH  = 0,
+
+    //  Selector signal is one-hot encoded
+    parameter  bit          ONE_HOT_SEL = 0,
+
+    //  Compute the width of the selection signal
+    localparam int unsigned NINPUT_LOG2 = $clog2(NINPUT),
+    localparam int unsigned SEL_WIDTH   = ONE_HOT_SEL ? NINPUT : NINPUT_LOG2,
+
+    localparam type data_t = logic [DATA_WIDTH-1:0],
+    localparam type sel_t  = logic [SEL_WIDTH-1:0]
+)
+    //  }}}
+
+    //  Ports
+    //  {{{
+(
+    input  data_t [NINPUT-1:0] data_i,
+    input  sel_t               sel_i,
+    output data_t              data_o
+);
+    //  }}}
+
+    generate
+        //  Selector is one-hot encoded
+        if (ONE_HOT_SEL == 1) begin
+            always_comb
+            begin : data_out_mux_comb
+                data_o = '0;
+                for (int unsigned i = 0; i < NINPUT; i++) begin
+                    data_o |= sel_i[i] ? data_i[i] : '0;
+                end
+            end
+
+        //  Selector is binary encoded
+        end else begin
+            always_comb
+            begin : data_out_mux_comb
+                data_o = '0;
+                for (int unsigned i = 0; i < NINPUT; i++) begin
+                    data_o |= (i == int'(sel_i)) ? data_i[i] : '0;
+                end
+            end
+        end
+    endgenerate
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv
new file mode 100644
index 0000000..36fe5bc
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv
@@ -0,0 +1,43 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : Priority One-hot Encoder
+ *  History       :
+ */
+module hpdcache_prio_1hot_encoder
+    //  Parameters
+#(
+    parameter int unsigned N = 0
+)
+    //  Ports
+(
+    input  logic [N-1:0] val_i,
+    output logic [N-1:0] val_o
+);
+
+    generate
+        assign val_o[0] = val_i[0];
+        for (genvar i = 1; i < int'(N); i++) begin : prio_gen
+            assign val_o[i] = val_i[i] & ~(|val_i[i-1:0]);
+        end
+    endgenerate
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv
new file mode 100644
index 0000000..184e6fb
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv
@@ -0,0 +1,63 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : March, 2020
+ *  Description   : 1RW register bank with write byte enable
+ *  History       :
+ */
+module hpdcache_regbank_wbyteenable_1rw
+#(
+    parameter int unsigned ADDR_SIZE = 0,
+    parameter int unsigned DATA_SIZE = 0,
+    parameter int unsigned DEPTH = 2**ADDR_SIZE
+)
+(
+    input  logic                   clk,
+    input  logic                   rst_n,
+    input  logic                   cs,
+    input  logic                   we,
+    input  logic [ADDR_SIZE-1:0]   addr,
+    input  logic [DATA_SIZE-1:0]   wdata,
+    input  logic [DATA_SIZE/8-1:0] wbyteenable,
+    output logic [DATA_SIZE-1:0]   rdata
+);
+
+    /*
+     *  Internal memory array declaration
+     */
+    typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
+    mem_t mem;
+
+    /*
+     *  Process to update or read the memory array
+     */
+    always_ff @(posedge clk)
+    begin : mem_update_ff
+        if (cs == 1'b1) begin
+            if (we == 1'b1) begin
+                for (int i = 0; i < DATA_SIZE/8; i++) begin
+                    if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8];
+                end
+            end
+            rdata <= mem[addr];
+        end
+    end : mem_update_ff
+endmodule : hpdcache_regbank_wbyteenable_1rw
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv
new file mode 100644
index 0000000..e185bc4
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv
@@ -0,0 +1,61 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : March, 2020
+ *  Description   : 1RW register bank with write bit mask
+ *  History       :
+ */
+module hpdcache_regbank_wmask_1rw
+#(
+    parameter int unsigned ADDR_SIZE = 0,
+    parameter int unsigned DATA_SIZE = 0,
+    parameter int unsigned DEPTH = 2**ADDR_SIZE
+)
+(
+    input  logic                  clk,
+    input  logic                  rst_n,
+    input  logic                  cs,
+    input  logic                  we,
+    input  logic [ADDR_SIZE-1:0]  addr,
+    input  logic [DATA_SIZE-1:0]  wdata,
+    input  logic [DATA_SIZE-1:0]  wmask,
+    output logic [DATA_SIZE-1:0]  rdata
+);
+
+    /*
+     *  Internal memory array declaration
+     */
+    typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
+    mem_t mem;
+
+    /*
+     *  Process to update or read the memory array
+     */
+    always_ff @(posedge clk)
+    begin : mem_update_ff
+        if (cs == 1'b1) begin
+            if (we == 1'b1) begin
+                mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask);
+            end
+            rdata <= mem[addr];
+        end
+    end : mem_update_ff
+endmodule : hpdcache_regbank_wmask_1rw
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv
new file mode 100644
index 0000000..9595725
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv
@@ -0,0 +1,121 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/**
+ *  Author(s)     : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : Round-Robin Arbiter
+ *                  Based on design from
+ *                  http://www.rtlery.com/articles/how-design-round-robin-arbiter
+ *  History       :
+ */
+module hpdcache_rrarb
+    //  Parameters
+    //  {{{
+#(
+    //    Number of requesters
+    parameter int unsigned N = 0
+)
+    //  }}}
+    //  Ports
+    //  {{{
+(
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+    input  logic [N-1:0]          req_i,
+    output logic [N-1:0]          gnt_o,
+    input  logic                  ready_i
+);
+    //  }}}
+
+    //  Declaration of internal wires and registers
+    //  {{{
+    logic [N-1:0]       gnt_q, gnt;
+    logic [N-1:0]       nxt;
+    logic               wait_q;
+    logic [N-1:0]       mask, gnt_msk, gnt_nomsk;
+    logic               pending;
+    genvar              gen_i;
+    //  }}}
+
+    //  Elaboration-time assertions
+    //  {{{
+    //  pragma translate_off
+    generate
+        if (N == 0) $error("N must be greater than 0");
+    endgenerate
+    //  pragma translate_on
+    //  }}}
+
+    //  Compute the thermometer mask vector
+    //  {{{
+    generate
+        if (N > 1) begin : gen_nxt_gt_1
+            assign nxt = {gnt_q[N-2:0], gnt_q[N-1]};
+        end else begin : gen_nxt_1
+            assign nxt = gnt_q[0];
+        end
+
+        for (gen_i = 0; gen_i < int'(N); gen_i++) begin : gen_mask
+            assign mask[gen_i] = |nxt[gen_i:0];
+        end
+    endgenerate
+    //  }}}
+
+    //  Compute the grant vector
+    //  {{{
+    hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i   (.val_i(req_i & mask), .val_o(gnt_msk));
+    hpdcache_prio_1hot_encoder #(.N(N)) prio_nomsk_i (.val_i(req_i)       , .val_o(gnt_nomsk));
+    assign gnt = |gnt_msk ? gnt_msk : gnt_nomsk;
+    //  }}}
+
+    //  Compute the output grant vector
+    //  {{{
+    assign gnt_o = wait_q ? gnt_q : gnt;
+    //  }}}
+
+    //  Setting of internal state
+    //  {{{
+    assign pending = |req_i;
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin
+        if (!rst_ni) begin
+            wait_q <= 1'b0;
+            gnt_q  <= {1'b1, {N-1{1'b0}}};
+        end else begin
+            wait_q <= ~ready_i & (wait_q | pending);
+            if (!wait_q && pending) begin
+                gnt_q <= gnt;
+            end
+        end
+    end
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    //  pragma translate_off
+    gnt_at_most_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni)
+            $onehot0(gnt)) else $error("arbiter: granting more than one requester");
+    gnt_q_exactly_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni)
+            $onehot(gnt_q)) else $error("arbiter: grant state is not one-hot");
+    //  pragma translate_on
+    //  }}}
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv
new file mode 100644
index 0000000..d4cab7d
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv
@@ -0,0 +1,56 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : March, 2020
+ *  Description   : Wrapper for Behavioral SRAM macros
+ *  History       :
+ */
+module hpdcache_sram
+#(
+    parameter int unsigned ADDR_SIZE = 0,
+    parameter int unsigned DATA_SIZE = 0,
+    parameter int unsigned DEPTH = 2**ADDR_SIZE
+)
+(
+    input  logic                  clk,
+    input  logic                  rst_n,
+    input  logic                  cs,
+    input  logic                  we,
+    input  logic [ADDR_SIZE-1:0]  addr,
+    input  logic [DATA_SIZE-1:0]  wdata,
+    output logic [DATA_SIZE-1:0]  rdata
+);
+
+    hpdcache_sram_1rw #(
+        .ADDR_SIZE(ADDR_SIZE),
+        .DATA_SIZE(DATA_SIZE),
+        .DEPTH(DEPTH)
+    ) ram_i (
+        .clk,
+        .rst_n,
+        .cs,
+        .we,
+        .addr,
+        .wdata,
+        .rdata
+    );
+
+endmodule : hpdcache_sram
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv
new file mode 100644
index 0000000..43bdb45
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv
@@ -0,0 +1,58 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : March, 2020
+ *  Description   : Wrapper for 1RW SRAM macros implementing a write byte enable
+ *  History       :
+ */
+module hpdcache_sram_wbyteenable
+#(
+    parameter int unsigned ADDR_SIZE = 0,
+    parameter int unsigned DATA_SIZE = 0,
+    parameter int unsigned DEPTH = 2**ADDR_SIZE
+)
+(
+    input  logic                   clk,
+    input  logic                   rst_n,
+    input  logic                   cs,
+    input  logic                   we,
+    input  logic [ADDR_SIZE-1:0]   addr,
+    input  logic [DATA_SIZE-1:0]   wdata,
+    input  logic [DATA_SIZE/8-1:0] wbyteenable,
+    output logic [DATA_SIZE-1:0]   rdata
+);
+
+    hpdcache_sram_wbyteenable_1rw #(
+        .ADDR_SIZE(ADDR_SIZE),
+        .DATA_SIZE(DATA_SIZE),
+        .DEPTH(DEPTH)
+    ) ram_i (
+        .clk,
+        .rst_n,
+        .cs,
+        .we,
+        .addr,
+        .wdata,
+        .wbyteenable,
+        .rdata
+    );
+
+endmodule : hpdcache_sram_wbyteenable
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv
new file mode 100644
index 0000000..a4771e3
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv
@@ -0,0 +1,58 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : March, 2020
+ *  Description   : Wrapper for 1RW SRAM macros implementing write bit mask
+ *  History       :
+ */
+module hpdcache_sram_wmask
+#(
+    parameter int unsigned ADDR_SIZE = 0,
+    parameter int unsigned DATA_SIZE = 0,
+    parameter int unsigned DEPTH = 2**ADDR_SIZE
+)
+(
+    input  logic                  clk,
+    input  logic                  rst_n,
+    input  logic                  cs,
+    input  logic                  we,
+    input  logic [ADDR_SIZE-1:0]  addr,
+    input  logic [DATA_SIZE-1:0]  wdata,
+    input  logic [DATA_SIZE-1:0]  wmask,
+    output logic [DATA_SIZE-1:0]  rdata
+);
+
+    hpdcache_sram_wmask_1rw #(
+        .ADDR_SIZE(ADDR_SIZE),
+        .DATA_SIZE(DATA_SIZE),
+        .DEPTH(DEPTH)
+    ) ram_i (
+        .clk,
+        .rst_n,
+        .cs,
+        .we,
+        .addr,
+        .wdata,
+        .wmask,
+        .rdata
+    );
+
+endmodule : hpdcache_sram_wmask
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv
new file mode 100644
index 0000000..863c588
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv
@@ -0,0 +1,89 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : October, 2023
+ *  Description   : Synchronization buffer
+ *  History       :
+ */
+module hpdcache_sync_buffer
+    //  Parameters
+    //  {{{
+#(
+    parameter bit FEEDTHROUGH = 1'b0,
+    parameter type data_t = logic
+)
+    //  }}}
+    //  Ports
+    //  {{{
+(
+    input  logic        clk_i,
+    input  logic        rst_ni,
+    input  logic        w_i,
+    output logic        wok_o,
+    input  data_t       wdata_i,
+    input  logic        r_i,
+    output logic        rok_o,
+    output data_t       rdata_o
+);
+    //  }}}
+
+    //  Declaration of internal wires and registers
+    //  {{{
+    data_t      buf_q;
+    logic       buf_we;
+    logic       valid_q, valid_d;
+    //  }}}
+
+    //  Global control signals
+    //  {{{
+    assign rok_o  =  valid_q | (FEEDTHROUGH & w_i),
+           wok_o  = ~valid_q | (FEEDTHROUGH & r_i);
+
+    assign buf_we = w_i & ((FEEDTHROUGH & ~(valid_q ^ r_i)) | (~FEEDTHROUGH & ~valid_q));
+    //  }}}
+
+    //  Control of buffer
+    //  {{{
+    assign valid_d = buf_we | (valid_q & ~r_i);
+    //  }}}
+
+    //  FIFO buffer memory management
+    //  {{{
+    always_ff @(posedge clk_i)
+    begin
+        if (buf_we) buf_q <= wdata_i;
+    end
+
+    assign rdata_o = FEEDTHROUGH && !valid_q ? wdata_i : buf_q;
+    //  }}}
+
+    //  Setting of internal state
+    //  {{{
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin
+        if (!rst_ni) begin
+            valid_q <= 1'b0;
+        end else begin
+            valid_q <= valid_d;
+        end
+    end
+    //  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv
new file mode 100644
index 0000000..7288c73
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv
@@ -0,0 +1,60 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : March, 2020
+ *  Description   : SRAM behavioral model
+ *  History       :
+ */
+module hpdcache_sram_1rw
+#(
+    parameter int unsigned ADDR_SIZE = 0,
+    parameter int unsigned DATA_SIZE = 0,
+    parameter int unsigned DEPTH = 2**ADDR_SIZE
+)
+(
+    input  logic                  clk,
+    input  logic                  rst_n,
+    input  logic                  cs,
+    input  logic                  we,
+    input  logic [ADDR_SIZE-1:0]  addr,
+    input  logic [DATA_SIZE-1:0]  wdata,
+    output logic [DATA_SIZE-1:0]  rdata
+);
+
+    /*
+     *  Internal memory array declaration
+     */
+    typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
+    mem_t mem;
+
+    /*
+     *  Process to update or read the memory array
+     */
+    always_ff @(posedge clk)
+    begin : mem_update_ff
+        if (cs == 1'b1) begin
+            if (we == 1'b1) begin
+                mem[addr] <= wdata;
+            end
+            rdata <= mem[addr];
+        end
+    end : mem_update_ff
+endmodule : hpdcache_sram_1rw
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv
new file mode 100644
index 0000000..0e5e225
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv
@@ -0,0 +1,63 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : March, 2020
+ *  Description   : Behavioral model of a 1RW SRAM with write byte enable
+ *  History       :
+ */
+module hpdcache_sram_wbyteenable_1rw
+#(
+    parameter int unsigned ADDR_SIZE = 0,
+    parameter int unsigned DATA_SIZE = 0,
+    parameter int unsigned DEPTH = 2**ADDR_SIZE
+)
+(
+    input  logic                   clk,
+    input  logic                   rst_n,
+    input  logic                   cs,
+    input  logic                   we,
+    input  logic [ADDR_SIZE-1:0]   addr,
+    input  logic [DATA_SIZE-1:0]   wdata,
+    input  logic [DATA_SIZE/8-1:0] wbyteenable,
+    output logic [DATA_SIZE-1:0]   rdata
+);
+
+    /*
+     *  Internal memory array declaration
+     */
+    typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
+    mem_t mem;
+
+    /*
+     *  Process to update or read the memory array
+     */
+    always_ff @(posedge clk)
+    begin : mem_update_ff
+        if (cs == 1'b1) begin
+            if (we == 1'b1) begin
+                for (int i = 0; i < DATA_SIZE/8; i++) begin
+                    if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8];
+                end
+            end
+            rdata <= mem[addr];
+        end
+    end : mem_update_ff
+endmodule : hpdcache_sram_wbyteenable_1rw
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv
new file mode 100644
index 0000000..5058ba2
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv
@@ -0,0 +1,61 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : March, 2020
+ *  Description   : Behavioral model of a 1RW SRAM with write bit mask
+ *  History       :
+ */
+module hpdcache_sram_wmask_1rw
+#(
+    parameter int unsigned ADDR_SIZE = 0,
+    parameter int unsigned DATA_SIZE = 0,
+    parameter int unsigned DEPTH = 2**ADDR_SIZE
+)
+(
+    input  logic                  clk,
+    input  logic                  rst_n,
+    input  logic                  cs,
+    input  logic                  we,
+    input  logic [ADDR_SIZE-1:0]  addr,
+    input  logic [DATA_SIZE-1:0]  wdata,
+    input  logic [DATA_SIZE-1:0]  wmask,
+    output logic [DATA_SIZE-1:0]  rdata
+);
+
+    /*
+     *  Internal memory array declaration
+     */
+    typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
+    mem_t mem;
+
+    /*
+     *  Process to update or read the memory array
+     */
+    always_ff @(posedge clk)
+    begin : mem_update_ff
+        if (cs == 1'b1) begin
+            if (we == 1'b1) begin
+                mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask);
+            end
+            rdata <= mem[addr];
+        end
+    end : mem_update_ff
+endmodule : hpdcache_sram_wmask_1rw
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv
new file mode 100644
index 0000000..861e60c
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv
@@ -0,0 +1,658 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : HPDcache top
+ *  History       :
+ */
+module hpdcache
+import hpdcache_pkg::*;
+    //  Parameters
+    //  {{{
+#(
+    parameter int  NREQUESTERS           = 1,
+    parameter int  HPDcacheMemIdWidth    = 8,
+    parameter int  HPDcacheMemDataWidth  = 512,
+    parameter type hpdcache_mem_req_t    = logic,
+    parameter type hpdcache_mem_req_w_t  = logic,
+    parameter type hpdcache_mem_resp_r_t = logic,
+    parameter type hpdcache_mem_resp_w_t = logic
+)
+    //  }}}
+
+    //  Ports
+    //  {{{
+(
+    //      Clock and reset signals
+    input  logic                          clk_i,
+    input  logic                          rst_ni,
+
+    //      Force the write buffer to send all pending writes
+    input  logic                          wbuf_flush_i,
+
+    //      Core request interface
+    //         1st cycle
+    input  logic                          core_req_valid_i [NREQUESTERS-1:0],
+    output logic                          core_req_ready_o [NREQUESTERS-1:0],
+    input  hpdcache_req_t                 core_req_i       [NREQUESTERS-1:0],
+    //         2nd cycle
+    input  logic                          core_req_abort_i [NREQUESTERS-1:0],
+    input  hpdcache_tag_t                 core_req_tag_i   [NREQUESTERS-1:0],
+    input  hpdcache_pma_t                 core_req_pma_i   [NREQUESTERS-1:0],
+
+    //      Core response interface
+    output logic                          core_rsp_valid_o [NREQUESTERS-1:0],
+    output hpdcache_rsp_t                 core_rsp_o       [NREQUESTERS-1:0],
+
+    //      Miss read interface
+    input  logic                          mem_req_miss_read_ready_i,
+    output logic                          mem_req_miss_read_valid_o,
+    output hpdcache_mem_req_t             mem_req_miss_read_o,
+
+    output logic                          mem_resp_miss_read_ready_o,
+    input  logic                          mem_resp_miss_read_valid_i,
+    input  hpdcache_mem_resp_r_t          mem_resp_miss_read_i,
+
+    //      Write-buffer write interface
+    input  logic                          mem_req_wbuf_write_ready_i,
+    output logic                          mem_req_wbuf_write_valid_o,
+    output hpdcache_mem_req_t             mem_req_wbuf_write_o,
+
+    input  logic                          mem_req_wbuf_write_data_ready_i,
+    output logic                          mem_req_wbuf_write_data_valid_o,
+    output hpdcache_mem_req_w_t           mem_req_wbuf_write_data_o,
+
+    output logic                          mem_resp_wbuf_write_ready_o,
+    input  logic                          mem_resp_wbuf_write_valid_i,
+    input  hpdcache_mem_resp_w_t          mem_resp_wbuf_write_i,
+
+    //      Uncached read interface
+    input  logic                          mem_req_uc_read_ready_i,
+    output logic                          mem_req_uc_read_valid_o,
+    output hpdcache_mem_req_t             mem_req_uc_read_o,
+
+    output logic                          mem_resp_uc_read_ready_o,
+    input  logic                          mem_resp_uc_read_valid_i,
+    input  hpdcache_mem_resp_r_t          mem_resp_uc_read_i,
+
+    //      Uncached write interface
+    input  logic                          mem_req_uc_write_ready_i,
+    output logic                          mem_req_uc_write_valid_o,
+    output hpdcache_mem_req_t             mem_req_uc_write_o,
+
+    input  logic                          mem_req_uc_write_data_ready_i,
+    output logic                          mem_req_uc_write_data_valid_o,
+    output hpdcache_mem_req_w_t           mem_req_uc_write_data_o,
+
+    output logic                          mem_resp_uc_write_ready_o,
+    input  logic                          mem_resp_uc_write_valid_i,
+    input  hpdcache_mem_resp_w_t          mem_resp_uc_write_i,
+
+    //      Performance events
+    output logic                          evt_cache_write_miss_o,
+    output logic                          evt_cache_read_miss_o,
+    output logic                          evt_uncached_req_o,
+    output logic                          evt_cmo_req_o,
+    output logic                          evt_write_req_o,
+    output logic                          evt_read_req_o,
+    output logic                          evt_prefetch_req_o,
+    output logic                          evt_req_on_hold_o,
+    output logic                          evt_rtab_rollback_o,
+    output logic                          evt_stall_refill_o,
+    output logic                          evt_stall_o,
+
+    //      Status interface
+    output logic                          wbuf_empty_o,
+
+    //      Configuration interface
+    input  logic                          cfg_enable_i,
+    input  wbuf_timecnt_t                 cfg_wbuf_threshold_i,
+    input  logic                          cfg_wbuf_reset_timecnt_on_write_i,
+    input  logic                          cfg_wbuf_sequential_waw_i,
+    input  logic                          cfg_wbuf_inhibit_write_coalescing_i,
+    input  logic                          cfg_prefetch_updt_plru_i,
+    input  logic                          cfg_error_on_cacheable_amo_i,
+    input  logic                          cfg_rtab_single_entry_i
+);
+
+    //  }}}
+
+    //  Declaration of internal signals
+    //  {{{
+    logic                  refill_req_valid;
+    logic                  refill_req_ready;
+    logic                  refill_busy;
+    logic                  refill_updt_plru;
+    hpdcache_set_t         refill_set;
+    hpdcache_dir_entry_t   refill_dir_entry;
+    hpdcache_way_vector_t  refill_read_victim_way;
+    hpdcache_way_vector_t  refill_write_victim_way;
+    logic                  refill_write_dir;
+    logic                  refill_write_data;
+    hpdcache_word_t        refill_word;
+    hpdcache_refill_data_t refill_data;
+    logic                  refill_core_rsp_valid;
+    hpdcache_rsp_t         refill_core_rsp;
+    hpdcache_nline_t       refill_nline;
+    logic                  refill_updt_rtab;
+
+    logic                  miss_mshr_empty;
+    logic                  miss_mshr_check;
+    mshr_set_t             miss_mshr_check_set;
+    mshr_tag_t             miss_mshr_check_tag;
+    logic                  miss_mshr_hit;
+    logic                  miss_mshr_alloc_cs;
+    logic                  miss_mshr_alloc;
+    logic                  miss_mshr_alloc_ready;
+    logic                  miss_mshr_alloc_full;
+    hpdcache_nline_t       miss_mshr_alloc_nline;
+    hpdcache_req_tid_t     miss_mshr_alloc_tid;
+    hpdcache_req_sid_t     miss_mshr_alloc_sid;
+    hpdcache_word_t        miss_mshr_alloc_word;
+    logic                  miss_mshr_alloc_need_rsp;
+    logic                  miss_mshr_alloc_is_prefetch;
+
+    logic                  wbuf_flush_all;
+    logic                  wbuf_write;
+    logic                  wbuf_write_ready;
+    wbuf_addr_t            wbuf_write_addr;
+    wbuf_data_t            wbuf_write_data;
+    wbuf_be_t              wbuf_write_be;
+    logic                  wbuf_write_uncacheable;
+    logic                  wbuf_read_hit;
+    logic                  wbuf_read_flush_hit;
+    hpdcache_req_addr_t    wbuf_rtab_addr;
+    logic                  wbuf_rtab_is_read;
+    logic                  wbuf_rtab_hit_open;
+    logic                  wbuf_rtab_hit_pend;
+    logic                  wbuf_rtab_hit_sent;
+    logic                  wbuf_rtab_not_ready;
+
+    logic                  uc_ready;
+    logic                  uc_req_valid;
+    hpdcache_uc_op_t       uc_req_op;
+    hpdcache_req_addr_t    uc_req_addr;
+    hpdcache_req_size_t    uc_req_size;
+    hpdcache_req_data_t    uc_req_data;
+    hpdcache_req_be_t      uc_req_be;
+    logic                  uc_req_uncacheable;
+    hpdcache_req_sid_t     uc_req_sid;
+    hpdcache_req_tid_t     uc_req_tid;
+    logic                  uc_req_need_rsp;
+    logic                  uc_wbuf_flush_all;
+    logic                  uc_dir_amo_match;
+    hpdcache_set_t         uc_dir_amo_match_set;
+    hpdcache_tag_t         uc_dir_amo_match_tag;
+    logic                  uc_dir_amo_update_plru;
+    hpdcache_way_vector_t  uc_dir_amo_hit_way;
+    logic                  uc_data_amo_write;
+    logic                  uc_data_amo_write_enable;
+    hpdcache_set_t         uc_data_amo_write_set;
+    hpdcache_req_size_t    uc_data_amo_write_size;
+    hpdcache_word_t        uc_data_amo_write_word;
+    logic [63:0]           uc_data_amo_write_data;
+    logic  [7:0]           uc_data_amo_write_be;
+    logic                  uc_lrsc_snoop;
+    hpdcache_req_addr_t    uc_lrsc_snoop_addr;
+    hpdcache_req_size_t    uc_lrsc_snoop_size;
+    logic                  uc_core_rsp_ready;
+    logic                  uc_core_rsp_valid;
+    hpdcache_rsp_t         uc_core_rsp;
+
+    logic                  cmo_req_valid;
+    logic                  cmo_ready;
+    hpdcache_cmoh_op_t     cmo_req_op;
+    hpdcache_req_addr_t    cmo_req_addr;
+    hpdcache_req_data_t    cmo_req_wdata;
+    logic                  cmo_wbuf_flush_all;
+    logic                  cmo_dir_check;
+    hpdcache_set_t         cmo_dir_check_set;
+    hpdcache_tag_t         cmo_dir_check_tag;
+    hpdcache_way_vector_t  cmo_dir_check_hit_way;
+    logic                  cmo_dir_inval;
+    hpdcache_set_t         cmo_dir_inval_set;
+    hpdcache_way_vector_t  cmo_dir_inval_way;
+
+    logic                  rtab_empty;
+    logic                  ctrl_empty;
+
+    logic                  core_rsp_valid;
+    hpdcache_rsp_t         core_rsp;
+
+    logic                  arb_req_valid;
+    logic                  arb_req_ready;
+    hpdcache_req_t         arb_req;
+    logic                  arb_abort;
+    hpdcache_tag_t         arb_tag;
+    hpdcache_pma_t         arb_pma;
+
+    localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_READ_ID  = {HPDcacheMemIdWidth{1'b1}};
+    localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_WRITE_ID = {HPDcacheMemIdWidth{1'b1}};
+    //  }}}
+
+    //  Requesters arbiter
+    //  {{{
+    hpdcache_core_arbiter #(
+        .NREQUESTERS                        (NREQUESTERS)
+    ) core_req_arbiter_i (
+        .clk_i,
+        .rst_ni,
+
+        .core_req_valid_i,
+        .core_req_ready_o,
+        .core_req_i,
+        .core_req_abort_i,
+        .core_req_tag_i,
+        .core_req_pma_i,
+
+        .core_rsp_valid_i                   (core_rsp_valid),
+        .core_rsp_i                         (core_rsp),
+        .core_rsp_valid_o,
+        .core_rsp_o,
+
+        .arb_req_valid_o                    (arb_req_valid),
+        .arb_req_ready_i                    (arb_req_ready),
+        .arb_req_o                          (arb_req),
+        .arb_abort_o                        (arb_abort),
+        .arb_tag_o                          (arb_tag),
+        .arb_pma_o                          (arb_pma)
+    );
+    //  }}}
+
+    //  HPDcache controller
+    //  {{{
+    hpdcache_ctrl hpdcache_ctrl_i(
+        .clk_i,
+        .rst_ni,
+
+        .core_req_valid_i                   (arb_req_valid),
+        .core_req_ready_o                   (arb_req_ready),
+        .core_req_i                         (arb_req),
+        .core_req_abort_i                   (arb_abort),
+        .core_req_tag_i                     (arb_tag),
+        .core_req_pma_i                     (arb_pma),
+
+        .core_rsp_valid_o                   (core_rsp_valid),
+        .core_rsp_o                         (core_rsp),
+
+        .wbuf_flush_i,
+
+        .cachedir_hit_o                     (/* unused */),
+
+        .miss_mshr_check_o                  (miss_mshr_check),
+        .miss_mshr_check_set_o              (miss_mshr_check_set),
+        .miss_mshr_check_tag_o              (miss_mshr_check_tag),
+        .miss_mshr_alloc_o                  (miss_mshr_alloc),
+        .miss_mshr_alloc_cs_o               (miss_mshr_alloc_cs),
+        .miss_mshr_alloc_ready_i            (miss_mshr_alloc_ready),
+        .miss_mshr_alloc_full_i             (miss_mshr_alloc_full),
+        .miss_mshr_alloc_nline_o            (miss_mshr_alloc_nline),
+        .miss_mshr_alloc_tid_o              (miss_mshr_alloc_tid),
+        .miss_mshr_alloc_sid_o              (miss_mshr_alloc_sid),
+        .miss_mshr_alloc_word_o             (miss_mshr_alloc_word),
+        .miss_mshr_alloc_need_rsp_o         (miss_mshr_alloc_need_rsp),
+        .miss_mshr_alloc_is_prefetch_o      (miss_mshr_alloc_is_prefetch),
+        .miss_mshr_hit_i                    (miss_mshr_hit),
+
+        .refill_req_valid_i                 (refill_req_valid),
+        .refill_req_ready_o                 (refill_req_ready),
+        .refill_busy_i                      (refill_busy),
+        .refill_updt_plru_i                 (refill_updt_plru),
+        .refill_set_i                       (refill_set),
+        .refill_dir_entry_i                 (refill_dir_entry),
+        .refill_victim_way_o                (refill_read_victim_way),
+        .refill_victim_way_i                (refill_write_victim_way),
+        .refill_write_dir_i                 (refill_write_dir),
+        .refill_write_data_i                (refill_write_data),
+        .refill_word_i                      (refill_word),
+        .refill_data_i                      (refill_data),
+        .refill_core_rsp_valid_i            (refill_core_rsp_valid),
+        .refill_core_rsp_i                  (refill_core_rsp),
+        .refill_nline_i                     (refill_nline),
+        .refill_updt_rtab_i                 (refill_updt_rtab),
+
+        .wbuf_empty_i                       (wbuf_empty_o),
+        .wbuf_flush_all_o                   (wbuf_flush_all),
+        .wbuf_write_o                       (wbuf_write),
+        .wbuf_write_ready_i                 (wbuf_write_ready),
+        .wbuf_write_addr_o                  (wbuf_write_addr),
+        .wbuf_write_data_o                  (wbuf_write_data),
+        .wbuf_write_be_o                    (wbuf_write_be),
+        .wbuf_write_uncacheable_o           (wbuf_write_uncacheable),
+        .wbuf_read_hit_i                    (wbuf_read_hit),
+        .wbuf_read_flush_hit_o              (wbuf_read_flush_hit),
+        .wbuf_rtab_addr_o                   (wbuf_rtab_addr),
+        .wbuf_rtab_is_read_o                (wbuf_rtab_is_read),
+        .wbuf_rtab_hit_open_i               (wbuf_rtab_hit_open),
+        .wbuf_rtab_hit_pend_i               (wbuf_rtab_hit_pend),
+        .wbuf_rtab_hit_sent_i               (wbuf_rtab_hit_sent),
+        .wbuf_rtab_not_ready_i              (wbuf_rtab_not_ready),
+
+        .uc_busy_i                          (~uc_ready),
+        .uc_lrsc_snoop_o                    (uc_lrsc_snoop),
+        .uc_lrsc_snoop_addr_o               (uc_lrsc_snoop_addr),
+        .uc_lrsc_snoop_size_o               (uc_lrsc_snoop_size),
+        .uc_req_valid_o                     (uc_req_valid),
+        .uc_req_op_o                        (uc_req_op),
+        .uc_req_addr_o                      (uc_req_addr),
+        .uc_req_size_o                      (uc_req_size),
+        .uc_req_data_o                      (uc_req_data),
+        .uc_req_be_o                        (uc_req_be),
+        .uc_req_uc_o                        (uc_req_uncacheable),
+        .uc_req_sid_o                       (uc_req_sid),
+        .uc_req_tid_o                       (uc_req_tid),
+        .uc_req_need_rsp_o                  (uc_req_need_rsp),
+        .uc_wbuf_flush_all_i                (uc_wbuf_flush_all),
+        .uc_dir_amo_match_i                 (uc_dir_amo_match),
+        .uc_dir_amo_match_set_i             (uc_dir_amo_match_set),
+        .uc_dir_amo_match_tag_i             (uc_dir_amo_match_tag),
+        .uc_dir_amo_update_plru_i           (uc_dir_amo_update_plru),
+        .uc_dir_amo_hit_way_o               (uc_dir_amo_hit_way),
+        .uc_data_amo_write_i                (uc_data_amo_write),
+        .uc_data_amo_write_enable_i         (uc_data_amo_write_enable),
+        .uc_data_amo_write_set_i            (uc_data_amo_write_set),
+        .uc_data_amo_write_size_i           (uc_data_amo_write_size),
+        .uc_data_amo_write_word_i           (uc_data_amo_write_word),
+        .uc_data_amo_write_data_i           (uc_data_amo_write_data),
+        .uc_data_amo_write_be_i             (uc_data_amo_write_be),
+        .uc_core_rsp_ready_o                (uc_core_rsp_ready),
+        .uc_core_rsp_valid_i                (uc_core_rsp_valid),
+        .uc_core_rsp_i                      (uc_core_rsp),
+
+        .cmo_busy_i                         (~cmo_ready),
+        .cmo_req_valid_o                    (cmo_req_valid),
+        .cmo_req_op_o                       (cmo_req_op),
+        .cmo_req_addr_o                     (cmo_req_addr),
+        .cmo_req_wdata_o                    (cmo_req_wdata),
+        .cmo_wbuf_flush_all_i               (cmo_wbuf_flush_all),
+        .cmo_dir_check_i                    (cmo_dir_check),
+        .cmo_dir_check_set_i                (cmo_dir_check_set),
+        .cmo_dir_check_tag_i                (cmo_dir_check_tag),
+        .cmo_dir_check_hit_way_o            (cmo_dir_check_hit_way),
+        .cmo_dir_inval_i                    (cmo_dir_inval),
+        .cmo_dir_inval_set_i                (cmo_dir_inval_set),
+        .cmo_dir_inval_way_i                (cmo_dir_inval_way),
+
+        .rtab_empty_o                       (rtab_empty),
+        .ctrl_empty_o                       (ctrl_empty),
+
+        .cfg_enable_i,
+        .cfg_rtab_single_entry_i,
+
+        .evt_cache_write_miss_o,
+        .evt_cache_read_miss_o,
+        .evt_uncached_req_o,
+        .evt_cmo_req_o,
+        .evt_write_req_o,
+        .evt_read_req_o,
+        .evt_prefetch_req_o,
+        .evt_req_on_hold_o,
+        .evt_rtab_rollback_o,
+        .evt_stall_refill_o,
+        .evt_stall_o
+    );
+    //  }}}
+
+    //  HPDcache write-buffer
+    //  {{{
+    hpdcache_wbuf_wrapper #(
+        .HPDcacheMemIdWidth                 (HPDcacheMemIdWidth),
+        .HPDcacheMemDataWidth               (HPDcacheMemDataWidth),
+        .hpdcache_mem_req_t                 (hpdcache_mem_req_t),
+        .hpdcache_mem_req_w_t               (hpdcache_mem_req_w_t),
+        .hpdcache_mem_resp_w_t              (hpdcache_mem_resp_w_t)
+    ) hpdcache_wbuf_i(
+        .clk_i,
+        .rst_ni,
+
+        .empty_o                            (wbuf_empty_o),
+        .full_o                             (/* unused */),
+        .flush_all_i                        (wbuf_flush_all),
+
+        .cfg_threshold_i                    (cfg_wbuf_threshold_i),
+        .cfg_reset_timecnt_on_write_i       (cfg_wbuf_reset_timecnt_on_write_i),
+        .cfg_sequential_waw_i               (cfg_wbuf_sequential_waw_i),
+        .cfg_inhibit_write_coalescing_i     (cfg_wbuf_inhibit_write_coalescing_i),
+
+        .write_i                            (wbuf_write),
+        .write_ready_o                      (wbuf_write_ready),
+        .write_addr_i                       (wbuf_write_addr),
+        .write_data_i                       (wbuf_write_data),
+        .write_be_i                         (wbuf_write_be),
+        .write_uc_i                         (wbuf_write_uncacheable),
+
+        .read_addr_i                        (wbuf_write_addr),
+        .read_hit_o                         (wbuf_read_hit),
+        .read_flush_hit_i                   (wbuf_read_flush_hit),
+
+        .replay_addr_i                      (wbuf_rtab_addr),
+        .replay_is_read_i                   (wbuf_rtab_is_read),
+        .replay_open_hit_o                  (wbuf_rtab_hit_open),
+        .replay_pend_hit_o                  (wbuf_rtab_hit_pend),
+        .replay_sent_hit_o                  (wbuf_rtab_hit_sent),
+        .replay_not_ready_o                 (wbuf_rtab_not_ready),
+
+        .mem_req_write_ready_i              (mem_req_wbuf_write_ready_i),
+        .mem_req_write_valid_o              (mem_req_wbuf_write_valid_o),
+        .mem_req_write_o                    (mem_req_wbuf_write_o),
+
+        .mem_req_write_data_ready_i         (mem_req_wbuf_write_data_ready_i),
+        .mem_req_write_data_valid_o         (mem_req_wbuf_write_data_valid_o),
+        .mem_req_write_data_o               (mem_req_wbuf_write_data_o),
+
+        .mem_resp_write_ready_o             (mem_resp_wbuf_write_ready_o),
+        .mem_resp_write_valid_i             (mem_resp_wbuf_write_valid_i),
+        .mem_resp_write_i                   (mem_resp_wbuf_write_i)
+    );
+    //  }}}
+
+    //  Miss handler
+    //  {{{
+    hpdcache_miss_handler #(
+        .HPDcacheMemIdWidth                 (HPDcacheMemIdWidth),
+        .HPDcacheMemDataWidth               (HPDcacheMemDataWidth),
+        .hpdcache_mem_req_t                 (hpdcache_mem_req_t),
+        .hpdcache_mem_resp_r_t              (hpdcache_mem_resp_r_t)
+    ) hpdcache_miss_handler_i(
+        .clk_i,
+        .rst_ni,
+
+        .mshr_empty_o                       (miss_mshr_empty),
+        .mshr_full_o                        (/* unused */),
+
+        .cfg_prefetch_updt_plru_i,
+
+        .mshr_check_i                       (miss_mshr_check),
+        .mshr_check_set_i                   (miss_mshr_check_set),
+        .mshr_check_tag_i                   (miss_mshr_check_tag),
+        .mshr_check_hit_o                   (miss_mshr_hit),
+
+        .mshr_alloc_ready_o                 (miss_mshr_alloc_ready),
+        .mshr_alloc_i                       (miss_mshr_alloc),
+        .mshr_alloc_cs_i                    (miss_mshr_alloc_cs),
+        .mshr_alloc_full_o                  (miss_mshr_alloc_full),
+        .mshr_alloc_nline_i                 (miss_mshr_alloc_nline),
+        .mshr_alloc_tid_i                   (miss_mshr_alloc_tid),
+        .mshr_alloc_sid_i                   (miss_mshr_alloc_sid),
+        .mshr_alloc_word_i                  (miss_mshr_alloc_word),
+        .mshr_alloc_need_rsp_i              (miss_mshr_alloc_need_rsp),
+        .mshr_alloc_is_prefetch_i           (miss_mshr_alloc_is_prefetch),
+
+        .refill_req_ready_i                 (refill_req_ready),
+        .refill_req_valid_o                 (refill_req_valid),
+        .refill_busy_o                      (refill_busy),
+        .refill_updt_plru_o                 (refill_updt_plru),
+        .refill_set_o                       (refill_set),
+        .refill_dir_entry_o                 (refill_dir_entry),
+        .refill_victim_way_i                (refill_read_victim_way),
+        .refill_write_dir_o                 (refill_write_dir),
+        .refill_write_data_o                (refill_write_data),
+        .refill_victim_way_o                (refill_write_victim_way),
+        .refill_data_o                      (refill_data),
+        .refill_word_o                      (refill_word),
+        .refill_nline_o                     (refill_nline),
+        .refill_updt_rtab_o                 (refill_updt_rtab),
+
+        .refill_core_rsp_valid_o            (refill_core_rsp_valid),
+        .refill_core_rsp_o                  (refill_core_rsp),
+
+        .mem_req_ready_i                    (mem_req_miss_read_ready_i),
+        .mem_req_valid_o                    (mem_req_miss_read_valid_o),
+        .mem_req_o                          (mem_req_miss_read_o),
+
+        .mem_resp_ready_o                   (mem_resp_miss_read_ready_o),
+        .mem_resp_valid_i                   (mem_resp_miss_read_valid_i),
+        .mem_resp_i                         (mem_resp_miss_read_i)
+    );
+    //  }}}
+
+    //  Uncacheable request handler
+    //  {{{
+    hpdcache_uncached #(
+        .HPDcacheMemIdWidth            (HPDcacheMemIdWidth),
+        .HPDcacheMemDataWidth          (HPDcacheMemDataWidth),
+        .hpdcache_mem_req_t            (hpdcache_mem_req_t),
+        .hpdcache_mem_req_w_t          (hpdcache_mem_req_w_t),
+        .hpdcache_mem_resp_r_t         (hpdcache_mem_resp_r_t),
+        .hpdcache_mem_resp_w_t         (hpdcache_mem_resp_w_t)
+    ) hpdcache_uc_i(
+        .clk_i,
+        .rst_ni,
+
+        .wbuf_empty_i                  (wbuf_empty_o),
+        .mshr_empty_i                  (miss_mshr_empty),
+        .rtab_empty_i                  (rtab_empty),
+        .ctrl_empty_i                  (ctrl_empty),
+
+        .req_valid_i                   (uc_req_valid),
+        .req_ready_o                   (uc_ready),
+        .req_op_i                      (uc_req_op),
+        .req_addr_i                    (uc_req_addr),
+        .req_size_i                    (uc_req_size),
+        .req_data_i                    (uc_req_data),
+        .req_be_i                      (uc_req_be),
+        .req_uc_i                      (uc_req_uncacheable),
+        .req_sid_i                     (uc_req_sid),
+        .req_tid_i                     (uc_req_tid),
+        .req_need_rsp_i                (uc_req_need_rsp),
+
+        .wbuf_flush_all_o              (uc_wbuf_flush_all),
+
+        .dir_amo_match_o               (uc_dir_amo_match),
+        .dir_amo_match_set_o           (uc_dir_amo_match_set),
+        .dir_amo_match_tag_o           (uc_dir_amo_match_tag),
+        .dir_amo_update_plru_o         (uc_dir_amo_update_plru),
+        .dir_amo_hit_way_i             (uc_dir_amo_hit_way),
+
+        .data_amo_write_o              (uc_data_amo_write),
+        .data_amo_write_enable_o       (uc_data_amo_write_enable),
+        .data_amo_write_set_o          (uc_data_amo_write_set),
+        .data_amo_write_size_o         (uc_data_amo_write_size),
+        .data_amo_write_word_o         (uc_data_amo_write_word),
+        .data_amo_write_data_o         (uc_data_amo_write_data),
+        .data_amo_write_be_o           (uc_data_amo_write_be),
+
+        .lrsc_snoop_i                  (uc_lrsc_snoop),
+        .lrsc_snoop_addr_i             (uc_lrsc_snoop_addr),
+        .lrsc_snoop_size_i             (uc_lrsc_snoop_size),
+
+        .core_rsp_ready_i              (uc_core_rsp_ready),
+        .core_rsp_valid_o              (uc_core_rsp_valid),
+        .core_rsp_o                    (uc_core_rsp),
+
+        .mem_read_id_i                 (HPDCACHE_UC_READ_ID),
+        .mem_write_id_i                (HPDCACHE_UC_WRITE_ID),
+
+        .mem_req_read_ready_i          (mem_req_uc_read_ready_i),
+        .mem_req_read_valid_o          (mem_req_uc_read_valid_o),
+        .mem_req_read_o                (mem_req_uc_read_o),
+
+        .mem_resp_read_ready_o         (mem_resp_uc_read_ready_o),
+        .mem_resp_read_valid_i         (mem_resp_uc_read_valid_i),
+        .mem_resp_read_i               (mem_resp_uc_read_i),
+
+        .mem_req_write_ready_i         (mem_req_uc_write_ready_i),
+        .mem_req_write_valid_o         (mem_req_uc_write_valid_o),
+        .mem_req_write_o               (mem_req_uc_write_o),
+
+        .mem_req_write_data_ready_i    (mem_req_uc_write_data_ready_i),
+        .mem_req_write_data_valid_o    (mem_req_uc_write_data_valid_o),
+        .mem_req_write_data_o          (mem_req_uc_write_data_o),
+
+        .mem_resp_write_ready_o        (mem_resp_uc_write_ready_o),
+        .mem_resp_write_valid_i        (mem_resp_uc_write_valid_i),
+        .mem_resp_write_i              (mem_resp_uc_write_i),
+
+        .cfg_error_on_cacheable_amo_i
+    );
+
+    //  CMO Request Handler
+    //  {{{
+    hpdcache_cmo hpdcache_cmo_i(
+        .clk_i,
+        .rst_ni,
+
+        .wbuf_empty_i           (wbuf_empty_o),
+        .mshr_empty_i           (miss_mshr_empty),
+        .rtab_empty_i           (rtab_empty),
+        .ctrl_empty_i           (ctrl_empty),
+
+        .req_valid_i            (cmo_req_valid),
+        .req_ready_o            (cmo_ready),
+        .req_op_i               (cmo_req_op),
+        .req_addr_i             (cmo_req_addr),
+        .req_wdata_i            (cmo_req_wdata),
+
+        .wbuf_flush_all_o       (cmo_wbuf_flush_all),
+
+        .dir_check_o            (cmo_dir_check),
+        .dir_check_set_o        (cmo_dir_check_set),
+        .dir_check_tag_o        (cmo_dir_check_tag),
+        .dir_check_hit_way_i    (cmo_dir_check_hit_way),
+
+        .dir_inval_o            (cmo_dir_inval),
+        .dir_inval_set_o        (cmo_dir_inval_set),
+        .dir_inval_way_o        (cmo_dir_inval_way)
+    );
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    // pragma translate_off
+    initial begin
+        req_access_width_assert:
+            assert (HPDCACHE_REQ_WORDS <= HPDCACHE_ACCESS_WORDS) else
+                $error("req data width shall be l.e. to cache access width");
+        refill_access_width_assert:
+            assert (HPDCACHE_CL_WORDS >= HPDCACHE_ACCESS_WORDS) else
+                $error("cache access width shall be l.e. to cache-line width");
+        miss_mem_id_width_assert:
+            assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_WAY_WIDTH + HPDCACHE_MSHR_SET_WIDTH)) else
+                $error("insufficient ID bits on the mem interface to transport misses");
+        wbuf_mem_id_width_assert:
+            assert (HPDcacheMemIdWidth >= HPDCACHE_WBUF_DIR_PTR_WIDTH) else
+                $error("insufficient ID bits on the mem interface to transport writes");
+
+    end
+    // pragma translate_on
+    // }}}
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv
new file mode 100644
index 0000000..d233af1
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv
@@ -0,0 +1,67 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : May, 2021
+ *  Description   : HPDcache AMO computing unit
+ *  History       :
+ */
+module hpdcache_amo
+import hpdcache_pkg::*;
+//  Ports
+//  {{{
+(
+    input  logic [63:0]           ld_data_i,
+    input  logic [63:0]           st_data_i,
+    input  hpdcache_uc_op_t       op_i,
+    output logic [63:0]           result_o
+);
+//  }}}
+
+    logic signed [63:0] ld_data;
+    logic signed [63:0] st_data;
+    logic signed [63:0] sum;
+    logic               ugt, sgt;
+
+    assign ld_data = ld_data_i,
+           st_data = st_data_i;
+
+    assign ugt = (ld_data_i > st_data_i),
+           sgt = (ld_data   > st_data),
+           sum =  ld_data   + st_data;
+
+    always_comb
+    begin : amo_compute_comb
+        unique case (1'b1)
+            op_i.is_amo_lr   : result_o = ld_data_i;
+            op_i.is_amo_sc   : result_o = st_data_i;
+            op_i.is_amo_swap : result_o = st_data_i;
+            op_i.is_amo_add  : result_o = sum;
+            op_i.is_amo_and  : result_o = ld_data_i & st_data_i;
+            op_i.is_amo_or   : result_o = ld_data_i | st_data_i;
+            op_i.is_amo_xor  : result_o = ld_data_i ^ st_data_i;
+            op_i.is_amo_max  : result_o = sgt ? ld_data_i : st_data_i;
+            op_i.is_amo_maxu : result_o = ugt ? ld_data_i : st_data_i;
+            op_i.is_amo_min  : result_o = sgt ? st_data_i : ld_data_i;
+            op_i.is_amo_minu : result_o = ugt ? st_data_i : ld_data_i;
+            default          : result_o = '0;
+        endcase
+    end
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv
new file mode 100644
index 0000000..de09cd7
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv
@@ -0,0 +1,250 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : July, 2021
+ *  Description   : HPDcache Cache-Management-Operation Handler
+ *  History       :
+ */
+module hpdcache_cmo
+import hpdcache_pkg::*;
+//  Ports
+//  {{{
+(
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    //  Global control signals
+    //  {{{
+    input  logic                  wbuf_empty_i,
+    input  logic                  mshr_empty_i,
+    input  logic                  rtab_empty_i,
+    input  logic                  ctrl_empty_i,
+    //  }}}
+
+    //  Request interface
+    //  {{{
+    input  logic                  req_valid_i,
+    output logic                  req_ready_o,
+    input  hpdcache_cmoh_op_t     req_op_i,
+    input  hpdcache_req_addr_t    req_addr_i,
+    input  hpdcache_req_data_t    req_wdata_i,
+    //  }}}
+
+    //  Write Buffer Interface
+    //  {{{
+    output logic                  wbuf_flush_all_o,
+    //  }}}
+
+    //  Cache Directory Interface
+    //  {{{
+    output logic                  dir_check_o,
+    output hpdcache_set_t         dir_check_set_o,
+    output hpdcache_tag_t         dir_check_tag_o,
+    input  hpdcache_way_vector_t  dir_check_hit_way_i,
+
+    output logic                  dir_inval_o,
+    output hpdcache_set_t         dir_inval_set_o,
+    output hpdcache_way_vector_t  dir_inval_way_o
+    // }}}
+);
+//  }}}
+
+//  Definition of constants and types
+//  {{{
+    typedef enum {
+        CMOH_IDLE,
+        CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY,
+        CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY,
+        CMOH_INVAL_CHECK_NLINE,
+        CMOH_INVAL_SET
+    } hpdcache_cmoh_fsm_t;
+//  }}}
+
+//  Internal signals and registers
+//  {{{
+    hpdcache_cmoh_fsm_t   cmoh_fsm_q, cmoh_fsm_d;
+    hpdcache_cmoh_op_t    cmoh_op_q, cmoh_op_d;
+    hpdcache_req_addr_t   cmoh_addr_q, cmoh_addr_d;
+    hpdcache_way_vector_t cmoh_way_q, cmoh_way_d;
+    hpdcache_set_t        cmoh_set_cnt_q, cmoh_set_cnt_d;
+    hpdcache_nline_t      cmoh_nline_q;
+    hpdcache_tag_t        cmoh_tag_q;
+    hpdcache_set_t        cmoh_set_q;
+    hpdcache_data_word_t  cmoh_wdata;
+//  }}}
+
+//  CMO request handler FSM
+//  {{{
+    assign cmoh_nline_q =  cmoh_addr_q[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH],
+           cmoh_set_q   = cmoh_nline_q[0                     +: HPDCACHE_SET_WIDTH],
+           cmoh_tag_q   = cmoh_nline_q[HPDCACHE_SET_WIDTH    +: HPDCACHE_TAG_WIDTH];
+
+    assign dir_check_set_o = cmoh_set_q,
+           dir_check_tag_o = cmoh_tag_q;
+
+    assign req_ready_o  = (cmoh_fsm_q == CMOH_IDLE);
+
+    //  Only the least significant word of the write data contains parameters
+    //  for the CMO handler
+    assign cmoh_wdata   = req_wdata_i[0];
+
+    always_comb
+    begin : cmoh_fsm_comb
+        cmoh_op_d             = cmoh_op_q;
+        cmoh_addr_d           = cmoh_addr_q;
+        cmoh_way_d            = cmoh_way_q;
+        cmoh_set_cnt_d        = cmoh_set_cnt_q;
+
+        dir_check_o           = 1'b0;
+
+        dir_inval_o           = 1'b0;
+        dir_inval_set_o       = cmoh_set_q;
+        dir_inval_way_o       = '0;
+
+        wbuf_flush_all_o      = 1'b0;
+
+        cmoh_fsm_d            = cmoh_fsm_q;
+
+        case (cmoh_fsm_q)
+            CMOH_IDLE: begin
+                cmoh_fsm_d  = CMOH_IDLE;
+
+                if (req_valid_i) begin
+                    unique case (1'b1)
+                        req_op_i.is_fence: begin
+                            //  request to the write buffer to send all open entries
+                            wbuf_flush_all_o = rtab_empty_i;
+
+                            //  then wait for the write buffer to be empty
+                            if (!rtab_empty_i || !wbuf_empty_i) begin
+                                cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY;
+                            end
+                        end
+                        req_op_i.is_inval_by_nline,
+                        req_op_i.is_inval_by_set,
+                        req_op_i.is_inval_all: begin
+                            cmoh_op_d      = req_op_i;
+                            cmoh_addr_d    = req_addr_i;
+                            cmoh_way_d     = cmoh_wdata[0 +: HPDCACHE_WAYS];
+                            cmoh_set_cnt_d = 0;
+                            if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
+                                if (req_op_i.is_inval_by_nline) begin
+                                    cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE;
+                                end else begin
+                                    cmoh_fsm_d = CMOH_INVAL_SET;
+                                end
+                            end else begin
+                                cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY;
+                            end
+                        end
+                        default: begin
+                            // pragma translate_off
+                            $error("cmo handler: unexpected operation");
+                            // pragma translate_on
+                        end
+                    endcase
+                end
+            end
+            CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY: begin
+                wbuf_flush_all_o = rtab_empty_i;
+
+                if (wbuf_empty_i && rtab_empty_i) begin
+                    cmoh_fsm_d = CMOH_IDLE;
+                end else begin
+                    cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY;
+                end
+            end
+            CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY: begin
+                cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY;
+                if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
+                    if (cmoh_op_q.is_inval_by_nline) begin
+                        cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE;
+                    end else begin
+                        cmoh_fsm_d = CMOH_INVAL_SET;
+                    end
+                end
+            end
+            CMOH_INVAL_CHECK_NLINE: begin
+                dir_check_o = 1'b1;
+                cmoh_fsm_d  = CMOH_INVAL_SET;
+            end
+            CMOH_INVAL_SET: begin
+                cmoh_fsm_d = CMOH_INVAL_SET;
+                case (1'b1)
+                    cmoh_op_q.is_inval_by_nline: begin
+                        dir_inval_o     = |dir_check_hit_way_i;
+                        dir_inval_way_o =  dir_check_hit_way_i;
+                        cmoh_fsm_d      = CMOH_IDLE;
+                    end
+                    cmoh_op_q.is_inval_all: begin
+                        dir_inval_o     = 1'b1;
+                        dir_inval_way_o = {HPDCACHE_WAYS{1'b1}};
+                        dir_inval_set_o = cmoh_set_cnt_q;
+                        cmoh_set_cnt_d  = cmoh_set_cnt_q + 1;
+                        if (cmoh_set_cnt_q == hpdcache_set_t'(HPDCACHE_SETS - 1)) begin
+                            cmoh_fsm_d = CMOH_IDLE;
+                        end
+                    end
+                    cmoh_op_q.is_inval_by_set: begin
+                        dir_inval_o     = 1'b1;
+                        dir_inval_way_o = cmoh_way_q;
+                        cmoh_fsm_d      = CMOH_IDLE;
+                    end
+                endcase
+            end
+        endcase
+    end
+//  }}}
+
+//  CMO request handler set state
+//  {{{
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin
+        if (!rst_ni) begin
+            cmoh_fsm_q <= CMOH_IDLE;
+        end else begin
+            cmoh_fsm_q <= cmoh_fsm_d;
+        end
+    end
+
+    always_ff @(posedge clk_i)
+    begin
+        cmoh_op_q      <= cmoh_op_d;
+        cmoh_addr_q    <= cmoh_addr_d;
+        cmoh_way_q     <= cmoh_way_d;
+        cmoh_set_cnt_q <= cmoh_set_cnt_d;
+    end
+//  }}}
+
+//  Assertions
+//  {{{
+//  pragma translate_off
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            req_valid_i -> $onehot(req_op_i)) else
+                    $error("cmo_handler: more than one operation type requested");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            req_valid_i -> (cmoh_fsm_q == CMOH_IDLE)) else
+                    $error("cmo_handler: new request received while busy");
+//  pragma translate_on
+//  }}}
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv
new file mode 100644
index 0000000..1f8f5a4
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv
@@ -0,0 +1,171 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : September, 2023
+ *  Description   : HPDcache request arbiter
+ *  History       :
+ */
+module hpdcache_core_arbiter
+import hpdcache_pkg::*;
+    //  Parameters
+    //  {{{
+#(
+    parameter int NREQUESTERS = 1
+)
+    //  }}}
+
+    //  Ports
+    //  {{{
+(
+    //      Clock and reset signals
+    input  logic                          clk_i,
+    input  logic                          rst_ni,
+
+    //      Core request interface
+    //         1st cycle
+    input  logic                          core_req_valid_i [NREQUESTERS-1:0],
+    output logic                          core_req_ready_o [NREQUESTERS-1:0],
+    input  hpdcache_req_t                 core_req_i       [NREQUESTERS-1:0],
+    //         2nd cycle
+    input  logic                          core_req_abort_i [NREQUESTERS-1:0],
+    input  hpdcache_tag_t                 core_req_tag_i   [NREQUESTERS-1:0],
+    input  hpdcache_pma_t                 core_req_pma_i   [NREQUESTERS-1:0],
+
+    //      Core response interface
+    input  logic                          core_rsp_valid_i,
+    input  hpdcache_rsp_t                 core_rsp_i,
+    output logic                          core_rsp_valid_o [NREQUESTERS-1:0],
+    output hpdcache_rsp_t                 core_rsp_o       [NREQUESTERS-1:0],
+
+    //      Granted request
+    output logic                          arb_req_valid_o,
+    input  logic                          arb_req_ready_i,
+    output hpdcache_req_t                 arb_req_o,
+    output logic                          arb_abort_o,
+    output hpdcache_tag_t                 arb_tag_o,
+    output hpdcache_pma_t                 arb_pma_o
+);
+
+    //  }}}
+
+    //  Declaration of internal signals
+    //  {{{
+    logic          [NREQUESTERS-1:0] core_req_valid;
+    hpdcache_req_t [NREQUESTERS-1:0] core_req;
+    logic          [NREQUESTERS-1:0] core_req_abort;
+    hpdcache_tag_t [NREQUESTERS-1:0] core_req_tag;
+    hpdcache_pma_t [NREQUESTERS-1:0] core_req_pma;
+
+    logic [NREQUESTERS-1:0] arb_req_gnt_q, arb_req_gnt_d;
+    //  }}}
+
+    //  Requesters arbiter
+    //  {{{
+    //      Pack request ports
+    genvar gen_i;
+
+    generate
+        for (gen_i = 0; gen_i < int'(NREQUESTERS); gen_i++) begin : gen_core_req
+            assign core_req_ready_o[gen_i] = arb_req_gnt_d[gen_i] & arb_req_ready_i,
+                   core_req_valid[gen_i]   = core_req_valid_i[gen_i],
+                   core_req[gen_i]         = core_req_i[gen_i];
+
+            assign core_req_abort[gen_i]   = core_req_abort_i[gen_i],
+                   core_req_tag[gen_i]     = core_req_tag_i[gen_i],
+                   core_req_pma[gen_i]     = core_req_pma_i[gen_i];
+        end
+    endgenerate
+
+    //      Arbiter
+    hpdcache_fxarb #(.N(NREQUESTERS)) req_arbiter_i
+    (
+        .clk_i,
+        .rst_ni,
+        .req_i          (core_req_valid),
+        .gnt_o          (arb_req_gnt_d),
+        .ready_i        (arb_req_ready_i)
+    );
+
+    //      Request multiplexor
+    hpdcache_mux #(
+        .NINPUT         (NREQUESTERS),
+        .DATA_WIDTH     ($bits(hpdcache_req_t)),
+        .ONE_HOT_SEL    (1'b1)
+    ) core_req_mux_i (
+        .data_i         (core_req),
+        .sel_i          (arb_req_gnt_d),
+        .data_o         (arb_req_o)
+    );
+
+    //      Request abort multiplexor
+    hpdcache_mux #(
+        .NINPUT         (NREQUESTERS),
+        .DATA_WIDTH     (1),
+        .ONE_HOT_SEL    (1'b1)
+    ) core_req_abort_mux_i (
+        .data_i         (core_req_abort),
+        .sel_i          (arb_req_gnt_q),
+        .data_o         (arb_abort_o)
+    );
+
+    //      Tag Multiplexor
+    hpdcache_mux #(
+        .NINPUT         (NREQUESTERS),
+        .DATA_WIDTH     ($bits(hpdcache_tag_t)),
+        .ONE_HOT_SEL    (1'b1)
+    ) core_req_tag_mux_i (
+        .data_i         (core_req_tag),
+        .sel_i          (arb_req_gnt_q),
+        .data_o         (arb_tag_o)
+    );
+
+    //      PMA Multiplexor
+    hpdcache_mux #(
+        .NINPUT         (NREQUESTERS),
+        .DATA_WIDTH     ($bits(hpdcache_pma_t)),
+        .ONE_HOT_SEL    (1'b1)
+    ) core_req_pma_mux_i (
+        .data_i         (core_req_pma),
+        .sel_i          (arb_req_gnt_q),
+        .data_o         (arb_pma_o)
+    );
+
+    //      Save the grant signal for the tag in the next cycle
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : arb_req_gnt_ff
+        if (!rst_ni) arb_req_gnt_q <= '0;
+        else         arb_req_gnt_q <= arb_req_gnt_d;
+    end
+
+    assign arb_req_valid_o = |arb_req_gnt_d;
+    //  }}}
+
+    //  Response demultiplexor
+    //  {{{
+    always_comb
+    begin : resp_demux
+        for (int unsigned i = 0; i < NREQUESTERS; i++) begin
+            core_rsp_valid_o[i]  = core_rsp_valid_i && (i == int'(core_rsp_i.sid));
+            core_rsp_o[i]        = core_rsp_i;
+        end
+    end
+    //  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv
new file mode 100755
index 0000000..19369c5
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv
@@ -0,0 +1,760 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : HPDcache controller
+ *  History       :
+ */
+module hpdcache_ctrl
+    // Package imports
+    // {{{
+import hpdcache_pkg::*;
+    // }}}
+
+    // Ports
+    // {{{
+(
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    //      Core request interface
+    input  logic                  core_req_valid_i,
+    output logic                  core_req_ready_o,
+    input  hpdcache_req_t         core_req_i,
+    input  logic                  core_req_abort_i,
+    input  hpdcache_tag_t         core_req_tag_i,
+    input  hpdcache_pma_t         core_req_pma_i,
+
+    //      Core response interface
+    output logic                  core_rsp_valid_o,
+    output hpdcache_rsp_t         core_rsp_o,
+
+    //      Force the write buffer to send all pending writes
+    input  logic                  wbuf_flush_i,
+
+    //      Global control signals
+    output logic                  cachedir_hit_o,
+
+    //      Miss handler interface
+    output logic                  miss_mshr_check_o,
+    output mshr_set_t             miss_mshr_check_set_o,
+    output mshr_tag_t             miss_mshr_check_tag_o,
+    output logic                  miss_mshr_alloc_o,
+    output logic                  miss_mshr_alloc_cs_o,
+    input  logic                  miss_mshr_alloc_ready_i,
+    input  logic                  miss_mshr_alloc_full_i,
+    output hpdcache_nline_t       miss_mshr_alloc_nline_o,
+    output hpdcache_req_tid_t     miss_mshr_alloc_tid_o,
+    output hpdcache_req_sid_t     miss_mshr_alloc_sid_o,
+    output hpdcache_word_t        miss_mshr_alloc_word_o,
+    output logic                  miss_mshr_alloc_need_rsp_o,
+    output logic                  miss_mshr_alloc_is_prefetch_o,
+    input  logic                  miss_mshr_hit_i,
+
+    //      Refill interface
+    input  logic                  refill_req_valid_i,
+    output logic                  refill_req_ready_o,
+    input  logic                  refill_busy_i,
+    input  logic                  refill_updt_plru_i,
+    input  hpdcache_set_t         refill_set_i,
+    input  hpdcache_dir_entry_t   refill_dir_entry_i,
+    output hpdcache_way_vector_t  refill_victim_way_o,
+    input  hpdcache_way_vector_t  refill_victim_way_i,
+    input  logic                  refill_write_dir_i,
+    input  logic                  refill_write_data_i,
+    input  hpdcache_word_t        refill_word_i,
+    input  hpdcache_refill_data_t refill_data_i,
+    input  logic                  refill_core_rsp_valid_i,
+    input  hpdcache_rsp_t         refill_core_rsp_i,
+    input  hpdcache_nline_t       refill_nline_i,
+    input  logic                  refill_updt_rtab_i,
+
+    //      Write buffer interface
+    input  logic                  wbuf_empty_i,
+    output logic                  wbuf_flush_all_o,
+    output logic                  wbuf_write_o,
+    input  logic                  wbuf_write_ready_i,
+    output wbuf_addr_t            wbuf_write_addr_o,
+    output wbuf_data_t            wbuf_write_data_o,
+    output wbuf_be_t              wbuf_write_be_o,
+    output logic                  wbuf_write_uncacheable_o,
+    input  logic                  wbuf_read_hit_i,
+    output logic                  wbuf_read_flush_hit_o,
+    output hpdcache_req_addr_t    wbuf_rtab_addr_o,
+    output logic                  wbuf_rtab_is_read_o,
+    input  logic                  wbuf_rtab_hit_open_i,
+    input  logic                  wbuf_rtab_hit_pend_i,
+    input  logic                  wbuf_rtab_hit_sent_i,
+    input  logic                  wbuf_rtab_not_ready_i,
+
+    //      Uncacheable request handler
+    input  logic                  uc_busy_i,
+    output logic                  uc_lrsc_snoop_o,
+    output hpdcache_req_addr_t    uc_lrsc_snoop_addr_o,
+    output hpdcache_req_size_t    uc_lrsc_snoop_size_o,
+    output logic                  uc_req_valid_o,
+    output hpdcache_uc_op_t       uc_req_op_o,
+    output hpdcache_req_addr_t    uc_req_addr_o,
+    output hpdcache_req_size_t    uc_req_size_o,
+    output hpdcache_req_data_t    uc_req_data_o,
+    output hpdcache_req_be_t      uc_req_be_o,
+    output logic                  uc_req_uc_o,
+    output hpdcache_req_sid_t     uc_req_sid_o,
+    output hpdcache_req_tid_t     uc_req_tid_o,
+    output logic                  uc_req_need_rsp_o,
+    input  logic                  uc_wbuf_flush_all_i,
+    input  logic                  uc_dir_amo_match_i,
+    input  hpdcache_set_t         uc_dir_amo_match_set_i,
+    input  hpdcache_tag_t         uc_dir_amo_match_tag_i,
+    input  logic                  uc_dir_amo_update_plru_i,
+    output hpdcache_way_vector_t  uc_dir_amo_hit_way_o,
+    input  logic                  uc_data_amo_write_i,
+    input  logic                  uc_data_amo_write_enable_i,
+    input  hpdcache_set_t         uc_data_amo_write_set_i,
+    input  hpdcache_req_size_t    uc_data_amo_write_size_i,
+    input  hpdcache_word_t        uc_data_amo_write_word_i,
+    input  logic [63:0]           uc_data_amo_write_data_i,
+    input  logic  [7:0]           uc_data_amo_write_be_i,
+    output logic                  uc_core_rsp_ready_o,
+    input  logic                  uc_core_rsp_valid_i,
+    input  hpdcache_rsp_t         uc_core_rsp_i,
+
+    //      Cache Management Operation (CMO)
+    input  logic                  cmo_busy_i,
+    output logic                  cmo_req_valid_o,
+    output hpdcache_cmoh_op_t     cmo_req_op_o,
+    output hpdcache_req_addr_t    cmo_req_addr_o,
+    output hpdcache_req_data_t    cmo_req_wdata_o,
+    input  logic                  cmo_wbuf_flush_all_i,
+    input  logic                  cmo_dir_check_i,
+    input  hpdcache_set_t         cmo_dir_check_set_i,
+    input  hpdcache_tag_t         cmo_dir_check_tag_i,
+    output hpdcache_way_vector_t  cmo_dir_check_hit_way_o,
+    input  logic                  cmo_dir_inval_i,
+    input  hpdcache_set_t         cmo_dir_inval_set_i,
+    input  hpdcache_way_vector_t  cmo_dir_inval_way_i,
+
+    output logic                  rtab_empty_o,
+    output logic                  ctrl_empty_o,
+
+    //   Configuration signals
+    input  logic                  cfg_enable_i,
+    input  logic                  cfg_rtab_single_entry_i,
+
+    //   Performance events
+    output logic                  evt_cache_write_miss_o,
+    output logic                  evt_cache_read_miss_o,
+    output logic                  evt_uncached_req_o,
+    output logic                  evt_cmo_req_o,
+    output logic                  evt_write_req_o,
+    output logic                  evt_read_req_o,
+    output logic                  evt_prefetch_req_o,
+    output logic                  evt_req_on_hold_o,
+    output logic                  evt_rtab_rollback_o,
+    output logic                  evt_stall_refill_o,
+    output logic                  evt_stall_o
+);
+    // }}}
+
+    //  Definition of internal registers
+    //  {{{
+    logic                    st1_req_valid_q, st1_req_valid_d;
+    hpdcache_req_t           st1_req_q;
+    logic                    st1_req_rtab_q;
+    rtab_ptr_t               st1_rtab_pop_try_ptr_q;
+
+    logic                    st2_req_valid_q, st2_req_valid_d;
+    logic                    st2_req_is_prefetch_q, st2_req_is_prefetch_d;
+    logic                    st2_req_need_rsp_q;
+    hpdcache_req_addr_t      st2_req_addr_q;
+    hpdcache_req_sid_t       st2_req_sid_q;
+    hpdcache_req_tid_t       st2_req_tid_q;
+    //  }}}
+
+    //  Definition of internal signals
+    //  {{{
+    logic [1:0]              st0_arb_req;
+    logic [1:0]              st0_arb_req_grant;
+    logic                    st0_arb_ready;
+
+    logic                    st0_req_ready;
+
+    logic                    st0_req_valid;
+    hpdcache_req_t           st0_req;
+    logic                    st0_req_is_uncacheable;
+    logic                    st0_req_is_load;
+    logic                    st0_req_is_store;
+    logic                    st0_req_is_amo;
+    logic                    st0_req_is_cmo_fence;
+    logic                    st0_req_is_cmo_inval;
+    logic                    st0_req_is_cmo_prefetch;
+    logic                    st0_req_cachedir_read;
+    logic                    st0_req_cachedata_read;
+    hpdcache_set_t           st0_req_set;
+    hpdcache_word_t          st0_req_word;
+    logic                    st0_rtab_pop_try_valid;
+    logic                    st0_rtab_pop_try_ready;
+    hpdcache_req_t           st0_rtab_pop_try_req;
+    logic                    st0_rtab_pop_try_sel;
+    rtab_ptr_t               st0_rtab_pop_try_ptr;
+
+    logic                    st1_rsp_valid;
+    logic                    st1_rsp_aborted;
+    hpdcache_req_t           st1_req;
+    logic                    st1_req_abort;
+    logic                    st1_req_cachedata_write;
+    logic                    st1_req_cachedata_write_enable;
+    hpdcache_pma_t           st1_req_pma;
+    hpdcache_tag_t           st1_req_tag;
+    hpdcache_set_t           st1_req_set;
+    hpdcache_word_t          st1_req_word;
+    hpdcache_nline_t         st1_req_nline;
+    hpdcache_req_addr_t      st1_req_addr;
+    logic                    st1_req_updt_lru;
+    logic                    st1_req_is_uncacheable;
+    logic                    st1_req_is_load;
+    logic                    st1_req_is_store;
+    logic                    st1_req_is_amo;
+    logic                    st1_req_is_amo_lr;
+    logic                    st1_req_is_amo_sc;
+    logic                    st1_req_is_amo_swap;
+    logic                    st1_req_is_amo_add;
+    logic                    st1_req_is_amo_and;
+    logic                    st1_req_is_amo_or;
+    logic                    st1_req_is_amo_xor;
+    logic                    st1_req_is_amo_max;
+    logic                    st1_req_is_amo_maxu;
+    logic                    st1_req_is_amo_min;
+    logic                    st1_req_is_amo_minu;
+    logic                    st1_req_is_cmo_inval;
+    logic                    st1_req_is_cmo_fence;
+    logic                    st1_req_is_cmo_prefetch;
+    hpdcache_way_vector_t    st1_dir_hit;
+    hpdcache_req_data_t      st1_read_data;
+    logic                    st1_rtab_alloc;
+    logic                    st1_rtab_alloc_and_link;
+    logic                    st1_rtab_pop_try_commit;
+    logic                    st1_rtab_pop_try_rback;
+    logic                    st1_rtab_mshr_hit;
+    logic                    st1_rtab_mshr_full;
+    logic                    st1_rtab_mshr_ready;
+    logic                    st1_rtab_wbuf_hit;
+    logic                    st1_rtab_wbuf_not_ready;
+    logic                    st1_rtab_check;
+    logic                    st1_rtab_check_hit;
+
+    logic                    st2_req_we;
+    hpdcache_word_t          st2_req_word;
+
+    logic                    rtab_full;
+
+    logic                    hpdcache_init_ready;
+    //  }}}
+
+    //  Decoding of the request
+    //  {{{
+    //     Select between request in the replay table or a new core requests
+    assign st0_req_valid           = st0_rtab_pop_try_sel ? st0_rtab_pop_try_valid
+                                                          : core_req_valid_i,
+           st0_req.addr_offset     = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_offset
+                                                          : core_req_i.addr_offset,
+           st0_req.addr_tag        = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_tag
+                                                          : core_req_i.addr_tag,
+           st0_req.wdata           = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.wdata
+                                                          : core_req_i.wdata,
+           st0_req.op              = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.op
+                                                          : core_req_i.op,
+           st0_req.be              = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.be
+                                                          : core_req_i.be,
+           st0_req.size            = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.size
+                                                          : core_req_i.size,
+           st0_req.sid             = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.sid
+                                                          : core_req_i.sid,
+           st0_req.tid             = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.tid
+                                                          : core_req_i.tid,
+           st0_req.need_rsp        = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.need_rsp
+                                                          : core_req_i.need_rsp,
+           st0_req.phys_indexed    = st0_rtab_pop_try_sel ? 1'b1
+                                                          : core_req_i.phys_indexed,
+           st0_req.pma             = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.pma
+                                                          : core_req_i.pma;
+
+    //     Decode operation in stage 0
+    assign st0_req_is_uncacheable  = ~cfg_enable_i | ( st0_req.phys_indexed
+                                                     & st0_req.pma.uncacheable),
+           st0_req_is_load         =         is_load(st0_req.op),
+           st0_req_is_store        =        is_store(st0_req.op),
+           st0_req_is_amo          =          is_amo(st0_req.op),
+           st0_req_is_cmo_fence    =    is_cmo_fence(st0_req.op, st0_req.size),
+           st0_req_is_cmo_inval    =    is_cmo_inval(st0_req.op, st0_req.size),
+           st0_req_is_cmo_prefetch = is_cmo_prefetch(st0_req.op, st0_req.size);
+
+    //     Decode operation in stage 1
+
+    //         In case of replay or physically-indexed cache, the tag and PMA come
+    //         from stage 0. Otherwise, this information come directly from the
+    //         requester in stage 1
+    assign st1_req_tag             = st1_req_q.phys_indexed ? st1_req_q.addr_tag : core_req_tag_i,
+           st1_req_pma             = st1_req_q.phys_indexed ? st1_req_q.pma      : core_req_pma_i;
+
+    assign st1_req.addr_offset     = st1_req_q.addr_offset,
+           st1_req.addr_tag        = st1_req_rtab_q ? st1_req_q.addr_tag : st1_req_tag,
+           st1_req.wdata           = st1_req_q.wdata,
+           st1_req.op              = st1_req_q.op,
+           st1_req.be              = st1_req_q.be,
+           st1_req.size            = st1_req_q.size,
+           st1_req.sid             = st1_req_q.sid,
+           st1_req.tid             = st1_req_q.tid,
+           st1_req.need_rsp        = st1_req_q.need_rsp,
+           st1_req.phys_indexed    = st1_req_q.phys_indexed,
+           st1_req.pma             = st1_req_rtab_q ? st1_req_q.pma : st1_req_pma;
+
+    //         A requester can ask to abort a request it initiated on the
+    //         previous cycle (stage 0). Useful in case of TLB miss for example
+    assign st1_req_abort           = core_req_abort_i & ~st1_req.phys_indexed;
+
+    assign st1_req_is_uncacheable  = ~cfg_enable_i | st1_req.pma.uncacheable,
+           st1_req_is_load         =         is_load(st1_req.op),
+           st1_req_is_store        =        is_store(st1_req.op),
+           st1_req_is_amo          =          is_amo(st1_req.op),
+           st1_req_is_amo_lr       =       is_amo_lr(st1_req.op),
+           st1_req_is_amo_sc       =       is_amo_sc(st1_req.op),
+           st1_req_is_amo_swap     =     is_amo_swap(st1_req.op),
+           st1_req_is_amo_add      =      is_amo_add(st1_req.op),
+           st1_req_is_amo_and      =      is_amo_and(st1_req.op),
+           st1_req_is_amo_or       =       is_amo_or(st1_req.op),
+           st1_req_is_amo_xor      =      is_amo_xor(st1_req.op),
+           st1_req_is_amo_max      =      is_amo_max(st1_req.op),
+           st1_req_is_amo_maxu     =     is_amo_maxu(st1_req.op),
+           st1_req_is_amo_min      =      is_amo_min(st1_req.op),
+           st1_req_is_amo_minu     =     is_amo_minu(st1_req.op),
+           st1_req_is_cmo_inval    =    is_cmo_inval(st1_req.op, st1_req.size),
+           st1_req_is_cmo_fence    =    is_cmo_fence(st1_req.op, st1_req.size),
+           st1_req_is_cmo_prefetch = is_cmo_prefetch(st1_req.op, st1_req.size);
+    //  }}}
+
+    //  Refill arbiter: it arbitrates between normal requests (from the core,
+    //  coprocessor, prefetch) and refill requests (from the miss handler).
+    //
+    //  TODO This arbiter could be replaced by a weighted-round-robin arbiter.
+    //  This way we could distribute asymetrically the bandwidth to the core
+    //  and the refill interfaces.
+    //  {{{
+    hpdcache_rrarb #(.N(2)) st0_arb_i
+    (
+        .clk_i,
+        .rst_ni,
+        .req_i                              (st0_arb_req),
+        .gnt_o                              (st0_arb_req_grant),
+        .ready_i                            (st0_arb_ready)
+    );
+
+    //      The arbiter can cycle the priority token when:
+    //      - The granted request is consumed (req_grant &  req_valid & req_ready)
+    //      - The granted request is aborted  (req_grant & ~req_valid)
+    assign st0_arb_ready  = ((st0_arb_req_grant[0] &     st0_req_valid   &    st0_req_ready  ) |
+                             (st0_arb_req_grant[1] &  refill_req_valid_i & refill_req_ready_o) |
+                             (st0_arb_req_grant[0] &    ~st0_req_valid  ) |
+                             (st0_arb_req_grant[1] & ~refill_req_valid_i));
+
+    assign st0_arb_req[0] = st0_req_valid,
+           st0_arb_req[1] = refill_req_valid_i;
+
+    assign core_req_ready_o       = st0_req_ready & ~st0_rtab_pop_try_sel,
+           st0_rtab_pop_try_ready = st0_req_ready &  st0_rtab_pop_try_sel;
+
+    //      Trigger an event signal when the pipeline is stalled (new request is not consumed)
+    assign evt_stall_o        = core_req_valid_i & ~core_req_ready_o;
+    //  }}}
+
+    //  Cache controller protocol engine
+    //  {{{
+    hpdcache_ctrl_pe hpdcache_ctrl_pe_i(
+        .arb_st0_req_valid_i                (st0_req_valid & st0_arb_req_grant[0]),
+        .arb_st0_req_ready_o                (st0_req_ready),
+        .arb_refill_valid_i                 (refill_req_valid_i & st0_arb_req_grant[1]),
+        .arb_refill_ready_o                 (refill_req_ready_o),
+        .st0_req_is_uncacheable_i           (st0_req_is_uncacheable),
+        .st0_req_need_rsp_i                 (st0_req.need_rsp),
+        .st0_req_is_load_i                  (st0_req_is_load),
+        .st0_req_is_store_i                 (st0_req_is_store),
+        .st0_req_is_amo_i                   (st0_req_is_amo),
+        .st0_req_is_cmo_fence_i             (st0_req_is_cmo_fence),
+        .st0_req_is_cmo_inval_i             (st0_req_is_cmo_inval),
+        .st0_req_is_cmo_prefetch_i          (st0_req_is_cmo_prefetch),
+        .st0_req_mshr_check_o               (miss_mshr_check_o),
+        .st0_req_cachedir_read_o            (st0_req_cachedir_read),
+        .st0_req_cachedata_read_o           (st0_req_cachedata_read),
+
+        .st1_req_valid_i                    (st1_req_valid_q),
+        .st1_req_abort_i                    (st1_req_abort),
+        .st1_req_rtab_i                     (st1_req_rtab_q),
+        .st1_req_is_uncacheable_i           (st1_req_is_uncacheable),
+        .st1_req_need_rsp_i                 (st1_req.need_rsp),
+        .st1_req_is_load_i                  (st1_req_is_load),
+        .st1_req_is_store_i                 (st1_req_is_store),
+        .st1_req_is_amo_i                   (st1_req_is_amo),
+        .st1_req_is_cmo_inval_i             (st1_req_is_cmo_inval),
+        .st1_req_is_cmo_fence_i             (st1_req_is_cmo_fence),
+        .st1_req_is_cmo_prefetch_i          (st1_req_is_cmo_prefetch),
+        .st1_req_valid_o                    (st1_req_valid_d),
+        .st1_rsp_valid_o                    (st1_rsp_valid),
+        .st1_rsp_aborted_o                  (st1_rsp_aborted),
+        .st1_req_cachedir_updt_lru_o        (st1_req_updt_lru),
+        .st1_req_cachedata_write_o          (st1_req_cachedata_write),
+        .st1_req_cachedata_write_enable_o   (st1_req_cachedata_write_enable),
+
+        .st2_req_valid_i                    (st2_req_valid_q),
+        .st2_req_is_prefetch_i              (st2_req_is_prefetch_q),
+        .st2_req_valid_o                    (st2_req_valid_d),
+        .st2_req_we_o                       (st2_req_we),
+        .st2_req_is_prefetch_o              (st2_req_is_prefetch_d),
+        .st2_req_mshr_alloc_o               (miss_mshr_alloc_o),
+        .st2_req_mshr_alloc_cs_o            (miss_mshr_alloc_cs_o),
+
+        .rtab_full_i                        (rtab_full),
+        .rtab_req_valid_i                   (st0_rtab_pop_try_valid),
+        .rtab_sel_o                         (st0_rtab_pop_try_sel),
+        .rtab_check_o                       (st1_rtab_check),
+        .rtab_check_hit_i                   (st1_rtab_check_hit),
+        .st1_rtab_alloc_o                   (st1_rtab_alloc),
+        .st1_rtab_alloc_and_link_o          (st1_rtab_alloc_and_link),
+        .st1_rtab_commit_o                  (st1_rtab_pop_try_commit),
+        .st1_rtab_rback_o                   (st1_rtab_pop_try_rback),
+        .st1_rtab_mshr_hit_o                (st1_rtab_mshr_hit),
+        .st1_rtab_mshr_full_o               (st1_rtab_mshr_full),
+        .st1_rtab_mshr_ready_o              (st1_rtab_mshr_ready),
+        .st1_rtab_wbuf_hit_o                (st1_rtab_wbuf_hit),
+        .st1_rtab_wbuf_not_ready_o          (st1_rtab_wbuf_not_ready),
+
+        .cachedir_hit_i                     (cachedir_hit_o),
+        .cachedir_init_ready_i              (hpdcache_init_ready),
+
+        .mshr_alloc_ready_i                 (miss_mshr_alloc_ready_i),
+        .mshr_hit_i                         (miss_mshr_hit_i),
+        .mshr_full_i                        (miss_mshr_alloc_full_i),
+
+        .refill_busy_i,
+        .refill_core_rsp_valid_i,
+
+        .wbuf_write_valid_o                 (wbuf_write_o),
+        .wbuf_write_ready_i,
+        .wbuf_read_hit_i,
+        .wbuf_write_uncacheable_o,
+        .wbuf_read_flush_hit_o,
+
+        .uc_busy_i,
+        .uc_req_valid_o,
+        .uc_core_rsp_ready_o,
+
+        .cmo_busy_i,
+        .cmo_req_valid_o,
+
+        .evt_cache_write_miss_o,
+        .evt_cache_read_miss_o,
+        .evt_uncached_req_o,
+        .evt_cmo_req_o,
+        .evt_write_req_o,
+        .evt_read_req_o,
+        .evt_prefetch_req_o,
+        .evt_req_on_hold_o,
+        .evt_rtab_rollback_o,
+        .evt_stall_refill_o
+    );
+
+    assign ctrl_empty_o = ~(st1_req_valid_q | st2_req_valid_q);
+    //  }}}
+
+    //  Replay table
+    //  {{{
+    hpdcache_rtab #(
+        .rtab_entry_t                       (hpdcache_req_t)
+    ) hpdcache_rtab_i(
+        .clk_i,
+        .rst_ni,
+
+        .empty_o                            (rtab_empty_o),
+        .full_o                             (rtab_full),
+
+        .check_i                            (st1_rtab_check),
+        .check_nline_i                      (st1_req_nline),
+        .check_hit_o                        (st1_rtab_check_hit),
+
+        .alloc_i                            (st1_rtab_alloc),
+        .alloc_and_link_i                   (st1_rtab_alloc_and_link),
+        .alloc_req_i                        (st1_req),
+        .alloc_mshr_hit_i                   (st1_rtab_mshr_hit),
+        .alloc_mshr_full_i                  (st1_rtab_mshr_full),
+        .alloc_mshr_ready_i                 (st1_rtab_mshr_ready),
+        .alloc_wbuf_hit_i                   (st1_rtab_wbuf_hit),
+        .alloc_wbuf_not_ready_i             (st1_rtab_wbuf_not_ready),
+
+        .pop_try_valid_o                    (st0_rtab_pop_try_valid),
+        .pop_try_i                          (st0_rtab_pop_try_ready),
+        .pop_try_req_o                      (st0_rtab_pop_try_req),
+        .pop_try_ptr_o                      (st0_rtab_pop_try_ptr),
+
+        .pop_commit_i                       (st1_rtab_pop_try_commit),
+        .pop_commit_ptr_i                   (st1_rtab_pop_try_ptr_q),
+
+        .pop_rback_i                        (st1_rtab_pop_try_rback),
+        .pop_rback_ptr_i                    (st1_rtab_pop_try_ptr_q),
+        .pop_rback_mshr_hit_i               (st1_rtab_mshr_hit),
+        .pop_rback_mshr_full_i              (st1_rtab_mshr_full),
+        .pop_rback_mshr_ready_i             (st1_rtab_mshr_ready),
+        .pop_rback_wbuf_hit_i               (st1_rtab_wbuf_hit),
+        .pop_rback_wbuf_not_ready_i         (st1_rtab_wbuf_not_ready),
+
+        .wbuf_addr_o                        (wbuf_rtab_addr_o),
+        .wbuf_is_read_o                     (wbuf_rtab_is_read_o),
+        .wbuf_hit_open_i                    (wbuf_rtab_hit_open_i),
+        .wbuf_hit_pend_i                    (wbuf_rtab_hit_pend_i),
+        .wbuf_hit_sent_i                    (wbuf_rtab_hit_sent_i),
+        .wbuf_not_ready_i                   (wbuf_rtab_not_ready_i),
+
+        .miss_ready_i                       (miss_mshr_alloc_ready_i),
+
+        .refill_i                           (refill_updt_rtab_i),
+        .refill_nline_i,
+
+        .cfg_single_entry_i                 (cfg_rtab_single_entry_i)
+    );
+    //  }}}
+
+    //  Pipeline stage 1 registers
+    //  {{{
+    always_ff @(posedge clk_i)
+    begin : st1_req_payload_ff
+        if (st0_req_ready) begin
+            st1_req_q <= st0_req;
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : st1_req_valid_ff
+        if (!rst_ni) begin
+            st1_req_valid_q        <= 1'b0;
+            st1_req_rtab_q         <= 1'b0;
+            st1_rtab_pop_try_ptr_q <= '0;
+        end else begin
+            st1_req_valid_q <= st1_req_valid_d;
+            if (st0_req_ready) begin
+                st1_req_rtab_q <= st0_rtab_pop_try_sel;
+                if (st0_rtab_pop_try_sel) begin
+                    st1_rtab_pop_try_ptr_q <= st0_rtab_pop_try_ptr;
+                end
+            end
+        end
+    end
+    //  }}}
+
+    //  Pipeline stage 2 registers
+    //  {{{
+    always_ff @(posedge clk_i)
+    begin : st2_req_payload_ff
+        if (st2_req_we) begin
+            st2_req_need_rsp_q <= st1_req.need_rsp;
+            st2_req_addr_q     <= st1_req_addr;
+            st2_req_sid_q      <= st1_req.sid;
+            st2_req_tid_q      <= st1_req.tid;
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : st2_req_valid_ff
+        if (!rst_ni) begin
+            st2_req_valid_q       <= 1'b0;
+            st2_req_is_prefetch_q <= 1'b0;
+        end else begin
+            st2_req_valid_q       <= st2_req_valid_d;
+            st2_req_is_prefetch_q <= st2_req_is_prefetch_d;
+        end
+    end
+    //  }}}
+
+    //  Controller for the HPDcache directory and data memory arrays
+    //  {{{
+    assign st0_req_set   = hpdcache_get_req_offset_set(st0_req.addr_offset),
+           st0_req_word  = hpdcache_get_req_offset_word(st0_req.addr_offset),
+           st1_req_set   = hpdcache_get_req_offset_set(st1_req.addr_offset),
+           st1_req_word  = hpdcache_get_req_offset_word(st1_req.addr_offset),
+           st1_req_addr  = {st1_req.addr_tag, st1_req.addr_offset},
+           st1_req_nline = hpdcache_get_req_addr_nline(st1_req_addr),
+           st2_req_word  = hpdcache_get_req_addr_word(st2_req_addr_q);
+
+    hpdcache_memctrl hpdcache_memctrl_i (
+        .clk_i,
+        .rst_ni,
+
+        .ready_o                       (hpdcache_init_ready),
+
+        .dir_match_i                   (st0_req_cachedir_read),
+        .dir_match_set_i               (st0_req_set),
+        .dir_match_tag_i               (st1_req.addr_tag),
+        .dir_update_lru_i              (st1_req_updt_lru),
+        .dir_hit_way_o                 (st1_dir_hit),
+
+        .dir_amo_match_i               (uc_dir_amo_match_i),
+        .dir_amo_match_set_i           (uc_dir_amo_match_set_i),
+        .dir_amo_match_tag_i           (uc_dir_amo_match_tag_i),
+        .dir_amo_update_plru_i         (uc_dir_amo_update_plru_i),
+        .dir_amo_hit_way_o             (uc_dir_amo_hit_way_o),
+
+        .dir_refill_i                  (refill_write_dir_i),
+        .dir_refill_set_i              (refill_set_i),
+        .dir_refill_entry_i            (refill_dir_entry_i),
+        .dir_refill_updt_plru_i        (refill_updt_plru_i),
+        .dir_victim_way_o              (refill_victim_way_o),
+
+        .dir_cmo_check_i               (cmo_dir_check_i),
+        .dir_cmo_check_set_i           (cmo_dir_check_set_i),
+        .dir_cmo_check_tag_i           (cmo_dir_check_tag_i),
+        .dir_cmo_check_hit_way_o       (cmo_dir_check_hit_way_o),
+
+        .dir_cmo_inval_i               (cmo_dir_inval_i),
+        .dir_cmo_inval_set_i           (cmo_dir_inval_set_i),
+        .dir_cmo_inval_way_i           (cmo_dir_inval_way_i),
+
+        .data_req_read_i               (st0_req_cachedata_read),
+        .data_req_read_set_i           (st0_req_set),
+        .data_req_read_size_i          (st0_req.size),
+        .data_req_read_word_i          (st0_req_word),
+        .data_req_read_data_o          (st1_read_data),
+
+        .data_req_write_i              (st1_req_cachedata_write),
+        .data_req_write_enable_i       (st1_req_cachedata_write_enable),
+        .data_req_write_set_i          (st1_req_set),
+        .data_req_write_size_i         (st1_req.size),
+        .data_req_write_word_i         (st1_req_word),
+        .data_req_write_data_i         (st1_req.wdata),
+        .data_req_write_be_i           (st1_req.be),
+
+        .data_amo_write_i              (uc_data_amo_write_i),
+        .data_amo_write_enable_i       (uc_data_amo_write_enable_i),
+        .data_amo_write_set_i          (uc_data_amo_write_set_i),
+        .data_amo_write_size_i         (uc_data_amo_write_size_i),
+        .data_amo_write_word_i         (uc_data_amo_write_word_i),
+        .data_amo_write_data_i         (uc_data_amo_write_data_i),
+        .data_amo_write_be_i           (uc_data_amo_write_be_i),
+
+        .data_refill_i                 (refill_write_data_i),
+        .data_refill_way_i             (refill_victim_way_i),
+        .data_refill_set_i             (refill_set_i),
+        .data_refill_word_i            (refill_word_i),
+        .data_refill_data_i            (refill_data_i)
+    );
+
+    assign cachedir_hit_o = |st1_dir_hit;
+    //  }}}
+
+    //  Write buffer outputs
+    //  {{{
+    assign wbuf_write_addr_o = st1_req_addr,
+           wbuf_write_data_o = st1_req.wdata,
+           wbuf_write_be_o   = st1_req.be,
+           wbuf_flush_all_o  = cmo_wbuf_flush_all_i | uc_wbuf_flush_all_i | wbuf_flush_i;
+    //  }}}
+
+    //  Miss handler outputs
+    //  {{{
+    assign miss_mshr_check_set_o =
+            st0_req.addr_offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_MSHR_SET_WIDTH];
+    assign miss_mshr_check_tag_o =
+            st1_req_nline[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH];
+
+    assign miss_mshr_alloc_nline_o       = hpdcache_get_req_addr_nline(st2_req_addr_q),
+           miss_mshr_alloc_tid_o         = st2_req_tid_q,
+           miss_mshr_alloc_sid_o         = st2_req_sid_q,
+           miss_mshr_alloc_word_o        = st2_req_word,
+           miss_mshr_alloc_need_rsp_o    = st2_req_need_rsp_q,
+           miss_mshr_alloc_is_prefetch_o = st2_req_is_prefetch_q;
+    //  }}}
+
+    //  Uncacheable request handler outputs
+    //  {{{
+    assign uc_lrsc_snoop_o           = st1_req_valid_q & st1_req_is_store,
+           uc_lrsc_snoop_addr_o      = st1_req_addr,
+           uc_lrsc_snoop_size_o      = st1_req.size,
+           uc_req_addr_o             = st1_req_addr,
+           uc_req_size_o             = st1_req.size,
+           uc_req_data_o             = st1_req.wdata,
+           uc_req_be_o               = st1_req.be,
+           uc_req_uc_o               = st1_req_is_uncacheable,
+           uc_req_sid_o              = st1_req.sid,
+           uc_req_tid_o              = st1_req.tid,
+           uc_req_need_rsp_o         = st1_req.need_rsp,
+           uc_req_op_o.is_ld         = st1_req_is_load,
+           uc_req_op_o.is_st         = st1_req_is_store,
+           uc_req_op_o.is_amo_lr     = st1_req_is_amo_lr,
+           uc_req_op_o.is_amo_sc     = st1_req_is_amo_sc,
+           uc_req_op_o.is_amo_swap   = st1_req_is_amo_swap,
+           uc_req_op_o.is_amo_add    = st1_req_is_amo_add,
+           uc_req_op_o.is_amo_and    = st1_req_is_amo_and,
+           uc_req_op_o.is_amo_or     = st1_req_is_amo_or,
+           uc_req_op_o.is_amo_xor    = st1_req_is_amo_xor,
+           uc_req_op_o.is_amo_max    = st1_req_is_amo_max,
+           uc_req_op_o.is_amo_maxu   = st1_req_is_amo_maxu,
+           uc_req_op_o.is_amo_min    = st1_req_is_amo_min,
+           uc_req_op_o.is_amo_minu   = st1_req_is_amo_minu;
+    //  }}}
+
+    //  CMO request handler outputs
+    //  {{{
+    assign cmo_req_addr_o                 = st1_req_addr,
+           cmo_req_wdata_o                = st1_req.wdata,
+           cmo_req_op_o.is_fence          = st1_req_is_cmo_fence,
+           cmo_req_op_o.is_inval_by_nline = st1_req_is_cmo_inval &
+                                            is_cmo_inval_by_nline(st1_req.size),
+           cmo_req_op_o.is_inval_by_set   = st1_req_is_cmo_inval &
+                                            is_cmo_inval_by_set(st1_req.size),
+           cmo_req_op_o.is_inval_all      = st1_req_is_cmo_inval &
+                                            is_cmo_inval_all(st1_req.size);
+    //  }}}
+
+    //  Control of the response to the core
+    //  {{{
+    assign core_rsp_valid_o   = refill_core_rsp_valid_i                     |
+                                (uc_core_rsp_valid_i & uc_core_rsp_ready_o) |
+                                st1_rsp_valid,
+           core_rsp_o.rdata   = (refill_core_rsp_valid_i ? refill_core_rsp_i.rdata :
+                                (uc_core_rsp_valid_i     ? uc_core_rsp_i.rdata     :
+                                st1_read_data)),
+           core_rsp_o.sid     = (refill_core_rsp_valid_i ? refill_core_rsp_i.sid   :
+                                (uc_core_rsp_valid_i     ? uc_core_rsp_i.sid       :
+                                st1_req.sid)),
+           core_rsp_o.tid     = (refill_core_rsp_valid_i ? refill_core_rsp_i.tid   :
+                                (uc_core_rsp_valid_i     ? uc_core_rsp_i.tid       :
+                                st1_req.tid)),
+           core_rsp_o.error   = (refill_core_rsp_valid_i ? refill_core_rsp_i.error :
+                                (uc_core_rsp_valid_i     ? uc_core_rsp_i.error     :
+                                /* FIXME */1'b0)),
+           core_rsp_o.aborted = st1_rsp_aborted;
+    //  }}}
+
+    //  Assertions
+    //  pragma translate_off
+    //  {{{
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            $onehot0({core_req_ready_o, st0_rtab_pop_try_ready, refill_req_ready_o})) else
+                    $error("ctrl: only one request can be served per cycle");
+    //  }}}
+    //  pragma translate_on
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv
new file mode 100755
index 0000000..13b4f58
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv
@@ -0,0 +1,620 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : HPDcache Control Protocol Engine
+ *  History       :
+ */
+module hpdcache_ctrl_pe
+    // Ports
+    // {{{
+(
+    //   Refill arbiter
+    //   {{{
+    input  logic                   arb_st0_req_valid_i,
+    output logic                   arb_st0_req_ready_o,
+    input  logic                   arb_refill_valid_i,
+    output logic                   arb_refill_ready_o,
+    //   }}}
+
+    //   Pipeline stage 0
+    //   {{{
+    input  logic                   st0_req_is_uncacheable_i,
+    input  logic                   st0_req_need_rsp_i,
+    input  logic                   st0_req_is_load_i,
+    input  logic                   st0_req_is_store_i,
+    input  logic                   st0_req_is_amo_i,
+    input  logic                   st0_req_is_cmo_fence_i,
+    input  logic                   st0_req_is_cmo_inval_i,
+    input  logic                   st0_req_is_cmo_prefetch_i,
+    output logic                   st0_req_mshr_check_o,
+    output logic                   st0_req_cachedir_read_o,
+    output logic                   st0_req_cachedata_read_o,
+    //   }}}
+
+    //   Pipeline stage 1
+    //   {{{
+    input  logic                   st1_req_valid_i,
+    input  logic                   st1_req_abort_i,
+    input  logic                   st1_req_rtab_i,
+    input  logic                   st1_req_is_uncacheable_i,
+    input  logic                   st1_req_need_rsp_i,
+    input  logic                   st1_req_is_load_i,
+    input  logic                   st1_req_is_store_i,
+    input  logic                   st1_req_is_amo_i,
+    input  logic                   st1_req_is_cmo_inval_i,
+    input  logic                   st1_req_is_cmo_fence_i,
+    input  logic                   st1_req_is_cmo_prefetch_i,
+    output logic                   st1_req_valid_o,
+    output logic                   st1_rsp_valid_o,
+    output logic                   st1_rsp_aborted_o,
+    output logic                   st1_req_cachedir_updt_lru_o,
+    output logic                   st1_req_cachedata_write_o,
+    output logic                   st1_req_cachedata_write_enable_o,
+    //   }}}
+
+    //   Pipeline stage 2
+    //   {{{
+    input  logic                   st2_req_valid_i,
+    input  logic                   st2_req_is_prefetch_i,
+    output logic                   st2_req_valid_o,
+    output logic                   st2_req_we_o,
+    output logic                   st2_req_is_prefetch_o,
+    output logic                   st2_req_mshr_alloc_o,
+    output logic                   st2_req_mshr_alloc_cs_o,
+    //   }}}
+
+    //   Replay
+    //   {{{
+    input  logic                   rtab_full_i,
+    input  logic                   rtab_req_valid_i,
+    output logic                   rtab_sel_o,
+    output logic                   rtab_check_o,
+    input  logic                   rtab_check_hit_i,
+    output logic                   st1_rtab_alloc_o,
+    output logic                   st1_rtab_alloc_and_link_o,
+    output logic                   st1_rtab_commit_o,
+    output logic                   st1_rtab_rback_o,
+    output logic                   st1_rtab_mshr_hit_o,
+    output logic                   st1_rtab_mshr_full_o,
+    output logic                   st1_rtab_mshr_ready_o,
+    output logic                   st1_rtab_wbuf_hit_o,
+    output logic                   st1_rtab_wbuf_not_ready_o,
+    //   }}}
+
+    //   Cache directory
+    //   {{{
+    input  logic                   cachedir_hit_i,
+    input  logic                   cachedir_init_ready_i,
+    //   }}}
+
+    //   Miss Status Holding Register (MSHR)
+    //   {{{
+    input  logic                   mshr_alloc_ready_i,
+    input  logic                   mshr_hit_i,
+    input  logic                   mshr_full_i,
+    //   }}}
+
+    //   Refill interface
+    //   {{{
+    input  logic                   refill_busy_i,
+    input  logic                   refill_core_rsp_valid_i,
+    //   }}}
+
+    //   Write buffer
+    //   {{{
+    input  logic                   wbuf_write_ready_i,
+    input  logic                   wbuf_read_hit_i,
+    output logic                   wbuf_write_valid_o,
+    output logic                   wbuf_write_uncacheable_o,
+    output logic                   wbuf_read_flush_hit_o,
+    //   }}}
+
+    //   Uncacheable request handler
+    //   {{{
+    input  logic                   uc_busy_i,
+    output logic                   uc_req_valid_o,
+    output logic                   uc_core_rsp_ready_o,
+    //   }}}
+
+    //   Cache Management Operation (CMO)
+    //   {{{
+    input  logic                   cmo_busy_i,
+    output logic                   cmo_req_valid_o,
+    //   }}}
+
+    //   Performance events
+    //   {{{
+    output logic                   evt_cache_write_miss_o,
+    output logic                   evt_cache_read_miss_o,
+    output logic                   evt_uncached_req_o,
+    output logic                   evt_cmo_req_o,
+    output logic                   evt_write_req_o,
+    output logic                   evt_read_req_o,
+    output logic                   evt_prefetch_req_o,
+    output logic                   evt_req_on_hold_o,
+    output logic                   evt_rtab_rollback_o,
+    output logic                   evt_stall_refill_o
+    //   }}}
+);
+    // }}}
+
+    //  Definition of internal signals
+    //  {{{
+    logic  st1_fence;
+    logic  st1_rtab_alloc, st1_rtab_alloc_and_link;
+    //  }}}
+
+    //  Global control signals
+    //  {{{
+
+    //  Determine if the new request is a "fence". Here, fence instructions are
+    //  considered those that need to be executed in program order
+    //  (irrespectively of addresses). This means that all memory operations
+    //  arrived before the "fence" instruction need to be finished, and only
+    //  then the "fence" instruction is executed. In the same manner, all
+    //  instructions following the "fence" need to wait the completion of this
+    //  last before being executed.
+    assign st1_fence = st1_req_is_uncacheable_i |
+                       st1_req_is_cmo_fence_i   |
+                       st1_req_is_cmo_inval_i   |
+                       st1_req_is_amo_i;
+    //  }}}
+
+    //  Arbitration of responses to the core
+    //  {{{
+    assign uc_core_rsp_ready_o = ~refill_core_rsp_valid_i;
+    //  }}}
+
+    //  Arbiter between core or replay request.
+    //  {{{
+    //      Take the replay request when:
+    //      - The replay table is full.
+    //      - The replay table has a ready request (request with all dependencies solved)
+    //      - There is an outstanding CMO or uncached/AMO request
+    //
+    //      IMPORTANT: When the replay table is full, the cache cannot accept new core
+    //      requests because this can introduce a dead-lock : If the core request needs to
+    //      be put on hold, as there is no place the replay table, the pipeline needs to
+    //      stall. If the pipeline is stalled, dependencies of on-hold requests cannot be
+    //      solved, and the system is locked.
+    assign rtab_sel_o = rtab_full_i                   |
+                        rtab_req_valid_i              |
+                        (st1_req_valid_i & st1_fence) |
+                        cmo_busy_i                    |
+                        uc_busy_i;
+    //  }}}
+
+    //  Replay logic
+    //  {{{
+    //      Replay table allocation
+    assign st1_rtab_alloc_o          = st1_rtab_alloc          & ~st1_req_rtab_i,
+           st1_rtab_alloc_and_link_o = st1_rtab_alloc_and_link,
+           st1_rtab_rback_o          = st1_rtab_alloc          &  st1_req_rtab_i;
+
+    //      Performance event
+    assign evt_req_on_hold_o   = st1_rtab_alloc | st1_rtab_alloc_and_link,
+           evt_rtab_rollback_o = st1_rtab_rback_o;
+    //  }}}
+
+    //  Data-cache control lines
+    //  {{{
+    always_comb
+    begin : hpdcache_ctrl_comb
+        automatic logic nop, st1_nop, st2_nop;
+
+        uc_req_valid_o                      = 1'b0;
+
+        cmo_req_valid_o                     = 1'b0;
+
+        wbuf_write_valid_o                  = 1'b0;
+        wbuf_read_flush_hit_o               = 1'b0;
+        wbuf_write_uncacheable_o            = 1'b0; // unused
+
+        arb_st0_req_ready_o                 = 1'b0;
+        arb_refill_ready_o                  = 1'b0;
+
+        st0_req_mshr_check_o                = 1'b0;
+        st0_req_cachedir_read_o             = 1'b0;
+        st0_req_cachedata_read_o            = 1'b0;
+
+        st1_req_valid_o                     = st1_req_valid_i;
+        st1_nop                             = 1'b0;
+        st1_req_cachedata_write_o           = 1'b0;
+        st1_req_cachedata_write_enable_o    = 1'b0;
+        st1_req_cachedir_updt_lru_o         = 1'b0;
+        st1_rsp_valid_o                     = 1'b0;
+        st1_rsp_aborted_o                   = 1'b0;
+
+        st2_req_valid_o                     = st2_req_valid_i;
+        st2_req_we_o                        = 1'b0;
+        st2_req_is_prefetch_o               = 1'b0;
+        st2_req_mshr_alloc_cs_o             = 1'b0;
+        st2_req_mshr_alloc_o                = 1'b0;
+        st2_nop                             = 1'b0;
+
+        nop                                 = 1'b0;
+
+        rtab_check_o                        = 1'b0;
+        st1_rtab_alloc                      = 1'b0;
+        st1_rtab_alloc_and_link             = 1'b0;
+        st1_rtab_commit_o                   = 1'b0;
+        st1_rtab_mshr_hit_o                 = 1'b0;
+        st1_rtab_mshr_full_o                = 1'b0;
+        st1_rtab_mshr_ready_o               = 1'b0;
+        st1_rtab_wbuf_hit_o                 = 1'b0;
+        st1_rtab_wbuf_not_ready_o           = 1'b0;
+
+        evt_cache_write_miss_o              = 1'b0;
+        evt_cache_read_miss_o               = 1'b0;
+        evt_uncached_req_o                  = 1'b0;
+        evt_cmo_req_o                       = 1'b0;
+        evt_write_req_o                     = 1'b0;
+        evt_read_req_o                      = 1'b0;
+        evt_prefetch_req_o                  = 1'b0;
+        evt_stall_refill_o                  = 1'b0;
+
+        //  Wait for the cache to be initialized
+        //  {{{
+        if (!cachedir_init_ready_i) begin
+            //  initialization of the cache RAMs
+        end
+        //  }}}
+
+        //  Refilling the cache
+        //  {{{
+        else if (refill_busy_i) begin
+            //  miss handler has the control of the cache
+            evt_stall_refill_o = arb_st0_req_valid_i;
+        end
+        //  }}}
+
+        //  Normal pipeline operation
+        //  {{{
+        else begin
+            //  Stage 2 request pending
+            //  {{{
+            if (st2_req_valid_i) begin
+                st2_req_valid_o         = 1'b0;
+
+                //  Allocate an entry in the MSHR
+                st2_req_mshr_alloc_cs_o = 1'b1;
+                st2_req_mshr_alloc_o    = 1'b1;
+
+                //  Introduce a NOP in the next cycle to prevent a hazard on the MSHR
+                st2_nop                 = 1'b1;
+
+                //  Performance event
+                evt_cache_read_miss_o   = ~st2_req_is_prefetch_i;
+                evt_read_req_o          = ~st2_req_is_prefetch_i;
+                evt_prefetch_req_o      =  st2_req_is_prefetch_i;
+            end
+            //  }}}
+
+            //  Stage 1 request pending
+            //  {{{
+            if (st1_req_valid_i) begin
+                //  Check if the request in stage 1 has a conflict with one of the
+                //  request in the replay table.
+                rtab_check_o = ~st1_req_rtab_i & ~st1_fence;
+
+                //  Check if the current request is aborted. If so, respond to the
+                //  core (when need_rsp is set) and set the aborted flag
+                if (st1_req_abort_i && !st1_req_rtab_i) begin
+                    st1_rsp_valid_o = st1_req_need_rsp_i;
+                    st1_rsp_aborted_o = 1'b1;
+                end
+
+                //  Allocate a new entry in the replay table in case of conflict with
+                //  an on-hold request
+                else if (rtab_check_o && rtab_check_hit_i) begin
+                    st1_rtab_alloc_and_link = 1'b1;
+
+                    //  Do not consume a request in this cycle in stage 0
+                    st1_nop = 1'b1;
+                end
+
+                //  CMO fence or invalidate
+                //  {{{
+                else if (st1_req_is_cmo_fence_i || st1_req_is_cmo_inval_i) begin
+                    cmo_req_valid_o = 1'b1;
+                    st1_nop         = 1'b1;
+
+                    //  Performance event
+                    evt_cmo_req_o = 1'b1;
+                end
+                //  }}}
+
+                //  Uncacheable load, store or AMO request
+                //  {{{
+                else if (st1_req_is_uncacheable_i) begin
+                    uc_req_valid_o = 1'b1;
+                    st1_nop        = 1'b1;
+
+                    //  Performance event
+                    evt_uncached_req_o = 1'b1;
+                end
+                //  }}}
+
+                //  Cacheable request
+                //  {{{
+                else begin
+                    //  AMO cacheable request
+                    //  {{{
+                    if (st1_req_is_amo_i) begin
+                        uc_req_valid_o = 1'b1;
+                        st1_nop        = 1'b1;
+
+                        //  Performance event
+                        evt_uncached_req_o = 1'b1;
+                    end
+                    //  }}}
+
+                    //  Load cacheable request
+                    //  {{{
+                    if (|{st1_req_is_load_i,
+                          st1_req_is_cmo_prefetch_i})
+                    begin
+                        //  Cache miss
+                        //  {{{
+                        if (!cachedir_hit_i) begin
+                            //  If there is a match in the write buffer, lets send the
+                            //  entry right away
+                            wbuf_read_flush_hit_o = 1'b1;
+
+                            //  Do not consume a request in this cycle in stage 0
+                            st1_nop = 1'b1;
+
+                            //  Pending miss on the same line
+                            if (mshr_hit_i) begin
+                                //  Put the request in the replay table
+                                st1_rtab_alloc = 1'b1;
+
+                                st1_rtab_mshr_hit_o = 1'b1;
+                            end
+
+                            //  No available slot in the MSHR
+                            else if (mshr_full_i) begin
+                                //  Put the request in the replay table
+                                st1_rtab_alloc = 1'b1;
+
+                                st1_rtab_mshr_full_o = 1'b1;
+                            end
+
+                            //  Hit on an open entry of the write buffer:
+                            //    wait for the entry to be acknowledged
+                            else if (wbuf_read_hit_i) begin
+                                //  Put the request in the replay table
+                                st1_rtab_alloc = 1'b1;
+
+                                st1_rtab_wbuf_hit_o = 1'b1;
+                            end
+
+                            //  Miss Handler is not ready to send
+                            else if (!mshr_alloc_ready_i) begin
+                                //  Put the request on hold if the MISS HANDLER is not
+                                //  ready to send a new miss request. This is to prevent
+                                //  a deadlock between the read request channel and the
+                                //  read response channel.
+                                //
+                                //  The request channel may be stalled by targets if they
+                                //  are not able to send a response (response is
+                                //  prioritary). Therefore, we need to put the request on
+                                //  hold to allow a possible refill read response to be
+                                //  accomplished.
+                                st1_rtab_alloc = 1'b1;
+
+                                st1_rtab_mshr_ready_o = 1'b1;
+                            end
+
+                            //  Forward the request to the next stage to allocate the
+                            //  entry in the MSHR and send the refill request
+                            else begin
+                                //  If the request comes from the replay table, free the
+                                //  corresponding RTAB entry
+                                st1_rtab_commit_o = st1_req_rtab_i;
+
+                                st2_req_valid_o       = 1'b1;
+                                st2_req_we_o          = 1'b1;
+                                st2_req_is_prefetch_o = st1_req_is_cmo_prefetch_i;
+                            end
+                        end
+                        //  }}}
+
+                        //  Cache hit
+                        //  {{{
+                        else begin
+                            //  If the request comes from the replay table, free the
+                            //  corresponding RTAB entry
+                            st1_rtab_commit_o = st1_req_rtab_i;
+
+                            //  Add a NOP when replaying a request, and there is no available
+                            //  request from the replay table.
+                            st1_nop = st1_req_rtab_i & ~rtab_sel_o;
+
+                            //  Update the PLRU bit for the accessed set
+                            st1_req_cachedir_updt_lru_o = st1_req_is_load_i;
+
+                            //  Respond to the core (if needed)
+                            st1_rsp_valid_o = st1_req_need_rsp_i;
+
+                            //  Performance event
+                            evt_read_req_o     = ~st1_req_is_cmo_prefetch_i;
+                            evt_prefetch_req_o =  st1_req_is_cmo_prefetch_i;
+                        end
+                        //  }}}
+                    end
+                    //  }}}
+
+                    //  Store cacheable request
+                    //  {{{
+                    if (st1_req_is_store_i) begin
+                        //  Write in the write buffer if there is no pending miss in the same line.
+                        //
+                        //  We assume here that the NoC that transports read and write transactions does
+                        //  not guaranty the order between transactions on those channels.
+                        //  Therefore, the cache must hold a write if there is a pending read on the
+                        //  same address.
+                        wbuf_write_valid_o = ~mshr_hit_i;
+
+                        //  Add a NOP in the pipeline when:
+                        //  - Structural hazard on the cache data if the st0 request is a load
+                        //    operation.
+                        //  - Replaying a request, the cache cannot accept a request from the
+                        //    core the next cycle. It can however accept a new request from the
+                        //    replay table
+                        //
+                        //  IMPORTANT: we could remove the NOP in the first scenario if the
+                        //  controller checks for the hit of this write. However, this adds
+                        //  a DIR_RAM -> DATA_RAM timing path.
+                        st1_nop = (arb_st0_req_valid_i &  st0_req_is_load_i) |
+                                  (st1_req_rtab_i      & ~rtab_sel_o);
+
+                        //  Enable the data RAM in case of write. However, the actual write
+                        //  depends on the hit signal from the cache directory.
+                        //
+                        //  IMPORTANT: this produces unnecessary power consumption in case of
+                        //  write misses, but removes timing paths between the cache directory
+                        //  RAM and the data RAM chip-select.
+                        st1_req_cachedata_write_o = 1'b1;
+
+                        //  Cache miss
+                        if (!cachedir_hit_i) begin
+                            //  Pending miss on the same line
+                            if (mshr_hit_i) begin
+                                //  Put the request in the replay table
+                                st1_rtab_alloc = 1'b1;
+
+                                st1_rtab_mshr_hit_o = 1'b1;
+
+                                //  Do not consume a request in this cycle in stage 0
+                                st1_nop = 1'b1;
+                            end
+
+                            //  No available entry in the write buffer (or conflict on pending entry)
+                            else if (!wbuf_write_ready_i) begin
+                                //  Put the request in the replay table
+                                st1_rtab_alloc = 1'b1;
+
+                                st1_rtab_wbuf_not_ready_o = 1'b1;
+
+                                //  Do not consume a request in this cycle in stage 0
+                                st1_nop = 1'b1;
+                            end
+
+                            else begin
+                                //  If the request comes from the replay table, free the
+                                //  corresponding RTAB entry
+                                st1_rtab_commit_o = st1_req_rtab_i;
+
+                                //  Respond to the core (if needed)
+                                st1_rsp_valid_o = st1_req_need_rsp_i;
+
+                                //  Performance event
+                                evt_cache_write_miss_o = 1'b1;
+                                evt_write_req_o        = 1'b1;
+                            end
+                        end
+
+                        //  Cache hit
+                        else begin
+                            //  No available entry in the write buffer (or conflict on pending entry)
+                            if (!wbuf_write_ready_i) begin
+                                //  Put the request in the replay table
+                                st1_rtab_alloc = 1'b1;
+
+                                st1_rtab_wbuf_not_ready_o = 1'b1;
+
+                                //  Do not consume a request in this cycle in stage 0
+                                st1_nop = 1'b1;
+                            end
+
+                            //  The store can be performed in the write buffer and in the cache
+                            else begin
+                                //  If the request comes from the replay table, free the
+                                //  corresponding RTAB entry
+                                st1_rtab_commit_o = st1_req_rtab_i;
+
+                                //  Respond to the core
+                                st1_rsp_valid_o = st1_req_need_rsp_i;
+
+                                //  Update the PLRU bit for the accessed set
+                                st1_req_cachedir_updt_lru_o = 1'b1;
+
+                                //  Write in the data RAM
+                                st1_req_cachedata_write_enable_o = 1'b1;
+
+                                //  Performance event
+                                evt_write_req_o = 1'b1;
+                            end
+                        end
+                    end
+                    //  }}}
+                end
+                // }}}
+            end
+            //  }}}
+
+            //  New request
+            //  {{{
+            nop = st1_nop | st2_nop;
+
+            //      The cache controller accepts a core request when:
+            //      -  The req-refill arbiter grants the request
+            //      -  The pipeline is not being flushed
+            arb_st0_req_ready_o = arb_st0_req_valid_i & ~nop;
+
+            //      The cache controller accepts a refill when:
+            //      -  The req-refill arbiter grants the refill
+            //      -  The pipeline is empty
+            arb_refill_ready_o = arb_refill_valid_i & ~(st1_req_valid_i | st2_req_valid_i);
+
+            //      Forward the request to stage 1
+            //      - There is a valid request in stage 0
+            st1_req_valid_o = arb_st0_req_ready_o;
+
+            //      New cacheable stage 0 request granted
+            //      {{{
+            //          IMPORTANT: here the RAM is enabled independently if the
+            //          request needs to be put on-hold.
+            //          This increases the power consumption in that cases, but
+            //          removes the timing paths RAM-to-RAM between the cache
+            //          directory and the data array.
+            if (arb_st0_req_valid_i && !st0_req_is_uncacheable_i) begin
+                st0_req_cachedata_read_o =
+                          st0_req_is_load_i &
+                        ~(st1_req_valid_i   & st1_req_is_store_i & ~st1_req_is_uncacheable_i);
+                if (st0_req_is_load_i         |
+                    st0_req_is_cmo_prefetch_i |
+                    st0_req_is_store_i        |
+                    st0_req_is_amo_i          )
+                begin
+                    st0_req_mshr_check_o    = 1'b1;
+                    st0_req_cachedir_read_o = ~st0_req_is_amo_i;
+                end
+            end
+            //      }}}
+            //  }}}
+        end
+        //  }}} end of normal pipeline operation
+    end
+    //  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv
new file mode 100644
index 0000000..7c7ee65
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv
@@ -0,0 +1,120 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : HPDcache Directory and Data Memory Arrays
+ *  History       :
+ */
+module hpdcache_memarray
+import hpdcache_pkg::*;
+    //  Ports
+    //  {{{
+(
+    input  logic                                    clk_i,
+    input  logic                                    rst_ni,
+
+    input  hpdcache_dir_addr_t                      dir_addr_i,
+    input  hpdcache_way_vector_t                    dir_cs_i,
+    input  hpdcache_way_vector_t                    dir_we_i,
+    input  hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry_i,
+    output hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry_o,
+
+    input  hpdcache_data_addr_t                     data_addr_i,
+    input  hpdcache_data_enable_t                   data_cs_i,
+    input  hpdcache_data_enable_t                   data_we_i,
+    input  hpdcache_data_be_entry_t                 data_wbyteenable_i,
+    input  hpdcache_data_entry_t                    data_wentry_i,
+    output hpdcache_data_entry_t                    data_rentry_o
+);
+    //  }}}
+
+    //  Memory arrays
+    //  {{{
+    generate
+        genvar x, y, dir_w;
+
+        //  Directory
+        //
+        for (dir_w = 0; dir_w < int'(HPDCACHE_WAYS); dir_w++) begin : dir_sram_gen
+            hpdcache_sram #(
+                .DATA_SIZE (HPDCACHE_DIR_RAM_WIDTH),
+                .ADDR_SIZE (HPDCACHE_DIR_RAM_ADDR_WIDTH)
+            ) dir_sram (
+                .clk       (clk_i),
+                .rst_n     (rst_ni),
+                .cs        (dir_cs_i[dir_w]),
+                .we        (dir_we_i[dir_w]),
+                .addr      (dir_addr_i),
+                .wdata     (dir_wentry_i[dir_w]),
+                .rdata     (dir_rentry_o[dir_w])
+            );
+        end
+
+        //  Data
+        //
+        for (y = 0; y < int'(HPDCACHE_DATA_RAM_Y_CUTS); y++) begin : data_sram_row_gen
+            for (x = 0; x < int'(HPDCACHE_DATA_RAM_X_CUTS); x++) begin : data_sram_col_gen
+                if (HPDCACHE_DATA_RAM_WBYTEENABLE) begin : data_sram_wbyteenable_gen
+                    hpdcache_sram_wbyteenable #(
+                        .DATA_SIZE   (HPDCACHE_DATA_RAM_WIDTH),
+                        .ADDR_SIZE   (HPDCACHE_DATA_RAM_ADDR_WIDTH)
+                    ) data_sram (
+                        .clk         (clk_i),
+                        .rst_n       (rst_ni),
+                        .cs          (data_cs_i[y][x]),
+                        .we          (data_we_i[y][x]),
+                        .addr        (data_addr_i[y][x]),
+                        .wdata       (data_wentry_i[y][x]),
+                        .wbyteenable (data_wbyteenable_i[y][x]),
+                        .rdata       (data_rentry_o[y][x])
+                    );
+                end else begin : data_sram_wmask_gen
+                    hpdcache_data_ram_data_t data_wmask;
+
+                    //  build the bitmask from the write byte enable signal
+                    always_comb
+                    begin : data_wmask_comb
+                        for (int w = 0; w < HPDCACHE_DATA_WAYS_PER_RAM_WORD; w++) begin
+                            for (int b = 0; b < HPDCACHE_WORD_WIDTH/8; b++) begin
+                                data_wmask[w][8*b +: 8] = {8{data_wbyteenable_i[y][x][w][b]}};
+                            end
+                        end
+                    end
+
+                    hpdcache_sram_wmask #(
+                        .DATA_SIZE   (HPDCACHE_DATA_RAM_WIDTH),
+                        .ADDR_SIZE   (HPDCACHE_DATA_RAM_ADDR_WIDTH)
+                    ) data_sram (
+                        .clk         (clk_i),
+                        .rst_n       (rst_ni),
+                        .cs          (data_cs_i[y][x]),
+                        .we          (data_we_i[y][x]),
+                        .addr        (data_addr_i[y][x]),
+                        .wdata       (data_wentry_i[y][x]),
+                        .wmask       (data_wmask),
+                        .rdata       (data_rentry_o[y][x])
+                    );
+                end
+            end
+        end
+    endgenerate
+    //  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv
new file mode 100644
index 0000000..c87cc3f
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv
@@ -0,0 +1,656 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : HPDcache Directory and Data Memory RAMs Controller
+ *  History       :
+ */
+module hpdcache_memctrl
+import hpdcache_pkg::*;
+    //  Ports
+    //  {{{
+(
+    //      Global clock and reset signals
+    //      {{{
+    input  logic                                clk_i,
+    input  logic                                rst_ni,
+    //      }}}
+
+    //      Global control signals
+    //      {{{
+    output logic                                ready_o,
+    //      }}}
+
+    //      DIR array access interface
+    //      {{{
+    input  logic                                dir_match_i,
+    input  hpdcache_set_t                       dir_match_set_i,
+    input  hpdcache_tag_t                       dir_match_tag_i,
+    input  logic                                dir_update_lru_i,
+    output hpdcache_way_vector_t                dir_hit_way_o,
+
+    input  logic                                dir_amo_match_i,
+    input  hpdcache_set_t                       dir_amo_match_set_i,
+    input  hpdcache_tag_t                       dir_amo_match_tag_i,
+    input  logic                                dir_amo_update_plru_i,
+    output hpdcache_way_vector_t                dir_amo_hit_way_o,
+
+    input  logic                                dir_refill_i,
+    input  hpdcache_set_t                       dir_refill_set_i,
+    input  hpdcache_dir_entry_t                 dir_refill_entry_i,
+    input  logic                                dir_refill_updt_plru_i,
+    output hpdcache_way_vector_t                dir_victim_way_o,
+
+    input  logic                                dir_cmo_check_i,
+    input  hpdcache_set_t                       dir_cmo_check_set_i,
+    input  hpdcache_tag_t                       dir_cmo_check_tag_i,
+    output hpdcache_way_vector_t                dir_cmo_check_hit_way_o,
+
+    input  logic                                dir_cmo_inval_i,
+    input  hpdcache_set_t                       dir_cmo_inval_set_i,
+    input  hpdcache_way_vector_t                dir_cmo_inval_way_i,
+
+    //      }}}
+
+    //      DATA array access interface
+    //      {{{
+    input  logic                                data_req_read_i,
+    input  hpdcache_set_t                       data_req_read_set_i,
+    input  hpdcache_req_size_t                  data_req_read_size_i,
+    input  hpdcache_word_t                      data_req_read_word_i,
+    output hpdcache_req_data_t                  data_req_read_data_o,
+
+    input  logic                                data_req_write_i,
+    input  logic                                data_req_write_enable_i,
+    input  hpdcache_set_t                       data_req_write_set_i,
+    input  hpdcache_req_size_t                  data_req_write_size_i,
+    input  hpdcache_word_t                      data_req_write_word_i,
+    input  hpdcache_req_data_t                  data_req_write_data_i,
+    input  hpdcache_req_be_t                    data_req_write_be_i,
+
+    input  logic                                data_amo_write_i,
+    input  logic                                data_amo_write_enable_i,
+    input  hpdcache_set_t                       data_amo_write_set_i,
+    input  hpdcache_req_size_t                  data_amo_write_size_i,
+    input  hpdcache_word_t                      data_amo_write_word_i,
+    input  logic [63:0]                         data_amo_write_data_i,
+    input  logic  [7:0]                         data_amo_write_be_i,
+
+    input  logic                                data_refill_i,
+    input  hpdcache_way_vector_t                data_refill_way_i,
+    input  hpdcache_set_t                       data_refill_set_i,
+    input  hpdcache_word_t                      data_refill_word_i,
+    input  hpdcache_refill_data_t               data_refill_data_i
+    //      }}}
+);
+    //  }}}
+
+    //  Definition of constants
+    //  {{{
+    localparam int unsigned HPDCACHE_ALL_CUTS = HPDCACHE_DATA_RAM_X_CUTS*HPDCACHE_DATA_RAM_Y_CUTS;
+    localparam int unsigned HPDCACHE_DATA_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS;
+    //  }}}
+
+    //  Definition of functions
+    //  {{{
+
+    //      hpdcache_compute_data_ram_cs
+    //
+    //      description: This function computes the chip-select signal for data
+    //                   RAMs depending on the request size and the word offset
+    function automatic hpdcache_data_row_enable_t hpdcache_compute_data_ram_cs(
+            input hpdcache_req_size_t size_i,
+            input hpdcache_word_t     word_i);
+
+        localparam hpdcache_uint32 off_width =
+                HPDCACHE_ACCESS_WORDS > 1 ? $clog2(HPDCACHE_ACCESS_WORDS) : 1;
+
+        hpdcache_data_row_enable_t ret;
+        hpdcache_uint32 off;
+
+        case (size_i)
+            3'h0,
+            3'h1,
+            3'h2,
+            3'h3:    ret = hpdcache_data_row_enable_t'({ 64/HPDCACHE_WORD_WIDTH{1'b1}});
+            3'h4:    ret = hpdcache_data_row_enable_t'({128/HPDCACHE_WORD_WIDTH{1'b1}});
+            3'h5:    ret = hpdcache_data_row_enable_t'({256/HPDCACHE_WORD_WIDTH{1'b1}});
+            default: ret = hpdcache_data_row_enable_t'({512/HPDCACHE_WORD_WIDTH{1'b1}});
+        endcase
+
+        off = HPDCACHE_ACCESS_WORDS > 1 ? hpdcache_uint'(word_i[0 +: off_width]) : 0;
+        return hpdcache_data_row_enable_t'(ret << off);
+    endfunction
+
+    function automatic hpdcache_data_ram_row_idx_t hpdcache_way_to_data_ram_row(
+            input hpdcache_way_vector_t way);
+        for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin
+            if (way[i]) return hpdcache_data_ram_row_idx_t'(i / HPDCACHE_DATA_WAYS_PER_RAM_WORD);
+        end
+        return 0;
+    endfunction
+
+    function automatic hpdcache_data_ram_way_idx_t hpdcache_way_to_data_ram_word(
+            input hpdcache_way_vector_t way);
+        for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin
+            if (way[i]) return hpdcache_data_ram_way_idx_t'(i % HPDCACHE_DATA_WAYS_PER_RAM_WORD);
+        end
+        return 0;
+    endfunction
+
+    function automatic hpdcache_data_ram_addr_t hpdcache_set_to_data_ram_addr(
+            input hpdcache_set_t set,
+            input hpdcache_word_t word);
+        hpdcache_uint ret;
+
+        ret = (hpdcache_uint'(set)*(HPDCACHE_CL_WORDS / HPDCACHE_ACCESS_WORDS)) +
+              (hpdcache_uint'(word) / HPDCACHE_ACCESS_WORDS);
+
+        return hpdcache_data_ram_addr_t'(ret);
+    endfunction
+    //  }}}
+
+    //  Definition of internal signals and registers
+    //  {{{
+    genvar gen_i, gen_j, gen_k;
+
+    //      Directory initialization signals and registers
+    logic                                      init_q,     init_d;
+    hpdcache_dir_addr_t                        init_set_q, init_set_d;
+    hpdcache_way_vector_t                      init_dir_cs;
+    hpdcache_way_vector_t                      init_dir_we;
+    hpdcache_dir_entry_t                       init_dir_wentry;
+
+    //      Directory valid bit vector (one bit per set and way)
+    hpdcache_way_vector_t [HPDCACHE_SETS-1:0]  dir_valid_q, dir_valid_d;
+    hpdcache_set_t                             dir_req_set_q, dir_req_set_d;
+    hpdcache_dir_addr_t                        dir_addr;
+    hpdcache_way_vector_t                      dir_cs;
+    hpdcache_way_vector_t                      dir_we;
+    hpdcache_dir_entry_t  [HPDCACHE_WAYS-1:0]  dir_wentry;
+    hpdcache_dir_entry_t  [HPDCACHE_WAYS-1:0]  dir_rentry;
+
+    hpdcache_data_addr_t                       data_addr;
+    hpdcache_data_enable_t                     data_cs;
+    hpdcache_data_enable_t                     data_we;
+    hpdcache_data_be_entry_t                   data_wbyteenable;
+    hpdcache_data_entry_t                      data_wentry;
+    hpdcache_data_entry_t                      data_rentry;
+
+    logic                                      data_write;
+    logic                                      data_write_enable;
+    hpdcache_set_t                             data_write_set;
+    hpdcache_req_size_t                        data_write_size;
+    hpdcache_word_t                            data_write_word;
+    hpdcache_refill_data_t                     data_write_data;
+    hpdcache_refill_be_t                       data_write_be;
+
+    hpdcache_refill_data_t                     data_req_write_data;
+    hpdcache_refill_be_t                       data_req_write_be;
+
+    hpdcache_refill_data_t                     data_amo_write_data;
+    hpdcache_refill_be_t                       data_amo_write_be;
+
+    hpdcache_way_vector_t                      data_way;
+
+    hpdcache_data_ram_row_idx_t                data_ram_row;
+    hpdcache_data_ram_way_idx_t                data_ram_word;
+
+    //  }}}
+
+    //  Init FSM
+    //  {{{
+    always_comb
+    begin : init_comb
+        init_dir_wentry.tag      = '0;
+        init_dir_wentry.reserved = '0;
+        init_dir_cs              = '0;
+        init_dir_we              = '0;
+        init_d                   = init_q;
+        init_set_d               = init_set_q;
+
+        case (init_q)
+            1'b0: begin
+                init_d      = (hpdcache_uint'(init_set_q) == (HPDCACHE_SETS - 1));
+                init_set_d  = init_set_q + 1;
+                init_dir_cs = '1;
+                init_dir_we = '1;
+            end
+
+            1'b1: begin
+                init_d      = 1'b1;
+                init_set_d  = init_set_q;
+            end
+        endcase
+    end
+
+    assign ready_o = init_q;
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : init_ff
+        if (!rst_ni) begin
+            init_q      <= 1'b0;
+            init_set_q  <= 0;
+            dir_valid_q <= '0;
+        end else begin
+            init_q      <= init_d;
+            init_set_q  <= init_set_d;
+            dir_valid_q <= dir_valid_d;
+        end
+    end
+    //  }}}
+
+    //  Memory arrays
+    //  {{{
+    hpdcache_memarray hpdcache_memarray_i(
+        .clk_i,
+        .rst_ni,
+
+        .dir_addr_i         (dir_addr),
+        .dir_cs_i           (dir_cs),
+        .dir_we_i           (dir_we),
+        .dir_wentry_i       (dir_wentry),
+        .dir_rentry_o       (dir_rentry),
+
+        .data_addr_i        (data_addr),
+        .data_cs_i          (data_cs),
+        .data_we_i          (data_we),
+        .data_wbyteenable_i (data_wbyteenable),
+        .data_wentry_i      (data_wentry),
+        .data_rentry_o      (data_rentry)
+    );
+    //  }}}
+
+    //  Directory RAM request mux
+    //  {{{
+    always_comb
+    begin : dir_ctrl_comb
+        case (1'b1)
+            //  Cache directory initialization
+            ~init_q: begin
+                dir_addr    = init_set_q;
+                dir_cs      = init_dir_cs;
+                dir_we      = init_dir_we;
+                dir_wentry  = {HPDCACHE_WAYS{init_dir_wentry}};
+            end
+
+            //  Cache directory match tag -> hit
+            dir_match_i: begin
+                dir_addr    = dir_match_set_i;
+                dir_cs      = '1;
+                dir_we      = '0;
+                dir_wentry  = '0;
+            end
+
+            //  Cache directory AMO match tag -> hit
+            dir_amo_match_i: begin
+                dir_addr    = dir_amo_match_set_i;
+                dir_cs      = '1;
+                dir_we      = '0;
+                dir_wentry  = '0;
+            end
+
+            //  Cache directory update
+            dir_refill_i: begin
+                dir_addr    = dir_refill_set_i;
+                dir_cs      = dir_victim_way_o;
+                dir_we      = dir_victim_way_o;
+                dir_wentry  = {HPDCACHE_WAYS{dir_refill_entry_i}};
+            end
+
+            //  Cache directory CMO match tag
+            dir_cmo_check_i: begin
+                dir_addr    = dir_cmo_check_set_i;
+                dir_cs      = '1;
+                dir_we      = '0;
+                dir_wentry  = '0;
+            end
+
+            //  Do nothing
+            default: begin
+                dir_addr    = '0;
+                dir_cs      = '0;
+                dir_we      = '0;
+                dir_wentry  = '0;
+            end
+        endcase
+    end
+    //  }}}
+
+    //  Directory valid logic
+    //  {{{
+    always_comb
+    begin : dir_valid_comb
+        dir_valid_d = dir_valid_q;
+
+        unique case (1'b1)
+            //  Refill the cache after a miss
+            dir_refill_i: begin
+                dir_valid_d[dir_refill_set_i]    = dir_valid_q[dir_refill_set_i]    |  dir_victim_way_o;
+            end
+            //  CMO invalidate a set
+            dir_cmo_inval_i: begin
+                dir_valid_d[dir_cmo_inval_set_i] = dir_valid_q[dir_cmo_inval_set_i] & ~dir_cmo_inval_way_i;
+            end
+            default: begin
+                // do nothing
+            end
+        endcase
+    end
+    //  }}}
+
+    //  Directory hit logic
+    //  {{{
+    assign dir_req_set_d = dir_match_i     ? dir_match_set_i     :
+                           dir_amo_match_i ? dir_amo_match_set_i :
+                           dir_cmo_check_i ? dir_cmo_check_set_i :
+                                             dir_req_set_q       ;
+
+    generate
+        hpdcache_way_vector_t req_hit;
+        hpdcache_way_vector_t amo_hit;
+        hpdcache_way_vector_t cmo_hit;
+
+        for (gen_i = 0; gen_i < int'(HPDCACHE_WAYS); gen_i++)
+        begin : dir_match_tag_gen
+            assign req_hit[gen_i] = (dir_rentry[gen_i].tag == dir_match_tag_i),
+                   amo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_amo_match_tag_i),
+                   cmo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_cmo_check_tag_i);
+
+            assign dir_hit_way_o          [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & req_hit[gen_i],
+                   dir_amo_hit_way_o      [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & amo_hit[gen_i],
+                   dir_cmo_check_hit_way_o[gen_i] = dir_valid_q[dir_req_set_q][gen_i] & cmo_hit[gen_i];
+        end
+    endgenerate
+    //  }}}
+
+    //  Directory victim select logic
+    //  {{{
+    logic               plru_updt;
+    hpdcache_way_vector_t plru_updt_way;
+
+    assign plru_updt     = dir_update_lru_i | dir_amo_update_plru_i,
+           plru_updt_way = dir_update_lru_i ? dir_hit_way_o : dir_amo_hit_way_o;
+
+    hpdcache_plru #(
+        .SETS                (HPDCACHE_SETS),
+        .WAYS                (HPDCACHE_WAYS)
+    ) plru_i (
+        .clk_i,
+        .rst_ni,
+
+        .updt_i              (plru_updt),
+        .updt_set_i          (dir_req_set_q),
+        .updt_way_i          (plru_updt_way),
+
+        .repl_i              (dir_refill_i),
+        .repl_set_i          (dir_refill_set_i),
+        .repl_dir_valid_i    (dir_valid_q[dir_refill_set_i]),
+        .repl_updt_plru_i    (dir_refill_updt_plru_i),
+
+        .victim_way_o        (dir_victim_way_o)
+    );
+    //  }}}
+
+    //  Data RAM request multiplexor
+    //  {{{
+
+    //  Upsize the request interface to match the maximum access width of the data RAM
+    generate
+        if (HPDCACHE_DATA_REQ_RATIO > 1) begin : upsize_data_req_write_gen
+            //  demux request DATA
+            assign data_req_write_data = {HPDCACHE_DATA_REQ_RATIO{data_req_write_data_i}};
+
+            //  demux request BE
+            hpdcache_demux #(
+                .NOUTPUT     (HPDCACHE_DATA_REQ_RATIO),
+                .DATA_WIDTH  (HPDCACHE_REQ_DATA_WIDTH/8),
+                .ONE_HOT_SEL (1'b0)
+            ) data_req_write_be_demux_i (
+                .data_i      (data_req_write_be_i),
+                .sel_i       (data_req_write_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +:
+                                                    $clog2(HPDCACHE_DATA_REQ_RATIO)]),
+                .data_o      (data_req_write_be)
+            );
+        end else begin
+            assign data_req_write_data = data_req_write_data_i,
+                   data_req_write_be   = data_req_write_be_i;
+        end
+    endgenerate
+
+    //  Upsize the AMO data interface to match the maximum access width of the data RAM
+    generate
+        localparam hpdcache_uint AMO_DATA_RATIO       = HPDCACHE_DATA_RAM_ACCESS_WIDTH/64;
+        localparam hpdcache_uint AMO_DATA_INDEX_WIDTH = $clog2(AMO_DATA_RATIO);
+
+        if (AMO_DATA_RATIO > 1) begin
+            assign data_amo_write_data = {AMO_DATA_RATIO{data_amo_write_data_i}};
+
+            hpdcache_demux #(
+                .NOUTPUT          (AMO_DATA_RATIO),
+                .DATA_WIDTH       (8),
+                .ONE_HOT_SEL      (1'b0)
+            ) amo_be_demux_i (
+                .data_i           (data_amo_write_be_i),
+                .sel_i            (data_amo_write_word_i[0 +: AMO_DATA_INDEX_WIDTH]),
+                .data_o           (data_amo_write_be)
+            );
+        end else begin
+            assign data_amo_write_data = data_amo_write_data_i,
+                   data_amo_write_be   = data_amo_write_be_i;
+        end
+    endgenerate
+
+    //  Multiplex between data write requests
+    always_comb
+    begin : data_write_comb
+        case (1'b1)
+            data_refill_i: begin
+                data_write        = 1'b1;
+                data_write_enable = 1'b1;
+                data_write_set    = data_refill_set_i;
+                data_write_size   = hpdcache_req_size_t'($clog2(HPDCACHE_DATA_RAM_ACCESS_WIDTH/8));
+                data_write_word   = data_refill_word_i;
+                data_write_data   = data_refill_data_i;
+                data_write_be     = '1;
+            end
+
+            data_req_write_i: begin
+                data_write        = 1'b1;
+                data_write_enable = data_req_write_enable_i;
+                data_write_set    = data_req_write_set_i;
+                data_write_size   = data_req_write_size_i;
+                data_write_word   = data_req_write_word_i;
+                data_write_data   = data_req_write_data;
+                data_write_be     = data_req_write_be;
+            end
+
+            data_amo_write_i: begin
+                data_write        = 1'b1;
+                data_write_enable = data_amo_write_enable_i;
+                data_write_set    = data_amo_write_set_i;
+                data_write_size   = data_amo_write_size_i;
+                data_write_word   = data_amo_write_word_i;
+                data_write_data   = data_amo_write_data;
+                data_write_be     = data_amo_write_be;
+            end
+
+            default: begin
+                data_write        = 1'b0;
+                data_write_enable = 1'b0;
+                data_write_set    = '0;
+                data_write_size   = '0;
+                data_write_word   = '0;
+                data_write_data   = '0;
+                data_write_be     = '0;
+            end
+        endcase
+    end
+
+    //  Multiplex between read and write access on the data RAM
+    assign  data_way = data_refill_i    ? data_refill_way_i :
+                       data_amo_write_i ? dir_amo_hit_way_o :
+                                          dir_hit_way_o;
+
+    //  Decode way index
+    assign  data_ram_word = hpdcache_way_to_data_ram_word(data_way),
+            data_ram_row  = hpdcache_way_to_data_ram_row(data_way);
+
+    always_comb
+    begin : data_ctrl_comb
+        case (1'b1)
+            //  Select data read inputs
+            data_req_read_i: begin
+                data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_req_read_set_i,
+                                                                         data_req_read_word_i)}};
+
+                data_we          = '0;
+                data_wbyteenable = '0;
+                data_wentry      = '0;
+                for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin
+                    data_cs[i] = hpdcache_compute_data_ram_cs(data_req_read_size_i,
+                                                              data_req_read_word_i);
+                end
+            end
+
+            //  Select data write inputs
+            data_write: begin
+                data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_write_set,
+                                                                           data_write_word)}};
+
+                for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin
+                    for (int unsigned j = 0; j < HPDCACHE_DATA_RAM_X_CUTS; j++) begin
+                        data_wentry[i][j] = {HPDCACHE_DATA_WAYS_PER_RAM_WORD{data_write_data[j]}};
+                    end
+                end
+
+                for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin
+                    data_cs[i] = hpdcache_compute_data_ram_cs(data_write_size, data_write_word);
+
+                    if (i == hpdcache_uint'(data_ram_row)) begin
+                        data_we[i] = data_write_enable ? data_cs[i] : '0;
+                    end else begin
+                        data_we[i] = '0;
+                    end
+
+                    //  Build the write mask
+                    for (int unsigned j = 0; j < HPDCACHE_ACCESS_WORDS; j++) begin
+                        for (int unsigned k = 0; k < HPDCACHE_DATA_WAYS_PER_RAM_WORD; k++) begin
+                            data_wbyteenable[i][j][k] = (k == hpdcache_uint'(data_ram_word)) ?
+                                                        data_write_be[j] : '0;
+                        end
+                    end
+                end
+            end
+
+            //  Do nothing
+            default: begin
+                data_addr        = '0;
+                data_cs          = '0;
+                data_we          = '0;
+                data_wbyteenable = '0;
+                data_wentry      = '0;
+            end
+        endcase
+    end
+    //  }}}
+
+    //  Data RAM read data multiplexor
+    //  {{{
+    generate
+        hpdcache_req_data_t [HPDCACHE_DATA_REQ_RATIO-1:0][HPDCACHE_WAYS-1:0] data_read_words;
+        hpdcache_req_data_t                              [HPDCACHE_WAYS-1:0] data_read_req_word;
+
+        //  Organize the read data by words (all ways for the same word are contiguous)
+        for (gen_i = 0; gen_i < int'(HPDCACHE_DATA_REQ_RATIO); gen_i++) begin
+            for (gen_j = 0; gen_j < int'(HPDCACHE_WAYS); gen_j++) begin
+                for (gen_k = 0; gen_k < int'(HPDCACHE_REQ_WORDS); gen_k++) begin
+                    assign data_read_words[gen_i][gen_j][gen_k] =
+                            data_rentry[(gen_j / HPDCACHE_DATA_WAYS_PER_RAM_WORD)]
+                                       [(gen_i * HPDCACHE_REQ_WORDS     ) + gen_k]
+                                       [(gen_j % HPDCACHE_DATA_WAYS_PER_RAM_WORD)];
+                end
+            end
+        end
+
+        //  Mux the data according to the access word
+        if (HPDCACHE_DATA_REQ_RATIO > 1) begin : req_width_lt_ram_width
+            typedef logic [$clog2(HPDCACHE_DATA_REQ_RATIO)-1:0] data_req_word_t;
+            data_req_word_t data_read_req_word_index_q;
+
+            hpdcache_mux #(
+                .NINPUT      (HPDCACHE_DATA_REQ_RATIO),
+                .DATA_WIDTH  (HPDCACHE_REQ_DATA_WIDTH*HPDCACHE_WAYS)
+            ) data_read_req_word_mux_i(
+                .data_i      (data_read_words),
+                .sel_i       (data_read_req_word_index_q),
+                .data_o      (data_read_req_word)
+            );
+
+            always_ff @(posedge clk_i)
+            begin : data_req_read_word_ff
+                data_read_req_word_index_q <=
+                        data_req_read_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +:
+                                             $clog2(HPDCACHE_DATA_REQ_RATIO)];
+            end
+        end
+
+        //  Request data interface width is equal to the data RAM width
+        else begin : req_width_eq_ram_width
+            assign data_read_req_word = data_read_words;
+        end
+
+        //  Mux the data according to the hit way
+        hpdcache_mux #(
+            .NINPUT      (HPDCACHE_WAYS),
+            .DATA_WIDTH  (HPDCACHE_REQ_DATA_WIDTH),
+            .ONE_HOT_SEL (1'b1)
+        ) data_read_req_word_way_mux_i(
+            .data_i      (data_read_req_word),
+            .sel_i       (dir_hit_way_o),
+            .data_o      (data_req_read_data_o)
+        );
+    endgenerate
+
+
+    //  Delay the accessed set for checking the tag from the directory in the
+    //  next cycle (hit logic)
+    always_ff @(posedge clk_i)
+    begin : req_read_ff
+        if (dir_match_i || dir_amo_match_i || dir_cmo_check_i) begin
+            dir_req_set_q <= dir_req_set_d;
+        end
+    end
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    //  pragma translate_off
+    concurrent_dir_access_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
+            $onehot0({dir_match_i, dir_amo_match_i, dir_cmo_check_i, dir_refill_i})) else
+            $error("hpdcache_memctrl: more than one process is accessing the cache directory");
+
+    concurrent_data_access_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
+            $onehot0({data_req_read_i, data_req_write_i, data_amo_write_i, data_refill_i})) else
+            $error("hpdcache_memctrl: more than one process is accessing the cache data");
+    //  pragma translate_on
+    //  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv
new file mode 100644
index 0000000..97ecf46
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv
@@ -0,0 +1,659 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : HPDcache Miss Handler
+ *  History       :
+ */
+module hpdcache_miss_handler
+//  {{{
+import hpdcache_pkg::*;
+//  Parameters
+//  {{{
+#(
+    parameter  int  HPDcacheMemIdWidth    = 8,
+    parameter  int  HPDcacheMemDataWidth  = 512,
+    parameter  type hpdcache_mem_req_t    = logic,
+    parameter  type hpdcache_mem_resp_r_t = logic,
+    localparam type hpdcache_mem_id_t     = logic [HPDcacheMemIdWidth-1:0]
+)
+//  }}}
+//  Ports
+//  {{{
+(
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    //      Global control signals
+    //      {{{
+    output logic                  mshr_empty_o,
+    output logic                  mshr_full_o,
+    //      }}}
+
+    //      Configuration signals
+    //      {{{
+    input  logic                  cfg_prefetch_updt_plru_i,
+    //      }}}
+
+    //      CHECK interface
+    //      {{{
+    input  logic                  mshr_check_i,
+    input  mshr_set_t             mshr_check_set_i,
+    input  mshr_tag_t             mshr_check_tag_i,
+    output logic                  mshr_check_hit_o,
+    //      }}}
+
+    //      MISS interface
+    //      {{{
+    //          MISS request interface
+    output logic                  mshr_alloc_ready_o,
+    input  logic                  mshr_alloc_i,
+    input  logic                  mshr_alloc_cs_i,
+    input  hpdcache_nline_t       mshr_alloc_nline_i,
+    output logic                  mshr_alloc_full_o,
+    input  hpdcache_req_tid_t     mshr_alloc_tid_i,
+    input  hpdcache_req_sid_t     mshr_alloc_sid_i,
+    input  hpdcache_word_t        mshr_alloc_word_i,
+    input  logic                  mshr_alloc_need_rsp_i,
+    input  logic                  mshr_alloc_is_prefetch_i,
+
+    //          REFILL MISS interface
+    input  logic                  refill_req_ready_i,
+    output logic                  refill_req_valid_o,
+    output logic                  refill_busy_o,
+    output logic                  refill_updt_plru_o,
+    output hpdcache_set_t         refill_set_o,
+    output hpdcache_dir_entry_t   refill_dir_entry_o,
+    input  hpdcache_way_vector_t  refill_victim_way_i,
+    output logic                  refill_write_dir_o,
+    output logic                  refill_write_data_o,
+    output hpdcache_way_vector_t  refill_victim_way_o,
+    output hpdcache_refill_data_t refill_data_o,
+    output hpdcache_word_t        refill_word_o,
+    output hpdcache_nline_t       refill_nline_o,
+    output logic                  refill_updt_rtab_o,
+
+    //          REFILL core response interface
+    output logic                  refill_core_rsp_valid_o,
+    output hpdcache_rsp_t         refill_core_rsp_o,
+    //      }}}
+
+    //      MEMORY interface
+    //      {{{
+    input  logic                  mem_req_ready_i,
+    output logic                  mem_req_valid_o,
+    output hpdcache_mem_req_t     mem_req_o,
+
+    output logic                  mem_resp_ready_o,
+    input  logic                  mem_resp_valid_i,
+    input  hpdcache_mem_resp_r_t  mem_resp_i
+    //      }}}
+);
+//  }}}
+
+    //  Declaration of constants and types
+    //  {{{
+    localparam int unsigned REFILL_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS;
+
+    typedef enum logic {
+        MISS_REQ_IDLE = 1'b0,
+        MISS_REQ_SEND = 1'b1
+    } miss_req_fsm_e;
+
+    typedef enum {
+        REFILL_IDLE,
+        REFILL_WRITE,
+        REFILL_WRITE_DIR
+    } refill_fsm_e;
+
+    typedef struct packed {
+        hpdcache_mem_error_e r_error;
+        hpdcache_mem_id_t    r_id;
+    } mem_resp_metadata_t;
+
+    function automatic mshr_set_t get_ack_mshr_set(hpdcache_mem_id_t id);
+        return id[0 +: HPDCACHE_MSHR_SET_WIDTH];
+    endfunction
+
+    function automatic mshr_way_t get_ack_mshr_way(hpdcache_mem_id_t id);
+        return id[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_WAY_WIDTH];
+    endfunction
+    //  }}}
+
+    //  Declaration of internal signals and registers
+    //  {{{
+    miss_req_fsm_e           miss_req_fsm_q, miss_req_fsm_d;
+    mshr_way_t               mshr_alloc_way_q, mshr_alloc_way_d;
+    mshr_set_t               mshr_alloc_set_q, mshr_alloc_set_d;
+    mshr_tag_t               mshr_alloc_tag_q, mshr_alloc_tag_d;
+
+    refill_fsm_e             refill_fsm_q, refill_fsm_d;
+    hpdcache_set_t           refill_set_q;
+    hpdcache_tag_t           refill_tag_q;
+    hpdcache_way_vector_t    refill_way_q;
+    hpdcache_req_sid_t       refill_sid_q;
+    hpdcache_req_tid_t       refill_tid_q;
+    hpdcache_word_t          refill_cnt_q, refill_cnt_d;
+    logic                    refill_need_rsp_q;
+    logic                    refill_is_prefetch_q;
+    hpdcache_word_t          refill_core_rsp_word_q;
+    logic                    refill_way_bypass;
+
+    mem_resp_metadata_t      refill_fifo_resp_meta_wdata, refill_fifo_resp_meta_rdata;
+    logic                    refill_fifo_resp_meta_w, refill_fifo_resp_meta_wok;
+    logic                    refill_fifo_resp_meta_r, refill_fifo_resp_meta_rok;
+
+    logic                    refill_fifo_resp_data_w, refill_fifo_resp_data_wok;
+    hpdcache_refill_data_t   refill_fifo_resp_data_rdata;
+    logic                    refill_fifo_resp_data_r;
+
+    logic                    refill_core_rsp_valid;
+    hpdcache_req_data_t      refill_core_rsp_rdata;
+    hpdcache_req_sid_t       refill_core_rsp_sid;
+    hpdcache_req_tid_t       refill_core_rsp_tid;
+    logic                    refill_core_rsp_error;
+    hpdcache_word_t          refill_core_rsp_word;
+    hpdcache_rsp_t           refill_core_rsp;
+
+    logic                    refill_is_error;
+
+    logic                    mshr_alloc;
+    logic                    mshr_alloc_cs;
+    logic                    mshr_ack;
+    logic                    mshr_ack_cs;
+    mshr_set_t               mshr_ack_set;
+    mshr_way_t               mshr_ack_way;
+    hpdcache_nline_t         mshr_ack_nline;
+    hpdcache_req_sid_t       mshr_ack_src_id;
+    hpdcache_req_tid_t       mshr_ack_req_id;
+    hpdcache_word_t          mshr_ack_word;
+    logic                    mshr_ack_need_rsp;
+    logic                    mshr_ack_is_prefetch;
+    logic                    mshr_empty;
+    //  }}}
+
+    //  Miss Request FSM
+    //  {{{
+    always_comb
+    begin : miss_req_fsm_comb
+        mshr_alloc_ready_o = 1'b0;
+        mshr_alloc         = 1'b0;
+        mshr_alloc_cs      = 1'b0;
+        mem_req_valid_o    = 1'b0;
+
+        miss_req_fsm_d     = miss_req_fsm_q;
+
+        case (miss_req_fsm_q)
+            MISS_REQ_IDLE: begin
+                mshr_alloc_ready_o = 1'b1;
+                mshr_alloc         = mshr_alloc_i;
+                mshr_alloc_cs      = mshr_alloc_cs_i;
+                if (mshr_alloc_i) begin
+                    miss_req_fsm_d = MISS_REQ_SEND;
+                end else begin
+                    miss_req_fsm_d = MISS_REQ_IDLE;
+                end
+            end
+            MISS_REQ_SEND: begin
+                mem_req_valid_o = 1'b1;
+                if (mem_req_ready_i) begin
+                    miss_req_fsm_d = MISS_REQ_IDLE;
+                end else begin
+                    miss_req_fsm_d = MISS_REQ_SEND;
+                end
+            end
+        endcase
+    end
+
+    localparam hpdcache_uint REFILL_REQ_SIZE = $clog2(HPDcacheMemDataWidth/8);
+    localparam hpdcache_uint REFILL_REQ_LEN = HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth;
+
+    assign mem_req_o.mem_req_addr = {mshr_alloc_tag_q, mshr_alloc_set_q, {HPDCACHE_OFFSET_WIDTH{1'b0}} },
+           mem_req_o.mem_req_len = hpdcache_mem_len_t'(REFILL_REQ_LEN-1),
+           mem_req_o.mem_req_size = hpdcache_mem_size_t'(REFILL_REQ_SIZE),
+           mem_req_o.mem_req_id = hpdcache_mem_id_t'({mshr_alloc_way_q, mshr_alloc_set_q}),
+           mem_req_o.mem_req_command = HPDCACHE_MEM_READ,
+           mem_req_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD,
+           mem_req_o.mem_req_cacheable = 1'b1;
+
+    always_ff @(posedge clk_i)
+    begin : miss_req_fsm_internal_ff
+        if (mshr_alloc) begin
+            mshr_alloc_way_q <= mshr_alloc_way_d;
+            mshr_alloc_set_q <= mshr_alloc_set_d;
+            mshr_alloc_tag_q <= mshr_alloc_tag_d;
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : miss_req_fsm_ff
+        if (!rst_ni) begin
+            miss_req_fsm_q <= MISS_REQ_IDLE;
+        end else begin
+            miss_req_fsm_q <= miss_req_fsm_d;
+        end
+    end
+    //  }}}
+
+    //  Refill FSM
+    //  {{{
+
+    //      ask permission to the refill arbiter if there is a pending refill
+    assign refill_req_valid_o  = refill_fsm_q == REFILL_IDLE ? refill_fifo_resp_meta_rok : 1'b0;
+
+    //      forward the victim way directly from the victim selection logic or
+    //      from the internal register
+    assign refill_victim_way_o = refill_way_bypass ? refill_victim_way_i : refill_way_q;
+
+    always_comb
+    begin : miss_resp_fsm_comb
+        automatic hpdcache_uint REFILL_LAST_CHUNK_WORD;
+        REFILL_LAST_CHUNK_WORD = HPDCACHE_CL_WORDS - HPDCACHE_ACCESS_WORDS;
+
+        refill_updt_plru_o      = 1'b0;
+        refill_set_o            = '0;
+        refill_write_dir_o      = 1'b0;
+        refill_write_data_o     = 1'b0;
+        refill_updt_rtab_o      = 1'b0;
+        refill_cnt_d            = refill_cnt_q;
+        refill_way_bypass       = 1'b0;
+
+        refill_core_rsp_valid   = 1'b0;
+        refill_core_rsp_sid     = '0;
+        refill_core_rsp_tid     = '0;
+        refill_core_rsp_error   = 1'b0;
+        refill_core_rsp_word    = 0;
+
+        refill_fifo_resp_meta_r = 1'b0;
+        refill_fifo_resp_data_r = 1'b0;
+
+        mshr_ack_cs             = 1'b0;
+        mshr_ack                = 1'b0;
+
+        refill_fsm_d            = refill_fsm_q;
+
+        case (refill_fsm_q)
+            //  Wait for refill responses
+            //  {{{
+            REFILL_IDLE: begin
+                if (refill_fifo_resp_meta_rok) begin
+                    //  anticipate the activation of the MSHR independently of the grant signal from
+                    //  the refill arbiter. This is to avoid the introduction of unnecessary timing
+                    //  paths (however there could be a minor augmentation of the power
+                    //  consumption).
+                    mshr_ack_cs = 1'b1;
+
+                    //  if the permission is granted, start refilling
+                    if (refill_req_ready_i) begin
+                        refill_fsm_d = REFILL_WRITE;
+
+                        //  read the MSHR and reset the valid bit for the
+                        //  corresponding entry
+                        mshr_ack = 1'b1;
+
+                        //  initialize the counter for refill words
+                        refill_cnt_d = 0;
+                    end
+                end
+            end
+            //  }}}
+
+            //  Write refill data into the cache
+            //  {{{
+            REFILL_WRITE: begin
+                automatic logic is_prefetch;
+
+                //  Respond to the core (when needed)
+                if (refill_cnt_q == 0) begin
+                    automatic hpdcache_uint _core_rsp_word;
+                    _core_rsp_word = hpdcache_uint'(mshr_ack_word)/HPDCACHE_ACCESS_WORDS;
+
+                    if (mshr_ack_need_rsp) begin
+                        refill_core_rsp_valid = (hpdcache_uint'(_core_rsp_word) == 0);
+                    end
+
+                    refill_core_rsp_sid = mshr_ack_src_id;
+                    refill_core_rsp_tid = mshr_ack_req_id;
+                    refill_core_rsp_error = refill_is_error;
+                    refill_core_rsp_word = hpdcache_word_t'(
+                        hpdcache_uint'(mshr_ack_word)/HPDCACHE_REQ_WORDS);
+                end else begin
+                    automatic hpdcache_uint _core_rsp_word;
+                    _core_rsp_word = hpdcache_uint'(refill_core_rsp_word_q)/
+                                     HPDCACHE_ACCESS_WORDS;
+
+                    if (refill_need_rsp_q) begin
+                        automatic hpdcache_uint _refill_cnt;
+                        _refill_cnt = hpdcache_uint'(refill_cnt_q)/HPDCACHE_ACCESS_WORDS;
+                        refill_core_rsp_valid = (_core_rsp_word == _refill_cnt);
+                    end
+
+                    refill_core_rsp_sid = refill_sid_q;
+                    refill_core_rsp_tid = refill_tid_q;
+                    refill_core_rsp_error = refill_is_error;
+                    refill_core_rsp_word = hpdcache_word_t'(
+                        hpdcache_uint'(refill_core_rsp_word_q)/HPDCACHE_REQ_WORDS);
+                end
+
+                //  Write the the data in the cache data array
+                if (refill_cnt_q == 0) begin
+                    refill_set_o = mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH];
+                    refill_way_bypass = 1'b1;
+                    is_prefetch = mshr_ack_is_prefetch;
+                end else begin
+                    refill_set_o = refill_set_q;
+                    refill_way_bypass = 1'b0;
+                    is_prefetch = refill_is_prefetch_q;
+                end
+                refill_write_data_o = ~refill_is_error;
+
+                //  Consume chunk of data from the FIFO buffer in the memory interface
+                refill_fifo_resp_data_r = 1'b1;
+
+                //  Update directory on the last chunk of data
+                refill_cnt_d = refill_cnt_q + hpdcache_word_t'(HPDCACHE_ACCESS_WORDS);
+
+                if (hpdcache_uint'(refill_cnt_q) == REFILL_LAST_CHUNK_WORD) begin
+                    if (REFILL_LAST_CHUNK_WORD == 0) begin
+                        //  Special case: if the cache-line data can be written in a single cycle,
+                        //  wait an additional cycle to write the directory. This allows to prevent
+                        //  a RAM-to-RAM timing path between the MSHR and the DIR.
+                        refill_fsm_d = REFILL_WRITE_DIR;
+                    end else begin
+                        //  Write the new entry in the cache directory
+                        refill_write_dir_o  = ~refill_is_error;
+
+                        //  Update the PLRU bits. Only in the following cases:
+                        //  - There is no error in response AND
+                        //  - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR
+                        //  - It is a read miss.
+                        refill_updt_plru_o  =  ~refill_is_error &
+                                              (~is_prefetch | cfg_prefetch_updt_plru_i);
+
+                        //  Update dependency flags in the retry table
+                        refill_updt_rtab_o  = 1'b1;
+
+                        //  consume the response from the network
+                        refill_fifo_resp_meta_r = 1'b1;
+
+                        refill_fsm_d = REFILL_IDLE;
+                    end
+                end
+            end
+            //  }}}
+
+            //  Write cache directory (this state is only visited when ACCESS_WORDS == CL_WORDS,
+            //  this is when the entire cache-line can be written in a single cycle)
+            //  {{{
+            REFILL_WRITE_DIR: begin
+                automatic logic is_prefetch;
+                is_prefetch = refill_is_prefetch_q;
+
+                //  Select the target set and way
+                refill_set_o = refill_set_q;
+                refill_way_bypass = 1'b0;
+
+                //  Write the new entry in the cache directory
+                refill_write_dir_o  = ~refill_is_error;
+
+                //  Update the PLRU bits. Only in the following cases:
+                //  - There is no error in response AND
+                //  - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR
+                //  - It is a read miss.
+                refill_updt_plru_o  =  ~refill_is_error &
+                                      (~is_prefetch | cfg_prefetch_updt_plru_i);
+
+                //  Update dependency flags in the retry table
+                refill_updt_rtab_o  = 1'b1;
+
+                //  consume the response from the network
+                refill_fifo_resp_meta_r = 1'b1;
+
+                refill_fsm_d = REFILL_IDLE;
+            end
+            //  }}}
+
+            default: begin
+                // pragma translate_off
+                $error("Illegal state");
+                // pragma translate_on
+            end
+        endcase
+    end
+
+    assign  refill_is_error = (refill_fifo_resp_meta_rdata.r_error == HPDCACHE_MEM_RESP_NOK);
+
+    assign  refill_busy_o  = (refill_fsm_q != REFILL_IDLE),
+            refill_nline_o = {refill_tag_q, refill_set_q},
+            refill_word_o  = refill_cnt_q;
+
+    assign  mshr_ack_set = get_ack_mshr_set(refill_fifo_resp_meta_rdata.r_id),
+            mshr_ack_way = get_ack_mshr_way(refill_fifo_resp_meta_rdata.r_id);
+
+    assign  refill_dir_entry_o.tag      = refill_tag_q,
+            refill_dir_entry_o.reserved = '0;
+
+    assign  refill_core_rsp.rdata   = refill_core_rsp_rdata,
+            refill_core_rsp.sid     = refill_core_rsp_sid,
+            refill_core_rsp.tid     = refill_core_rsp_tid,
+            refill_core_rsp.error   = refill_core_rsp_error,
+            refill_core_rsp.aborted = 1'b0;
+
+    hpdcache_fifo_reg #(
+        .FIFO_DEPTH  (1),
+        .FEEDTHROUGH (HPDCACHE_REFILL_CORE_RSP_FEEDTHROUGH),
+        .fifo_data_t (hpdcache_rsp_t)
+    ) i_refill_core_rsp_buf(
+        .clk_i,
+        .rst_ni,
+        .w_i         (refill_core_rsp_valid),
+        .wok_o       (/*unused*/),
+        .wdata_i     (refill_core_rsp),
+        .r_i         (1'b1),  //  core shall always be ready to consume a response
+        .rok_o       (refill_core_rsp_valid_o),
+        .rdata_o     (refill_core_rsp_o)
+    );
+
+    generate
+        //  refill's width is bigger than the width of the core's interface
+        if (REFILL_REQ_RATIO > 1) begin : core_rsp_data_mux_gen
+            hpdcache_mux #(
+                .NINPUT      (REFILL_REQ_RATIO),
+                .DATA_WIDTH  (HPDCACHE_REQ_DATA_WIDTH)
+            ) data_read_rsp_mux_i(
+                .data_i      (refill_data_o),
+                .sel_i       (refill_core_rsp_word[0 +: $clog2(REFILL_REQ_RATIO)]),
+                .data_o      (refill_core_rsp_rdata)
+            );
+        end
+
+        //  refill's width is equal to the width of the core's interface
+        else begin
+            assign refill_core_rsp_rdata = refill_data_o;
+        end
+    endgenerate
+
+    /* FIXME: when multiple chunks, in case of error, the error bit is not
+     *        necessarily set on all chunks */
+    assign refill_fifo_resp_meta_wdata = '{
+        r_error: mem_resp_i.mem_resp_r_error,
+        r_id   : mem_resp_i.mem_resp_r_id
+    };
+
+    hpdcache_fifo_reg #(
+        .FIFO_DEPTH  (2),
+        .fifo_data_t (mem_resp_metadata_t)
+    ) i_r_metadata_fifo (
+        .clk_i,
+        .rst_ni,
+
+        .w_i    (refill_fifo_resp_meta_w),
+        .wok_o  (refill_fifo_resp_meta_wok),
+        .wdata_i(refill_fifo_resp_meta_wdata),
+
+        .r_i    (refill_fifo_resp_meta_r),
+        .rok_o  (refill_fifo_resp_meta_rok),
+        .rdata_o(refill_fifo_resp_meta_rdata)
+    );
+
+    generate
+        if (HPDcacheMemDataWidth < HPDCACHE_REFILL_DATA_WIDTH) begin
+            hpdcache_data_upsize #(
+                .WR_WIDTH(HPDcacheMemDataWidth),
+                .RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH),
+                .DEPTH(2*(HPDCACHE_CL_WIDTH/HPDCACHE_REFILL_DATA_WIDTH))
+            ) i_rdata_upsize (
+                .clk_i,
+                .rst_ni,
+
+                .w_i      (refill_fifo_resp_data_w),
+                .wlast_i  (mem_resp_i.mem_resp_r_last),
+                .wok_o    (refill_fifo_resp_data_wok),
+                .wdata_i  (mem_resp_i.mem_resp_r_data),
+
+                .r_i      (refill_fifo_resp_data_r),
+                .rok_o    (/* unused */),
+                .rdata_o  (refill_fifo_resp_data_rdata)
+            );
+        end else if (HPDcacheMemDataWidth > HPDCACHE_REFILL_DATA_WIDTH) begin
+            hpdcache_data_downsize #(
+                .WR_WIDTH(HPDcacheMemDataWidth),
+                .RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH),
+                .DEPTH(2*(HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth))
+            ) i_rdata_downsize (
+                .clk_i,
+                .rst_ni,
+
+                .w_i      (refill_fifo_resp_data_w),
+                .wok_o    (refill_fifo_resp_data_wok),
+                .wdata_i  (mem_resp_i.mem_resp_r_data),
+
+                .r_i      (refill_fifo_resp_data_r),
+                .rok_o    (/* unused */),
+                .rdata_o  (refill_fifo_resp_data_rdata)
+            );
+        end else begin
+            hpdcache_fifo_reg #(
+                .FIFO_DEPTH  (2),
+                .fifo_data_t (hpdcache_refill_data_t)
+            ) i_rdata_fifo (
+                .clk_i,
+                .rst_ni,
+
+                .w_i      (refill_fifo_resp_data_w),
+                .wok_o    (refill_fifo_resp_data_wok),
+                .wdata_i  (mem_resp_i.mem_resp_r_data),
+
+                .r_i      (refill_fifo_resp_data_r),
+                .rok_o    (/* unused */),
+                .rdata_o  (refill_fifo_resp_data_rdata)
+            );
+        end
+    endgenerate
+
+    assign           refill_data_o = refill_fifo_resp_data_rdata;
+
+    assign refill_fifo_resp_data_w = mem_resp_valid_i &
+                                     (refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last),
+           refill_fifo_resp_meta_w = mem_resp_valid_i &
+                                     (refill_fifo_resp_data_wok &  mem_resp_i.mem_resp_r_last),
+                  mem_resp_ready_o = refill_fifo_resp_data_wok &
+                                     (refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last);
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : miss_resp_fsm_ff
+        if (!rst_ni) begin
+            refill_fsm_q <= REFILL_IDLE;
+        end else begin
+            refill_fsm_q <= refill_fsm_d;
+        end
+    end
+
+    always_ff @(posedge clk_i)
+    begin : miss_resp_fsm_internal_ff
+        if ((refill_fsm_q == REFILL_WRITE) && (refill_cnt_q == 0)) begin
+            refill_set_q <= mshr_ack_nline[0                  +: HPDCACHE_SET_WIDTH];
+            refill_tag_q <= mshr_ack_nline[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];;
+            refill_way_q <= refill_victim_way_i;
+            refill_sid_q <= mshr_ack_src_id;
+            refill_tid_q <= mshr_ack_req_id;
+            refill_need_rsp_q <= mshr_ack_need_rsp;
+            refill_is_prefetch_q <= mshr_ack_is_prefetch;
+            refill_core_rsp_word_q <= mshr_ack_word;
+        end
+        refill_cnt_q <= refill_cnt_d;
+    end
+    //  }}}
+
+    //  Miss Status Holding Register component
+    //  {{{
+    hpdcache_mshr hpdcache_mshr_i (
+        .clk_i,
+        .rst_ni,
+
+        .empty_o                  (mshr_empty),
+        .full_o                   (mshr_full_o),
+
+        .check_i                  (mshr_check_i),
+        .check_set_i              (mshr_check_set_i),
+        .check_tag_i              (mshr_check_tag_i),
+        .hit_o                    (mshr_check_hit_o),
+        .alloc_i                  (mshr_alloc),
+        .alloc_cs_i               (mshr_alloc_cs),
+        .alloc_nline_i            (mshr_alloc_nline_i),
+        .alloc_req_id_i           (mshr_alloc_tid_i),
+        .alloc_src_id_i           (mshr_alloc_sid_i),
+        .alloc_word_i             (mshr_alloc_word_i),
+        .alloc_need_rsp_i         (mshr_alloc_need_rsp_i),
+        .alloc_is_prefetch_i      (mshr_alloc_is_prefetch_i),
+        .alloc_full_o             (mshr_alloc_full_o),
+        .alloc_set_o              (mshr_alloc_set_d),
+        .alloc_tag_o              (mshr_alloc_tag_d),
+        .alloc_way_o              (mshr_alloc_way_d),
+
+        .ack_i                    (mshr_ack),
+        .ack_cs_i                 (mshr_ack_cs),
+        .ack_set_i                (mshr_ack_set),
+        .ack_way_i                (mshr_ack_way),
+        .ack_req_id_o             (mshr_ack_req_id),
+        .ack_src_id_o             (mshr_ack_src_id),
+        .ack_nline_o              (mshr_ack_nline),
+        .ack_word_o               (mshr_ack_word),
+        .ack_need_rsp_o           (mshr_ack_need_rsp),
+        .ack_is_prefetch_o        (mshr_ack_is_prefetch)
+    );
+
+    //    Indicate to the cache controller that there is no pending miss. This
+    //    is, when the MSHR is empty, and the MISS handler has finished of
+    //    processing the last miss response.
+    assign mshr_empty_o = mshr_empty & ~refill_busy_o;
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    //  pragma translate_off
+    initial assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_SET_WIDTH + HPDCACHE_MSHR_WAY_WIDTH)) else
+            $error("miss_handler: not enough ID bits in the memory interface");
+    //  pragma translate_on
+    //  }}}
+
+endmodule
+//  }}}
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv
new file mode 100644
index 0000000..f63e408
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv
@@ -0,0 +1,385 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : HPDcache Miss Status Holding Register (MSHR)
+ *  History       :
+ */
+module hpdcache_mshr
+import hpdcache_pkg::*;
+    //  Ports
+    //  {{{
+(
+    //  Clock and reset signals
+    input  logic              clk_i,
+    input  logic              rst_ni,
+
+    //  Global control signals
+    output logic              empty_o,
+    output logic              full_o,
+
+    //  Check and allocation interface
+    input  logic              check_i,
+    input  mshr_set_t         check_set_i,
+    input  mshr_tag_t         check_tag_i,
+    output logic              hit_o,
+    input  logic              alloc_i,
+    input  logic              alloc_cs_i,
+    input  hpdcache_nline_t   alloc_nline_i,
+    input  hpdcache_req_tid_t alloc_req_id_i,
+    input  hpdcache_req_sid_t alloc_src_id_i,
+    input  hpdcache_word_t    alloc_word_i,
+    input  logic              alloc_need_rsp_i,
+    input  logic              alloc_is_prefetch_i,
+    output logic              alloc_full_o,
+    output mshr_set_t         alloc_set_o,
+    output mshr_tag_t         alloc_tag_o,
+    output mshr_way_t         alloc_way_o,
+
+    //  Acknowledge interface
+    input  logic              ack_i,
+    input  logic              ack_cs_i,
+    input  mshr_set_t         ack_set_i,
+    input  mshr_way_t         ack_way_i,
+    output hpdcache_req_tid_t ack_req_id_o,
+    output hpdcache_req_sid_t ack_src_id_o,
+    output hpdcache_nline_t   ack_nline_o,
+    output hpdcache_word_t    ack_word_o,
+    output logic              ack_need_rsp_o,
+    output logic              ack_is_prefetch_o
+);
+    //  }}}
+
+    //  Definition of constants and types
+    //  {{{
+    typedef struct packed {
+        mshr_tag_t         tag;
+        hpdcache_req_tid_t req_id;
+        hpdcache_req_sid_t src_id;
+        hpdcache_word_t    word_idx;
+        logic              need_rsp;
+        logic              is_prefetch;
+    } mshr_entry_t;
+
+
+    //  Compute the width of MSHR entries depending on the support of write
+    //  bitmask or not (write byte enable)
+    localparam int unsigned HPDCACHE_MSHR_ENTRY_BITS = $bits(mshr_entry_t);
+
+    localparam int unsigned HPDCACHE_MSHR_RAM_ENTRY_BITS =
+            HPDCACHE_MSHR_RAM_WBYTEENABLE ?
+                    ((HPDCACHE_MSHR_ENTRY_BITS + 7)/8) * 8 : // align to 8 bits
+                      HPDCACHE_MSHR_ENTRY_BITS;              // or use the exact number of bits
+
+    typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_data_t;
+    //  }}}
+
+    //  Definition of internal wires and registers
+    //  {{{
+    logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_q, mshr_valid_d;
+    mshr_set_t     check_set_q;
+    mshr_set_t     alloc_set;
+    mshr_tag_t     alloc_tag;
+    hpdcache_set_t alloc_dcache_set;
+    mshr_way_t     ack_way_q;
+    mshr_set_t     ack_set_q;
+    hpdcache_set_t ack_dcache_set;
+    hpdcache_tag_t ack_dcache_tag;
+
+    logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_set, mshr_valid_rst;
+    mshr_entry_t             [HPDCACHE_MSHR_WAYS-1:0] mshr_wentry;
+    mshr_sram_data_t         [HPDCACHE_MSHR_WAYS-1:0] mshr_wdata;
+    mshr_entry_t             [HPDCACHE_MSHR_WAYS-1:0] mshr_rentry;
+    mshr_sram_data_t         [HPDCACHE_MSHR_WAYS-1:0] mshr_rdata;
+
+    logic mshr_we;
+    logic mshr_cs;
+    mshr_set_t  mshr_addr;
+    logic check;
+    //  }}}
+
+    //  Control part for the allocation and check operations
+    //  {{{
+
+    //    The allocation operation is prioritary with respect to the check operation
+    assign check            = check_i & ~alloc_i;
+
+    assign alloc_set        = alloc_nline_i[0                       +: HPDCACHE_MSHR_SET_WIDTH],
+           alloc_tag        = alloc_nline_i[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH],
+           alloc_dcache_set = alloc_nline_i[0                       +: HPDCACHE_SET_WIDTH];
+
+    //  Look for an available way in case of allocation
+    always_comb
+    begin
+        automatic mshr_way_t found_available_way;
+
+        found_available_way = 0;
+        for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
+            if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(alloc_set)]) begin
+                found_available_way = mshr_way_t'(i);
+                break;
+            end
+        end
+        alloc_way_o = found_available_way;
+    end
+
+    //  Look if the mshr can accept the checked nline (in case of allocation)
+    always_comb
+    begin
+        automatic bit found_available;
+
+        found_available = 1'b0;
+        for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
+            if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(check_set_q)]) begin
+                found_available = 1'b1;
+                break;
+            end
+        end
+        alloc_full_o = ~found_available;
+    end
+
+    assign alloc_set_o = alloc_set,
+           alloc_tag_o = alloc_tag;
+
+    //  Write when there is an allocation operation
+    assign mshr_we = alloc_i;
+
+    //  HPDcache SET to MSHR SET translation table
+    hpdcache_mshr_to_cache_set trlt_i (
+        .clk_i,
+        .write_i              (mshr_we),
+        .write_dcache_set_i   (alloc_dcache_set),
+        .write_mshr_way_i     (alloc_way_o),
+        .read_mshr_set_i      (ack_set_q),
+        .read_mshr_way_i      (ack_way_q),
+        .read_dcache_set_o    (ack_dcache_set)
+    );
+
+
+    //  Generate write data and mask depending on the available way
+    always_comb
+    begin
+        for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
+            mshr_wentry[i].tag = alloc_tag;
+            mshr_wentry[i].req_id = alloc_req_id_i;
+            mshr_wentry[i].src_id = alloc_src_id_i;
+            mshr_wentry[i].word_idx = alloc_word_i;
+            mshr_wentry[i].need_rsp = alloc_need_rsp_i;
+            mshr_wentry[i].is_prefetch = alloc_is_prefetch_i;
+        end
+    end
+    //  }}}
+
+    //  Shared control signals
+    //  {{{
+    assign mshr_cs   = check_i | alloc_cs_i | ack_cs_i;
+    assign mshr_addr =  ack_i   ? ack_set_i :
+                       (alloc_i ? alloc_set : check_set_i);
+
+    always_comb
+    begin : mshr_valid_comb
+        automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_alloc_slot;
+        automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_ack_slot;
+
+        mshr_alloc_slot = {alloc_way_o, alloc_set};
+        mshr_ack_slot   = {  ack_way_i, ack_set_i};
+
+        for (int unsigned i = 0; i < HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS; i++) begin
+            mshr_valid_rst[i] = (i ==   hpdcache_uint'(mshr_ack_slot)) ? ack_i   : 1'b0;
+            mshr_valid_set[i] = (i == hpdcache_uint'(mshr_alloc_slot)) ? alloc_i : 1'b0;
+        end
+    end
+    assign mshr_valid_d   = (~mshr_valid_q & mshr_valid_set) | (mshr_valid_q & ~mshr_valid_rst);
+    //  }}}
+
+    //  Read interface (ack)
+    //  {{{
+    generate
+        //  extract HPDcache tag from the MSb of the MSHT TAG
+        if (HPDCACHE_SETS >= HPDCACHE_MSHR_SETS) begin : ack_dcache_set_ge_mshr_set_gen
+            assign ack_dcache_tag = mshr_rentry[ack_way_q].tag[
+                    HPDCACHE_MSHR_TAG_WIDTH - 1               :
+                    HPDCACHE_MSHR_TAG_WIDTH - HPDCACHE_TAG_WIDTH];
+        end
+
+        //  extract HPDcache tag from MSb of the MSHR set concatenated with the MSHR tag
+        else begin : ack_dcache_set_lt_mshr_set_gen
+            assign ack_dcache_tag = {
+                    mshr_rentry[ack_way_q].tag                           ,
+                    ack_set_q[HPDCACHE_MSHR_SET_WIDTH - 1:HPDCACHE_SET_WIDTH]};
+        end
+    endgenerate
+
+    assign ack_req_id_o      = mshr_rentry[ack_way_q].req_id,
+           ack_src_id_o      = mshr_rentry[ack_way_q].src_id,
+           ack_nline_o       = {ack_dcache_tag, ack_dcache_set},
+           ack_word_o        = mshr_rentry[ack_way_q].word_idx,
+           ack_need_rsp_o    = mshr_rentry[ack_way_q].need_rsp,
+           ack_is_prefetch_o = mshr_rentry[ack_way_q].is_prefetch;
+    //  }}}
+
+    //  Global control signals
+    //  {{{
+    assign empty_o  = ~|mshr_valid_q;
+    assign full_o   =  &mshr_valid_q;
+
+    always_comb
+    begin : hit_comb
+        automatic bit [HPDCACHE_MSHR_WAYS-1:0] __hit_way;
+
+        for (int unsigned w = 0; w < HPDCACHE_MSHR_WAYS; w++) begin
+            automatic bit __valid;
+            automatic bit __match;
+            __valid = mshr_valid_q[w*HPDCACHE_MSHR_SETS + int'(check_set_q)];
+            __match = (mshr_rentry[w].tag == check_tag_i);
+            __hit_way[w] = (__valid && __match);
+        end
+
+        hit_o = |__hit_way;
+    end
+    //  }}}
+
+    //  Internal state assignment
+    //  {{{
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : mshr_ff_set
+        if (!rst_ni) begin
+            mshr_valid_q <= '0;
+            ack_way_q    <= '0;
+            ack_set_q    <= '0;
+            check_set_q  <= '0;
+        end else begin
+            mshr_valid_q <= mshr_valid_d;
+            if (ack_i) begin
+                ack_way_q   <= ack_way_i;
+                ack_set_q   <= ack_set_i;
+            end
+            if (check) begin
+                check_set_q <= check_set_i;
+            end
+        end
+    end
+    //  }}}
+
+    //  Internal components
+    //  {{{
+    generate
+        if (HPDCACHE_MSHR_RAM_WBYTEENABLE) begin : mshr_wbyteenable_gen
+            typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS/8-1:0] mshr_sram_wbyteenable_t;
+            mshr_sram_wbyteenable_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wbyteenable;
+
+            always_comb
+            begin : mshr_wbyteenable_comb
+                for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
+                    mshr_wbyteenable[i] = (int'(alloc_way_o) == i) ? '1 : '0;
+                end
+            end
+
+            if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen
+                hpdcache_regbank_wbyteenable_1rw #(
+                    .DATA_SIZE     (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
+                    .ADDR_SIZE     (HPDCACHE_MSHR_SET_WIDTH)
+                ) mshr_mem(
+                    .clk           (clk_i),
+                    .rst_n         (rst_ni),
+                    .cs            (mshr_cs),
+                    .we            (mshr_we),
+                    .addr          (mshr_addr),
+                    .wbyteenable   (mshr_wbyteenable),
+                    .wdata         (mshr_wdata),
+                    .rdata         (mshr_rdata)
+                );
+            end else begin : mshr_sram_gen
+                hpdcache_sram_wbyteenable #(
+                    .DATA_SIZE     (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
+                    .ADDR_SIZE     (HPDCACHE_MSHR_SET_WIDTH)
+                ) mshr_mem(
+                    .clk           (clk_i),
+                    .rst_n         (rst_ni),
+                    .cs            (mshr_cs),
+                    .we            (mshr_we),
+                    .addr          (mshr_addr),
+                    .wbyteenable   (mshr_wbyteenable),
+                    .wdata         (mshr_wdata),
+                    .rdata         (mshr_rdata)
+                );
+            end
+        end else begin : mshr_wmask_gen
+            typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_wmask_t;
+            mshr_sram_wmask_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wmask;
+
+            always_comb
+            begin : mshr_wmask_comb
+                for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
+                    mshr_wmask[i] = (int'(alloc_way_o) == i) ? '1 : '0;
+                end
+            end
+
+            if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen
+                hpdcache_regbank_wmask_1rw #(
+                    .DATA_SIZE     (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
+                    .ADDR_SIZE     (HPDCACHE_MSHR_SET_WIDTH)
+                ) mshr_mem(
+                    .clk           (clk_i),
+                    .rst_n         (rst_ni),
+                    .cs            (mshr_cs),
+                    .we            (mshr_we),
+                    .addr          (mshr_addr),
+                    .wmask         (mshr_wmask),
+                    .wdata         (mshr_wdata),
+                    .rdata         (mshr_rdata)
+                );
+            end else begin : mshr_sram_gen
+                hpdcache_sram_wmask #(
+                    .DATA_SIZE     (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
+                    .ADDR_SIZE     (HPDCACHE_MSHR_SET_WIDTH)
+                ) mshr_mem(
+                    .clk           (clk_i),
+                    .rst_n         (rst_ni),
+                    .cs            (mshr_cs),
+                    .we            (mshr_we),
+                    .addr          (mshr_addr),
+                    .wmask         (mshr_wmask),
+                    .wdata         (mshr_wdata),
+                    .rdata         (mshr_rdata)
+                );
+            end
+        end
+    endgenerate
+
+    always_comb
+    begin : ram_word_fitting_comb
+        for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
+            mshr_wdata[i]  = mshr_sram_data_t'(mshr_wentry[i]);
+            mshr_rentry[i] = mshr_entry_t'(mshr_rdata[i][0 +: HPDCACHE_MSHR_ENTRY_BITS]);
+        end
+    end
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    //  pragma translate_off
+    one_command_assert: assert property (@(posedge clk_i)
+            (ack_i -> !(alloc_i || check_i))) else
+            $error("MSHR: ack with concurrent alloc or check");
+    //  pragma translate_on
+    //  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv
new file mode 100644
index 0000000..3dc8b73
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv
@@ -0,0 +1,105 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : HPDcache MSHR set translation table
+ *  History       :
+ */
+module hpdcache_mshr_to_cache_set
+import hpdcache_pkg::*;
+//  Ports
+//  {{{
+(
+    //  Clock signals
+    input  logic          clk_i,
+
+    //  Write interface
+    input  logic          write_i,
+    input  hpdcache_set_t write_dcache_set_i,
+    input  mshr_way_t     write_mshr_way_i,
+
+    //  Read interface
+    input  mshr_way_t     read_mshr_way_i,
+    input  mshr_set_t     read_mshr_set_i,
+    output hpdcache_set_t read_dcache_set_o
+);
+//  }}}
+    //
+
+    generate
+        //  Number of HPDcache sets is bigger than the MSHR sets
+        //  In this case, a translation table (in flip-flops) is needed
+        //  {{{
+        //      Write most significant bits of the HPDcache set into the
+        //      translation table
+        if (HPDCACHE_SETS > HPDCACHE_MSHR_SETS) begin : hpdcache_sets_gt_mshr_sets_gen
+            localparam hpdcache_uint TRLT_TAB_ENTRY_WIDTH =
+                    HPDCACHE_SET_WIDTH - HPDCACHE_MSHR_SET_WIDTH;
+            typedef logic [TRLT_TAB_ENTRY_WIDTH-1:0] trlt_entry_t;
+
+
+            //  Translation table
+            //
+            //  This table is used to store the most significant bits of the HPDcache set
+            trlt_entry_t [HPDCACHE_MSHR_SETS-1:0][HPDCACHE_MSHR_WAYS-1:0] tab;
+            trlt_entry_t tab_wdata;
+            mshr_set_t   write_mshr_set;
+
+            //  Write operation
+            //  {{{
+            //      Write most significant bits of the HPDcache set into the
+            //      translation table
+            always_ff @(posedge clk_i)
+            begin
+                if (write_i) begin
+                    tab[write_mshr_set][write_mshr_way_i] <= tab_wdata;
+                end
+            end
+
+            assign tab_wdata        = write_dcache_set_i[HPDCACHE_MSHR_SET_WIDTH +:
+                                                         TRLT_TAB_ENTRY_WIDTH],
+                   write_mshr_set   = write_dcache_set_i[0 +: HPDCACHE_MSHR_SET_WIDTH];
+            //  }}}
+
+            //  Read operation
+            //  {{{
+            //      Concatenate the mshr set with the most significant bits of the
+            //      dcache set stored in the translation table
+            assign read_dcache_set_o = {tab[read_mshr_set_i][read_mshr_way_i], read_mshr_set_i};
+            //  }}}
+        end
+        //  }}}
+
+        //  Number of HPDcache sets is smaller or equal than the MSHR sets
+        //  In this case, no translation table is needed
+        //  {{{
+        else begin : hpdcache_sets_le_mshr_sets_gen
+           assign read_dcache_set_o = hpdcache_set_t'(read_mshr_set_i);
+        end
+        //  }}}
+    endgenerate
+
+//  Assertions
+//  {{{
+//  pragma translate_off
+//  pragma translate_on
+//  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv
new file mode 100755
index 0000000..8a96a16
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv
@@ -0,0 +1,623 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : Write-Through (WT), High-Throughput (HTPUT) HPDcache Package
+ *  History       :
+ */
+package hpdcache_pkg;
+    //  Definition of global constants for the HPDcache data and directory
+    //  {{{
+
+    //  HPDcache physical address width (bits)
+    localparam int unsigned HPDCACHE_PA_WIDTH = hpdcache_params_pkg::PARAM_PA_WIDTH;
+
+    //  HPDcache number of sets
+    localparam int unsigned HPDCACHE_SETS = hpdcache_params_pkg::PARAM_SETS;
+
+    //  HPDcache number of ways
+    localparam int unsigned HPDCACHE_WAYS = hpdcache_params_pkg::PARAM_WAYS;
+
+    //  HPDcache word width (bits)
+    localparam int unsigned HPDCACHE_WORD_WIDTH = hpdcache_params_pkg::PARAM_WORD_WIDTH;
+
+    //  HPDcache cache-line width (bits)
+    localparam int unsigned HPDCACHE_CL_WORDS = hpdcache_params_pkg::PARAM_CL_WORDS;
+
+    //  HPDcache number of words in the request data channels (request and response)
+    localparam int unsigned HPDCACHE_REQ_WORDS = hpdcache_params_pkg::PARAM_REQ_WORDS;
+
+    //  HPDcache request transaction ID width (bits)
+    localparam int unsigned HPDCACHE_REQ_TRANS_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_TRANS_ID_WIDTH;
+
+    //  HPDcache request source ID width (bits)
+    localparam int unsigned HPDCACHE_REQ_SRC_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_SRC_ID_WIDTH;
+    //  }}}
+
+    //  Utility definitions
+    //  {{{
+    typedef logic unsigned [31:0] hpdcache_uint;
+    typedef logic signed   [31:0] hpdcache_int;
+    typedef logic unsigned [31:0] hpdcache_uint32;
+    typedef logic signed   [31:0] hpdcache_int32;
+    typedef logic unsigned [63:0] hpdcache_uint64;
+    typedef logic signed   [63:0] hpdcache_int64;
+    //  }}}
+
+    //  Definition of constants and types for HPDcache directory memory
+    //  {{{
+    localparam int unsigned HPDCACHE_CL_WIDTH       = HPDCACHE_CL_WORDS*HPDCACHE_WORD_WIDTH;
+    localparam int unsigned HPDCACHE_OFFSET_WIDTH   = $clog2(HPDCACHE_CL_WIDTH/8);
+    localparam int unsigned HPDCACHE_NLINE_WIDTH    = HPDCACHE_PA_WIDTH - HPDCACHE_OFFSET_WIDTH;
+    localparam int unsigned HPDCACHE_SET_WIDTH      = $clog2(HPDCACHE_SETS);
+    localparam int unsigned HPDCACHE_TAG_WIDTH      = HPDCACHE_NLINE_WIDTH - HPDCACHE_SET_WIDTH;
+    localparam int unsigned HPDCACHE_WORD_IDX_WIDTH = $clog2(HPDCACHE_CL_WORDS);
+
+    typedef logic unsigned [  HPDCACHE_OFFSET_WIDTH-1:0] hpdcache_offset_t;
+    typedef logic unsigned [   HPDCACHE_NLINE_WIDTH-1:0] hpdcache_nline_t;
+    typedef logic unsigned [     HPDCACHE_SET_WIDTH-1:0] hpdcache_set_t;
+    typedef logic unsigned [     HPDCACHE_TAG_WIDTH-1:0] hpdcache_tag_t;
+    typedef logic unsigned [  $clog2(HPDCACHE_WAYS)-1:0] hpdcache_way_t;
+    typedef logic unsigned [          HPDCACHE_WAYS-1:0] hpdcache_way_vector_t;
+    typedef logic unsigned [HPDCACHE_WORD_IDX_WIDTH-1:0] hpdcache_word_t;
+
+    typedef struct packed {
+        hpdcache_tag_t  tag;
+        logic [1:0]     reserved;
+    } hpdcache_dir_entry_t;
+
+    localparam int unsigned HPDCACHE_DIR_RAM_WIDTH       = $bits(hpdcache_dir_entry_t);
+    localparam int unsigned HPDCACHE_DIR_RAM_DEPTH       = HPDCACHE_SETS;
+    localparam int unsigned HPDCACHE_DIR_RAM_ADDR_WIDTH  = $clog2(HPDCACHE_DIR_RAM_DEPTH);
+
+    typedef logic [HPDCACHE_DIR_RAM_ADDR_WIDTH-1:0] hpdcache_dir_addr_t;
+
+    function automatic hpdcache_way_t hpdcache_way_vector_to_index(input hpdcache_way_vector_t way);
+        for (int unsigned i = 0; i < HPDCACHE_WAYS; i++) begin
+            if (way[i]) return hpdcache_way_t'(i);
+        end
+        return 0;
+    endfunction
+
+    //  }}}
+
+    //  Definition of constants and types for HPDcache data memory
+    //  {{{
+    localparam int unsigned HPDCACHE_DATA_WAYS_PER_RAM_WORD =
+        hpdcache_params_pkg::PARAM_DATA_WAYS_PER_RAM_WORD;
+
+    localparam int unsigned HPDCACHE_DATA_SETS_PER_RAM = /* FIXME this parameter is currently ignored */
+        hpdcache_params_pkg::PARAM_DATA_SETS_PER_RAM;
+
+    //  HPDcache DATA RAM implements write byte enable
+    localparam bit HPDCACHE_DATA_RAM_WBYTEENABLE =
+        hpdcache_params_pkg::PARAM_DATA_RAM_WBYTEENABLE;
+
+    //  Define the number of memory contiguous words that can be accessed
+    //  simultaneously from the cache.
+    //  -  This limits the maximum width for the data channel from requesters
+    //  -  This impacts the refill latency
+    localparam int unsigned HPDCACHE_ACCESS_WORDS = hpdcache_params_pkg::PARAM_ACCESS_WORDS;
+
+
+    localparam int unsigned HPDCACHE_DATA_RAM_WIDTH        =
+            HPDCACHE_DATA_WAYS_PER_RAM_WORD*HPDCACHE_WORD_WIDTH;
+    localparam int unsigned HPDCACHE_DATA_RAM_Y_CUTS       = HPDCACHE_WAYS/HPDCACHE_DATA_WAYS_PER_RAM_WORD;
+    localparam int unsigned HPDCACHE_DATA_RAM_X_CUTS       = HPDCACHE_ACCESS_WORDS;
+    localparam int unsigned HPDCACHE_DATA_RAM_ACCESS_WIDTH = HPDCACHE_ACCESS_WORDS*HPDCACHE_WORD_WIDTH;
+    localparam int unsigned HPDCACHE_DATA_RAM_ENTR_PER_SET = HPDCACHE_CL_WORDS/HPDCACHE_ACCESS_WORDS;
+    localparam int unsigned HPDCACHE_DATA_RAM_DEPTH        = HPDCACHE_SETS*HPDCACHE_DATA_RAM_ENTR_PER_SET;
+    localparam int unsigned HPDCACHE_DATA_RAM_ADDR_WIDTH   = $clog2(HPDCACHE_DATA_RAM_DEPTH);
+
+    typedef logic [                     HPDCACHE_WORD_WIDTH-1:0]      hpdcache_data_word_t;
+    typedef logic [                   HPDCACHE_WORD_WIDTH/8-1:0]      hpdcache_data_be_t;
+    typedef logic [        $clog2(HPDCACHE_DATA_RAM_Y_CUTS)-1:0]      hpdcache_data_ram_row_idx_t;
+    typedef logic [ $clog2(HPDCACHE_DATA_WAYS_PER_RAM_WORD)-1:0]      hpdcache_data_ram_way_idx_t;
+
+    typedef logic [HPDCACHE_DATA_RAM_ADDR_WIDTH-1:0]                  hpdcache_data_ram_addr_t;
+    typedef hpdcache_data_word_t[HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_data_t;
+    typedef hpdcache_data_be_t  [HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_be_t;
+
+    typedef hpdcache_data_ram_data_t
+        [HPDCACHE_DATA_RAM_Y_CUTS-1:0]
+        [HPDCACHE_DATA_RAM_X_CUTS-1:0]
+        hpdcache_data_entry_t;
+
+    typedef hpdcache_data_ram_be_t
+        [HPDCACHE_DATA_RAM_Y_CUTS-1:0]
+        [HPDCACHE_DATA_RAM_X_CUTS-1:0]
+        hpdcache_data_be_entry_t;
+
+    typedef logic
+        [HPDCACHE_DATA_RAM_X_CUTS-1:0]
+        hpdcache_data_row_enable_t;
+
+    typedef hpdcache_data_row_enable_t
+        [HPDCACHE_DATA_RAM_Y_CUTS-1:0]
+        hpdcache_data_enable_t;
+
+    typedef hpdcache_data_ram_addr_t
+        [HPDCACHE_DATA_RAM_Y_CUTS-1:0]
+        [HPDCACHE_DATA_RAM_X_CUTS-1:0]
+        hpdcache_data_addr_t;
+    //  }}}
+
+    //  Definition of interface with miss handler
+    //  {{{
+    localparam int unsigned HPDCACHE_REFILL_DATA_WIDTH = HPDCACHE_DATA_RAM_ACCESS_WIDTH;
+
+    //    Use feedthrough FIFOs from the refill handler to the core. This
+    //    reduces the latency (by one cycle) but adds an additional timing path
+    localparam bit HPDCACHE_REFILL_CORE_RSP_FEEDTHROUGH =
+        hpdcache_params_pkg::PARAM_REFILL_CORE_RSP_FEEDTHROUGH;
+
+    typedef hpdcache_data_word_t[HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_data_t;
+    typedef hpdcache_data_be_t  [HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_be_t;
+    //  }}}
+
+    //  Definition of interface with requesters
+    //  {{{
+    localparam int unsigned HPDCACHE_REQ_DATA_WIDTH = HPDCACHE_REQ_WORDS*HPDCACHE_WORD_WIDTH;
+    localparam int unsigned HPDCACHE_REQ_DATA_BYTES = HPDCACHE_REQ_DATA_WIDTH/8;
+    localparam int unsigned HPDCACHE_REQ_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_WORDS);
+    localparam int unsigned HPDCACHE_REQ_BYTE_OFFSET_WIDTH = $clog2(HPDCACHE_REQ_DATA_BYTES);
+    localparam int unsigned HPDCACHE_REQ_OFFSET_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_TAG_WIDTH;
+
+    typedef logic                       [HPDCACHE_PA_WIDTH-1:0] hpdcache_req_addr_t;
+    typedef logic               [HPDCACHE_REQ_OFFSET_WIDTH-1:0] hpdcache_req_offset_t;
+    typedef hpdcache_data_word_t       [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_data_t;
+    typedef hpdcache_data_be_t         [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_be_t;
+    typedef logic                                         [2:0] hpdcache_req_size_t;
+    typedef logic               [HPDCACHE_REQ_SRC_ID_WIDTH-1:0] hpdcache_req_sid_t;
+    typedef logic             [HPDCACHE_REQ_TRANS_ID_WIDTH-1:0] hpdcache_req_tid_t;
+
+    //      Definition of operation codes
+    //      {{{
+    typedef enum logic [3:0] {
+        HPDCACHE_REQ_LOAD                 = 4'h0,
+        HPDCACHE_REQ_STORE                = 4'h1,
+        // RESERVED                     = 4'h2,
+        // RESERVED                     = 4'h3,
+        HPDCACHE_REQ_AMO_LR               = 4'h4,
+        HPDCACHE_REQ_AMO_SC               = 4'h5,
+        HPDCACHE_REQ_AMO_SWAP             = 4'h6,
+        HPDCACHE_REQ_AMO_ADD              = 4'h7,
+        HPDCACHE_REQ_AMO_AND              = 4'h8,
+        HPDCACHE_REQ_AMO_OR               = 4'h9,
+        HPDCACHE_REQ_AMO_XOR              = 4'ha,
+        HPDCACHE_REQ_AMO_MAX              = 4'hb,
+        HPDCACHE_REQ_AMO_MAXU             = 4'hc,
+        HPDCACHE_REQ_AMO_MIN              = 4'hd,
+        HPDCACHE_REQ_AMO_MINU             = 4'he,
+        HPDCACHE_REQ_CMO                  = 4'hf
+    } hpdcache_req_op_t;
+    //      }}}
+
+    //      Definition of CMO codes
+    //      {{{
+    typedef enum hpdcache_req_size_t {
+        HPDCACHE_REQ_CMO_FENCE            = 3'h0,
+        // RESERVED                     = 3'h1,
+        HPDCACHE_REQ_CMO_INVAL_NLINE      = 3'h2,
+        HPDCACHE_REQ_CMO_INVAL_SET_WAY    = 3'h3,
+        HPDCACHE_REQ_CMO_INVAL_ALL        = 3'h4,
+        HPDCACHE_REQ_CMO_PREFETCH         = 3'h5
+    } hpdcache_req_cmo_t;
+    //      }}}
+
+    //      Definition of PMA flags
+    //      {{{
+    typedef struct packed
+    {
+        logic uncacheable;
+        logic io; //  FIXME: for future use
+    } hpdcache_pma_t;
+    //      }}}
+
+    //      Definition of interfaces
+    //      {{{
+    //          Request Interface
+    typedef struct packed
+    {
+        hpdcache_req_offset_t addr_offset;
+        hpdcache_req_data_t   wdata;
+        hpdcache_req_op_t     op;
+        hpdcache_req_be_t     be;
+        hpdcache_req_size_t   size;
+        hpdcache_req_sid_t    sid;
+        hpdcache_req_tid_t    tid;
+        logic                 need_rsp;
+
+        //  only valid in case of physically indexed requests
+        logic                 phys_indexed;
+        hpdcache_tag_t        addr_tag;
+        hpdcache_pma_t        pma;
+    } hpdcache_req_t;
+
+    //          Response Interface
+    typedef struct packed
+    {
+        hpdcache_req_data_t   rdata;
+        hpdcache_req_sid_t    sid;
+        hpdcache_req_tid_t    tid;
+        logic                 error;
+        logic                 aborted;
+    } hpdcache_rsp_t;
+    //      }}}
+
+    //      Definition of functions
+    //      {{{
+    function automatic logic is_load(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_LOAD: return 1'b1;
+            default:           return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_store(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_STORE: return 1'b1;
+            default:            return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_LR,
+            HPDCACHE_REQ_AMO_SC,
+            HPDCACHE_REQ_AMO_SWAP,
+            HPDCACHE_REQ_AMO_ADD,
+            HPDCACHE_REQ_AMO_AND,
+            HPDCACHE_REQ_AMO_OR,
+            HPDCACHE_REQ_AMO_XOR,
+            HPDCACHE_REQ_AMO_MAX,
+            HPDCACHE_REQ_AMO_MAXU,
+            HPDCACHE_REQ_AMO_MIN,
+            HPDCACHE_REQ_AMO_MINU:
+                return 1'b1;
+            default:
+                return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_lr(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_LR: return 1'b1;
+            default:             return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_sc(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_SC: return 1'b1;
+            default:             return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_swap(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_SWAP: return 1'b1;
+            default:               return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_add(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_ADD: return 1'b1;
+            default:              return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_and(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_AND: return 1'b1;
+            default:              return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_or(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_OR:  return 1'b1;
+            default:              return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_xor(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_XOR: return 1'b1;
+            default:              return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_max(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_MAX: return 1'b1;
+            default:              return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_maxu(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_MAXU: return 1'b1;
+            default:               return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_min(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_MIN: return 1'b1;
+            default:              return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_amo_minu(input hpdcache_req_op_t op);
+        case (op)
+            HPDCACHE_REQ_AMO_MINU: return 1'b1;
+            default:               return 1'b0;
+        endcase
+    endfunction
+
+    function automatic logic is_cmo_inval(
+            input hpdcache_req_op_t op,
+            input hpdcache_req_size_t sz);
+        case (op)
+            HPDCACHE_REQ_CMO:
+                case (sz)
+                  HPDCACHE_REQ_CMO_INVAL_NLINE,
+                  HPDCACHE_REQ_CMO_INVAL_SET_WAY,
+                  HPDCACHE_REQ_CMO_INVAL_ALL: begin
+                    return 1'b1;
+                  end
+                  default: begin
+                    return 1'b0;
+                  end
+                endcase
+            default: begin
+              return 1'b0;
+            end
+        endcase
+    endfunction
+
+    function automatic logic is_cmo_inval_by_nline(input hpdcache_req_size_t sz);
+        return (sz == HPDCACHE_REQ_CMO_INVAL_NLINE);
+    endfunction
+
+    function automatic logic is_cmo_inval_by_set(input hpdcache_req_size_t sz);
+        return (sz == HPDCACHE_REQ_CMO_INVAL_SET_WAY);
+    endfunction
+
+    function automatic logic is_cmo_inval_all(input hpdcache_req_size_t sz);
+        return (sz == HPDCACHE_REQ_CMO_INVAL_ALL);
+    endfunction
+
+    function automatic logic is_cmo_fence(
+            input hpdcache_req_op_t op,
+            input hpdcache_req_size_t sz);
+        case (op)
+            HPDCACHE_REQ_CMO: begin
+                return (sz == HPDCACHE_REQ_CMO_FENCE);
+            end
+            default: begin
+                return 1'b0;
+            end
+        endcase
+    endfunction
+
+    function automatic logic is_cmo_prefetch(
+            input hpdcache_req_op_t op,
+            input hpdcache_req_size_t sz);
+        case (op)
+            HPDCACHE_REQ_CMO: begin
+                return (sz == HPDCACHE_REQ_CMO_PREFETCH);
+            end
+            default: begin
+                return 1'b0;
+            end
+        endcase
+    endfunction
+
+    function automatic hpdcache_tag_t hpdcache_get_req_addr_tag(input hpdcache_req_addr_t addr);
+        return addr[(HPDCACHE_OFFSET_WIDTH + HPDCACHE_SET_WIDTH) +: HPDCACHE_TAG_WIDTH];
+    endfunction
+
+    function automatic hpdcache_set_t hpdcache_get_req_addr_set(input hpdcache_req_addr_t addr);
+        return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH];
+    endfunction
+
+    function automatic hpdcache_word_t hpdcache_get_req_addr_word(input hpdcache_req_addr_t addr);
+        return addr[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH];
+    endfunction
+
+    function automatic hpdcache_offset_t hpdcache_get_req_addr_offset(input hpdcache_req_addr_t addr);
+        return addr[0 +: HPDCACHE_OFFSET_WIDTH];
+    endfunction
+
+    function automatic hpdcache_nline_t hpdcache_get_req_addr_nline(input hpdcache_req_addr_t addr);
+        return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH];
+    endfunction
+
+    function automatic hpdcache_set_t hpdcache_get_req_offset_set(input hpdcache_req_offset_t offset);
+        return offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH];
+    endfunction
+
+    function automatic hpdcache_word_t hpdcache_get_req_offset_word(input hpdcache_req_offset_t offset);
+        return offset[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH];
+    endfunction
+
+    //      }}}
+    //  }}}
+
+    //  Definition of constants and types for the Miss Status Holding Register (MSHR)
+    //  {{{
+
+    //  HPDcache MSHR number of sets
+    localparam int unsigned HPDCACHE_MSHR_SETS =
+        hpdcache_params_pkg::PARAM_MSHR_SETS;
+
+    //  HPDcache MSHR number of ways
+    localparam int unsigned HPDCACHE_MSHR_WAYS =
+        hpdcache_params_pkg::PARAM_MSHR_WAYS;
+
+    //  HPDcache MSHR number of ways in the same SRAM word
+    localparam int unsigned HPDCACHE_MSHR_WAYS_PER_RAM_WORD =
+        hpdcache_params_pkg::PARAM_MSHR_WAYS_PER_RAM_WORD; /* FIXME this parameter is currently ignored */
+
+    //  HPDcache MSHR number of sets in the same SRAM
+    localparam int unsigned HPDCACHE_MSHR_SETS_PER_RAM =
+        hpdcache_params_pkg::PARAM_MSHR_SETS_PER_RAM; /* FIXME this parameter is currently ignored */
+
+    //  HPDcache MSHR implements write byte enable
+    localparam bit HPDCACHE_MSHR_RAM_WBYTEENABLE =
+        hpdcache_params_pkg::PARAM_MSHR_RAM_WBYTEENABLE;
+    localparam bit HPDCACHE_MSHR_USE_REGBANK =
+        hpdcache_params_pkg::PARAM_MSHR_USE_REGBANK;
+
+    localparam int unsigned HPDCACHE_MSHR_SET_WIDTH = $clog2(HPDCACHE_MSHR_SETS);
+    localparam int unsigned HPDCACHE_MSHR_WAY_WIDTH = $clog2(HPDCACHE_MSHR_WAYS);
+    localparam int unsigned HPDCACHE_MSHR_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_MSHR_SET_WIDTH;
+
+    typedef logic unsigned [HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_set_t;
+    typedef logic unsigned [HPDCACHE_MSHR_TAG_WIDTH-1:0] mshr_tag_t;
+    typedef logic unsigned [HPDCACHE_MSHR_WAY_WIDTH-1:0] mshr_way_t;
+    //  }}}
+
+    //  Definition of interface with memory
+    //  {{{
+    typedef logic [7:0]                           hpdcache_mem_len_t;
+    typedef logic [2:0]                           hpdcache_mem_size_t;
+
+    typedef enum logic [1:0] {
+        HPDCACHE_MEM_RESP_OK  = 2'b00,
+        HPDCACHE_MEM_RESP_NOK = 2'b01
+    } hpdcache_mem_error_e;
+
+    typedef enum logic [1:0] {
+        HPDCACHE_MEM_READ     = 2'b00,
+        HPDCACHE_MEM_WRITE    = 2'b01,
+        HPDCACHE_MEM_ATOMIC   = 2'b10
+        //  Reserved        = 2'b11 - TODO: CMO ?
+    } hpdcache_mem_command_e;
+
+    typedef enum logic [3:0] {
+        HPDCACHE_MEM_ATOMIC_ADD  = 4'b0000,
+        HPDCACHE_MEM_ATOMIC_CLR  = 4'b0001,
+        HPDCACHE_MEM_ATOMIC_SET  = 4'b0010,
+        HPDCACHE_MEM_ATOMIC_EOR  = 4'b0011,
+        HPDCACHE_MEM_ATOMIC_SMAX = 4'b0100,
+        HPDCACHE_MEM_ATOMIC_SMIN = 4'b0101,
+        HPDCACHE_MEM_ATOMIC_UMAX = 4'b0110,
+        HPDCACHE_MEM_ATOMIC_UMIN = 4'b0111,
+        HPDCACHE_MEM_ATOMIC_SWAP = 4'b1000,
+        //  Reserved           = 4'b1001,
+        //  Reserved           = 4'b1010,
+        //  Reserved           = 4'b1011,
+        HPDCACHE_MEM_ATOMIC_LDEX = 4'b1100,
+        HPDCACHE_MEM_ATOMIC_STEX = 4'b1101
+        //  Reserved           = 4'b1110,
+        //  Reserved           = 4'b1111
+    } hpdcache_mem_atomic_e;
+
+    function automatic hpdcache_mem_size_t get_hpdcache_mem_size(int unsigned bytes);
+        if      (bytes ==   0) return 0;
+        else if (bytes <=   2) return 1;
+        else if (bytes <=   4) return 2;
+        else if (bytes <=   8) return 3;
+        else if (bytes <=  16) return 4;
+        else if (bytes <=  32) return 5;
+        else if (bytes <=  64) return 6;
+        else if (bytes <= 128) return 7;
+        // pragma translate_off
+        else    $error("hpdcache: unsupported number of bytes");
+        // pragma translate_on
+    endfunction
+    //  }}}
+
+    //  Definition of constants and types for the Write Buffer (WBUF)
+    //  {{{
+    localparam int unsigned HPDCACHE_WBUF_DIR_ENTRIES =
+        hpdcache_params_pkg::PARAM_WBUF_DIR_ENTRIES;
+
+    localparam int unsigned HPDCACHE_WBUF_DATA_ENTRIES =
+        hpdcache_params_pkg::PARAM_WBUF_DATA_ENTRIES;
+
+    localparam int unsigned HPDCACHE_WBUF_WORDS =
+        hpdcache_params_pkg::PARAM_WBUF_WORDS;
+
+    localparam int unsigned HPDCACHE_WBUF_TIMECNT_WIDTH =
+        hpdcache_params_pkg::PARAM_WBUF_TIMECNT_WIDTH;
+
+    //    Use feedthrough FIFOs from the write-buffer to the NoC. This reduces
+    //    the latency (by one cycle) but adds an additional timing path
+    localparam bit HPDCACHE_WBUF_SEND_FEEDTHROUGH =
+        hpdcache_params_pkg::PARAM_WBUF_SEND_FEEDTHROUGH;
+
+    localparam int unsigned HPDCACHE_WBUF_DATA_WIDTH     = HPDCACHE_REQ_DATA_WIDTH*
+                                                           HPDCACHE_WBUF_WORDS;
+    localparam int unsigned HPDCACHE_WBUF_DATA_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DATA_ENTRIES);
+    localparam int unsigned HPDCACHE_WBUF_DIR_PTR_WIDTH  = $clog2(HPDCACHE_WBUF_DIR_ENTRIES);
+
+    typedef hpdcache_req_addr_t                                 wbuf_addr_t;
+    typedef hpdcache_nline_t                                    wbuf_match_t;
+    typedef hpdcache_req_data_t                                 wbuf_data_t;
+    typedef hpdcache_req_be_t                                   wbuf_be_t;
+    typedef wbuf_data_t[HPDCACHE_WBUF_WORDS-1:0]                wbuf_data_buf_t;
+    typedef wbuf_be_t  [HPDCACHE_WBUF_WORDS-1:0]                wbuf_be_buf_t;
+    typedef logic unsigned   [ HPDCACHE_WBUF_TIMECNT_WIDTH-1:0] wbuf_timecnt_t;
+    typedef logic unsigned   [ HPDCACHE_WBUF_DIR_PTR_WIDTH-1:0] wbuf_dir_ptr_t;
+    typedef logic unsigned   [HPDCACHE_WBUF_DATA_PTR_WIDTH-1:0] wbuf_data_ptr_t;
+    //  }}}
+
+    //  Definition of constants and types for the Replay Table (RTAB)
+    //  {{{
+    localparam int HPDCACHE_RTAB_ENTRIES = hpdcache_params_pkg::PARAM_RTAB_ENTRIES;
+
+    typedef logic [$clog2(HPDCACHE_RTAB_ENTRIES)-1:0] rtab_ptr_t;
+    //  }}}
+
+    //  Definition of constants and types for the uncacheable request handler (UC)
+    //  {{{
+    typedef struct packed {
+        logic is_ld;
+        logic is_st;
+        logic is_amo_lr;
+        logic is_amo_sc;
+        logic is_amo_swap;
+        logic is_amo_add;
+        logic is_amo_and;
+        logic is_amo_or;
+        logic is_amo_xor;
+        logic is_amo_max;
+        logic is_amo_maxu;
+        logic is_amo_min;
+        logic is_amo_minu;
+    } hpdcache_uc_op_t;
+    //  }}}
+
+    //  Definition of constants and types for the CMO request handler (CMOH)
+    //  {{{
+    typedef struct packed {
+        logic is_inval_by_nline;
+        logic is_inval_by_set;
+        logic is_inval_all;
+        logic is_fence;
+    } hpdcache_cmoh_op_t;
+    //  }}}
+endpackage
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv
new file mode 100644
index 0000000..7697737
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv
@@ -0,0 +1,138 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : May, 2021
+ *  Description   : HPDcache Pseudo-LRU replacement policy
+ *  History       :
+ */
+module hpdcache_plru
+    //  Parameters
+    //  {{{
+#(
+    parameter int unsigned SETS = 0,
+    parameter int unsigned WAYS = 0,
+
+    localparam type set_t        = logic [$clog2(SETS)-1:0],
+    localparam type way_vector_t = logic [WAYS-1:0]
+)
+    //  }}}
+
+    //  Ports
+    //  {{{
+(
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    //      PLRU update interface
+    input  logic                  updt_i,
+    input  set_t                  updt_set_i,
+    input  way_vector_t           updt_way_i,
+
+    //      Victim replacement interface
+    input  logic                  repl_i,
+    input  set_t                  repl_set_i,
+    input  way_vector_t           repl_dir_valid_i,
+    input  logic                  repl_updt_plru_i,
+
+    output way_vector_t           victim_way_o
+);
+    //  }}}
+
+    //  Internal signals and registers
+    //  {{{
+    way_vector_t [SETS-1:0] plru_q, plru_d;
+    way_vector_t            updt_plru;
+    way_vector_t            repl_plru;
+    way_vector_t            used_victim_way, unused_victim_way;
+    //  }}}
+
+    //  Victim way selection
+    //  {{{
+    hpdcache_prio_1hot_encoder #(.N(WAYS))
+        used_victim_select_i (
+            .val_i     (~plru_q[repl_set_i]),
+            .val_o     (used_victim_way)
+        );
+
+    hpdcache_prio_1hot_encoder #(.N(WAYS))
+        unused_victim_select_i (
+            .val_i     (~repl_dir_valid_i),
+            .val_o     (unused_victim_way)
+        );
+
+    //  If there is a free entry in the directory (valid == 0), choose it as victim
+    assign victim_way_o = |unused_victim_way ? unused_victim_way : used_victim_way;
+    //  }}}
+
+    //  Pseudo-LRU update process
+    //  {{{
+    assign updt_plru = plru_q[updt_set_i] | updt_way_i;
+    assign repl_plru = plru_q[repl_set_i] | victim_way_o;
+
+    always_comb
+    begin : plru_update_comb
+        plru_d = plru_q;
+
+        case (1'b1)
+            //  When replacing a cache-line, set the PLRU bit of the new line
+            repl_i:
+                if (repl_updt_plru_i) begin
+                    //  If all PLRU bits of a given would be set, reset them all
+                    //  but the currently accessed way
+                    if (&repl_plru) begin
+                        plru_d[repl_set_i] = victim_way_o;
+                    end else begin
+                        plru_d[repl_set_i] = repl_plru;
+                    end
+                end
+
+            //  When accessing a cache-line, set the corresponding PLRU bit
+            updt_i:
+                //  If all PLRU bits of a given would be set, reset them all
+                //  but the currently accessed way
+                if (&updt_plru) begin
+                    plru_d[updt_set_i] = updt_way_i;
+                end else begin
+                    plru_d[updt_set_i] = updt_plru;
+                end
+
+            default: begin
+                //  do nothing
+            end
+        endcase
+    end
+    //  }}}
+
+    //  Set state process
+    //  {{{
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : lru_ff
+        if (!rst_ni) begin
+           plru_q <= '0;
+        end else begin
+           if (updt_i || repl_i) begin
+              plru_q <= plru_d;
+           end
+        end
+    end
+    //  }}}
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv
new file mode 100755
index 0000000..d7d9d64
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv
@@ -0,0 +1,666 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : September, 2021
+ *  Description   : HPDcache Replay Table
+ *  History       :
+ */
+module hpdcache_rtab
+import hpdcache_pkg::*;
+//  Parameters
+//  {{{
+#(
+    parameter type rtab_entry_t = logic
+)
+//  }}}
+//  Ports
+//  {{{
+(
+    //  Clock and reset signals
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    //  Global control signals
+    output logic                  empty_o,          // RTAB is empty
+    output logic                  full_o,           // RTAB is full
+
+    //  Check RTAB signals
+    //     This interface allows to check if there is an address-overlapping
+    //     request in the RTAB with respect to the given nline.
+    input  logic                  check_i,          // Check for hit (nline) in the RTAB
+    input  hpdcache_nline_t       check_nline_i,
+    output logic                  check_hit_o,
+
+    //  Allocate signals
+    //     This interface allows to allocate a new request in a new linked list
+    input  logic                  alloc_i,
+    input  logic                  alloc_and_link_i,
+    input  rtab_entry_t           alloc_req_i,
+    input  logic                  alloc_mshr_hit_i,
+    input  logic                  alloc_mshr_full_i,
+    input  logic                  alloc_mshr_ready_i,
+    input  logic                  alloc_wbuf_hit_i,
+    input  logic                  alloc_wbuf_not_ready_i,
+
+    //  Pop signals
+    //     This interface allows to read (and remove) a request from the RTAB
+    output logic                  pop_try_valid_o,  // Request ready to be replayed
+    input  logic                  pop_try_i,
+    output rtab_entry_t           pop_try_req_o,
+    output rtab_ptr_t             pop_try_ptr_o,
+
+    //  Pop Commit signals
+    //     This interface allows to actually remove a popped request
+    input  logic                  pop_commit_i,
+    input  rtab_ptr_t             pop_commit_ptr_i,
+
+    //  Pop Rollback signals
+    //     This interface allows to put back a popped request
+    input  logic                  pop_rback_i,
+    input  rtab_ptr_t             pop_rback_ptr_i,
+    input  logic                  pop_rback_mshr_hit_i,
+    input  logic                  pop_rback_mshr_full_i,
+    input  logic                  pop_rback_mshr_ready_i,
+    input  logic                  pop_rback_wbuf_hit_i,
+    input  logic                  pop_rback_wbuf_not_ready_i,
+
+
+    //  Control signals from/to WBUF
+    output hpdcache_req_addr_t    wbuf_addr_o,      // address to check against ongoing writes
+    output logic                  wbuf_is_read_o,   // monitored request is read
+    input  logic                  wbuf_hit_open_i,  // Hit on open entry in the write buf
+    input  logic                  wbuf_hit_pend_i,  // Hit on pend entry in the write buf
+    input  logic                  wbuf_hit_sent_i,  // Hit on sent entry in the write buf
+    input  logic                  wbuf_not_ready_i, // Write buffer cannot accept the write
+
+    //  Control signals from the Miss Handler
+    input  logic                  miss_ready_i,     // Miss Handler is ready
+
+    //  Control signals from the Refill Handler
+    input  logic                  refill_i,         // Active refill
+    input  hpdcache_nline_t       refill_nline_i,   // Cache-line index being refilled
+
+    //  Configuration parameters
+    input  logic                  cfg_single_entry_i // Enable only one entry of the table
+);
+//  }}}
+
+//  Definition of constants, types and functions
+//  {{{
+    localparam int N = HPDCACHE_RTAB_ENTRIES;
+
+    function automatic rtab_ptr_t rtab_bv_to_index(
+            input logic [N-1:0] bv);
+        for (int i = 0; i < N; i++) begin
+            if (bv[i]) return rtab_ptr_t'(i);
+        end
+        return 0;
+    endfunction
+
+    function automatic logic [N-1:0] rtab_index_to_bv(
+            input rtab_ptr_t index);
+        logic [N-1:0] bv;
+
+        for (int i = 0; i < N; i++) begin
+            bv[i] = (rtab_ptr_t'(i) == index);
+        end
+        return bv;
+    endfunction
+
+    function automatic bit rtab_mshr_set_equal(
+            input hpdcache_nline_t x,
+            input hpdcache_nline_t y);
+        return (x[0 +: HPDCACHE_MSHR_SET_WIDTH] == y[0 +: HPDCACHE_MSHR_SET_WIDTH]);
+    endfunction
+
+    function automatic logic [N-1:0] rtab_next(rtab_ptr_t [N-1:0] next, rtab_ptr_t x);
+        return rtab_index_to_bv(next[x]);
+    endfunction
+
+    typedef enum {
+        POP_TRY_HEAD,
+        POP_TRY_NEXT,
+        POP_TRY_NEXT_WAIT
+    } rtab_pop_try_state_e;
+//  }}}
+
+//  Internal signals and registers
+//  {{{
+    rtab_entry_t        [N-1:0]  req_q;
+    rtab_ptr_t          [N-1:0]  next_q;
+
+    rtab_pop_try_state_e         pop_try_state_q, pop_try_state_d;
+    logic               [N-1:0]  pop_try_next_q, pop_try_next_d;
+
+    logic               [N-1:0]  valid_q;
+    logic               [N-1:0]  valid_set, valid_rst;
+    logic               [N-1:0]  alloc_valid_set;
+    logic               [N-1:0]  pop_commit_valid_rst;
+
+    //  Bits indicating  if the corresponding entry is the head of a linked list
+    logic               [N-1:0]  head_q;
+    logic               [N-1:0]  head_set, head_rst;
+    logic               [N-1:0]  alloc_head_set, alloc_head_rst;
+    logic               [N-1:0]  pop_try_head_rst;
+    logic               [N-1:0]  pop_commit_head_set;
+    logic               [N-1:0]  pop_rback_head_set;
+
+    //  Bits indicating  if the corresponding entry is the tail of a linked list
+    logic               [N-1:0]  tail_q;
+    logic               [N-1:0]  tail_set, tail_rst;
+    logic               [N-1:0]  alloc_tail_set, alloc_tail_rst;
+
+    //  There is a pend ing miss on the target nline
+    logic               [N-1:0]  deps_mshr_hit_q;
+    logic               [N-1:0]  deps_mshr_hit_set, deps_mshr_hit_rst;
+    logic               [N-1:0]  alloc_deps_mshr_hit_set;
+    logic               [N-1:0]  pop_rback_deps_mshr_hit_set;
+
+    //  The MSHR has no  available slot for the new miss
+    logic               [N-1:0]  deps_mshr_full_q;
+    logic               [N-1:0]  deps_mshr_full_set, deps_mshr_full_rst;
+    logic               [N-1:0]  alloc_deps_mshr_full_set;
+    logic               [N-1:0]  pop_rback_deps_mshr_full_set;
+
+    //  The MSHR is not  ready to send a new miss requests
+    logic               [N-1:0]  deps_mshr_ready_q;
+    logic               [N-1:0]  deps_mshr_ready_set, deps_mshr_ready_rst;
+    logic               [N-1:0]  alloc_deps_mshr_ready_set;
+    logic               [N-1:0]  pop_rback_deps_mshr_ready_set;
+
+    //  Hit on an non-e mpty entry of the write buffer
+    logic               [N-1:0]  deps_wbuf_hit_q;
+    logic               [N-1:0]  deps_wbuf_hit_set, deps_wbuf_hit_rst;
+    logic               [N-1:0]  alloc_deps_wbuf_hit_set;
+    logic               [N-1:0]  pop_rback_deps_wbuf_hit_set;
+
+    //  Hit on a pend entry of the write buffer
+    logic               [N-1:0]  deps_wbuf_not_ready_q;
+    logic               [N-1:0]  deps_wbuf_not_ready_set, deps_wbuf_not_ready_rst;
+    logic               [N-1:0]  alloc_deps_wbuf_not_ready_set;
+    logic               [N-1:0]  pop_rback_deps_wbuf_not_ready_set;
+
+    logic               [N-1:0]  nodeps;
+    hpdcache_nline_t    [N-1:0]  nline;
+    hpdcache_req_addr_t [N-1:0]  addr;
+    logic               [N-1:0]  is_read;
+    logic               [N-1:0]  check_hit;
+    logic               [N-1:0]  match_check_nline;
+    logic               [N-1:0]  match_check_tail;
+    logic               [N-1:0]  match_refill_nline;
+    logic               [N-1:0]  match_refill_mshr_set;
+
+    logic               [N-1:0]  free;
+    logic               [N-1:0]  free_alloc;
+    logic                        alloc;
+
+    logic               [N-1:0]  pop_match_next;
+    logic               [N-1:0]  pop_rback_ptr_bv;
+    logic               [N-1:0]  pop_try_bv;
+    logic               [N-1:0]  ready;
+
+    genvar                       gen_i;
+//  }}}
+
+//  Compute global control signals
+//  {{{
+    //  compute if entries are ready to be replayed
+    assign nodeps       = ~(deps_mshr_hit_q |
+                            deps_mshr_full_q |
+                            deps_mshr_ready_q |
+                            deps_wbuf_hit_q |
+                            deps_wbuf_not_ready_q);
+
+    assign ready        = valid_q & head_q & nodeps;
+
+    assign free         = ~valid_q;
+
+    //  compute the free vector (one-hot signal)
+    hpdcache_prio_1hot_encoder #(
+        .N         (N)
+    ) free_encoder_i (
+        .val_i     (free),
+        .val_o     (free_alloc)
+    );
+
+    //  full and empty signals
+    assign empty_o = &(~valid_q);
+    assign  full_o = &( valid_q) | (|valid_q & cfg_single_entry_i);
+//  }}}
+
+//  Check interface
+//  {{{
+    generate
+        for (gen_i = 0; gen_i < N; gen_i++) begin : check_gen
+            assign              addr[gen_i] = {req_q[gen_i].addr_tag, req_q[gen_i].addr_offset},
+                               nline[gen_i] = hpdcache_get_req_addr_nline(addr[gen_i]),
+                   match_check_nline[gen_i] = (check_nline_i == nline[gen_i]);
+
+            assign is_read[gen_i] =         is_load(req_q[gen_i].op) |
+                                    is_cmo_prefetch(req_q[gen_i].op, req_q[gen_i].size);
+        end
+    endgenerate
+
+    assign check_hit        =  valid_q   & match_check_nline,
+           check_hit_o      = |check_hit,
+           match_check_tail =  check_hit & tail_q;
+//  }}}
+
+//  Allocation process
+//  {{{
+    assign alloc = alloc_i | alloc_and_link_i;
+
+    //  Set the valid bit-vector of the replay table
+    assign alloc_valid_set = free_alloc       & {N{alloc}};
+
+    //  Set of head and tail bit-vectors during an allocation
+    //    - The head bit is only set when creating a new linked-list
+    //    - The tail bit is always set because new requests are added on the tail.
+    assign alloc_head_set  = free_alloc       & {N{alloc_i}},
+           alloc_tail_set  = alloc_valid_set;
+
+    //  Reset of head and tail bit-vectors during an allocation
+    //    - When doing an allocation and link, head bit shall be reset
+    //    - when doing an allocation and link, the "prev" tail shall be reset
+    assign alloc_head_rst  = free_alloc       & {N{alloc_and_link_i}},
+           alloc_tail_rst  = match_check_tail & {N{alloc_and_link_i}};
+
+    //  Set the dependency bits for the allocated entry
+    assign alloc_deps_mshr_hit_set       = alloc_valid_set & {N{      alloc_mshr_hit_i}},
+           alloc_deps_mshr_full_set      = alloc_valid_set & {N{     alloc_mshr_full_i}},
+           alloc_deps_mshr_ready_set     = alloc_valid_set & {N{    alloc_mshr_ready_i}},
+           alloc_deps_wbuf_hit_set       = alloc_valid_set & {N{      alloc_wbuf_hit_i}},
+           alloc_deps_wbuf_not_ready_set = alloc_valid_set & {N{alloc_wbuf_not_ready_i}};
+//  }}}
+
+//  Update replay table dependencies
+//  {{{
+    //  Update write buffer hit dependencies
+    //  {{{
+    //  Build a bit-vector with HEAD requests waiting for a conflict in the wbuf
+    logic [N-1:0]  wbuf_rd_pending, wbuf_wr_pending;
+    logic [N-1:0]  wbuf_rd_gnt, wbuf_wr_gnt;
+    logic [  1:0]  wbuf_pending;
+    logic [  1:0]  wbuf_gnt;
+    logic          wbuf_ready;
+    logic [N-1:0]  wbuf_sel;
+
+    assign wbuf_rd_pending = valid_q & head_q & deps_wbuf_hit_q,
+           wbuf_wr_pending = valid_q & head_q & deps_wbuf_not_ready_q;
+
+    //  Choose in a round-robin manner a ready transaction waiting for a conflict in the wbuf
+    hpdcache_rrarb #(
+        .N              (N)
+    ) wbuf_rd_pending_arb_i (
+        .clk_i,
+        .rst_ni,
+        .req_i          (wbuf_rd_pending),
+        .gnt_o          (wbuf_rd_gnt),
+        .ready_i        (wbuf_gnt[0] & wbuf_ready)
+    );
+
+    hpdcache_rrarb #(
+        .N              (N)
+    ) wbuf_wr_pending_arb_i (
+        .clk_i,
+        .rst_ni,
+        .req_i          (wbuf_wr_pending),
+        .gnt_o          (wbuf_wr_gnt),
+        .ready_i        (wbuf_gnt[1] & wbuf_ready)
+    );
+
+    assign wbuf_pending = {|wbuf_wr_gnt, |wbuf_rd_gnt},
+           wbuf_ready   = |(pop_try_bv & (wbuf_rd_gnt | wbuf_wr_gnt));
+
+    hpdcache_fxarb #(
+        .N              (2)
+    ) wbuf_pending_arb_i (
+        .clk_i,
+        .rst_ni,
+        .req_i          (wbuf_pending),
+        .gnt_o          (wbuf_gnt),
+        .ready_i        (wbuf_ready)
+    );
+
+    assign wbuf_sel = wbuf_gnt[0] ? wbuf_rd_gnt :
+                      wbuf_gnt[1] ? wbuf_wr_gnt : '0;
+
+    hpdcache_mux #(
+        .NINPUT         (N),
+        .DATA_WIDTH     ($bits(hpdcache_req_addr_t)),
+        .ONE_HOT_SEL    (1'b1)
+    ) wbuf_pending_addr_mux_i (
+        .data_i         (addr),
+        .sel_i          (wbuf_sel),
+        .data_o         (wbuf_addr_o)
+    );
+
+    hpdcache_mux #(
+        .NINPUT         (N),
+        .DATA_WIDTH     (1),
+        .ONE_HOT_SEL    (1'b1)
+    ) wbuf_pending_is_read_mux_i (
+        .data_i         (is_read),
+        .sel_i          (wbuf_sel),
+        .data_o         (wbuf_is_read_o)
+    );
+
+    //  reset write buffer dependency bits with the output from the write buffer
+    assign deps_wbuf_hit_rst =
+            wbuf_sel & ~{N{wbuf_hit_open_i | wbuf_hit_pend_i | wbuf_hit_sent_i}};
+    assign deps_wbuf_not_ready_rst =
+            wbuf_sel & ~{N{wbuf_not_ready_i}};
+    //  }}}
+
+    //  Update miss handler dependency
+    //  {{{
+    assign deps_mshr_ready_rst = {N{miss_ready_i}};
+    //  }}}
+
+    //  Update refill dependencies
+    //  {{{
+    generate
+        for (gen_i = 0; gen_i < N; gen_i++) begin : match_refill_gen
+            assign match_refill_mshr_set[gen_i] =
+                    rtab_mshr_set_equal(refill_nline_i, nline[gen_i]);
+            assign match_refill_nline[gen_i] =
+                    (refill_nline_i == nline[gen_i]);
+        end
+    endgenerate
+
+    assign deps_mshr_full_rst = {N{refill_i}} & match_refill_mshr_set;
+    assign deps_mshr_hit_rst  = {N{refill_i}} & match_refill_nline;
+    //  }}}
+//  }}}
+
+//  Pop interface
+//  {{{
+    logic [N-1:0]  pop_sel;
+    logic [N-1:0]  pop_commit_bv;
+
+    assign pop_commit_bv = rtab_index_to_bv(pop_commit_ptr_i);
+
+    //  Pop try process
+    //  {{{
+    logic [N-1:0]  pop_gnt;
+    logic          pop_head;
+
+    hpdcache_rrarb #(
+        .N              (N)
+    ) pop_arb_i (
+        .clk_i,
+        .rst_ni,
+        .req_i          (ready),
+        .gnt_o          (pop_gnt),
+        .ready_i        (pop_head)
+    );
+
+    always_comb
+    begin : req_valid_comb
+        case(pop_try_state_q)
+            POP_TRY_HEAD     : pop_try_valid_o = |ready;
+            POP_TRY_NEXT     : pop_try_valid_o = 1'b1;
+            POP_TRY_NEXT_WAIT: pop_try_valid_o = 1'b1;
+            default          : pop_try_valid_o = 1'b0;
+        endcase
+    end
+
+    always_comb
+    begin : pop_entry_sel_comb
+        pop_try_state_d = pop_try_state_q;
+        pop_try_next_d = pop_try_next_q;
+        pop_head = 1'b0;
+        pop_sel = '0;
+
+        case (pop_try_state_q)
+            POP_TRY_HEAD: begin
+                // This FSM may be in this state after forwarding the tail of
+                // a list. In that case, a rollback may arrive in this cycle.
+                pop_sel = pop_gnt;
+                if (!pop_rback_i && pop_try_valid_o) begin
+                    if (pop_try_i) begin
+                        //  If the request interface accepts the request, go to the next request
+                        //  in the list (if the current request is not the tail). Otherwise, stay in
+                        //  the same state to to forward a request from a new list
+                        pop_head = 1'b1;
+                        if ((pop_gnt & ~tail_q) != 0) begin
+                            pop_try_state_d = POP_TRY_NEXT;
+                            pop_try_next_d = rtab_next(next_q, pop_try_ptr_o);
+                        end
+                    end
+                end
+            end
+            POP_TRY_NEXT: begin
+                pop_sel     = pop_try_next_q;
+                if (pop_rback_i) begin
+                    pop_try_state_d = POP_TRY_HEAD;
+                end else begin
+                    if (pop_try_i) begin
+                        //  If the request interface accepts the new request, go to the next request
+                        //  in the list (if the current request is not the tail). Otherwise, return
+                        //  to the POP_TRY_HEAD state to forward a request from a new list
+                        if ((pop_try_next_q & ~tail_q) != 0) begin
+                            pop_try_state_d = POP_TRY_NEXT;
+                            pop_try_next_d  = rtab_next(next_q, pop_try_ptr_o);
+                        end else begin
+                            pop_try_state_d = POP_TRY_HEAD;
+                        end
+                    end else begin
+                        //  If the request interface is not ready to consume the new request, wait
+                        //  until it is
+                        pop_try_state_d = POP_TRY_NEXT_WAIT;
+                    end
+                end
+            end
+            POP_TRY_NEXT_WAIT: begin
+                //  Wait for the current request to be accepted. Then go to the next request in the
+                //  list or to a new list
+                pop_sel     = pop_try_next_q;
+                if (pop_try_i) begin
+                    if ((pop_try_next_q & ~tail_q) != 0) begin
+                        pop_try_state_d = POP_TRY_NEXT;
+                        pop_try_next_d  = rtab_next(next_q, pop_try_ptr_o);
+                    end else begin
+                        pop_try_state_d = POP_TRY_HEAD;
+                    end
+                end
+            end
+            default: begin
+            end
+        endcase
+    end
+
+    assign pop_commit_head_set  = '0;
+
+    hpdcache_mux #(
+        .NINPUT         (N),
+        .DATA_WIDTH     ($bits(rtab_entry_t)),
+        .ONE_HOT_SEL    (1'b1)
+    ) pop_mux_i (
+        .data_i         (req_q),
+        .sel_i          (pop_sel),
+        .data_o         (pop_try_req_o)
+    );
+
+    //  Temporarily unset the head bit of the popped request to prevent it to be rescheduled
+    assign pop_try_bv       = pop_sel & {N{pop_try_i}},
+           pop_try_head_rst = pop_try_bv;
+
+
+    //  Forward the index of the entry being popped. This is used later by the
+    //  commit or rollback operations
+    assign pop_try_ptr_o = rtab_bv_to_index(pop_sel);
+
+    //  }}}
+
+    //  Pop commit process
+    //  {{{
+    //  Invalidate the entry being popped (head of the linked list)
+    assign pop_commit_valid_rst = {N{pop_commit_i}} & rtab_index_to_bv(pop_commit_ptr_i);
+    //  }}}
+
+    //  Pop rollback process
+    //  {{{
+    //  Set again the head bit of the rolled-back request
+    assign pop_rback_ptr_bv                  = rtab_index_to_bv(pop_rback_ptr_i);
+
+    assign pop_rback_head_set                = {N{pop_rback_i}} & pop_rback_ptr_bv;
+
+    assign pop_rback_deps_mshr_hit_set       = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_hit_i}},
+           pop_rback_deps_mshr_full_set      = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_full_i}},
+           pop_rback_deps_mshr_ready_set     = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_ready_i}},
+           pop_rback_deps_wbuf_hit_set       = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_hit_i}},
+           pop_rback_deps_wbuf_not_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_not_ready_i}};
+    //  }}}
+//  }}}
+
+//  Internal state assignment
+//  {{{
+    assign head_set                = alloc_head_set | pop_commit_head_set | pop_rback_head_set,
+           head_rst                = alloc_head_rst | pop_try_head_rst;
+
+    assign tail_set                = alloc_tail_set,
+           tail_rst                = alloc_tail_rst;
+
+    assign valid_set               = alloc_valid_set,
+           valid_rst               = pop_commit_valid_rst;
+
+    assign deps_mshr_hit_set       = alloc_deps_mshr_hit_set       | pop_rback_deps_mshr_hit_set,
+           deps_mshr_full_set      = alloc_deps_mshr_full_set      | pop_rback_deps_mshr_full_set,
+           deps_mshr_ready_set     = alloc_deps_mshr_ready_set     | pop_rback_deps_mshr_ready_set,
+           deps_wbuf_hit_set       = alloc_deps_wbuf_hit_set       | pop_rback_deps_wbuf_hit_set,
+           deps_wbuf_not_ready_set = alloc_deps_wbuf_not_ready_set | pop_rback_deps_wbuf_not_ready_set;
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : rtab_valid_ff
+        if (!rst_ni) begin
+            valid_q               <= '0;
+            head_q                <= '0;
+            tail_q                <= '0;
+            deps_mshr_hit_q       <= '0;
+            deps_mshr_full_q      <= '0;
+            deps_mshr_ready_q     <= '0;
+            deps_wbuf_hit_q       <= '0;
+            deps_wbuf_not_ready_q <= '0;
+            next_q                <= '0;
+        end else begin
+            valid_q <= (~valid_q &  valid_set) |
+                       ( valid_q & ~valid_rst);
+
+            //  update head and tail flags
+            head_q <= (~head_q &  head_set) |
+                      ( head_q & ~head_rst);
+
+            tail_q <= (~tail_q &  tail_set) |
+                      ( tail_q & ~tail_rst);
+
+            //  update dependency flags
+            deps_mshr_hit_q       <= (~deps_mshr_hit_q       &  deps_mshr_hit_set) |
+                                     ( deps_mshr_hit_q       & ~deps_mshr_hit_rst);
+            deps_mshr_full_q      <= (~deps_mshr_full_q      &  deps_mshr_full_set) |
+                                     ( deps_mshr_full_q      & ~deps_mshr_full_rst);
+            deps_mshr_ready_q     <= (~deps_mshr_ready_q     &  deps_mshr_ready_set) |
+                                     ( deps_mshr_ready_q     & ~deps_mshr_ready_rst);
+            deps_wbuf_hit_q       <= (~deps_wbuf_hit_q       &  deps_wbuf_hit_set) |
+                                     ( deps_wbuf_hit_q       & ~deps_wbuf_hit_rst);
+            deps_wbuf_not_ready_q <= (~deps_wbuf_not_ready_q &  deps_wbuf_not_ready_set) |
+                                     ( deps_wbuf_not_ready_q & ~deps_wbuf_not_ready_rst);
+
+            //  update the next pointers
+            for (int i = 0; i < N; i++) begin
+                if (alloc_and_link_i && match_check_tail[i]) begin
+                    next_q[i] <= rtab_bv_to_index(free_alloc);
+                end
+            end
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : pop_try_ff
+        if (!rst_ni) begin
+            pop_try_state_q <= POP_TRY_HEAD;
+            pop_try_next_q  <= '0;
+        end else begin
+            pop_try_state_q <= pop_try_state_d;
+            pop_try_next_q  <= pop_try_next_d;
+        end
+    end
+
+    always_ff @(posedge clk_i)
+    begin : rtab_ff
+        for (int i = 0; i < N; i++) begin
+            //  update the request array
+            if (valid_set[i]) begin
+                req_q[i] <= alloc_req_i;
+            end
+        end
+    end
+//  }}}
+
+//  Assertions
+//  {{{
+//  pragma translate_off
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            check_i |-> $onehot0(match_check_tail)) else
+                    $error("rtab: more than one entry matching");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            alloc_and_link_i |-> (check_i & check_hit_o)) else
+                    $error("rtab: alloc and link shall be performed in case of check hit");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            alloc_and_link_i |->
+                    ({alloc_req_i.addr_tag, hpdcache_get_req_offset_set(alloc_req_i.addr_offset)} ==
+                        check_nline_i)) else
+                    $error("rtab: nline for alloc and link shall match the one being checked");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            alloc_i |-> !alloc_and_link_i) else
+                    $error("rtab: only one allocation per cycle is allowed");
+
+`ifndef VERILATOR
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            pop_try_i |-> ##1 (pop_commit_i | pop_rback_i)) else
+                    $error("rtab: a pop try shall be followed by a commit or rollback");
+`endif
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            pop_commit_i |-> valid_q[pop_commit_ptr_i]) else
+                    $error("rtab: commiting an invalid entry");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            pop_rback_i |-> valid_q[pop_rback_ptr_i]) else
+                    $error("rtab: rolling-back an invalid entry");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            pop_rback_i |-> !pop_try_i) else
+                    $error("rtab: cache shall not accept a new request while rolling back");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            alloc |-> ~full_o) else
+                    $error("rtab: trying to allocate while the table is full");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            alloc_and_link_i |-> ~cfg_single_entry_i) else
+                    $error("rtab: trying to link a request in single entry mode");
+//  pragma translate_on
+//  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv
new file mode 100644
index 0000000..17519e6
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv
@@ -0,0 +1,965 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : May, 2021
+ *  Description   : HPDcache uncached and AMO request handler
+ *  History       :
+ */
+module hpdcache_uncached
+import hpdcache_pkg::*;
+    //  Parameters
+    //  {{{
+#(
+    parameter int  HPDcacheMemIdWidth    = 8,
+    parameter int  HPDcacheMemDataWidth  = 512,
+    parameter type hpdcache_mem_req_t    = logic,
+    parameter type hpdcache_mem_req_w_t  = logic,
+    parameter type hpdcache_mem_resp_r_t = logic,
+    parameter type hpdcache_mem_resp_w_t = logic,
+
+    localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
+)
+    //  }}}
+//  Ports
+//  {{{
+(
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    //  Global control signals
+    //  {{{
+    input  logic                  wbuf_empty_i,
+    input  logic                  mshr_empty_i,
+    input  logic                  rtab_empty_i,
+    input  logic                  ctrl_empty_i,
+    //  }}}
+
+    //  Cache-side request interface
+    //  {{{
+    input  logic                  req_valid_i,
+    output logic                  req_ready_o,
+    input  hpdcache_uc_op_t       req_op_i,
+    input  hpdcache_req_addr_t    req_addr_i,
+    input  hpdcache_req_size_t    req_size_i,
+    input  hpdcache_req_data_t    req_data_i,
+    input  hpdcache_req_be_t      req_be_i,
+    input  logic                  req_uc_i,
+    input  hpdcache_req_sid_t     req_sid_i,
+    input  hpdcache_req_tid_t     req_tid_i,
+    input  logic                  req_need_rsp_i,
+    //  }}}
+
+    //  Write buffer interface
+    //  {{{
+    output logic                  wbuf_flush_all_o,
+    //  }}}
+
+    //  AMO Cache Interface
+    //  {{{
+    output logic                  dir_amo_match_o,
+    output hpdcache_set_t         dir_amo_match_set_o,
+    output hpdcache_tag_t         dir_amo_match_tag_o,
+    output logic                  dir_amo_update_plru_o,
+    input  hpdcache_way_vector_t  dir_amo_hit_way_i,
+
+    output logic                  data_amo_write_o,
+    output logic                  data_amo_write_enable_o,
+    output hpdcache_set_t         data_amo_write_set_o,
+    output hpdcache_req_size_t    data_amo_write_size_o,
+    output hpdcache_word_t        data_amo_write_word_o,
+    output logic [63:0]           data_amo_write_data_o,
+    output logic  [7:0]           data_amo_write_be_o,
+    // }}}
+
+    //  LR/SC reservation buffer
+    //  {{{
+    input  logic                  lrsc_snoop_i,
+    input  hpdcache_req_addr_t    lrsc_snoop_addr_i,
+    input  hpdcache_req_size_t    lrsc_snoop_size_i,
+    //  }}}
+
+    //  Core response interface
+    //  {{{
+    input  logic                  core_rsp_ready_i,
+    output logic                  core_rsp_valid_o,
+    output hpdcache_rsp_t         core_rsp_o,
+    //  }}}
+
+    //  MEMORY interfaces
+    //  {{{
+    //      Memory request unique identifier
+    input  hpdcache_mem_id_t      mem_read_id_i,
+    input  hpdcache_mem_id_t      mem_write_id_i,
+
+    //      Read interface
+    input  logic                  mem_req_read_ready_i,
+    output logic                  mem_req_read_valid_o,
+    output hpdcache_mem_req_t     mem_req_read_o,
+
+    output logic                  mem_resp_read_ready_o,
+    input  logic                  mem_resp_read_valid_i,
+    input  hpdcache_mem_resp_r_t  mem_resp_read_i,
+
+    //      Write interface
+    input  logic                  mem_req_write_ready_i,
+    output logic                  mem_req_write_valid_o,
+    output hpdcache_mem_req_t     mem_req_write_o,
+
+    input  logic                  mem_req_write_data_ready_i,
+    output logic                  mem_req_write_data_valid_o,
+    output hpdcache_mem_req_w_t   mem_req_write_data_o,
+
+    output logic                  mem_resp_write_ready_o,
+    input  logic                  mem_resp_write_valid_i,
+    input  hpdcache_mem_resp_w_t  mem_resp_write_i,
+    //  }}}
+
+    //  Configuration interface
+    //  {{{
+    input  logic                  cfg_error_on_cacheable_amo_i
+    //  }}}
+);
+//  }}}
+
+//  Definition of constants and types
+//  {{{
+    localparam hpdcache_uint MEM_REQ_RATIO = HPDcacheMemDataWidth/HPDCACHE_REQ_DATA_WIDTH;
+    localparam hpdcache_uint MEM_REQ_WORD_INDEX_WIDTH = $clog2(MEM_REQ_RATIO);
+
+    typedef enum {
+        UC_IDLE,
+        UC_WAIT_PENDING,
+        UC_MEM_REQ,
+        UC_MEM_W_REQ,
+        UC_MEM_WDATA_REQ,
+        UC_MEM_WAIT_RSP,
+        UC_CORE_RSP,
+        UC_AMO_READ_DIR,
+        UC_AMO_WRITE_DATA
+    } hpdcache_uc_fsm_t;
+
+    localparam logic AMO_SC_SUCCESS = 1'b0;
+    localparam logic AMO_SC_FAILURE = 1'b1;
+
+    function automatic logic [63:0] prepare_amo_data_operand(
+            input logic [63:0]        data_i,
+            input hpdcache_req_size_t size_i,
+            input hpdcache_req_addr_t addr_i,
+            input logic               sign_extend_i
+    );
+        // 64-bits AMOs are already aligned, thus do nothing
+        if (size_i == hpdcache_req_size_t'(3)) begin
+            return data_i;
+        end
+
+        // 32-bits AMOs
+        else begin
+            if (addr_i[2] == 1'b1) begin
+                if (sign_extend_i) begin
+                    return {{32{data_i[63]}}, data_i[63:32]};
+                end else begin
+                    return {{32{      1'b0}}, data_i[63:32]};
+                end
+            end else begin
+                if (sign_extend_i) begin
+                    return {{32{data_i[31]}}, data_i[31: 0]};
+                end else begin
+                    return {{32{      1'b0}}, data_i[31: 0]};
+                end
+            end
+        end
+    endfunction;
+
+    function automatic logic [63:0] prepare_amo_data_result(
+            input logic [63:0]      data_i,
+            input hpdcache_req_size_t size_i
+    );
+        // 64-bits AMOs are already aligned, thus do nothing
+        if (size_i == hpdcache_req_size_t'(3)) begin
+            return data_i;
+        end
+
+        // 32-bits AMOs
+        else begin
+            return {2{data_i[31:0]}};
+        end
+    endfunction;
+
+    function automatic logic amo_need_sign_extend(hpdcache_uc_op_t op);
+        unique case (1'b1)
+            op.is_amo_add,
+            op.is_amo_max,
+            op.is_amo_min: return 1'b1;
+            default      : return 1'b0;
+        endcase;
+    endfunction
+//  }}}
+
+//  Internal signals and registers
+//  {{{
+    hpdcache_uc_fsm_t   uc_fsm_q, uc_fsm_d;
+    hpdcache_uc_op_t    req_op_q;
+    hpdcache_req_addr_t req_addr_q;
+    hpdcache_req_size_t req_size_q;
+    hpdcache_req_data_t req_data_q;
+    hpdcache_req_be_t   req_be_q;
+    logic               req_uc_q;
+    hpdcache_req_sid_t  req_sid_q;
+    hpdcache_req_tid_t  req_tid_q;
+    logic               req_need_rsp_q;
+
+    logic               uc_sc_retcode_q, uc_sc_retcode_d;
+
+    hpdcache_req_data_t rsp_rdata_q, rsp_rdata_d;
+    logic               rsp_error_set, rsp_error_rst;
+    logic               rsp_error_q;
+    logic               mem_resp_write_valid_q, mem_resp_write_valid_d;
+    logic               mem_resp_read_valid_q, mem_resp_read_valid_d;
+
+    hpdcache_req_data_t mem_req_write_data;
+    logic [63:0]        amo_req_ld_data;
+    logic [63:0]        amo_ld_data;
+    logic [63:0]        amo_req_st_data;
+    logic [63:0]        amo_st_data;
+    logic [ 7:0]        amo_st_be;
+    logic [63:0]        amo_result;
+//  }}}
+
+//  LR/SC reservation buffer logic
+//  {{{
+    logic               lrsc_rsrv_valid_q;
+    hpdcache_req_addr_t lrsc_rsrv_addr_q, lrsc_rsrv_addr_d;
+    hpdcache_nline_t    lrsc_rsrv_nline;
+    hpdcache_offset_t   lrsc_rsrv_word;
+
+    hpdcache_offset_t   lrsc_snoop_words;
+    hpdcache_nline_t    lrsc_snoop_nline;
+    hpdcache_offset_t   lrsc_snoop_base, lrsc_snoop_end;
+    logic               lrsc_snoop_hit;
+    logic               lrsc_snoop_reset;
+
+    hpdcache_nline_t    lrsc_uc_nline;
+    hpdcache_offset_t   lrsc_uc_word;
+    logic               lrsc_uc_hit;
+    logic               lrsc_uc_set, lrsc_uc_reset;
+
+    //  NOTE: Reservation set for LR instruction is always 8-bytes in this
+    //  implementation.
+    assign lrsc_rsrv_nline  = hpdcache_get_req_addr_nline(lrsc_rsrv_addr_q),
+           lrsc_rsrv_word   = hpdcache_get_req_addr_offset(lrsc_rsrv_addr_q) >> 3;
+
+    //  Check hit on LR/SC reservation for snoop port (normal write accesses)
+    assign lrsc_snoop_words = (lrsc_snoop_size_i < 3) ? 1 : hpdcache_offset_t'((8'h1 << lrsc_snoop_size_i) >> 3),
+           lrsc_snoop_nline = hpdcache_get_req_addr_nline(lrsc_snoop_addr_i),
+           lrsc_snoop_base  = hpdcache_get_req_addr_offset(lrsc_snoop_addr_i) >> 3,
+           lrsc_snoop_end   = lrsc_snoop_base + lrsc_snoop_words;
+
+    assign lrsc_snoop_hit   = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_snoop_nline) &
+                                                  (lrsc_rsrv_word  >= lrsc_snoop_base) &
+                                                  (lrsc_rsrv_word  <  lrsc_snoop_end );
+
+    assign lrsc_snoop_reset = lrsc_snoop_i & lrsc_snoop_hit;
+
+    //  Check hit on LR/SC reservation for AMOs and SC
+    assign lrsc_uc_nline    = hpdcache_get_req_addr_nline(req_addr_i),
+           lrsc_uc_word     = hpdcache_get_req_addr_offset(req_addr_i) >> 3;
+
+    assign lrsc_uc_hit      = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_uc_nline) &
+                                                  (lrsc_rsrv_word  == lrsc_uc_word);
+//  }}}
+
+//  Uncacheable request FSM
+//  {{{
+    always_comb
+    begin : uc_fsm_comb
+        mem_resp_write_valid_d = mem_resp_write_valid_q;
+        mem_resp_read_valid_d  = mem_resp_read_valid_q;
+        rsp_error_set          = 1'b0;
+        rsp_error_rst          = 1'b0;
+        lrsc_rsrv_addr_d       = lrsc_rsrv_addr_q;
+        uc_sc_retcode_d        = uc_sc_retcode_q;
+        wbuf_flush_all_o       = 1'b0;
+        lrsc_uc_set            = 1'b0;
+        lrsc_uc_reset          = 1'b0;
+
+        uc_fsm_d               = uc_fsm_q;
+
+        case (uc_fsm_q)
+            //  Wait for a request
+            //  {{{
+            UC_IDLE: begin
+
+                if (req_valid_i) begin
+                    wbuf_flush_all_o = 1'b1;
+
+                    unique case (1'b1)
+                        req_op_i.is_ld,
+                        req_op_i.is_st: begin
+                            if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
+                                uc_fsm_d = UC_MEM_REQ;
+                            end else begin
+                                uc_fsm_d = UC_WAIT_PENDING;
+                            end
+                        end
+
+                        req_op_i.is_amo_swap,
+                        req_op_i.is_amo_add,
+                        req_op_i.is_amo_and,
+                        req_op_i.is_amo_or,
+                        req_op_i.is_amo_xor,
+                        req_op_i.is_amo_max,
+                        req_op_i.is_amo_maxu,
+                        req_op_i.is_amo_min,
+                        req_op_i.is_amo_minu,
+                        req_op_i.is_amo_lr: begin
+                            //  Reset LR/SC reservation if AMO matches its address
+                            lrsc_uc_reset = ~req_op_i.is_amo_lr & lrsc_uc_hit;
+
+                            if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin
+                                rsp_error_set = 1'b1;
+                                uc_fsm_d = UC_CORE_RSP;
+                            end else begin
+                                if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
+                                    uc_fsm_d = UC_MEM_REQ;
+                                end else begin
+                                    uc_fsm_d = UC_WAIT_PENDING;
+                                end
+                            end
+                        end
+
+                        req_op_i.is_amo_sc: begin
+                            if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin
+                                rsp_error_set = 1'b1;
+                                uc_fsm_d = UC_CORE_RSP;
+                            end else begin
+                                //  Reset previous reservation (if any)
+                                lrsc_uc_reset = 1'b1;
+
+                                //  SC with valid reservation
+                                if (lrsc_uc_hit) begin
+                                    if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
+                                        uc_fsm_d = UC_MEM_REQ;
+                                    end else begin
+                                        uc_fsm_d = UC_WAIT_PENDING;
+                                    end
+                                end
+                                //  SC with no valid reservation, thus respond with the failure code
+                                else begin
+                                    uc_sc_retcode_d = AMO_SC_FAILURE;
+                                    uc_fsm_d = UC_CORE_RSP;
+                                end
+                            end
+                        end
+
+                        default: begin
+                            if (req_need_rsp_i) begin
+                                rsp_error_set = 1'b1;
+                                uc_fsm_d = UC_CORE_RSP;
+                            end
+                        end
+                    endcase
+                end
+            end
+            //  }}}
+
+            //  Wait for the write buffer to be empty
+            //  {{{
+            UC_WAIT_PENDING: begin
+                if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
+                    uc_fsm_d = UC_MEM_REQ;
+                end else begin
+                    uc_fsm_d = UC_WAIT_PENDING;
+                end
+            end
+            //  }}}
+
+            //  Send request to memory
+            //  {{{
+            UC_MEM_REQ: begin
+                uc_fsm_d = UC_MEM_REQ;
+
+                mem_resp_write_valid_d = 1'b0;
+                mem_resp_read_valid_d  = 1'b0;
+
+                case (1'b1)
+                    req_op_q.is_ld,
+                    req_op_q.is_amo_lr: begin
+                        if (mem_req_read_ready_i) begin
+                            uc_fsm_d = UC_MEM_WAIT_RSP;
+                        end
+                    end
+
+                    req_op_q.is_st,
+                    req_op_q.is_amo_sc,
+                    req_op_q.is_amo_swap,
+                    req_op_q.is_amo_add,
+                    req_op_q.is_amo_and,
+                    req_op_q.is_amo_or,
+                    req_op_q.is_amo_xor,
+                    req_op_q.is_amo_max,
+                    req_op_q.is_amo_maxu,
+                    req_op_q.is_amo_min,
+                    req_op_q.is_amo_minu: begin
+                        if (mem_req_write_ready_i && mem_req_write_data_ready_i) begin
+                            uc_fsm_d = UC_MEM_WAIT_RSP;
+                        end else if (mem_req_write_ready_i) begin
+                            uc_fsm_d = UC_MEM_WDATA_REQ;
+                        end else if (mem_req_write_data_ready_i) begin
+                            uc_fsm_d = UC_MEM_W_REQ;
+                        end
+                    end
+                endcase
+            end
+            //  }}}
+
+            //  Send write address
+            //  {{{
+            UC_MEM_W_REQ: begin
+                mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i;
+                mem_resp_read_valid_d  =  mem_resp_read_valid_q |  mem_resp_read_valid_i;
+
+                if (mem_req_write_ready_i) begin
+                    uc_fsm_d = UC_MEM_WAIT_RSP;
+                end else begin
+                    uc_fsm_d = UC_MEM_W_REQ;
+                end
+            end
+            //  }}}
+
+            //  Send write data
+            //  {{{
+            UC_MEM_WDATA_REQ: begin
+                mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i;
+                mem_resp_read_valid_d  =  mem_resp_read_valid_q |  mem_resp_read_valid_i;
+
+                if (mem_req_write_data_ready_i) begin
+                    uc_fsm_d = UC_MEM_WAIT_RSP;
+                end else begin
+                    uc_fsm_d = UC_MEM_WDATA_REQ;
+                end
+            end
+            //  }}}
+
+            //  Wait for the response from the memory
+            //  {{{
+            UC_MEM_WAIT_RSP: begin
+                automatic bit rd_error;
+                automatic bit wr_error;
+
+                uc_fsm_d = UC_MEM_WAIT_RSP;
+                mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i;
+                mem_resp_read_valid_d  =  mem_resp_read_valid_q |  mem_resp_read_valid_i;
+
+                rd_error = mem_resp_read_valid_i  &&
+                           ( mem_resp_read_i.mem_resp_r_error == HPDCACHE_MEM_RESP_NOK);
+                wr_error = mem_resp_write_valid_i &&
+                           (mem_resp_write_i.mem_resp_w_error == HPDCACHE_MEM_RESP_NOK);
+                rsp_error_set = req_need_rsp_q & (rd_error | wr_error);
+
+                case (1'b1)
+                    req_op_q.is_ld: begin
+                        if (mem_resp_read_valid_i) begin
+                            if (req_need_rsp_q) begin
+                                uc_fsm_d = UC_CORE_RSP;
+                            end else begin
+                                uc_fsm_d = UC_IDLE;
+                            end
+                        end
+                    end
+                    req_op_q.is_st: begin
+                        if (mem_resp_write_valid_i) begin
+                            if (req_need_rsp_q) begin
+                                uc_fsm_d = UC_CORE_RSP;
+                            end else begin
+                                uc_fsm_d = UC_IDLE;
+                            end
+                        end
+                    end
+                    req_op_q.is_amo_lr: begin
+                        if (mem_resp_read_valid_i) begin
+                            //  set a new reservation
+                            if (!rd_error)
+                            begin
+                                lrsc_uc_set      = 1'b1;
+                                lrsc_rsrv_addr_d = req_addr_q;
+                            end
+                            //  in case of a memory error, do not make the reservation and
+                            //  invalidate an existing one (if valid)
+                            else begin
+                                lrsc_uc_reset = 1'b1;
+                            end
+
+                            if (req_uc_q || rd_error) begin
+                                uc_fsm_d = UC_CORE_RSP;
+                            end else begin
+                                uc_fsm_d = UC_AMO_READ_DIR;
+                            end
+                        end
+                    end
+                    req_op_q.is_amo_sc: begin
+                        if (mem_resp_write_valid_i) begin
+                            automatic bit is_atomic;
+
+                            is_atomic = mem_resp_write_i.mem_resp_w_is_atomic && !wr_error;
+                            uc_sc_retcode_d = is_atomic ? AMO_SC_SUCCESS : AMO_SC_FAILURE;
+
+                            if (req_uc_q || !is_atomic) begin
+                                uc_fsm_d = UC_CORE_RSP;
+                            end else begin
+                                uc_fsm_d = UC_AMO_READ_DIR;
+                            end
+                        end
+                    end
+                    req_op_q.is_amo_swap,
+                    req_op_q.is_amo_add,
+                    req_op_q.is_amo_and,
+                    req_op_q.is_amo_or,
+                    req_op_q.is_amo_xor,
+                    req_op_q.is_amo_max,
+                    req_op_q.is_amo_maxu,
+                    req_op_q.is_amo_min,
+                    req_op_q.is_amo_minu: begin
+                        //  wait for both old data and write acknowledged were received
+                        if ((mem_resp_read_valid_i && mem_resp_write_valid_i) ||
+                            (mem_resp_read_valid_i && mem_resp_write_valid_q) ||
+                            (mem_resp_read_valid_q && mem_resp_write_valid_i))
+                        begin
+                            if (req_uc_q || rsp_error_q || rd_error || wr_error) begin
+                                uc_fsm_d = UC_CORE_RSP;
+                            end else begin
+                                uc_fsm_d = UC_AMO_READ_DIR;
+                            end
+                        end
+                    end
+                endcase
+            end
+            //  }}}
+
+            //  Send the response to the requester
+            //  {{{
+            UC_CORE_RSP: begin
+                if (core_rsp_ready_i) begin
+                    rsp_error_rst = 1'b1;
+                    uc_fsm_d = UC_IDLE;
+                end else begin
+                    uc_fsm_d = UC_CORE_RSP;
+                end
+            end
+            //  }}}
+
+            //  Check for a cache hit on the AMO target address
+            //  {{{
+            UC_AMO_READ_DIR: begin
+                uc_fsm_d = UC_AMO_WRITE_DATA;
+            end
+            //  }}}
+
+            //  Write the locally computed AMO result in the cache
+            //  {{{
+            UC_AMO_WRITE_DATA: begin
+                uc_fsm_d = UC_CORE_RSP;
+            end
+            //  }}}
+        endcase
+    end
+//  }}}
+
+//  AMO unit
+//  {{{
+    localparam hpdcache_uint AMO_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_DATA_WIDTH/64);
+
+    generate
+        if (AMO_WORD_INDEX_WIDTH > 0) begin : amo_operand_mux_gen
+            hpdcache_mux #(
+                .NINPUT         (HPDCACHE_REQ_DATA_WIDTH/64),
+                .DATA_WIDTH     (64),
+                .ONE_HOT_SEL    (1'b0)
+            ) amo_ld_data_mux_i (
+                .data_i         (rsp_rdata_q),
+                .sel_i          (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]),
+                .data_o         (amo_req_ld_data)
+            );
+
+            hpdcache_mux #(
+                .NINPUT         (HPDCACHE_REQ_DATA_WIDTH/64),
+                .DATA_WIDTH     (64),
+                .ONE_HOT_SEL    (1'b0)
+            ) amo_st_data_mux_i (
+                .data_i         (req_data_q),
+                .sel_i          (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]),
+                .data_o         (amo_req_st_data)
+            );
+
+            hpdcache_mux #(
+                .NINPUT         (HPDCACHE_REQ_DATA_WIDTH/64),
+                .DATA_WIDTH     (8),
+                .ONE_HOT_SEL    (1'b0)
+            ) amo_st_be_mux_i (
+                .data_i         (req_be_q),
+                .sel_i          (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]),
+                .data_o         (amo_st_be)
+            );
+
+        end else begin
+            assign amo_req_ld_data = rsp_rdata_q;
+            assign amo_req_st_data = req_data_q;
+            assign amo_st_be   = req_be_q;
+        end
+    endgenerate
+
+    assign amo_ld_data = prepare_amo_data_operand(amo_req_ld_data, req_size_q,
+            req_addr_q, amo_need_sign_extend(req_op_q));
+    assign amo_st_data = prepare_amo_data_operand(amo_req_st_data, req_size_q,
+            req_addr_q, amo_need_sign_extend(req_op_q));
+
+    hpdcache_amo amo_unit_i (
+        .ld_data_i           (amo_ld_data),
+        .st_data_i           (amo_st_data),
+        .op_i                (req_op_q),
+        .result_o            (amo_result)
+    );
+
+    assign dir_amo_match_o       = (uc_fsm_q == UC_AMO_READ_DIR),
+           dir_amo_match_set_o   = hpdcache_get_req_addr_set(req_addr_q),
+           dir_amo_match_tag_o   = hpdcache_get_req_addr_tag(req_addr_q),
+           dir_amo_update_plru_o = dir_amo_match_o;
+
+    assign data_amo_write_o        = (uc_fsm_q == UC_AMO_WRITE_DATA),
+           data_amo_write_enable_o = |dir_amo_hit_way_i,
+           data_amo_write_set_o    = hpdcache_get_req_addr_set(req_addr_q),
+           data_amo_write_size_o   = req_size_q,
+           data_amo_write_word_o   = hpdcache_get_req_addr_word(req_addr_q),
+           data_amo_write_data_o   = prepare_amo_data_result(amo_result, req_size_q),
+           data_amo_write_be_o     = amo_st_be;
+//  }}}
+
+//  Core response outputs
+//  {{{
+    assign req_ready_o      = (uc_fsm_q ==     UC_IDLE),
+           core_rsp_valid_o = (uc_fsm_q == UC_CORE_RSP);
+//  }}}
+
+//  Memory read request outputs
+//  {{{
+    always_comb
+    begin : mem_req_read_comb
+        mem_req_read_o.mem_req_addr      = req_addr_q;
+        mem_req_read_o.mem_req_len       = 0;
+        mem_req_read_o.mem_req_size      = req_size_q;
+        mem_req_read_o.mem_req_id        = mem_read_id_i;
+        mem_req_read_o.mem_req_cacheable = 1'b0;
+        mem_req_read_o.mem_req_command   = HPDCACHE_MEM_READ;
+        mem_req_read_o.mem_req_atomic    = HPDCACHE_MEM_ATOMIC_ADD;
+
+        unique case (1'b1)
+            req_op_q.is_ld: begin
+                mem_req_read_valid_o           = (uc_fsm_q == UC_MEM_REQ);
+            end
+            req_op_q.is_amo_lr: begin
+                mem_req_read_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_read_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_LDEX;
+                mem_req_read_valid_o           = (uc_fsm_q == UC_MEM_REQ);
+            end
+            default: begin
+                mem_req_read_valid_o           = 1'b0;
+            end
+        endcase
+    end
+//  }}}
+
+//  Memory write request outputs
+//  {{{
+    always_comb
+    begin : mem_req_write_comb
+        mem_req_write_data                = req_data_q;
+        mem_req_write_o.mem_req_addr      = req_addr_q;
+        mem_req_write_o.mem_req_len       = 0;
+        mem_req_write_o.mem_req_size      = req_size_q;
+        mem_req_write_o.mem_req_id        = mem_write_id_i;
+        mem_req_write_o.mem_req_cacheable = 1'b0;
+        unique case (1'b1)
+            req_op_q.is_amo_sc: begin
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_STEX;
+            end
+            req_op_q.is_amo_swap: begin
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_SWAP;
+            end
+            req_op_q.is_amo_add: begin
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_ADD;
+            end
+            req_op_q.is_amo_and: begin
+                mem_req_write_data              = ~req_data_q;
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_CLR;
+            end
+            req_op_q.is_amo_or: begin
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_SET;
+            end
+            req_op_q.is_amo_xor: begin
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_EOR;
+            end
+            req_op_q.is_amo_max: begin
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_SMAX;
+            end
+            req_op_q.is_amo_maxu: begin
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_UMAX;
+            end
+            req_op_q.is_amo_min: begin
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_SMIN;
+            end
+            req_op_q.is_amo_minu: begin
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_UMIN;
+            end
+            default: begin
+                mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE;
+                mem_req_write_o.mem_req_atomic  = HPDCACHE_MEM_ATOMIC_ADD;
+            end
+        endcase
+
+        unique case (uc_fsm_q)
+            UC_MEM_REQ: begin
+                unique case (1'b1)
+                    req_op_q.is_st,
+                    req_op_q.is_amo_sc,
+                    req_op_q.is_amo_swap,
+                    req_op_q.is_amo_add,
+                    req_op_q.is_amo_and,
+                    req_op_q.is_amo_or,
+                    req_op_q.is_amo_xor,
+                    req_op_q.is_amo_max,
+                    req_op_q.is_amo_maxu,
+                    req_op_q.is_amo_min,
+                    req_op_q.is_amo_minu: begin
+                        mem_req_write_data_valid_o = 1'b1;
+                        mem_req_write_valid_o      = 1'b1;
+                    end
+
+                    default: begin
+                        mem_req_write_data_valid_o = 1'b0;
+                        mem_req_write_valid_o      = 1'b0;
+                    end
+                endcase
+            end
+
+            UC_MEM_W_REQ: begin
+                mem_req_write_valid_o      = 1'b1;
+                mem_req_write_data_valid_o = 1'b0;
+            end
+
+            UC_MEM_WDATA_REQ: begin
+                mem_req_write_valid_o      = 1'b0;
+                mem_req_write_data_valid_o = 1'b1;
+            end
+
+            default: begin
+                mem_req_write_valid_o      = 1'b0;
+                mem_req_write_data_valid_o = 1'b0;
+            end
+        endcase
+    end
+
+    generate
+        //  memory data width is bigger than the width of the core's interface
+        if (MEM_REQ_RATIO > 1) begin : mem_req_data_gen
+            //  replicate data
+            assign mem_req_write_data_o.mem_req_w_data = {MEM_REQ_RATIO{mem_req_write_data}};
+
+            //  demultiplex the byte-enable
+            hpdcache_demux #(
+                .NOUTPUT     (MEM_REQ_RATIO),
+                .DATA_WIDTH  (HPDCACHE_REQ_DATA_WIDTH/8)
+            ) mem_write_be_demux_i (
+                .data_i      (req_be_q),
+                .sel_i       (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]),
+                .data_o      (mem_req_write_data_o.mem_req_w_be)
+            );
+        end
+
+        //  memory data width is equal to the width of the core's interface
+        else begin
+            assign mem_req_write_data_o.mem_req_w_data = mem_req_write_data;
+            assign mem_req_write_data_o.mem_req_w_be   = req_be_q;
+        end
+
+        assign mem_req_write_data_o.mem_req_w_last = 1'b1;
+    endgenerate
+//  }}}
+
+//  Response handling
+//  {{{
+    logic [63:0] sc_retcode;
+    logic [63:0] sc_rdata;
+
+    assign sc_retcode = {{63{1'b0}}, uc_sc_retcode_q},
+           sc_rdata   = prepare_amo_data_result(sc_retcode, req_size_q);
+
+    assign core_rsp_o.rdata   = req_op_q.is_amo_sc ? {HPDCACHE_REQ_WORDS{sc_rdata}} : rsp_rdata_q,
+           core_rsp_o.sid     = req_sid_q,
+           core_rsp_o.tid     = req_tid_q,
+           core_rsp_o.error   = rsp_error_q,
+           core_rsp_o.aborted = 1'b0;
+
+    //  Resize the memory response data to the core response width
+    generate
+        //  memory data width is bigger than the width of the core's interface
+        if (MEM_REQ_RATIO > 1) begin : core_rsp_data_gen
+            hpdcache_mux #(
+                .NINPUT      (MEM_REQ_RATIO),
+                .DATA_WIDTH  (HPDCACHE_REQ_DATA_WIDTH)
+            ) data_read_rsp_mux_i(
+                .data_i      (mem_resp_read_i.mem_resp_r_data),
+                .sel_i       (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]),
+                .data_o      (rsp_rdata_d)
+            );
+        end
+
+        //  memory data width is equal to the width of the core's interface
+        else begin
+            assign rsp_rdata_d = mem_resp_read_i.mem_resp_r_data;
+        end
+    endgenerate
+
+    //  This FSM is always ready to accept the response
+    assign mem_resp_read_ready_o  = 1'b1,
+           mem_resp_write_ready_o = 1'b1;
+//  }}}
+
+//  Set cache request registers
+//  {{{
+    always_ff @(posedge clk_i)
+    begin : req_ff
+        if (req_valid_i && req_ready_o) begin
+            req_op_q        <= req_op_i;
+            req_addr_q      <= req_addr_i;
+            req_size_q      <= req_size_i;
+            req_data_q      <= req_data_i;
+            req_be_q        <= req_be_i;
+            req_uc_q        <= req_uc_i;
+            req_sid_q       <= req_sid_i;
+            req_tid_q       <= req_tid_i;
+            req_need_rsp_q  <= req_need_rsp_i;
+        end
+    end
+//  }}}
+
+//  Uncacheable request FSM set state
+//  {{{
+    logic lrsc_rsrv_valid_set, lrsc_rsrv_valid_reset;
+
+    assign lrsc_rsrv_valid_set   = lrsc_uc_set,
+           lrsc_rsrv_valid_reset = lrsc_uc_reset | lrsc_snoop_reset;
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : uc_fsm_ff
+        if (!rst_ni) begin
+            uc_fsm_q          <= UC_IDLE;
+            lrsc_rsrv_valid_q <= 1'b0;
+        end else begin
+            uc_fsm_q          <= uc_fsm_d;
+            lrsc_rsrv_valid_q <= (~lrsc_rsrv_valid_q &  lrsc_rsrv_valid_set  ) |
+                                 ( lrsc_rsrv_valid_q & ~lrsc_rsrv_valid_reset);
+        end
+    end
+
+    always_ff @(posedge clk_i)
+    begin : uc_amo_ff
+        lrsc_rsrv_addr_q <= lrsc_rsrv_addr_d;
+        uc_sc_retcode_q  <= uc_sc_retcode_d;
+    end
+//  }}}
+
+//  Response registers
+//  {{{
+    always_ff @(posedge clk_i)
+    begin
+        if (mem_resp_read_valid_i) begin
+            rsp_rdata_q <= rsp_rdata_d;
+        end
+        mem_resp_write_valid_q <= mem_resp_write_valid_d;
+        mem_resp_read_valid_q  <= mem_resp_read_valid_d;
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin
+        if (!rst_ni) begin
+            rsp_error_q <= 1'b0;
+        end else begin
+            rsp_error_q <= (~rsp_error_q &  rsp_error_set) |
+                           ( rsp_error_q & ~rsp_error_rst);
+        end
+    end
+//  }}}
+
+//  Assertions
+//  {{{
+//  pragma translate_off
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            (req_valid_i && req_op_i.is_ld) -> req_uc_i) else
+                    $error("uc_handler: unexpected load request on cacheable region");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            (req_valid_i && req_op_i.is_st) -> req_uc_i) else
+                    $error("uc_handler: unexpected store request on cacheable region");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            (req_valid_i && (req_op_i.is_amo_lr   ||
+                             req_op_i.is_amo_sc   ||
+                             req_op_i.is_amo_swap ||
+                             req_op_i.is_amo_add  ||
+                             req_op_i.is_amo_and  ||
+                             req_op_i.is_amo_or   ||
+                             req_op_i.is_amo_xor  ||
+                             req_op_i.is_amo_max  ||
+                             req_op_i.is_amo_maxu ||
+                             req_op_i.is_amo_min  ||
+                             req_op_i.is_amo_minu )) -> req_need_rsp_i) else
+                    $error("uc_handler: amo requests shall need a response");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            (req_valid_i && (req_op_i.is_amo_lr   ||
+                             req_op_i.is_amo_sc   ||
+                             req_op_i.is_amo_swap ||
+                             req_op_i.is_amo_add  ||
+                             req_op_i.is_amo_and  ||
+                             req_op_i.is_amo_or   ||
+                             req_op_i.is_amo_xor  ||
+                             req_op_i.is_amo_max  ||
+                             req_op_i.is_amo_maxu ||
+                             req_op_i.is_amo_min  ||
+                             req_op_i.is_amo_minu )) -> (req_size_i inside {2,3})) else
+                    $error("uc_handler: amo requests shall be 4 or 8 bytes wide");
+
+    assert property (@(posedge clk_i) disable iff (!rst_ni)
+            (mem_resp_write_valid_i || mem_resp_read_valid_i) -> (uc_fsm_q == UC_MEM_WAIT_RSP)) else
+                    $error("uc_handler: unexpected response from memory");
+//  pragma translate_on
+//  }}}
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv
new file mode 100644
index 0000000..0607440
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv
@@ -0,0 +1,678 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : HPDcache Write Buffer
+ *  History       :
+ */
+module hpdcache_wbuf
+    //  Parameters
+    //  {{{
+#(
+    //  Number of entries in the directory part of the Write Buffer
+    parameter int unsigned WBUF_DIR_ENTRIES      = 0,
+    //  Number of entries in the data part of the Write Buffer
+    parameter int unsigned WBUF_DATA_ENTRIES     = 0,
+    //  Width in bits of the write words
+    parameter int unsigned WBUF_WORD_WIDTH       = 0,
+    //  Number of words per line in the write buffer
+    parameter int unsigned WBUF_WORDS            = 0,
+    //  Width in bits of the physical address
+    parameter int unsigned WBUF_PA_WIDTH         = 0,
+    //  Maximum value of the time counter
+    parameter int unsigned WBUF_TIMECNT_MAX      = 8,
+    //  Number of most significant bits to check for read conflicts
+    parameter int unsigned WBUF_READ_MATCH_WIDTH = 0,
+    //  Use a feedthrough FIFO on the send interface
+    parameter bit WBUF_SEND_FEEDTHROUGH = 0,
+
+    localparam int unsigned WBUF_OFFSET_WIDTH   = $clog2((WBUF_WORD_WIDTH*WBUF_WORDS)/8),
+    localparam int unsigned WBUF_TAG_WIDTH      = WBUF_PA_WIDTH - WBUF_OFFSET_WIDTH,
+    localparam int unsigned WBUF_WORD_OFFSET    = $clog2(WBUF_WORD_WIDTH/8),
+    localparam int unsigned WBUF_DATA_PTR_WIDTH = $clog2(WBUF_DATA_ENTRIES),
+    localparam int unsigned WBUF_DIR_PTR_WIDTH  = $clog2(WBUF_DIR_ENTRIES),
+    localparam int unsigned WBUF_TIMECNT_WIDTH  = $clog2(WBUF_TIMECNT_MAX),
+    localparam type wbuf_addr_t      = logic unsigned [        WBUF_PA_WIDTH-1:0],
+    localparam type wbuf_dir_ptr_t   = logic unsigned [   WBUF_DIR_PTR_WIDTH-1:0],
+    localparam type wbuf_data_ptr_t  = logic unsigned [  WBUF_DATA_PTR_WIDTH-1:0],
+    localparam type wbuf_data_t      = logic          [      WBUF_WORD_WIDTH-1:0],
+    localparam type wbuf_be_t        = logic          [    WBUF_WORD_WIDTH/8-1:0],
+    localparam type wbuf_data_buf_t  = wbuf_data_t    [           WBUF_WORDS-1:0],
+    localparam type wbuf_be_buf_t    = wbuf_be_t      [           WBUF_WORDS-1:0],
+    localparam type wbuf_tag_t       = logic unsigned [       WBUF_TAG_WIDTH-1:0],
+    localparam type wbuf_match_t     = logic unsigned [WBUF_READ_MATCH_WIDTH-1:0],
+    localparam type wbuf_timecnt_t   = logic unsigned [   WBUF_TIMECNT_WIDTH-1:0]
+)
+    //  }}}
+    //  Ports
+    //  {{{
+(
+    //  Clock and reset signals
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    //  Global control signals
+    output logic                  empty_o,
+    output logic                  full_o,
+    input  logic                  flush_all_i,
+
+    //  Configuration signals
+    //    Timer threshold
+    input  wbuf_timecnt_t         cfg_threshold_i,
+    //    Reset timer on write
+    input  logic                  cfg_reset_timecnt_on_write_i,
+    //    Sequentialize write-after-write hazards
+    input  logic                  cfg_sequential_waw_i,
+    //    Inhibit write coalescing
+    input  logic                  cfg_inhibit_write_coalescing_i,
+
+    //  Write interface
+    input  logic                  write_i,
+    output logic                  write_ready_o,
+    input  wbuf_addr_t            write_addr_i,
+    input  wbuf_data_t            write_data_i,
+    input  wbuf_be_t              write_be_i,  // byte-enable
+    input  logic                  write_uc_i,  // uncacheable write
+
+    //  Read hit interface
+    input  wbuf_addr_t            read_addr_i,
+    output logic                  read_hit_o,
+    input  logic                  read_flush_hit_i,
+
+    //  Replay hit interface
+    input  wbuf_addr_t            replay_addr_i,
+    input  logic                  replay_is_read_i,
+    output logic                  replay_open_hit_o,
+    output logic                  replay_pend_hit_o,
+    output logic                  replay_sent_hit_o,
+    output logic                  replay_not_ready_o,
+
+    //  Send interface
+    input  logic                  send_meta_ready_i,
+    output logic                  send_meta_valid_o,
+    output wbuf_addr_t            send_addr_o,
+    output wbuf_dir_ptr_t         send_id_o,
+    output logic                  send_uc_o,
+
+    input  logic                  send_data_ready_i,
+    output logic                  send_data_valid_o,
+    output wbuf_addr_t            send_data_tag_o,
+    output wbuf_data_buf_t        send_data_o,
+    output wbuf_be_buf_t          send_be_o,
+
+    //  Acknowledge interface
+    input  logic                  ack_i,
+    input  wbuf_dir_ptr_t         ack_id_i,
+    input  logic                  ack_error_i
+);
+    //  }}}
+
+    //  Definition of constants, types and functions
+    //  {{{
+    localparam int WBUF_SEND_FIFO_DEPTH = WBUF_DATA_ENTRIES;
+
+    typedef logic unsigned [31:0]          wbuf_uint;
+
+    typedef enum logic [1:0] {
+        WBUF_FREE = 2'b00, // unused/free slot
+        WBUF_OPEN = 2'b01, // there are pending writes in this slot
+        WBUF_PEND = 2'b10, // the slot is waiting to be sent
+        WBUF_SENT = 2'b11  // the slot is sent and waits for the memory acknowledge
+    } wbuf_state_e;
+
+    typedef struct packed {
+        wbuf_data_ptr_t ptr;
+        wbuf_timecnt_t  cnt;
+        wbuf_tag_t      tag;
+        logic           uc;
+    } wbuf_dir_entry_t;
+
+    typedef struct packed {
+        wbuf_data_buf_t data;
+        wbuf_be_buf_t   be;
+    } wbuf_data_entry_t;
+
+    typedef struct packed {
+        wbuf_data_ptr_t send_data_ptr;
+        wbuf_tag_t      send_data_tag;
+    } wbuf_send_data_t;
+
+    typedef struct packed {
+        wbuf_tag_t      send_meta_tag;
+        wbuf_dir_ptr_t  send_meta_id;
+        logic           send_meta_uc;
+    } wbuf_send_meta_t;
+
+    function automatic wbuf_dir_ptr_t wbuf_dir_find_next(
+            input wbuf_dir_ptr_t curr_ptr,
+            input wbuf_state_e [WBUF_DIR_ENTRIES-1:0] dir_state,
+            input wbuf_state_e state);
+        automatic wbuf_dir_ptr_t next_ptr;
+        for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
+            next_ptr = wbuf_dir_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DIR_ENTRIES);
+            if (dir_state[next_ptr] == state) begin
+                return next_ptr;
+            end
+        end
+        return curr_ptr;
+    endfunction
+
+    function automatic wbuf_data_ptr_t wbuf_data_find_next(
+            input wbuf_data_ptr_t curr_ptr,
+            input logic [WBUF_DATA_ENTRIES-1:0] data_valid,
+            input logic state);
+        automatic wbuf_data_ptr_t next_ptr;
+        for (int unsigned i = 0; i < WBUF_DATA_ENTRIES; i++) begin
+            next_ptr = wbuf_data_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DATA_ENTRIES);
+            if (data_valid[next_ptr] == state) begin
+                return next_ptr;
+            end
+        end
+        return curr_ptr;
+    endfunction
+
+    function automatic void wbuf_data_write(
+            output wbuf_data_buf_t wbuf_ret_data,
+            output wbuf_be_buf_t   wbuf_ret_be,
+            input  wbuf_data_buf_t wbuf_old_data,
+            input  wbuf_be_buf_t   wbuf_old_be,
+            input  wbuf_data_buf_t wbuf_new_data,
+            input  wbuf_be_buf_t   wbuf_new_be);
+        for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
+            for (int unsigned b = 0; b < WBUF_WORD_WIDTH/8; b++) begin
+                wbuf_ret_data[w][b*8 +: 8] = wbuf_new_be[w][b] ?
+                        wbuf_new_data[w][b*8 +: 8] :
+                        wbuf_old_data[w][b*8 +: 8];
+            end
+            wbuf_ret_be[w] = wbuf_old_be[w] | wbuf_new_be[w];
+        end
+    endfunction
+
+    function automatic wbuf_match_t wbuf_tag_to_match_addr(wbuf_tag_t tag);
+        return tag[WBUF_TAG_WIDTH - 1:WBUF_TAG_WIDTH - WBUF_READ_MATCH_WIDTH];
+    endfunction
+    //  }}}
+
+    //  Definition of internal wires and registers
+    //  {{{
+    wbuf_state_e      [ WBUF_DIR_ENTRIES-1:0]   wbuf_dir_state_q, wbuf_dir_state_d;
+    wbuf_dir_entry_t  [ WBUF_DIR_ENTRIES-1:0]   wbuf_dir_q, wbuf_dir_d;
+    logic             [WBUF_DATA_ENTRIES-1:0]   wbuf_data_valid_q, wbuf_data_valid_d;
+    wbuf_data_entry_t [WBUF_DATA_ENTRIES-1:0]   wbuf_data_q, wbuf_data_d;
+
+    wbuf_dir_ptr_t                              wbuf_dir_free_ptr_q, wbuf_dir_free_ptr_d;
+    logic                                       wbuf_dir_free;
+    wbuf_dir_ptr_t                              wbuf_dir_send_ptr_q, wbuf_dir_send_ptr_d;
+    wbuf_data_ptr_t                             wbuf_data_free_ptr_q, wbuf_data_free_ptr_d;
+    logic                                       wbuf_data_free;
+
+    logic                                       wbuf_write_free;
+    logic                                       wbuf_write_hit_open;
+    logic                                       wbuf_write_hit_pend;
+    logic                                       wbuf_write_hit_sent;
+    wbuf_dir_ptr_t                              wbuf_write_hit_open_dir_ptr;
+    wbuf_dir_ptr_t                              wbuf_write_hit_pend_dir_ptr;
+
+    logic                                       send_meta_valid;
+    logic                                       send_meta_ready;
+    wbuf_send_meta_t                            send_meta_wdata, send_meta_rdata;
+
+    logic                                       send_data_wok;
+    logic                                       send_data_w;
+    wbuf_send_data_t                            send_data_d;
+    wbuf_send_data_t                            send_data_q;
+
+    wbuf_tag_t                                  write_tag;
+    wbuf_data_buf_t                             write_data;
+    wbuf_be_buf_t                               write_be;
+
+    logic [WBUF_DIR_ENTRIES-1:0]                replay_match;
+    logic [WBUF_DIR_ENTRIES-1:0]                replay_open_hit;
+    logic [WBUF_DIR_ENTRIES-1:0]                replay_pend_hit;
+    logic [WBUF_DIR_ENTRIES-1:0]                replay_sent_hit;
+
+    genvar                                      gen_i;
+    //  }}}
+
+    //  Global control signals
+    //  {{{
+    always_comb
+    begin : empty_comb
+        empty_o = 1'b1;
+        for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
+            empty_o &= (wbuf_dir_state_q[i] == WBUF_FREE);
+        end
+    end
+
+    always_comb
+    begin : full_comb
+        full_o = 1'b1;
+        for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
+            full_o &= (wbuf_dir_state_q[i] != WBUF_FREE);
+        end
+    end
+    //  }}}
+
+    //  Write control
+    //  {{{
+    assign write_tag = write_addr_i[WBUF_PA_WIDTH-1:WBUF_OFFSET_WIDTH];
+
+    always_comb
+    begin : wbuf_write_data_comb
+        for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
+            write_data[w] = write_data_i;
+        end
+    end
+
+    generate
+        if (WBUF_OFFSET_WIDTH > WBUF_WORD_OFFSET) begin : wbuf_write_be_gt_gen
+            always_comb
+            begin : wbuf_write_be_comb
+                for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
+                    if (w == int'(write_addr_i[WBUF_OFFSET_WIDTH-1:WBUF_WORD_OFFSET])) begin
+                        write_be[w] = write_be_i;
+                    end else begin
+                        write_be[w] = '0;
+                    end
+                end
+            end
+        end else begin : wbuf_write_be_le_gen
+            always_comb
+            begin : wbuf_write_be_comb
+                for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
+                    write_be[w] = write_be_i;
+                end
+            end
+        end
+    endgenerate
+
+    always_comb
+    begin : wbuf_free_comb
+        wbuf_dir_free_ptr_d = wbuf_dir_free_ptr_q;
+        if (ack_i) begin
+            wbuf_dir_free_ptr_d = ack_id_i;
+        end else if (write_i && wbuf_write_free) begin
+            wbuf_dir_free_ptr_d = wbuf_dir_find_next(wbuf_dir_free_ptr_q, wbuf_dir_state_q, WBUF_FREE);
+        end
+
+        wbuf_data_free_ptr_d = wbuf_data_free_ptr_q;
+        if (send_data_valid_o && send_data_ready_i) begin
+            wbuf_data_free_ptr_d = send_data_q.send_data_ptr;
+        end else if (write_i && wbuf_write_free) begin
+            wbuf_data_free_ptr_d = wbuf_data_find_next(wbuf_data_free_ptr_q, wbuf_data_valid_q, 1'b0);
+        end
+    end
+
+    assign wbuf_dir_free  = (wbuf_dir_state_q[wbuf_dir_free_ptr_q] == WBUF_FREE);
+    assign wbuf_data_free = ~wbuf_data_valid_q[wbuf_data_free_ptr_q];
+
+    always_comb
+    begin : wbuf_write_hit_comb
+        wbuf_write_hit_open = 1'b0;
+        wbuf_write_hit_pend = 1'b0;
+        wbuf_write_hit_sent = 1'b0;
+
+        wbuf_write_hit_open_dir_ptr = 0;
+        wbuf_write_hit_pend_dir_ptr = 0;
+        for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
+            if (wbuf_dir_q[i].tag == write_tag) begin
+                unique case (wbuf_dir_state_q[i])
+                    WBUF_OPEN: begin
+                        wbuf_write_hit_open = 1'b1;
+                        wbuf_write_hit_open_dir_ptr = wbuf_dir_ptr_t'(i);
+                    end
+                    WBUF_PEND: begin
+                        wbuf_write_hit_pend = 1'b1;
+                        wbuf_write_hit_pend_dir_ptr = wbuf_dir_ptr_t'(i);
+                    end
+                    WBUF_SENT: begin
+                        wbuf_write_hit_sent = 1'b1;
+                    end
+                    default: begin
+                        /* do nothing */
+                    end
+                endcase
+            end
+        end
+    end
+
+    //  Check if there is a match between the read address and the tag of one
+    //  of the used slots in the write buffer directory
+    always_comb
+    begin : read_hit_comb
+        automatic logic [WBUF_DIR_ENTRIES-1:0] read_hit;
+
+        for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
+            read_hit[i] = 1'b0;
+            unique case (wbuf_dir_state_q[i])
+                WBUF_OPEN, WBUF_PEND, WBUF_SENT: begin
+                    automatic wbuf_addr_t  wbuf_addr;
+                    automatic wbuf_match_t wbuf_tag;
+                    automatic wbuf_match_t read_tag;
+
+                    wbuf_addr   = wbuf_addr_t'(wbuf_dir_q[i].tag) << WBUF_OFFSET_WIDTH;
+                    read_tag    = read_addr_i[WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH];
+                    wbuf_tag    = wbuf_addr  [WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH];
+                    read_hit[i] = (read_tag == wbuf_tag) ? 1'b1 : 1'b0;
+                end
+                default: begin
+                    /* do nothing */
+                end
+            endcase
+        end
+
+        read_hit_o = |read_hit;
+    end
+
+    //  Check if there is a match between the replay address and the tag of one
+    //  of the used slots in the write buffer directory
+    generate
+        for (gen_i = 0; gen_i < WBUF_DIR_ENTRIES; gen_i++) begin : replay_match_gen
+            assign replay_match[gen_i] = replay_is_read_i ?
+                    /* replay is read: compare address block tag (e.g. cache line) */
+                    (wbuf_tag_to_match_addr(wbuf_dir_q[gen_i].tag) ==
+                        replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]) :
+                    /* replay is write: compare wbuf tag */
+                    (wbuf_dir_q[gen_i].tag ==
+                        replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_TAG_WIDTH]);
+
+            assign replay_open_hit[gen_i] =
+                    replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_OPEN);
+            assign replay_pend_hit[gen_i] =
+                    replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_PEND);
+            assign replay_sent_hit[gen_i] =
+                    replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_SENT);
+        end
+    endgenerate
+
+    assign replay_open_hit_o = |replay_open_hit,
+           replay_pend_hit_o = |replay_pend_hit,
+           replay_sent_hit_o = |replay_sent_hit;
+
+    always_comb
+    begin : replay_wbuf_not_ready_comb
+        replay_not_ready_o = 1'b0;
+        if (replay_pend_hit_o) begin
+            replay_not_ready_o = 1'b1;
+        end else if (replay_sent_hit_o && cfg_sequential_waw_i) begin
+            replay_not_ready_o = 1'b1;
+        end else if (!replay_open_hit_o && (!wbuf_dir_free || !wbuf_data_free)) begin
+            replay_not_ready_o = 1'b1;
+        end
+    end
+
+    assign wbuf_write_free =
+                wbuf_dir_free
+            &   wbuf_data_free
+            &  ~wbuf_write_hit_open
+            &  ~wbuf_write_hit_pend
+            & ~(wbuf_write_hit_sent & cfg_sequential_waw_i);
+
+    assign write_ready_o = wbuf_write_free
+                           | ((wbuf_write_hit_open | wbuf_write_hit_pend)
+                             & ~cfg_inhibit_write_coalescing_i);
+    //  }}}
+
+    //  Update control
+    //  {{{
+    always_comb
+    begin : wbuf_update_comb
+        automatic bit timeout;
+        automatic bit write_hit;
+        automatic bit read_hit;
+        automatic bit match_open_ptr;
+        automatic bit match_pend_ptr;
+        automatic bit match_free;
+        automatic bit send;
+
+        timeout = 1'b0;
+        write_hit = 1'b0;
+        read_hit = 1'b0;
+        match_open_ptr = 1'b0;
+        match_pend_ptr = 1'b0;
+        match_free = 1'b0;
+        send = 1'b0;
+
+        wbuf_dir_state_d = wbuf_dir_state_q;
+        wbuf_dir_d = wbuf_dir_q;
+        wbuf_data_d = wbuf_data_q;
+
+        send_data_w = 1'b0;
+        send_meta_valid = 1'b0;
+
+        for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
+            case (wbuf_dir_state_q[i])
+                WBUF_FREE: begin
+                    match_free = wbuf_write_free && (i == int'(wbuf_dir_free_ptr_q));
+
+                    if (write_i && match_free) begin
+                        send = (cfg_threshold_i == 0)
+                               | write_uc_i
+                               | flush_all_i
+                               | cfg_inhibit_write_coalescing_i;
+
+                        wbuf_dir_state_d[i] = send ? WBUF_PEND : WBUF_OPEN;
+                        wbuf_dir_d[i].tag = write_tag;
+                        wbuf_dir_d[i].cnt = 0;
+                        wbuf_dir_d[i].ptr = wbuf_data_free_ptr_q;
+                        wbuf_dir_d[i].uc  = write_uc_i;
+
+                        wbuf_data_write(
+                            wbuf_data_d[wbuf_data_free_ptr_q].data,
+                            wbuf_data_d[wbuf_data_free_ptr_q].be,
+                            '0,
+                            '0,
+                            write_data,
+                            write_be
+                        );
+                    end
+                end
+
+                WBUF_OPEN: begin
+                    match_open_ptr  = (i == int'(wbuf_write_hit_open_dir_ptr));
+                    timeout         = (wbuf_dir_q[i].cnt == (cfg_threshold_i - 1));
+                    read_hit        = read_flush_hit_i & wbuf_write_hit_open & match_open_ptr;
+                    write_hit       = write_i
+                                      & wbuf_write_hit_open
+                                      & match_open_ptr
+                                      & ~cfg_inhibit_write_coalescing_i;
+
+                    if (!flush_all_i) begin
+                        if (write_hit && cfg_reset_timecnt_on_write_i) begin
+                            timeout = 1'b0;
+                            wbuf_dir_d[i].cnt = 0;
+                        end else if (!timeout) begin
+                            wbuf_dir_d[i].cnt = wbuf_dir_q[i].cnt + 1;
+                        end
+
+                        if (read_hit | timeout | cfg_inhibit_write_coalescing_i) begin
+                            wbuf_dir_state_d[i] = WBUF_PEND;
+                        end
+                    end else begin
+                        wbuf_dir_state_d[i] = WBUF_PEND;
+                    end
+
+                    if (write_hit) begin
+                        wbuf_data_write(
+                            wbuf_data_d[wbuf_dir_q[i].ptr].data,
+                            wbuf_data_d[wbuf_dir_q[i].ptr].be,
+                            wbuf_data_q[wbuf_dir_q[i].ptr].data,
+                            wbuf_data_q[wbuf_dir_q[i].ptr].be,
+                            write_data,
+                            write_be
+                        );
+                    end
+                end
+
+                WBUF_PEND: begin
+                    match_pend_ptr = (i == int'(wbuf_write_hit_pend_dir_ptr));
+                    write_hit = write_i
+                                & wbuf_write_hit_pend
+                                & match_pend_ptr
+                                & ~cfg_inhibit_write_coalescing_i;
+
+                    if (write_hit) begin
+                        wbuf_data_write(
+                            wbuf_data_d[wbuf_dir_q[i].ptr].data,
+                            wbuf_data_d[wbuf_dir_q[i].ptr].be,
+                            wbuf_data_q[wbuf_dir_q[i].ptr].data,
+                            wbuf_data_q[wbuf_dir_q[i].ptr].be,
+                            write_data,
+                            write_be
+                        );
+                    end
+
+                    if (i == int'(wbuf_dir_send_ptr_q)) begin
+                        send_data_w = send_meta_ready;
+                        send_meta_valid  = send_data_wok;
+                        if (send_meta_ready && send_data_wok) begin
+                            wbuf_dir_state_d[i] = WBUF_SENT;
+                        end
+                    end
+                end
+
+                WBUF_SENT: begin
+                    if (ack_i && (i == int'(ack_id_i))) begin
+                        wbuf_dir_state_d[i] = WBUF_FREE;
+                    end
+                end
+            endcase
+        end
+    end
+
+    always_comb
+    begin : wbuf_data_valid_comb
+        wbuf_data_valid_d = wbuf_data_valid_q;
+
+        //  allocate a free data buffer on new write
+        if (write_i && wbuf_write_free) begin
+            wbuf_data_valid_d[wbuf_data_free_ptr_q] = 1'b1;
+        end
+
+        //  de-allocate a data buffer as soon as it is send
+        if (send_data_valid_o && send_data_ready_i) begin
+            wbuf_data_valid_d[send_data_q.send_data_ptr] = 1'b0;
+        end
+    end
+    //  }}}
+
+    //  Send control
+    //  {{{
+    //    Data channel
+    hpdcache_fifo_reg #(
+        .FIFO_DEPTH          (WBUF_SEND_FIFO_DEPTH),
+        .FEEDTHROUGH         (WBUF_SEND_FEEDTHROUGH),
+        .fifo_data_t         (wbuf_send_data_t)
+    ) send_data_ptr_fifo_i (
+        .clk_i,
+        .rst_ni,
+        .w_i                 (send_data_w),
+        .wok_o               (send_data_wok),
+        .wdata_i             (send_data_d),
+        .r_i                 (send_data_ready_i),
+        .rok_o               (send_data_valid_o),
+        .rdata_o             (send_data_q)
+    );
+
+    assign send_data_d.send_data_ptr = wbuf_dir_q[wbuf_dir_send_ptr_q].ptr,
+           send_data_d.send_data_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag;
+
+    assign send_data_tag_o   = wbuf_addr_t'(send_data_q.send_data_tag),
+           send_data_o       = wbuf_data_q[send_data_q.send_data_ptr].data,
+           send_be_o         = wbuf_data_q[send_data_q.send_data_ptr].be;
+
+    //    Meta-data channel
+    hpdcache_fifo_reg #(
+        .FIFO_DEPTH          (WBUF_SEND_FIFO_DEPTH),
+        .FEEDTHROUGH         (WBUF_SEND_FEEDTHROUGH),
+        .fifo_data_t         (wbuf_send_meta_t)
+    ) send_meta_fifo_i (
+        .clk_i,
+        .rst_ni,
+        .w_i                 (send_meta_valid),
+        .wok_o               (send_meta_ready),
+        .wdata_i             (send_meta_wdata),
+        .r_i                 (send_meta_ready_i),
+        .rok_o               (send_meta_valid_o),
+        .rdata_o             (send_meta_rdata)
+    );
+
+    assign send_meta_wdata.send_meta_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag,
+           send_meta_wdata.send_meta_id  = wbuf_dir_send_ptr_q,
+           send_meta_wdata.send_meta_uc  = wbuf_dir_q[wbuf_dir_send_ptr_q].uc;
+
+    assign send_addr_o = { send_meta_rdata.send_meta_tag, {WBUF_OFFSET_WIDTH{1'b0}} },
+           send_id_o   = send_meta_rdata.send_meta_id,
+           send_uc_o   = send_meta_rdata.send_meta_uc;
+
+    //    Send pointer
+    always_comb
+    begin : wbuf_send_comb
+        wbuf_dir_send_ptr_d = wbuf_dir_find_next(wbuf_dir_send_ptr_q, wbuf_dir_state_q, WBUF_PEND);
+        if (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND) begin
+            if (!send_meta_valid || !send_meta_ready) begin
+                wbuf_dir_send_ptr_d = wbuf_dir_send_ptr_q;
+            end
+        end
+    end
+    //  }}}
+
+    //  Internal state assignment
+    //  {{{
+    always_ff @(posedge clk_i) wbuf_data_q <= wbuf_data_d;
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : wbuf_state_ff
+        if (!rst_ni) begin
+            wbuf_dir_q           <= '0;
+            wbuf_dir_state_q     <= {WBUF_DIR_ENTRIES{WBUF_FREE}};
+            wbuf_data_valid_q    <= '0;
+            wbuf_dir_free_ptr_q  <= 0;
+            wbuf_dir_send_ptr_q  <= 0;
+            wbuf_data_free_ptr_q <= 0;
+        end else begin
+            wbuf_dir_q           <= wbuf_dir_d;
+            wbuf_dir_state_q     <= wbuf_dir_state_d;
+            wbuf_data_valid_q    <= wbuf_data_valid_d;
+            wbuf_dir_free_ptr_q  <= wbuf_dir_free_ptr_d;
+            wbuf_dir_send_ptr_q  <= wbuf_dir_send_ptr_d;
+            wbuf_data_free_ptr_q <= wbuf_data_free_ptr_d;
+        end
+    end
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    //  pragma translate_off
+    initial assert(WBUF_WORDS inside {1, 2, 4, 8, 16}) else
+            $error("WBUF: width of data buffers must be a power of 2");
+    ack_sent_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
+            (ack_i -> (wbuf_dir_state_q[ack_id_i] == WBUF_SENT))) else
+            $error("WBUF: acknowledging a not SENT slot");
+    send_pend_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
+            (send_meta_valid -> (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND))) else
+            $error("WBUF: sending a not PEND slot");
+    send_valid_data_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
+            (send_data_valid_o -> (wbuf_data_valid_q[send_data_q.send_data_ptr] == 1'b1))) else
+            $error("WBUF: sending a not valid data");
+    //  pragma translate_on
+    //  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv
new file mode 100644
index 0000000..1792ff4
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv
@@ -0,0 +1,228 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : HPDcache Write Buffer Wrapper
+ *  History       :
+ */
+/*  This wrapper adapts the send interface of the write buffer to the memory
+ *  interface of the cache.
+ */
+module hpdcache_wbuf_wrapper
+import hpdcache_pkg::*;
+    //  Parameters
+    //  {{{
+#(
+    parameter  int  HPDcacheMemIdWidth    = 8,
+    parameter  int  HPDcacheMemDataWidth  = 512,
+    parameter  type hpdcache_mem_req_t    = logic,
+    parameter  type hpdcache_mem_req_w_t  = logic,
+    parameter  type hpdcache_mem_resp_w_t = logic,
+
+    localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
+)
+    //  }}}
+    //  Ports
+    //  {{{
+(
+    //      Clock and reset signals
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    //      Global control signals
+    output logic                  empty_o,
+    output logic                  full_o,
+    input  logic                  flush_all_i,
+
+    //      Configuration signals
+    //          Timer threshold
+    input  wbuf_timecnt_t         cfg_threshold_i,
+    //          Reset timer on write
+    input  logic                  cfg_reset_timecnt_on_write_i,
+    //          Sequentialize write-after-write hazards
+    input  logic                  cfg_sequential_waw_i,
+    //    Inhibit write coalescing
+    input  logic                  cfg_inhibit_write_coalescing_i,
+
+    //      Write interface
+    input  logic                  write_i,
+    output logic                  write_ready_o,
+    input  wbuf_addr_t            write_addr_i,
+    input  wbuf_data_t            write_data_i,
+    input  wbuf_be_t              write_be_i,  // byte-enable
+    input  logic                  write_uc_i,  // uncacheable write
+
+    //      Read hit interface
+    input  wbuf_addr_t            read_addr_i,
+    output logic                  read_hit_o,
+    input  logic                  read_flush_hit_i,
+
+    //      Replay hit interface
+    input  wbuf_addr_t            replay_addr_i,
+    input  logic                  replay_is_read_i,
+    output logic                  replay_open_hit_o,
+    output logic                  replay_pend_hit_o,
+    output logic                  replay_sent_hit_o,
+    output logic                  replay_not_ready_o,
+
+    //      Memory interface
+    input  logic                  mem_req_write_ready_i,
+    output logic                  mem_req_write_valid_o,
+    output hpdcache_mem_req_t     mem_req_write_o,
+
+    input  logic                  mem_req_write_data_ready_i,
+    output logic                  mem_req_write_data_valid_o,
+    output hpdcache_mem_req_w_t   mem_req_write_data_o,
+
+    output logic                  mem_resp_write_ready_o,
+    input  logic                  mem_resp_write_valid_i,
+    input  hpdcache_mem_resp_w_t  mem_resp_write_i
+);
+    //  }}}
+
+    //  Internal signals
+    //  {{{
+    wbuf_addr_t     send_addr;
+    wbuf_dir_ptr_t  send_id;
+    logic           send_uc;
+    wbuf_addr_t     send_data_tag;
+    wbuf_data_buf_t send_data;
+    wbuf_be_buf_t   send_be;
+    wbuf_dir_ptr_t  ack_id;
+    logic           ack_error;
+    //  }}}
+
+    //  Wrapped write buffer
+    //  {{{
+    hpdcache_wbuf #(
+        .WBUF_DIR_ENTRIES              (HPDCACHE_WBUF_DIR_ENTRIES),
+        .WBUF_DATA_ENTRIES             (HPDCACHE_WBUF_DATA_ENTRIES),
+        .WBUF_WORD_WIDTH               (HPDCACHE_REQ_DATA_WIDTH),
+        .WBUF_WORDS                    (HPDCACHE_WBUF_WORDS),
+        .WBUF_PA_WIDTH                 (HPDCACHE_PA_WIDTH),
+        .WBUF_TIMECNT_MAX              ((2**HPDCACHE_WBUF_TIMECNT_WIDTH) - 1),
+        .WBUF_READ_MATCH_WIDTH         (HPDCACHE_NLINE_WIDTH),
+        .WBUF_SEND_FEEDTHROUGH         (HPDCACHE_WBUF_SEND_FEEDTHROUGH)
+    ) hpdcache_wbuf_i (
+        .clk_i,
+        .rst_ni,
+        .empty_o,
+        .full_o,
+        .flush_all_i,
+        .cfg_threshold_i,
+        .cfg_reset_timecnt_on_write_i,
+        .cfg_sequential_waw_i,
+        .cfg_inhibit_write_coalescing_i,
+        .write_i,
+        .write_ready_o,
+        .write_addr_i,
+        .write_data_i,
+        .write_be_i,
+        .write_uc_i,
+        .read_addr_i,
+        .read_hit_o,
+        .read_flush_hit_i,
+        .replay_addr_i,
+        .replay_is_read_i,
+        .replay_open_hit_o,
+        .replay_pend_hit_o,
+        .replay_sent_hit_o,
+        .replay_not_ready_o,
+        .send_meta_ready_i             (mem_req_write_ready_i),
+        .send_meta_valid_o             (mem_req_write_valid_o),
+        .send_addr_o                   (send_addr),
+        .send_id_o                     (send_id),
+        .send_uc_o                     (send_uc),
+        .send_data_ready_i             (mem_req_write_data_ready_i),
+        .send_data_valid_o             (mem_req_write_data_valid_o),
+        .send_data_tag_o               (send_data_tag),
+        .send_data_o                   (send_data),
+        .send_be_o                     (send_be),
+        .ack_i                         (mem_resp_write_valid_i),
+        .ack_id_i                      (ack_id),
+        .ack_error_i                   (ack_error)
+    );
+    //  }}}
+
+    //  Memory interface
+    //  {{{
+    assign  mem_req_write_o.mem_req_addr        = send_addr,
+            mem_req_write_o.mem_req_len         = 0,
+            mem_req_write_o.mem_req_size        = get_hpdcache_mem_size(HPDCACHE_WBUF_DATA_WIDTH/8),
+            mem_req_write_o.mem_req_id          = hpdcache_mem_id_t'(send_id),
+            mem_req_write_o.mem_req_command     = HPDCACHE_MEM_WRITE,
+            mem_req_write_o.mem_req_atomic      = HPDCACHE_MEM_ATOMIC_ADD,
+            mem_req_write_o.mem_req_cacheable   = ~send_uc;
+
+    generate
+        localparam int unsigned WBUF_MEM_DATA_RATIO = HPDcacheMemDataWidth/HPDCACHE_WBUF_DATA_WIDTH;
+        localparam int unsigned WBUF_MEM_DATA_WORD_INDEX_WIDTH = $clog2(WBUF_MEM_DATA_RATIO);
+
+        assign mem_req_write_data_o.mem_req_w_last = 1'b1;
+
+        if (WBUF_MEM_DATA_RATIO > 1)
+        begin : wbuf_data_upsizing_gen
+            logic [HPDCACHE_WBUF_DATA_WIDTH/8-1:0][WBUF_MEM_DATA_RATIO-1:0] mem_req_be;
+
+            //  demux send BE
+            hpdcache_demux #(
+                .NOUTPUT     (WBUF_MEM_DATA_RATIO),
+                .DATA_WIDTH  (HPDCACHE_WBUF_DATA_WIDTH/8),
+                .ONE_HOT_SEL (1'b0)
+            ) mem_write_be_demux_i (
+                .data_i      (send_be),
+                .sel_i       (send_data_tag[0 +: WBUF_MEM_DATA_WORD_INDEX_WIDTH]),
+                .data_o      (mem_req_be)
+            );
+
+            assign mem_req_write_data_o.mem_req_w_data = {WBUF_MEM_DATA_RATIO{send_data}},
+                   mem_req_write_data_o.mem_req_w_be   = mem_req_be;
+
+        end else if (WBUF_MEM_DATA_RATIO == 1)
+        begin : wbuf_data_forwarding_gen
+            assign mem_req_write_data_o.mem_req_w_data = send_data,
+                   mem_req_write_data_o.mem_req_w_be = send_be;
+        end
+
+        //  Assertions
+        //  {{{
+        //  pragma translate_off
+        initial assert(WBUF_MEM_DATA_RATIO > 0) else
+                $error($sformatf("WBUF: data width of mem interface (%d) shall be g.e. to wbuf data width(%d)",
+                                 HPDcacheMemDataWidth, HPDCACHE_WBUF_DATA_WIDTH));
+        //  pragma translate_on
+        //  }}}
+    endgenerate
+
+    assign  mem_resp_write_ready_o = 1'b1,
+            ack_id                 =  mem_resp_write_i.mem_resp_w_id[0 +: HPDCACHE_WBUF_DIR_PTR_WIDTH],
+            ack_error              = (mem_resp_write_i.mem_resp_w_error != HPDCACHE_MEM_RESP_OK);
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    //  pragma translate_off
+    initial assert (HPDCACHE_WBUF_DIR_PTR_WIDTH <= HPDcacheMemIdWidth) else
+      $fatal("HPDcacheMemIdWidth is not wide enough to fit all possible write buffer transactions");
+    //  pragma translate_on
+    //  }}}
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv
new file mode 100644
index 0000000..dfef92d
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv
@@ -0,0 +1,374 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Riccardo Alidori, Cesar Fuguet
+ *  Maintainers(s): Cesar Fuguet
+ *  Creation Date : June, 2021
+ *  Description   : HPDcache Linear Hardware Memory Prefetcher.
+ *  History       :
+ */
+module hwpf_stride
+import hwpf_stride_pkg::*;
+import hpdcache_pkg::*;
+//  Parameters
+//  {{{
+#(
+    parameter int CACHE_LINE_BYTES = 64
+)
+//  }}}
+
+//  Ports
+//  {{{
+(
+    input  logic                        clk_i,
+    input  logic                        rst_ni,
+
+    // CSR
+    input  logic                        csr_base_set_i,
+    input  hwpf_stride_base_t           csr_base_i,
+    input  logic                        csr_param_set_i,
+    input  hwpf_stride_param_t          csr_param_i,
+    input  logic                        csr_throttle_set_i,
+    input  hwpf_stride_throttle_t       csr_throttle_i,
+
+    output hwpf_stride_base_t           csr_base_o,
+    output hwpf_stride_param_t          csr_param_o,
+    output hwpf_stride_throttle_t       csr_throttle_o,
+
+    // If high, the prefetcher is enabled and active
+    output logic                        busy_o,
+
+    // Snooping
+    //   Address to snoop on requests ports
+    output hpdcache_nline_t             snoop_nline_o,
+    //   If set to one, the snoop address matched one of the requests
+    input  snoop_match_i,
+
+    // D-Cache interface
+    output logic                        hpdcache_req_valid_o,
+    input  logic                        hpdcache_req_ready_i,
+    output hpdcache_req_t               hpdcache_req_o,
+    input  logic                        hpdcache_rsp_valid_i,
+    input  hpdcache_rsp_t               hpdcache_rsp_i
+);
+//  }}}
+
+    import hpdcache_pkg::hpdcache_req_addr_t;
+
+    //  Definition of constants
+    //  {{{
+    localparam int STRIDE_WIDTH     = $bits(csr_param_i.stride);
+    localparam int NBLOCKS_WIDTH    = $bits(csr_param_i.nblocks);
+    localparam int NLINES_WIDTH     = $bits(csr_param_i.nlines);
+    localparam int NWAIT_WIDTH      = $bits(csr_throttle_i.nwait);
+    localparam int INFLIGHT_WIDTH   = $bits(csr_throttle_i.ninflight);
+    localparam int NLINES_CNT_WIDTH = NLINES_WIDTH;
+    //  }}}
+
+    //  Internal registers and signals
+    //  {{{
+    //      FSM
+    enum {
+        IDLE,
+        SNOOP,
+        SEND_REQ,
+        WAIT,
+        DONE,
+        ABORT
+    } state_d, state_q;
+
+    logic [NBLOCKS_WIDTH-1:0] nblocks_cnt_d, nblocks_cnt_q;
+    logic [NLINES_CNT_WIDTH-1:0] nlines_cnt_d, nlines_cnt_q;
+    logic [NWAIT_WIDTH-1:0] nwait_cnt_d, nwait_cnt_q;
+    logic [INFLIGHT_WIDTH-1:0] inflight_cnt_d, inflight_cnt_q;
+    logic inflight_inc, inflight_dec;
+
+    hwpf_stride_base_t csr_base_q;
+    hwpf_stride_base_t shadow_base_q, shadow_base_d;
+    hwpf_stride_param_t csr_param_q;
+    hwpf_stride_param_t shadow_param_q, shadow_param_d;
+    hwpf_stride_throttle_t csr_throttle_q;
+    hwpf_stride_throttle_t shadow_throttle_q, shadow_throttle_d;
+    hpdcache_nline_t request_nline_q, request_nline_d;
+
+    hpdcache_set_t hpdcache_req_set;
+    hpdcache_tag_t hpdcache_req_tag;
+
+    logic csr_base_update;
+    hpdcache_nline_t increment_stride;
+    logic is_inflight_max;
+
+    //      Default assignment
+    assign increment_stride = hpdcache_nline_t'(shadow_param_q.stride) + 1'b1;
+    assign inflight_dec     = hpdcache_rsp_valid_i;
+    assign snoop_nline_o    = shadow_base_q.base_cline;
+    assign is_inflight_max  = ( shadow_throttle_q.ninflight == '0 ) ?
+                              1'b0 : ( inflight_cnt_q >= shadow_throttle_q.ninflight );
+    assign csr_base_o       = csr_base_q;
+    assign csr_param_o      = csr_param_q;
+    assign csr_throttle_o   = csr_throttle_q;
+    //  }}}
+
+    //  Dcache outputs
+    //  {{{
+    assign hpdcache_req_set = request_nline_q[0                  +: HPDCACHE_SET_WIDTH],
+           hpdcache_req_tag = request_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];
+
+    assign hpdcache_req_o.addr_offset     = { hpdcache_req_set, {HPDCACHE_OFFSET_WIDTH{1'b0}} },
+           hpdcache_req_o.wdata           = '0,
+           hpdcache_req_o.op              = HPDCACHE_REQ_CMO,
+           hpdcache_req_o.be              = '1,
+           hpdcache_req_o.size            = HPDCACHE_REQ_CMO_PREFETCH,
+           hpdcache_req_o.sid             = '0, // this is set when connecting to the dcache
+           hpdcache_req_o.tid             = '0, // this is set by the wrapper of the prefetcher
+           hpdcache_req_o.need_rsp        = 1'b1,
+           hpdcache_req_o.phys_indexed    = 1'b1,
+           hpdcache_req_o.addr_tag        = hpdcache_req_tag,
+           hpdcache_req_o.pma.uncacheable = 1'b0,
+           hpdcache_req_o.pma.io          = 1'b0;
+    //  }}}
+
+    //  Set state of internal registers
+    //  {{{
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin
+        if (!rst_ni) begin
+            csr_base_q <= '0;
+            csr_param_q <= '0;
+            shadow_base_q <= '0;
+            shadow_param_q <= '0;
+            shadow_throttle_q <= '0;
+            request_nline_q <= '0;
+            state_q <= IDLE;
+        end else begin
+            if      (csr_base_set_i) csr_base_q <= csr_base_i;
+            else if (csr_base_update) csr_base_q <= shadow_base_d;
+            if      (csr_param_set_i) csr_param_q <= csr_param_i;
+            if      (csr_throttle_set_i) csr_throttle_q <= csr_throttle_i;
+            shadow_base_q <= shadow_base_d;
+            shadow_param_q <= shadow_param_d;
+            shadow_throttle_q <= shadow_throttle_d;
+            request_nline_q <= request_nline_d;
+            state_q <= state_d;
+        end
+    end
+    //  }}}
+
+    //  Update internal counters
+    //  {{{
+    always_comb begin : inflight_cnt
+        inflight_cnt_d = inflight_cnt_q;
+
+        // Every time we send a dcache request, increment the counter
+        if ( inflight_inc ) begin
+            inflight_cnt_d++;
+        end
+
+        // Every time we got a response from the cache, decrement the counter
+        if ( inflight_dec && ( inflight_cnt_q > 0 )) begin
+            inflight_cnt_d--;
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (!rst_ni) begin
+            nblocks_cnt_q  <= '0;
+            nlines_cnt_q <= '0;
+            nwait_cnt_q <= '0;
+            inflight_cnt_q <= '0;
+        end else begin
+            nblocks_cnt_q <= nblocks_cnt_d;
+            nlines_cnt_q <= nlines_cnt_d;
+            nwait_cnt_q <= nwait_cnt_d;
+            inflight_cnt_q <= inflight_cnt_d;
+        end
+    end
+    //  }}}
+
+    //  FSM
+    //  {{{
+    always_comb begin : fsm_control
+        // default assignments
+        hpdcache_req_valid_o = 1'b0;
+        nblocks_cnt_d = nblocks_cnt_q;
+        nlines_cnt_d = nlines_cnt_q;
+        nwait_cnt_d = nwait_cnt_q;
+        inflight_inc = 1'b0;
+        busy_o = 1'b0;
+        csr_base_update = 1'b0;
+
+        shadow_base_d = shadow_base_q;
+        shadow_param_d = shadow_param_q;
+        shadow_throttle_d = shadow_throttle_q;
+        request_nline_d = request_nline_q;
+        state_d = state_q;
+
+        case ( state_q )
+
+            IDLE: begin
+                // If enabled, go snooping the dcache ports
+                if ( csr_base_q.enable ) begin
+                    shadow_base_d = csr_base_q;
+                    if (( csr_param_q.nlines > 0 ) || ( csr_param_q.nblocks > 0 )) begin
+                        shadow_param_d = csr_param_q;
+                        shadow_throttle_d = csr_throttle_q;
+                        state_d = SNOOP;
+                    end else begin
+                        // no prefetch needed, disarm immediately
+                        shadow_base_d.enable = 1'b0;
+                        csr_base_update = 1'b1;
+                    end
+                end
+            end
+
+
+            SNOOP: begin
+                if ( csr_base_q.enable ) begin
+                    // If a snooper matched an address, send the request
+                    if ( snoop_match_i ) begin
+                        state_d = SEND_REQ;
+
+                        if ( shadow_param_q.nlines == 0 ) begin
+                            //  skip the first block
+                            request_nline_d = shadow_base_q.base_cline +
+                                              hpdcache_nline_t'(increment_stride);
+                            nblocks_cnt_d = ( shadow_param_q.nblocks > 0 ) ?
+                                            shadow_param_q.nblocks - 1 : 0;
+                            nlines_cnt_d = 0;
+
+                            //  update the base cacheline to the first one of the next block
+                            shadow_base_d.base_cline = request_nline_d;
+                        end else begin
+                            //  skip the first cacheline (of the first block)
+                            request_nline_d = shadow_base_q.base_cline + 1'b1;
+                            nblocks_cnt_d = shadow_param_q.nblocks;
+                            nlines_cnt_d = shadow_param_q.nlines - 1;
+                        end
+                    end
+                end else begin
+                    state_d = IDLE;
+                end
+            end
+
+
+            SEND_REQ: begin
+                busy_o = 1'b1;
+
+                // make the prefetch request to memory
+                hpdcache_req_valid_o = 1'b1;
+
+                // we've got a grant, so we can move to the next request
+                if ( hpdcache_req_ready_i ) begin
+                    inflight_inc = 1'b1;
+
+                    if ( nlines_cnt_q == 0 ) begin
+                        //  go to the first cacheline of the next block
+                        request_nline_d = shadow_base_q.base_cline +
+                                          hpdcache_nline_t'(increment_stride);
+                        nblocks_cnt_d = ( nblocks_cnt_q > 0 ) ? nblocks_cnt_q - 1 : 0;
+                        nlines_cnt_d = shadow_param_q.nlines;
+
+                        //  update the base cacheline to the first one of the next block
+                        shadow_base_d.base_cline = request_nline_d;
+                    end else begin
+                        //  go to the next cacheline (within the same block)
+                        request_nline_d = request_nline_q + 1'b1;
+                        nlines_cnt_d = nlines_cnt_q - 1;
+                    end
+
+                    // if the NWAIT parameter is equal 0, we can issue a request every cycle
+                    if (( nblocks_cnt_q == 0 ) && ( nlines_cnt_q == 0 )) begin
+                        state_d = DONE;
+                    end else if ( shadow_throttle_q.nwait == 0 ) begin
+                        // Wait if the number of inflight requests is greater than
+                        // the maximum indicated. Otherwise, send the next request
+                        state_d = is_inflight_max ? WAIT : SEND_REQ;
+                    end else begin
+                        // Wait the indicated cycles before sending the next request
+                        nwait_cnt_d = shadow_throttle_q.nwait;
+                        state_d = WAIT;
+                    end
+
+                    if ( !csr_base_q.enable ) state_d = ABORT;
+                end
+            end
+
+
+            WAIT: begin
+                //  Wait until:
+                //    - the indicated number of wait cycles between requests is reached (nwait)
+                //    - the number of inflight requests is below the indicated maximum (ninflight)
+                busy_o = 1'b1;
+                if ( csr_base_q.enable ) begin
+                    if ( !is_inflight_max && ( nwait_cnt_q == 0 )) begin
+                        state_d = SEND_REQ;
+                    end
+
+                    if ( nwait_cnt_q > 0 ) begin
+                        nwait_cnt_d = nwait_cnt_q - 1;
+                    end
+                end else begin
+                    state_d = ABORT;
+                end
+            end
+
+
+            DONE: begin
+                busy_o = 1'b1;
+                if ( csr_base_q.enable ) begin
+                    if (( inflight_cnt_q == 0 ) && !is_inflight_max && ( nwait_cnt_q == 0 )) begin
+                        // Copy back shadow base register into the user visible one
+                        csr_base_update = 1'b1;
+
+                        // Check the rearm bit
+                        if ( shadow_base_q.rearm ) begin
+                            state_d = SNOOP;
+                        end else begin
+                            state_d = IDLE;
+
+                            // disarm the prefetcher
+                            shadow_base_d.enable = 1'b0;
+                        end
+
+                        // Check the cycle bit
+                        if ( shadow_base_q.cycle ) begin
+                            // restore the base address
+                            shadow_base_d.base_cline = csr_base_q.base_cline;
+                        end
+                    end
+
+                    if ( nwait_cnt_q > 0 ) begin
+                        nwait_cnt_d = nwait_cnt_q - 1;
+                    end
+                end else begin
+                    state_d = ABORT;
+                end
+            end
+
+            ABORT: begin
+                busy_o = 1'b1;
+                if ( inflight_cnt_q == 0 ) begin
+                    state_d = IDLE;
+                end
+            end
+        endcase
+    end
+    //  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv
new file mode 100644
index 0000000..1aa9df4
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv
@@ -0,0 +1,117 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Author(s)     : Riccardo Alidori, Cesar Fuguet
+ *  Creation Date : June, 2021
+ *  Description   : Hw prefetchers arbiter
+ *  History       :
+ */
+module hwpf_stride_arb
+import hpdcache_pkg::*;
+//  Parameters
+//  {{{
+#(
+    parameter NUM_HW_PREFETCH = 4
+)
+//  }}}
+
+//  Ports
+//  {{{
+(
+    input  logic                                clk_i,
+    input  logic                                rst_ni,
+
+    // Dcache input interface
+    input  logic          [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid_i,
+    output logic          [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready_o,
+    input  hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req_i,
+    output logic          [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_valid_o,
+    output hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_o,       // Not used
+
+    // Dcache output interface
+    output logic                                hpdcache_req_valid_o,
+    input  logic                                hpdcache_req_ready_i,
+    output hpdcache_req_t                       hpdcache_req_o,
+    input  logic                                hpdcache_rsp_valid_i,
+    input  hpdcache_rsp_t                       hpdcache_rsp_i           // Not used
+);
+//  }}}
+
+    //  Internal signals
+    //  {{{
+    logic          [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid;
+    hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req;
+    logic          [NUM_HW_PREFETCH-1:0] arb_req_gnt;
+    //  }}}
+
+    //  Requesters arbiter
+    //  {{{
+    //      Pack request ports
+    genvar gen_i;
+    generate
+        for (gen_i = 0; gen_i < NUM_HW_PREFETCH; gen_i++) begin : gen_hwpf_stride_req
+            assign hwpf_stride_req_ready_o[gen_i] = arb_req_gnt[gen_i] & hpdcache_req_ready_i,
+                   hwpf_stride_req_valid[gen_i]   = hwpf_stride_req_valid_i[gen_i],
+                   hwpf_stride_req[gen_i]         = hwpf_stride_req_i[gen_i];
+        end
+    endgenerate
+
+    //      Arbiter
+    hpdcache_rrarb #(
+        .N              (NUM_HW_PREFETCH)
+    ) hwpf_stride_req_arbiter_i (
+        .clk_i,
+        .rst_ni,
+        .req_i          (hwpf_stride_req_valid),
+        .gnt_o          (arb_req_gnt),
+        .ready_i        (hpdcache_req_ready_i)
+    );
+
+    //      Request Multiplexor
+    hpdcache_mux #(
+        .NINPUT         (NUM_HW_PREFETCH),
+        .DATA_WIDTH     ($bits(hpdcache_req_t)),
+        .ONE_HOT_SEL    (1'b1)
+    ) hwpf_stride_req_mux_i (
+        .data_i         (hwpf_stride_req),
+        .sel_i          (arb_req_gnt),
+        .data_o         (hpdcache_req_o)
+    );
+
+    assign hpdcache_req_valid_o = |arb_req_gnt;
+    //  }}}
+
+    //  Response demultiplexor
+    //  {{{
+    //      As the HW prefetcher does not need the TID field in the request, we
+    //      use it to transport the identifier of the specific hardware
+    //      prefetcher.
+    //      This way we share the same SID for all HW prefetchers. Using
+    //      different SIDs means that we need different ports to the cache and
+    //      we actually want to reduce those.
+    always_comb
+    begin : resp_demux
+        for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin
+            hwpf_stride_rsp_valid_o[i]  = hpdcache_rsp_valid_i && (i == int'(hpdcache_rsp_i.tid));
+            hwpf_stride_rsp_o[i]        = hpdcache_rsp_i;
+        end
+    end
+    //  }}}
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv
new file mode 100644
index 0000000..3470b78
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv
@@ -0,0 +1,68 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : January, 2023
+ *  Description   : High-Performance, Data-cache (HPDcache) HW memory
+ *                  prefetcher package
+ *  History       :
+ */
+package hwpf_stride_pkg;
+    //  Base address configuration register of the hardware memory prefetcher
+    //  {{{
+    typedef struct packed {
+            logic [63:6] base_cline;
+            logic [5:3]  unused;
+            logic        cycle;
+            logic        rearm;
+            logic        enable;
+            } hwpf_stride_base_t;
+    //  }}}
+
+    //  Parameters configuration register of the hardware memory prefetcher
+    //  {{{
+    typedef struct packed {
+            logic [63:48] nblocks;
+            logic [47:32] nlines;
+            logic [31:0]  stride;
+            } hwpf_stride_param_t;
+    //  }}}
+
+    //  Throttle configuration register of the hardware memory prefetcher
+    //  {{{
+    typedef struct packed {
+            logic [31:16] ninflight;
+            logic [15:0]  nwait;
+            } hwpf_stride_throttle_t;
+    //  }}}
+
+    //  Status register of the hardware memory prefetcher
+    //  {{{
+    typedef struct packed {
+            logic [63:48] unused1;
+            logic [47:32] busy;
+            logic         free;
+            logic [30:20] unused0;
+            logic [19:16] free_index;
+            logic [15:0]  enabled;
+            } hwpf_stride_status_t;
+    //  }}}
+
+endpackage
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv
new file mode 100644
index 0000000..ba995b5
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv
@@ -0,0 +1,38 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Riccardo Alidori, Cesar Fuguet
+ *  Creation Date : June, 2021
+ *  Description   : Snooper used by the hardware memory prefetcher
+ *  History       :
+ */
+module hwpf_stride_snooper
+import hpdcache_pkg::*;
+(
+    input  logic            en_i,           // Snooper enable bit.
+    input  hpdcache_nline_t base_nline_i,   // Address to check
+    input  hpdcache_nline_t snoop_addr_i,   // Input address to snoop
+    output snoop_match_o   // If high, the Snoopers matched the snoop_address
+);
+
+    // The snooper match if enabled and the two addresses are equal
+    assign snoop_match_o = en_i && ( base_nline_i == snoop_addr_i );
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv
new file mode 100644
index 0000000..fa1cfa4
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv
@@ -0,0 +1,265 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Riccardo Alidori, Cesar Fuguet
+ *  Creation Date : June, 2021
+ *  Description   : Linear Hardware Memory Prefetcher wrapper.
+ *  History       :
+ */
+module hwpf_stride_wrapper
+import hwpf_stride_pkg::*;
+import hpdcache_pkg::*;
+//  Parameters
+//  {{{
+#(
+    parameter NUM_HW_PREFETCH = 4,
+    parameter NUM_SNOOP_PORTS = 1
+)
+//  }}}
+
+//  Ports
+//  {{{
+(
+    input  logic                                        clk_i,
+    input  logic                                        rst_ni,
+
+    //  CSR
+    //  {{{
+    input  logic                  [NUM_HW_PREFETCH-1:0] hwpf_stride_base_set_i,
+    input  hwpf_stride_base_t     [NUM_HW_PREFETCH-1:0] hwpf_stride_base_i,
+    output hwpf_stride_base_t     [NUM_HW_PREFETCH-1:0] hwpf_stride_base_o,
+
+    input  logic                  [NUM_HW_PREFETCH-1:0] hwpf_stride_param_set_i,
+    input  hwpf_stride_param_t    [NUM_HW_PREFETCH-1:0] hwpf_stride_param_i,
+    output hwpf_stride_param_t    [NUM_HW_PREFETCH-1:0] hwpf_stride_param_o,
+
+    input  logic                  [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_set_i,
+    input  hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_i,
+    output hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_o,
+
+    output hwpf_stride_status_t                         hwpf_stride_status_o,
+    //  }}}
+
+    // Snooping
+    //  {{{
+    input  logic                 [NUM_SNOOP_PORTS-1:0]  snoop_valid_i,
+    input  logic                 [NUM_SNOOP_PORTS-1:0]  snoop_abort_i,
+    input  hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0]  snoop_addr_offset_i,
+    input  hpdcache_tag_t        [NUM_SNOOP_PORTS-1:0]  snoop_addr_tag_i,
+    input  logic                 [NUM_SNOOP_PORTS-1:0]  snoop_phys_indexed_i,
+    //  }}}
+
+    //  Dcache interface
+    //  {{{
+    input  hpdcache_req_sid_t                           hpdcache_req_sid_i,
+    output logic                                        hpdcache_req_valid_o,
+    input  logic                                        hpdcache_req_ready_i,
+    output hpdcache_req_t                               hpdcache_req_o,
+    output logic                                        hpdcache_req_abort_o,
+    output hpdcache_tag_t                               hpdcache_req_tag_o,
+    output hpdcache_pma_t                               hpdcache_req_pma_o,
+    input  logic                                        hpdcache_rsp_valid_i,
+    input  hpdcache_rsp_t                               hpdcache_rsp_i
+    //  }}}
+);
+//  }}}
+
+    //  Internal registers
+    //  {{{
+    logic                 [NUM_SNOOP_PORTS-1:0] snoop_valid_q;
+    hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_q;
+    //  }}}
+
+    //  Internal signals
+    //  {{{
+    logic            [NUM_HW_PREFETCH-1:0] hwpf_stride_enable;
+    logic            [NUM_HW_PREFETCH-1:0] hwpf_stride_free;
+    logic            [NUM_HW_PREFETCH-1:0] hwpf_stride_status_busy;
+    logic            [3:0]                 hwpf_stride_status_free_idx;
+
+    hpdcache_nline_t [NUM_HW_PREFETCH-1:0] hwpf_snoop_nline;
+    logic            [NUM_HW_PREFETCH-1:0] hwpf_snoop_match;
+
+    logic            [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid;
+    logic            [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready;
+    hpdcache_req_t   [NUM_HW_PREFETCH-1:0] hwpf_stride_req;
+
+    logic            [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_valid;
+    logic            [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_ready;
+    hpdcache_req_t   [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req;
+    logic            [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp_valid;
+    hpdcache_rsp_t   [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp;
+    //  }}}
+
+    //  Assertions
+    //  {{{
+    //  pragma translate_off
+    initial
+    begin
+        max_hwpf_stride_assert: assert (NUM_HW_PREFETCH <= 16) else
+                $error("hwpf_stride: maximum number of HW prefetchers is 16");
+    end
+    //  pragma translate_on
+    //  }}}
+
+    //  Compute the status information
+    //  {{{
+    always_comb begin: hwpf_stride_priority_encoder
+        hwpf_stride_status_free_idx = '0;
+        for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin
+            if (hwpf_stride_free[i]) begin
+                hwpf_stride_status_free_idx = i;
+                break;
+            end
+        end
+    end
+
+    //     Free flag of engines
+    assign hwpf_stride_free            = ~(hwpf_stride_enable | hwpf_stride_status_busy);
+    //     Busy flags
+    assign hwpf_stride_status_o[63:32] = {{32-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_status_busy};
+    //     Global free flag
+    assign hwpf_stride_status_o[31]    = |hwpf_stride_free;
+    //     Free Index
+    assign hwpf_stride_status_o[30:16] = {11'b0, hwpf_stride_status_free_idx};
+    //     Enable flags
+    assign hwpf_stride_status_o[15:0]  = {{16-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_enable};
+    //  }}}
+
+    //  Hardware prefetcher engines
+    //  {{{
+    generate
+        for (genvar j = 0; j < NUM_SNOOP_PORTS; j++) begin
+            always_ff @(posedge clk_i or negedge rst_ni)
+            begin : snoop_ff
+                if (!rst_ni) begin
+                    snoop_valid_q[j]       <= 1'b0;
+                    snoop_addr_offset_q[j] <= '0;
+                end else begin
+                    if (snoop_phys_indexed_i[j]) begin
+                        snoop_valid_q[j]       <= snoop_valid_i[j];
+                        snoop_addr_offset_q[j] <= snoop_addr_offset_i[j];
+                    end
+                end
+            end
+        end
+
+        for (genvar i = 0; i < NUM_HW_PREFETCH; i++) begin
+            assign hwpf_stride_enable[i] = hwpf_stride_base_o[i].enable;
+
+            //  Compute snoop match signals
+            //  {{{
+            always_comb
+            begin : snoop_comb
+                hwpf_snoop_match[i] = 1'b0;
+                for (int j = 0; j < NUM_SNOOP_PORTS; j++) begin
+                    automatic logic                 snoop_valid;
+                    automatic hpdcache_req_offset_t snoop_offset;
+                    automatic hpdcache_nline_t      snoop_nline;
+
+                    if (snoop_phys_indexed_i[j]) begin
+                        snoop_valid  = snoop_valid_i[j];
+                        snoop_offset = snoop_addr_offset_i[j];
+                    end else begin
+                        snoop_valid  = snoop_valid_q[j];
+                        snoop_offset = snoop_addr_offset_q[j];
+                    end
+                    snoop_nline = {snoop_addr_tag_i[j], snoop_offset};
+                    hwpf_snoop_match[i] |= (snoop_valid         && !snoop_abort_i[j] &&
+                                           (hwpf_snoop_nline[i] ==  snoop_nline));
+                end
+            end
+            //  }}}
+
+            hwpf_stride #(
+                .CACHE_LINE_BYTES     ( HPDCACHE_CL_WIDTH/8 )
+            ) hwpf_stride_i(
+                .clk_i,
+                .rst_ni,
+
+                .csr_base_set_i       ( hwpf_stride_base_set_i[i] ),
+                .csr_base_i           ( hwpf_stride_base_i[i] ),
+                .csr_param_set_i      ( hwpf_stride_param_set_i[i] ),
+                .csr_param_i          ( hwpf_stride_param_i[i] ),
+                .csr_throttle_set_i   ( hwpf_stride_throttle_set_i[i] ),
+                .csr_throttle_i       ( hwpf_stride_throttle_i[i] ),
+
+                .csr_base_o           ( hwpf_stride_base_o[i] ),
+                .csr_param_o          ( hwpf_stride_param_o[i] ),
+                .csr_throttle_o       ( hwpf_stride_throttle_o[i] ),
+
+                .busy_o               ( hwpf_stride_status_busy[i] ),
+
+                .snoop_nline_o        ( hwpf_snoop_nline[i] ),
+                .snoop_match_i        ( hwpf_snoop_match[i] ),
+
+                .hpdcache_req_valid_o ( hwpf_stride_req_valid[i] ),
+                .hpdcache_req_ready_i ( hwpf_stride_req_ready[i] ),
+                .hpdcache_req_o       ( hwpf_stride_req[i] ),
+                .hpdcache_rsp_valid_i ( hwpf_stride_arb_in_rsp_valid[i] ),
+                .hpdcache_rsp_i       ( hwpf_stride_arb_in_rsp[i] )
+            );
+
+            assign hwpf_stride_req_ready[i]               = hwpf_stride_arb_in_req_ready[i],
+                   hwpf_stride_arb_in_req_valid[i]        = hwpf_stride_req_valid[i],
+                   hwpf_stride_arb_in_req[i].addr_offset  = hwpf_stride_req[i].addr_offset,
+                   hwpf_stride_arb_in_req[i].wdata        = hwpf_stride_req[i].wdata,
+                   hwpf_stride_arb_in_req[i].op           = hwpf_stride_req[i].op,
+                   hwpf_stride_arb_in_req[i].be           = hwpf_stride_req[i].be,
+                   hwpf_stride_arb_in_req[i].size         = hwpf_stride_req[i].size,
+                   hwpf_stride_arb_in_req[i].sid          = hpdcache_req_sid_i,
+                   hwpf_stride_arb_in_req[i].tid          = hpdcache_req_tid_t'(i),
+                   hwpf_stride_arb_in_req[i].need_rsp     = hwpf_stride_req[i].need_rsp,
+                   hwpf_stride_arb_in_req[i].phys_indexed = hwpf_stride_req[i].phys_indexed,
+                   hwpf_stride_arb_in_req[i].addr_tag     = '0,
+                   hwpf_stride_arb_in_req[i].pma          = '0;
+        end
+    endgenerate
+    //  }}}
+
+    //  Hardware prefetcher arbiter betweem engines
+    //  {{{
+    hwpf_stride_arb #(
+        .NUM_HW_PREFETCH          ( NUM_HW_PREFETCH )
+    ) hwpf_stride_arb_i (
+        .clk_i,
+        .rst_ni,
+
+        // DCache input interface
+        .hwpf_stride_req_valid_i  ( hwpf_stride_arb_in_req_valid ),
+        .hwpf_stride_req_ready_o  ( hwpf_stride_arb_in_req_ready ),
+        .hwpf_stride_req_i        ( hwpf_stride_arb_in_req ),
+        .hwpf_stride_rsp_valid_o  ( hwpf_stride_arb_in_rsp_valid ),
+        .hwpf_stride_rsp_o        ( hwpf_stride_arb_in_rsp ),
+
+        // DCache output interface
+        .hpdcache_req_valid_o,
+        .hpdcache_req_ready_i,
+        .hpdcache_req_o,
+        .hpdcache_rsp_valid_i,
+        .hpdcache_rsp_i
+    );
+
+    assign hpdcache_req_abort_o = 1'b0,  // unused on physically indexed requests
+           hpdcache_req_tag_o   = '0,    // unused on physically indexed requests
+           hpdcache_req_pma_o   = '0;    // unused on physically indexed requests
+    //  }}}
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv
new file mode 100644
index 0000000..cb32acf
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv
@@ -0,0 +1,103 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : Dcache Memory Read Request Channel Arbiter
+ *  History       :
+ */
+module hpdcache_mem_req_read_arbiter
+import hpdcache_pkg::*;
+//  Parameters
+//  {{{
+#(
+    parameter hpdcache_uint N = 0,
+    parameter type hpdcache_mem_req_t = logic
+)
+//  }}}
+
+//  Ports
+//  {{{
+(
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    output logic                  mem_req_read_ready_o [N-1:0],
+    input  logic                  mem_req_read_valid_i [N-1:0],
+    input  hpdcache_mem_req_t     mem_req_read_i       [N-1:0],
+
+    input  logic                  mem_req_read_ready_i,
+    output logic                  mem_req_read_valid_o,
+    output hpdcache_mem_req_t     mem_req_read_o
+);
+//  }}}
+
+    logic              [N-1:0] mem_read_arb_req_valid;
+    hpdcache_mem_req_t [N-1:0] mem_read_arb_req;
+    logic              [N-1:0] mem_read_arb_req_gnt;
+
+    logic                      req_valid;
+
+    genvar                     gen_i;
+
+
+    //  Pack inputs
+    generate
+        for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen
+            assign mem_read_arb_req_valid[gen_i] = mem_req_read_valid_i[gen_i],
+                   mem_read_arb_req      [gen_i] = mem_req_read_i[gen_i];
+        end
+    endgenerate
+
+    assign req_valid = |(mem_read_arb_req_gnt & mem_read_arb_req_valid);
+
+    //  Fixed-priority arbiter
+    hpdcache_fxarb #(
+        .N                   (N)
+    ) hpdcache_fxarb_mem_req_write_i (
+        .clk_i,
+        .rst_ni,
+        .req_i               (mem_read_arb_req_valid),
+        .gnt_o               (mem_read_arb_req_gnt),
+        .ready_i             (mem_req_read_ready_i)
+    );
+
+    //  Demultiplexor for the ready signal
+    generate
+        for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen
+            assign mem_req_read_ready_o[gen_i] = mem_req_read_ready_i &
+                        mem_read_arb_req_gnt[gen_i] & mem_read_arb_req_valid[gen_i];
+        end
+    endgenerate
+
+    assign mem_req_read_valid_o = req_valid;
+
+    //  Multiplexor for requests
+    hpdcache_mux #(
+        .NINPUT              (N),
+        .DATA_WIDTH          ($bits(hpdcache_mem_req_t)),
+        .ONE_HOT_SEL         (1'b1)
+    ) mem_read_req_mux_i (
+        .data_i              (mem_read_arb_req),
+        .sel_i               (mem_read_arb_req_gnt),
+        .data_o              (mem_req_read_o)
+    );
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv
new file mode 100644
index 0000000..a7916ec
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv
@@ -0,0 +1,193 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : Dcache Memory Write Channels Arbiter
+ *  History       :
+ */
+module hpdcache_mem_req_write_arbiter
+import hpdcache_pkg::*;
+//  Parameters
+//  {{{
+#(
+    parameter hpdcache_uint N = 0,
+    parameter type hpdcache_mem_req_t = logic,
+    parameter type hpdcache_mem_req_w_t = logic
+)
+//  }}}
+
+//  Ports
+//  {{{
+(
+    input  logic                  clk_i,
+    input  logic                  rst_ni,
+
+    output logic                 mem_req_write_ready_o      [N-1:0],
+    input  logic                 mem_req_write_valid_i      [N-1:0],
+    input  hpdcache_mem_req_t    mem_req_write_i            [N-1:0],
+
+    output logic                 mem_req_write_data_ready_o [N-1:0],
+    input  logic                 mem_req_write_data_valid_i [N-1:0],
+    input  hpdcache_mem_req_w_t  mem_req_write_data_i       [N-1:0],
+
+    input  logic                 mem_req_write_ready_i,
+    output logic                 mem_req_write_valid_o,
+    output hpdcache_mem_req_t    mem_req_write_o,
+
+    input  logic                 mem_req_write_data_ready_i,
+    output logic                 mem_req_write_data_valid_o,
+    output hpdcache_mem_req_w_t  mem_req_write_data_o
+);
+//  }}}
+
+    typedef enum {
+        REQ_IDLE,
+        REQ_META_SENT,
+        REQ_DATA_SENT
+    } req_send_fsm_t;
+
+    req_send_fsm_t               req_send_fsm_q, req_send_fsm_d;
+    logic                        req_valid;
+    logic                        req_data_valid;
+
+    logic                [N-1:0] mem_write_arb_req_valid;
+    hpdcache_mem_req_t   [N-1:0] mem_write_arb_req;
+    logic                [N-1:0] mem_write_arb_req_data_valid;
+    hpdcache_mem_req_w_t [N-1:0] mem_write_arb_req_data;
+    logic                [N-1:0] mem_write_arb_req_gnt;
+    logic                        mem_write_arb_req_ready;
+
+    genvar                       gen_i;
+
+
+    generate
+        for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen
+            assign mem_write_arb_req_valid     [gen_i] = mem_req_write_valid_i[gen_i],
+                   mem_write_arb_req           [gen_i] = mem_req_write_i[gen_i],
+                   mem_write_arb_req_data_valid[gen_i] = mem_req_write_data_valid_i[gen_i],
+                   mem_write_arb_req_data      [gen_i] = mem_req_write_data_i[gen_i];
+        end
+    endgenerate
+
+    //      Fixed-priority arbiter
+    hpdcache_fxarb #(
+        .N                   (2)
+    ) hpdcache_fxarb_mem_req_write_i (
+        .clk_i,
+        .rst_ni,
+        .req_i               (mem_write_arb_req_valid),
+        .gnt_o               (mem_write_arb_req_gnt),
+        .ready_i             (mem_write_arb_req_ready)
+    );
+
+    assign req_valid      = |(mem_write_arb_req_gnt & mem_write_arb_req_valid);
+    assign req_data_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_data_valid);
+
+    //  Request sent FSM
+    //
+    //  This FSM allows to make sure that the request and its corresponding
+    //  data are sent in order. This is, when a requester sends a request, this
+    //  FSM keeps the grant signal on this requester until it has sent the
+    //  corresponding data.
+    //
+    //  {{{
+    always_comb
+    begin : req_send_fsm_comb
+        req_send_fsm_d = req_send_fsm_q;
+        mem_write_arb_req_ready = 1'b0;
+        case (req_send_fsm_q)
+            REQ_IDLE:
+                if (req_valid && mem_req_write_ready_i) begin
+                    if (req_data_valid) begin
+                        if (mem_req_write_data_ready_i) begin
+                            mem_write_arb_req_ready = 1'b1;
+                            req_send_fsm_d = REQ_IDLE;
+                        end else begin
+                            req_send_fsm_d = REQ_META_SENT;
+                        end
+                    end
+                end else if (req_data_valid && mem_req_write_data_ready_i) begin
+                    req_send_fsm_d = REQ_DATA_SENT;
+                end
+
+            REQ_META_SENT:
+                if (req_data_valid && mem_req_write_data_ready_i) begin
+                    mem_write_arb_req_ready = 1'b1;
+                    req_send_fsm_d = REQ_IDLE;
+                end
+
+            REQ_DATA_SENT:
+                if (req_valid && mem_req_write_ready_i) begin
+                    mem_write_arb_req_ready = 1'b1;
+                    req_send_fsm_d = REQ_IDLE;
+                end
+        endcase
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni)
+    begin : req_send_fsm_ff
+        if (!rst_ni) begin
+            req_send_fsm_q <= REQ_IDLE;
+        end else begin
+            req_send_fsm_q <= req_send_fsm_d;
+        end
+    end
+    //  }}}
+
+    generate
+        for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen
+            assign mem_req_write_ready_o[gen_i] =
+                        (mem_write_arb_req_gnt[gen_i] & mem_req_write_ready_i) &
+                        (req_send_fsm_q != REQ_META_SENT);
+
+            assign mem_req_write_data_ready_o[gen_i] =
+                        (mem_write_arb_req_gnt[gen_i] & mem_req_write_data_ready_i) &
+                        (req_send_fsm_q != REQ_DATA_SENT);
+        end
+    endgenerate
+
+    //  Output assignments
+    //  {{{
+    assign mem_req_write_valid_o      = req_valid      & (req_send_fsm_q != REQ_META_SENT);
+    assign mem_req_write_data_valid_o = req_data_valid & (req_send_fsm_q != REQ_DATA_SENT);
+
+    hpdcache_mux #(
+        .NINPUT              (N),
+        .DATA_WIDTH          ($bits(hpdcache_mem_req_t)),
+        .ONE_HOT_SEL         (1'b1)
+    ) mem_write_req_mux_i (
+        .data_i              (mem_write_arb_req),
+        .sel_i               (mem_write_arb_req_gnt),
+        .data_o              (mem_req_write_o)
+    );
+
+    hpdcache_mux #(
+        .NINPUT              (N),
+        .DATA_WIDTH          ($bits(hpdcache_mem_req_w_t)),
+        .ONE_HOT_SEL         (1'b1)
+    ) mem_write_data_req_mux_i (
+        .data_i              (mem_write_arb_req_data),
+        .sel_i               (mem_write_arb_req_gnt),
+        .data_o              (mem_req_write_data_o)
+    );
+    //  }}}
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv
new file mode 100644
index 0000000..c1502a9
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv
@@ -0,0 +1,108 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : June, 2022
+ *  Description   : Dcache Memory Reponse Demultiplexer
+ *  History       :
+ */
+module hpdcache_mem_resp_demux
+//  Parameters
+//  {{{
+#(
+  parameter int         N  = 0,
+  parameter type resp_t    = logic,
+  parameter type resp_id_t = logic,
+
+  localparam int RT_DEPTH  = (1 << $bits(resp_id_t)),
+  localparam type rt_t     = resp_id_t [RT_DEPTH-1:0]
+)
+//  }}}
+
+//  Ports
+//  {{{
+(
+  input  logic           clk_i,
+  input  logic           rst_ni,
+
+  output logic           mem_resp_ready_o,
+  input  logic           mem_resp_valid_i,
+  input  resp_id_t       mem_resp_id_i,
+  input  resp_t          mem_resp_i,
+
+  input  logic           mem_resp_ready_i [N-1:0],
+  output logic           mem_resp_valid_o [N-1:0],
+  output resp_t          mem_resp_o       [N-1:0],
+
+  input  rt_t            mem_resp_rt_i
+);
+//  }}}
+
+  typedef logic [$clog2(N)-1:0] sel_t;
+
+  logic    [N-1:0] mem_resp_demux_valid;
+  resp_t   [N-1:0] mem_resp_demux;
+  logic    [N-1:0] mem_resp_demux_ready;
+  sel_t            mem_resp_demux_sel;
+
+  //  Route the response according to the response ID and the routing table
+  assign mem_resp_demux_sel = mem_resp_rt_i[int'(mem_resp_id_i)];
+
+  //  Forward the response to the corresponding output port
+  hpdcache_demux #(
+      .NOUTPUT        (N),
+      .DATA_WIDTH     (1),
+      .ONE_HOT_SEL    (0)
+  ) i_resp_valid_demux (
+      .data_i         (mem_resp_valid_i),
+      .sel_i          (mem_resp_demux_sel),
+      .data_o         (mem_resp_demux_valid)
+  );
+
+  hpdcache_demux #(
+      .NOUTPUT        (N),
+      .DATA_WIDTH     ($bits(resp_t)),
+      .ONE_HOT_SEL    (0)
+  ) i_resp_demux (
+      .data_i         (mem_resp_i),
+      .sel_i          (mem_resp_demux_sel),
+      .data_o         (mem_resp_demux)
+  );
+
+  hpdcache_mux #(
+      .NINPUT         (N),
+      .DATA_WIDTH     (1),
+      .ONE_HOT_SEL    (0)
+  ) i_resp_ready_mux (
+      .data_i         (mem_resp_demux_ready),
+      .sel_i          (mem_resp_demux_sel),
+      .data_o         (mem_resp_ready_o)
+  );
+
+  //  Pack/unpack responses
+  generate
+      for (genvar gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_unpack_resp_gen
+        assign mem_resp_valid_o      [gen_i] = mem_resp_demux_valid [gen_i];
+        assign mem_resp_o            [gen_i] = mem_resp_demux       [gen_i];
+        assign mem_resp_demux_ready  [gen_i] = mem_resp_ready_i     [gen_i];
+      end
+  endgenerate
+
+endmodule : hpdcache_mem_resp_demux
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv
new file mode 100644
index 0000000..ec3fad7
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv
@@ -0,0 +1,95 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : Dcache memory request to axi read channels
+ *  History       :
+ */
+module hpdcache_mem_to_axi_read
+import hpdcache_pkg::*;
+#(
+    parameter type hpdcache_mem_req_t    = logic,
+    parameter type hpdcache_mem_resp_r_t = logic,
+    parameter type ar_chan_t = logic,
+    parameter type r_chan_t  = logic
+)
+(
+    output logic                          req_ready_o,
+    input  logic                          req_valid_i,
+    input  hpdcache_mem_req_t             req_i,
+
+    input  logic                          resp_ready_i,
+    output logic                          resp_valid_o,
+    output hpdcache_mem_resp_r_t          resp_o,
+
+    output logic                          axi_ar_valid_o,
+    output ar_chan_t                      axi_ar_o,
+    input  logic                          axi_ar_ready_i,
+
+    input  logic                          axi_r_valid_i,
+    input  r_chan_t                       axi_r_i,
+    output logic                          axi_r_ready_o
+);
+
+    logic                lock;
+    axi_pkg::cache_t     cache;
+    hpdcache_mem_error_e resp;
+
+    assign  lock  = (req_i.mem_req_command == HPDCACHE_MEM_ATOMIC) &&
+                    (req_i.mem_req_atomic  == HPDCACHE_MEM_ATOMIC_LDEX);
+
+    assign  cache = req_i.mem_req_cacheable ?
+                    axi_pkg::CACHE_BUFFERABLE |
+                    axi_pkg::CACHE_MODIFIABLE |
+                    axi_pkg::CACHE_RD_ALLOC   |
+                    axi_pkg::CACHE_WR_ALLOC   : '0;
+
+    always_comb
+    begin : resp_decode_comb
+        case (axi_r_i.resp)
+            axi_pkg::RESP_SLVERR,
+            axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK;
+            default:              resp = HPDCACHE_MEM_RESP_OK;
+        endcase
+    end
+
+    assign  req_ready_o       = axi_ar_ready_i,
+            axi_ar_valid_o    = req_valid_i,
+            axi_ar_o.id       = req_i.mem_req_id,
+            axi_ar_o.addr     = req_i.mem_req_addr,
+            axi_ar_o.len      = req_i.mem_req_len,
+            axi_ar_o.size     = req_i.mem_req_size,
+            axi_ar_o.burst    = axi_pkg::BURST_INCR,
+            axi_ar_o.lock     = lock,
+            axi_ar_o.cache    = cache,
+            axi_ar_o.prot     = '0,
+            axi_ar_o.qos      = '0,
+            axi_ar_o.region   = '0,
+            axi_ar_o.user     = '0;
+
+    assign  axi_r_ready_o           = resp_ready_i,
+            resp_valid_o            = axi_r_valid_i,
+            resp_o.mem_resp_r_error = resp,
+            resp_o.mem_resp_r_id    = axi_r_i.id,
+            resp_o.mem_resp_r_data  = axi_r_i.data,
+            resp_o.mem_resp_r_last  = axi_r_i.last;
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv
new file mode 100644
index 0000000..8d8eb9f
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv
@@ -0,0 +1,148 @@
+/*
+ *  Copyright 2023 CEA*
+ *  *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
+ *
+ *  SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+ *
+ *  Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
+ *  may not use this file except in compliance with the License, or, at your
+ *  option, the Apache License version 2.0. You may obtain a copy of the
+ *  License at
+ *
+ *  https://solderpad.org/licenses/SHL-2.1/
+ *
+ *  Unless required by applicable law or agreed to in writing, any work
+ *  distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  License for the specific language governing permissions and limitations
+ *  under the License.
+ */
+/*
+ *  Authors       : Cesar Fuguet
+ *  Creation Date : April, 2021
+ *  Description   : Dcache memory request to axi write channels
+ *  History       :
+ */
+module hpdcache_mem_to_axi_write
+import hpdcache_pkg::*;
+#(
+    parameter type hpdcache_mem_req_t = logic,
+    parameter type hpdcache_mem_req_w_t = logic,
+    parameter type hpdcache_mem_resp_w_t = logic,
+    parameter type aw_chan_t = logic,
+    parameter type w_chan_t = logic,
+    parameter type b_chan_t = logic
+)
+(
+    output logic                          req_ready_o,
+    input  logic                          req_valid_i,
+    input  hpdcache_mem_req_t             req_i,
+
+    output logic                          req_data_ready_o,
+    input  logic                          req_data_valid_i,
+    input  hpdcache_mem_req_w_t           req_data_i,
+
+    input  logic                          resp_ready_i,
+    output logic                          resp_valid_o,
+    output hpdcache_mem_resp_w_t          resp_o,
+
+    output logic                          axi_aw_valid_o,
+    output aw_chan_t                      axi_aw_o,
+    input  logic                          axi_aw_ready_i,
+
+    output logic                          axi_w_valid_o,
+    output w_chan_t                       axi_w_o,
+    input  logic                          axi_w_ready_i,
+
+    input  logic                          axi_b_valid_i,
+    input  b_chan_t                       axi_b_i,
+    output logic                          axi_b_ready_o
+);
+
+    logic                lock;
+    axi_pkg::atop_t      atop;
+    axi_pkg::cache_t     cache;
+    hpdcache_mem_error_e resp;
+
+    always_comb
+    begin : atop_comb
+        lock = 1'b0;
+        atop = '0;
+        case (req_i.mem_req_command)
+            HPDCACHE_MEM_ATOMIC: begin
+                case (req_i.mem_req_atomic)
+                    HPDCACHE_MEM_ATOMIC_STEX: lock = 1'b1;
+                    HPDCACHE_MEM_ATOMIC_ADD : atop = {axi_pkg::ATOP_ATOMICLOAD,
+                                                      axi_pkg::ATOP_LITTLE_END,
+                                                      axi_pkg::ATOP_ADD};
+                    HPDCACHE_MEM_ATOMIC_CLR : atop = {axi_pkg::ATOP_ATOMICLOAD,
+                                                      axi_pkg::ATOP_LITTLE_END,
+                                                      axi_pkg::ATOP_CLR};
+                    HPDCACHE_MEM_ATOMIC_SET : atop = {axi_pkg::ATOP_ATOMICLOAD,
+                                                      axi_pkg::ATOP_LITTLE_END,
+                                                      axi_pkg::ATOP_SET};
+                    HPDCACHE_MEM_ATOMIC_EOR : atop = {axi_pkg::ATOP_ATOMICLOAD,
+                                                      axi_pkg::ATOP_LITTLE_END,
+                                                      axi_pkg::ATOP_EOR};
+                    HPDCACHE_MEM_ATOMIC_SMAX: atop = {axi_pkg::ATOP_ATOMICLOAD,
+                                                      axi_pkg::ATOP_LITTLE_END,
+                                                      axi_pkg::ATOP_SMAX};
+                    HPDCACHE_MEM_ATOMIC_SMIN: atop = {axi_pkg::ATOP_ATOMICLOAD,
+                                                      axi_pkg::ATOP_LITTLE_END,
+                                                      axi_pkg::ATOP_SMIN};
+                    HPDCACHE_MEM_ATOMIC_UMAX: atop = {axi_pkg::ATOP_ATOMICLOAD,
+                                                      axi_pkg::ATOP_LITTLE_END,
+                                                      axi_pkg::ATOP_UMAX};
+                    HPDCACHE_MEM_ATOMIC_UMIN: atop = {axi_pkg::ATOP_ATOMICLOAD,
+                                                      axi_pkg::ATOP_LITTLE_END,
+                                                      axi_pkg::ATOP_UMIN};
+                    HPDCACHE_MEM_ATOMIC_SWAP: atop =  axi_pkg::ATOP_ATOMICSWAP;
+                endcase
+            end
+        endcase
+    end
+
+    assign  cache = (req_i.mem_req_cacheable && !lock) ?
+                        axi_pkg::CACHE_BUFFERABLE |
+                        axi_pkg::CACHE_MODIFIABLE |
+                        axi_pkg::CACHE_RD_ALLOC   |
+                        axi_pkg::CACHE_WR_ALLOC   : '0;
+
+    always_comb
+    begin : resp_decode_comb
+        case (axi_b_i.resp)
+            axi_pkg::RESP_SLVERR,
+            axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK;
+            default:              resp = HPDCACHE_MEM_RESP_OK;
+        endcase
+    end
+
+    assign  req_ready_o                     = axi_aw_ready_i,
+            axi_aw_valid_o                  = req_valid_i,
+            axi_aw_o.id                     = req_i.mem_req_id,
+            axi_aw_o.addr                   = req_i.mem_req_addr,
+            axi_aw_o.len                    = req_i.mem_req_len,
+            axi_aw_o.size                   = req_i.mem_req_size,
+            axi_aw_o.burst                  = axi_pkg::BURST_INCR,
+            axi_aw_o.lock                   = lock,
+            axi_aw_o.cache                  = cache,
+            axi_aw_o.prot                   = '0,
+            axi_aw_o.qos                    = '0,
+            axi_aw_o.region                 = '0,
+            axi_aw_o.atop                   = atop,
+            axi_aw_o.user                   = '0;
+
+    assign  req_data_ready_o                = axi_w_ready_i,
+            axi_w_valid_o                   = req_data_valid_i,
+            axi_w_o.data                    = req_data_i.mem_req_w_data,
+            axi_w_o.strb                    = req_data_i.mem_req_w_be,
+            axi_w_o.last                    = req_data_i.mem_req_w_last,
+            axi_w_o.user                    = '0;
+
+    assign  axi_b_ready_o                   = resp_ready_i,
+            resp_valid_o                    = axi_b_valid_i,
+            resp_o.mem_resp_w_error         = resp,
+            resp_o.mem_resp_w_id            = axi_b_i.id,
+            resp_o.mem_resp_w_is_atomic     = (axi_b_i.resp == axi_pkg::RESP_EXOKAY);
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/miss_handler.sv b/test/type_param/core/cache_subsystem/miss_handler.sv
new file mode 100644
index 0000000..4755d0b
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/miss_handler.sv
@@ -0,0 +1,826 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 12.11.2017
+// Description: Handles cache misses.
+
+// --------------
+// MISS Handler
+// --------------
+
+module miss_handler
+  import ariane_pkg::*;
+  import std_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg   = config_pkg::cva6_cfg_empty,
+    parameter int unsigned           NR_PORTS  = 4,
+    parameter type                   axi_req_t = logic,
+    parameter type                   axi_rsp_t = logic
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    input logic flush_i,  // flush request
+    output logic flush_ack_o,  // acknowledge successful flush
+    output logic miss_o,
+    input logic busy_i,  // dcache is busy with something
+    // Bypass or miss
+    input logic [NR_PORTS-1:0][$bits(miss_req_t)-1:0] miss_req_i,
+    // Bypass handling
+    output logic [NR_PORTS-1:0] bypass_gnt_o,
+    output logic [NR_PORTS-1:0] bypass_valid_o,
+    output logic [NR_PORTS-1:0][63:0] bypass_data_o,
+
+    // AXI port
+    output axi_req_t axi_bypass_o,
+    input  axi_rsp_t axi_bypass_i,
+
+    // Miss handling (~> cacheline refill)
+    output logic [NR_PORTS-1:0] miss_gnt_o,
+    output logic [NR_PORTS-1:0] active_serving_o,
+
+    output logic     [63:0] critical_word_o,
+    output logic            critical_word_valid_o,
+    output axi_req_t        axi_data_o,
+    input  axi_rsp_t        axi_data_i,
+
+    input logic [NR_PORTS-1:0][55:0] mshr_addr_i,
+    output logic [NR_PORTS-1:0] mshr_addr_matches_o,
+    output logic [NR_PORTS-1:0] mshr_index_matches_o,
+    // AMO
+    input amo_req_t amo_req_i,
+    output amo_resp_t amo_resp_o,
+    // Port to SRAMs, for refill and eviction
+    output logic [DCACHE_SET_ASSOC-1:0] req_o,
+    output logic [DCACHE_INDEX_WIDTH-1:0] addr_o,  // address into cache array
+    output cache_line_t data_o,
+    output cl_be_t be_o,
+    input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
+    output logic we_o
+);
+
+  // Three MSHR ports + AMO port
+  parameter NR_BYPASS_PORTS = NR_PORTS + 1;
+
+  // FSM states
+  enum logic [3:0] {
+    IDLE,                // 0
+    FLUSHING,            // 1
+    FLUSH,               // 2
+    WB_CACHELINE_FLUSH,  // 3
+    FLUSH_REQ_STATUS,    // 4
+    WB_CACHELINE_MISS,   // 5
+    WAIT_GNT_SRAM,       // 6
+    MISS,                // 7
+    REQ_CACHELINE,       // 8
+    MISS_REPL,           // 9
+    SAVE_CACHELINE,      // A
+    INIT,                // B
+    AMO_REQ,             // C
+    AMO_WAIT_RESP        // D
+  }
+      state_d, state_q;
+
+  // Registers
+  mshr_t mshr_d, mshr_q;
+  logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q;
+  logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
+  // cache line to evict
+  cache_line_t evict_cl_d, evict_cl_q;
+
+  logic serve_amo_d, serve_amo_q;
+  // Request from one FSM
+  logic                [                  NR_PORTS-1:0]       miss_req_valid;
+  logic                [                  NR_PORTS-1:0]       miss_req_bypass;
+  logic                [                  NR_PORTS-1:0][63:0] miss_req_addr;
+  logic                [                  NR_PORTS-1:0][63:0] miss_req_wdata;
+  logic                [                  NR_PORTS-1:0]       miss_req_we;
+  logic                [                  NR_PORTS-1:0][ 7:0] miss_req_be;
+  logic                [                  NR_PORTS-1:0][ 1:0] miss_req_size;
+
+  // Bypass AMO port
+  bypass_req_t                                                amo_bypass_req;
+  bypass_rsp_t                                                amo_bypass_rsp;
+
+  // Bypass ports <-> Arbiter
+  bypass_req_t         [           NR_BYPASS_PORTS-1:0]       bypass_ports_req;
+  bypass_rsp_t         [           NR_BYPASS_PORTS-1:0]       bypass_ports_rsp;
+
+  // Arbiter <-> Bypass AXI adapter
+  bypass_req_t                                                bypass_adapter_req;
+  bypass_rsp_t                                                bypass_adapter_rsp;
+
+  // Cache Line Refill <-> AXI
+  logic                                                       req_fsm_miss_valid;
+  logic                [                          63:0]       req_fsm_miss_addr;
+  logic                [         DCACHE_LINE_WIDTH-1:0]       req_fsm_miss_wdata;
+  logic                                                       req_fsm_miss_we;
+  logic                [     (DCACHE_LINE_WIDTH/8)-1:0]       req_fsm_miss_be;
+  ariane_pkg::ad_req_t                                        req_fsm_miss_req;
+  logic                [                           1:0]       req_fsm_miss_size;
+
+  logic                                                       gnt_miss_fsm;
+  logic                                                       valid_miss_fsm;
+  logic                [    (DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm;
+
+  // Cache Management <-> LFSR
+  logic                                                       lfsr_enable;
+  logic                [          DCACHE_SET_ASSOC-1:0]       lfsr_oh;
+  logic                [$clog2(DCACHE_SET_ASSOC-1)-1:0]       lfsr_bin;
+  // AMOs
+  ariane_pkg::amo_t                                           amo_op;
+  logic                [                          63:0]       amo_operand_b;
+
+  // ------------------------------
+  // Cache Management
+  // ------------------------------
+  always_comb begin : cache_management
+    automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way;
+
+    for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin
+      evict_way[i] = data_i[i].valid & data_i[i].dirty;
+      valid_way[i] = data_i[i].valid;
+    end
+    // ----------------------
+    // Default Assignments
+    // ----------------------
+    // memory array
+    req_o                       = '0;
+    addr_o                      = '0;
+    data_o                      = '0;
+    be_o                        = '0;
+    we_o                        = '0;
+    // Cache controller
+    miss_gnt_o                  = '0;
+    active_serving_o            = '0;
+    // LFSR replacement unit
+    lfsr_enable                 = 1'b0;
+    // to AXI refill
+    req_fsm_miss_valid          = 1'b0;
+    req_fsm_miss_addr           = '0;
+    req_fsm_miss_wdata          = '0;
+    req_fsm_miss_we             = 1'b0;
+    req_fsm_miss_be             = '0;
+    req_fsm_miss_req            = ariane_pkg::CACHE_LINE_REQ;
+    req_fsm_miss_size           = 2'b11;
+    // to AXI bypass
+    amo_bypass_req.req          = 1'b0;
+    amo_bypass_req.reqtype      = ariane_pkg::SINGLE_REQ;
+    amo_bypass_req.amo          = ariane_pkg::AMO_NONE;
+    amo_bypass_req.addr         = '0;
+    amo_bypass_req.we           = 1'b0;
+    amo_bypass_req.wdata        = '0;
+    amo_bypass_req.be           = '0;
+    amo_bypass_req.size         = 2'b11;
+    amo_bypass_req.id           = 4'b1011;
+    // core
+    flush_ack_o                 = 1'b0;
+    miss_o                      = 1'b0;  // to performance counter
+    serve_amo_d                 = serve_amo_q;
+    // --------------------------------
+    // Flush and Miss operation
+    // --------------------------------
+    state_d                     = state_q;
+    cnt_d                       = cnt_q;
+    evict_way_d                 = evict_way_q;
+    evict_cl_d                  = evict_cl_q;
+    mshr_d                      = mshr_q;
+    // communicate to the requester which unit we are currently serving
+    active_serving_o[mshr_q.id] = mshr_q.valid;
+    // AMOs
+    amo_resp_o.ack              = 1'b0;
+    amo_resp_o.result           = '0;
+    amo_operand_b               = '0;
+
+    case (state_q)
+
+      IDLE: begin
+        // lowest priority are AMOs, wait until everything else is served before going for the AMOs
+        if (amo_req_i.req && !busy_i) begin
+          // 1. Flush the cache
+          state_d = FLUSH_REQ_STATUS;
+          serve_amo_d = 1'b1;
+          cnt_d = '0;
+        end
+        // check if we want to flush and can flush e.g.: we are not busy anymore
+        // TODO: Check that the busy flag is indeed needed
+        if (flush_i && !busy_i) begin
+          state_d = FLUSH_REQ_STATUS;
+          cnt_d   = '0;
+        end
+
+        // check if one of the state machines missed
+        for (int unsigned i = 0; i < NR_PORTS; i++) begin
+          // here comes the refill portion of code
+          if (miss_req_valid[i] && !miss_req_bypass[i]) begin
+            state_d      = MISS;
+            // we are taking another request so don't take the AMO
+            serve_amo_d  = 1'b0;
+            // save to MSHR
+            mshr_d.valid = 1'b1;
+            mshr_d.we    = miss_req_we[i];
+            mshr_d.id    = i;
+            mshr_d.addr  = miss_req_addr[i][DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:0];
+            mshr_d.wdata = miss_req_wdata[i];
+            mshr_d.be    = miss_req_be[i];
+            break;
+          end
+        end
+      end
+
+      //  ~> we missed on the cache
+      MISS: begin
+        // 1. Check if there is an empty cache-line
+        // 2. If not -> evict one
+        req_o   = '1;
+        addr_o  = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
+        state_d = MISS_REPL;
+        miss_o  = 1'b1;
+      end
+
+      // ~> second miss cycle
+      MISS_REPL: begin
+        // if all are valid we need to evict one, pseudo random from LFSR
+        if (&valid_way) begin
+          lfsr_enable = 1'b1;
+          evict_way_d = lfsr_oh;
+          // do we need to write back the cache line?
+          if (data_i[lfsr_bin].dirty) begin
+            state_d = WB_CACHELINE_MISS;
+            evict_cl_d.tag = data_i[lfsr_bin].tag;
+            evict_cl_d.data = data_i[lfsr_bin].data;
+            cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
+            // no - we can request a cache line now
+          end else state_d = REQ_CACHELINE;
+          // we have at least one free way
+        end else begin
+          // get victim cache-line by looking for the first non-valid bit
+          evict_way_d = get_victim_cl(~valid_way);
+          state_d = REQ_CACHELINE;
+        end
+      end
+
+      // ~> we can just load the cache-line, the way is store in evict_way_q
+      REQ_CACHELINE: begin
+        req_fsm_miss_valid = 1'b1;
+        req_fsm_miss_addr  = mshr_q.addr;
+
+        if (gnt_miss_fsm) begin
+          state_d = SAVE_CACHELINE;
+          miss_gnt_o[mshr_q.id] = 1'b1;
+        end
+      end
+
+      // ~> replace the cacheline
+      SAVE_CACHELINE: begin
+        // calculate cacheline offset
+        automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
+        cl_offset = mshr_q.addr[DCACHE_BYTE_OFFSET-1:3] << 6;
+        // we've got a valid response from refill unit
+        if (valid_miss_fsm) begin
+
+          addr_o       = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
+          req_o        = evict_way_q;
+          we_o         = 1'b1;
+          be_o         = '1;
+          be_o.vldrty  = evict_way_q;
+          data_o.tag   = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
+          data_o.data  = data_miss_fsm;
+          data_o.valid = 1'b1;
+          data_o.dirty = 1'b0;
+
+          // is this a write?
+          if (mshr_q.we) begin
+            // Yes, so safe the updated data now
+            for (int i = 0; i < 8; i++) begin
+              // check if we really want to write the corresponding byte
+              if (mshr_q.be[i]) data_o.data[(cl_offset+i*8)+:8] = mshr_q.wdata[i];
+            end
+            // its immediately dirty if we write
+            data_o.dirty = 1'b1;
+          end
+          // reset MSHR
+          mshr_d.valid = 1'b0;
+          // go back to idle
+          state_d = IDLE;
+        end
+      end
+
+      // ------------------------------
+      // Write Back Operation
+      // ------------------------------
+      // ~> evict a cache line from way saved in evict_way_q
+      WB_CACHELINE_FLUSH, WB_CACHELINE_MISS: begin
+
+        req_fsm_miss_valid = 1'b1;
+        req_fsm_miss_addr = {
+          evict_cl_q.tag,
+          cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET],
+          {{DCACHE_BYTE_OFFSET} {1'b0}}
+        };
+        req_fsm_miss_be = '1;
+        req_fsm_miss_we = 1'b1;
+        req_fsm_miss_wdata = evict_cl_q.data;
+
+        // we've got a grant --> this is timing critical, think about it
+        if (gnt_miss_fsm) begin
+          // write status array
+          addr_o       = cnt_q;
+          req_o        = 1'b1;
+          we_o         = 1'b1;
+          data_o.valid = INVALIDATE_ON_FLUSH ? 1'b0 : 1'b1;
+          // invalidate
+          be_o.vldrty  = evict_way_q;
+          // go back to handling the miss or flushing, depending on where we came from
+          state_d      = (state_q == WB_CACHELINE_MISS) ? MISS : FLUSH_REQ_STATUS;
+        end
+      end
+
+      // ------------------------------
+      // Flushing & Initialization
+      // ------------------------------
+      // ~> make another request to check the same cache-line if there are still some valid entries
+      FLUSH_REQ_STATUS: begin
+        req_o   = '1;
+        addr_o  = cnt_q;
+        state_d = FLUSHING;
+      end
+
+      FLUSHING: begin
+        // this has priority
+        // at least one of the cache lines is dirty
+        if (|evict_way) begin
+          // evict cache line, look for the first cache-line which is dirty
+          evict_way_d = get_victim_cl(evict_way);
+          evict_cl_d  = data_i[one_hot_to_bin(evict_way)];
+          state_d     = WB_CACHELINE_FLUSH;
+          // not dirty ~> increment and continue
+        end else begin
+          // increment and re-request
+          cnt_d       = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET);
+          state_d     = FLUSH_REQ_STATUS;
+          addr_o      = cnt_q;
+          req_o       = 1'b1;
+          be_o.vldrty = INVALIDATE_ON_FLUSH ? '1 : '0;
+          we_o        = 1'b1;
+          // finished with flushing operation, go back to idle
+          if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) begin
+            // only acknowledge if the flush wasn't triggered by an atomic
+            flush_ack_o = ~serve_amo_q;
+            // if we are flushing because of an AMO go to serve it
+            if (serve_amo_q) begin
+              state_d = AMO_REQ;
+              serve_amo_d = 1'b0;
+            end else begin
+              state_d = IDLE;
+            end
+          end
+        end
+      end
+
+      // ~> only called after reset
+      INIT: begin
+        // initialize status array
+        addr_o      = cnt_q;
+        req_o       = 1'b1;
+        we_o        = 1'b1;
+        // only write the dirty array
+        be_o.vldrty = '1;
+        cnt_d       = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET);
+        // finished initialization
+        if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) state_d = IDLE;
+      end
+      // ----------------------
+      // AMOs
+      // ----------------------
+      // ~> we are here because we need to do the AMO, the cache is clean at this point
+      AMO_REQ: begin
+        amo_bypass_req.req     = 1'b1;
+        amo_bypass_req.reqtype = ariane_pkg::SINGLE_REQ;
+        amo_bypass_req.amo     = amo_req_i.amo_op;
+        // address is in operand a
+        amo_bypass_req.addr    = amo_req_i.operand_a;
+        if (amo_req_i.amo_op != AMO_LR) begin
+          amo_bypass_req.we = 1'b1;
+        end
+        amo_bypass_req.size = amo_req_i.size;
+        // AXI implements CLR op instead of AND, negate operand
+        if (amo_req_i.amo_op == AMO_AND) begin
+          amo_operand_b = ~amo_req_i.operand_b;
+        end else begin
+          amo_operand_b = amo_req_i.operand_b;
+        end
+        // align data and byte-enable to correct byte lanes
+        amo_bypass_req.wdata = amo_operand_b;
+        if (amo_req_i.size == 2'b11) begin
+          // 64b transfer
+          amo_bypass_req.be = 8'b11111111;
+        end else begin
+          // 32b transfer
+          if (amo_req_i.operand_a[2:0] == '0) begin
+            // 64b aligned -> activate lower 4 byte lanes
+            amo_bypass_req.be = 8'b00001111;
+          end else begin
+            // 64b unaligned -> activate upper 4 byte lanes
+            amo_bypass_req.be = 8'b11110000;
+            amo_bypass_req.wdata = amo_operand_b[31:0] << 32;
+          end
+        end
+
+        // when request is accepted, wait for response
+        if (amo_bypass_rsp.gnt) begin
+          if (amo_bypass_rsp.valid) begin
+            state_d = IDLE;
+            amo_resp_o.ack = 1'b1;
+            amo_resp_o.result = amo_bypass_rsp.rdata;
+          end else begin
+            state_d = AMO_WAIT_RESP;
+          end
+        end
+      end
+      AMO_WAIT_RESP: begin
+        if (amo_bypass_rsp.valid) begin
+          state_d = IDLE;
+          amo_resp_o.ack = 1'b1;
+          // Request is assumed to be still valid (ack not granted yet)
+          if (amo_req_i.size == 2'b10) begin
+            // 32b request
+            logic [31:0] halfword;
+            if (amo_req_i.operand_a[2:0] == '0) begin
+              // 64b aligned -> activate lower 4 byte lanes
+              halfword = amo_bypass_rsp.rdata[31:0];
+            end else begin
+              // 64b unaligned -> activate upper 4 byte lanes
+              halfword = amo_bypass_rsp.rdata[63:32];
+            end
+            // Sign-extend 32b requests as per RISC-V spec
+            amo_resp_o.result = {{32{halfword[31]}}, halfword};
+          end else begin
+            // 64b request
+            amo_resp_o.result = amo_bypass_rsp.rdata;
+          end
+        end
+      end
+    endcase
+  end
+
+  // check MSHR for aliasing
+  always_comb begin
+
+    mshr_addr_matches_o  = 'b0;
+    mshr_index_matches_o = 'b0;
+
+    for (int i = 0; i < NR_PORTS; i++) begin
+      // check mshr for potential matching of other units, exclude the unit currently being served
+      if (mshr_q.valid && mshr_addr_i[i][55:DCACHE_BYTE_OFFSET] == mshr_q.addr[55:DCACHE_BYTE_OFFSET]) begin
+        mshr_addr_matches_o[i] = 1'b1;
+      end
+
+      // same as previous, but checking only the index
+      if (mshr_q.valid && mshr_addr_i[i][DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == mshr_q.addr[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]) begin
+        mshr_index_matches_o[i] = 1'b1;
+      end
+    end
+  end
+  // --------------------
+  // Sequential Process
+  // --------------------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      mshr_q      <= '0;
+      state_q     <= INIT;
+      cnt_q       <= '0;
+      evict_way_q <= '0;
+      evict_cl_q  <= '0;
+      serve_amo_q <= 1'b0;
+    end else begin
+      mshr_q      <= mshr_d;
+      state_q     <= state_d;
+      cnt_q       <= cnt_d;
+      evict_way_q <= evict_way_d;
+      evict_cl_q  <= evict_cl_d;
+      serve_amo_q <= serve_amo_d;
+    end
+  end
+
+  //pragma translate_off
+`ifndef VERILATOR
+  // assert that cache only hits on one way
+  assert property (@(posedge clk_i) $onehot0(evict_way_q))
+  else $warning("Evict-way should be one-hot encoded");
+`endif
+  //pragma translate_on
+
+  // ----------------------
+  // Pack bypass ports
+  // ----------------------
+  always_comb begin
+    logic [$clog2(NR_BYPASS_PORTS)-1:0] id;
+
+    // Pack MHSR ports first
+    for (id = 0; id < NR_PORTS; id++) begin
+      bypass_ports_req[id].req     = miss_req_valid[id] & miss_req_bypass[id];
+      bypass_ports_req[id].reqtype = ariane_pkg::SINGLE_REQ;
+      bypass_ports_req[id].amo     = AMO_NONE;
+      bypass_ports_req[id].id      = 4'b1000 | 4'(id);
+      bypass_ports_req[id].addr    = miss_req_addr[id];
+      bypass_ports_req[id].wdata   = miss_req_wdata[id];
+      bypass_ports_req[id].we      = miss_req_we[id];
+      bypass_ports_req[id].be      = miss_req_be[id];
+      bypass_ports_req[id].size    = miss_req_size[id];
+
+      bypass_gnt_o[id]             = bypass_ports_rsp[id].gnt;
+      bypass_valid_o[id]           = bypass_ports_rsp[id].valid;
+      bypass_data_o[id]            = bypass_ports_rsp[id].rdata;
+    end
+
+    // AMO port has lowest priority
+    bypass_ports_req[id] = amo_bypass_req;
+    amo_bypass_rsp       = bypass_ports_rsp[id];
+  end
+
+  // ----------------------
+  // Arbitrate bypass ports
+  // ----------------------
+  axi_adapter_arbiter #(
+      .NR_PORTS           (NR_BYPASS_PORTS),
+      .MAX_OUTSTANDING_REQ(CVA6Cfg.MaxOutstandingStores),
+      .req_t              (bypass_req_t),
+      .rsp_t              (bypass_rsp_t)
+  ) i_bypass_arbiter (
+      .clk_i (clk_i),
+      .rst_ni(rst_ni),
+      // Master Side
+      .req_i (bypass_ports_req),
+      .rsp_o (bypass_ports_rsp),
+      // Slave Side
+      .req_o (bypass_adapter_req),
+      .rsp_i (bypass_adapter_rsp)
+  );
+
+  // ----------------------
+  // Bypass AXI Interface
+  // ----------------------
+  // Cast bypass_adapter_req.addr to axi_adapter port size
+  logic [riscv::XLEN-1:0] bypass_addr;
+  assign bypass_addr = bypass_adapter_req.addr;
+
+  axi_adapter #(
+      .CVA6Cfg              (CVA6Cfg),
+      .DATA_WIDTH           (64),
+      .CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET),
+      .axi_req_t            (axi_req_t),
+      .axi_rsp_t            (axi_rsp_t)
+  ) i_bypass_axi_adapter (
+      .clk_i(clk_i),
+      .rst_ni(rst_ni),
+      .req_i(bypass_adapter_req.req),
+      .type_i(bypass_adapter_req.reqtype),
+      .amo_i(bypass_adapter_req.amo),
+      .id_i(({{CVA6Cfg.AxiIdWidth - 4{1'b0}}, bypass_adapter_req.id})),
+      .addr_i(bypass_addr),
+      .wdata_i(bypass_adapter_req.wdata),
+      .we_i(bypass_adapter_req.we),
+      .be_i(bypass_adapter_req.be),
+      .size_i(bypass_adapter_req.size),
+      .gnt_o(bypass_adapter_rsp.gnt),
+      .valid_o(bypass_adapter_rsp.valid),
+      .rdata_o(bypass_adapter_rsp.rdata),
+      .id_o(),  // not used, single outstanding request in arbiter
+      .critical_word_o(),  // not used for single requests
+      .critical_word_valid_o(),  // not used for single requests
+      .axi_req_o(axi_bypass_o),
+      .axi_resp_i(axi_bypass_i)
+  );
+
+  // ----------------------
+  // Cache Line AXI Refill
+  // ----------------------
+  // Cast req_fsm_miss_addr to axi_adapter port size
+  logic [riscv::XLEN-1:0] miss_addr;
+  assign miss_addr = req_fsm_miss_addr;
+
+  axi_adapter #(
+      .CVA6Cfg              (CVA6Cfg),
+      .DATA_WIDTH           (DCACHE_LINE_WIDTH),
+      .CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET),
+      .axi_req_t            (axi_req_t),
+      .axi_rsp_t            (axi_rsp_t)
+  ) i_miss_axi_adapter (
+      .clk_i,
+      .rst_ni,
+      .req_i                (req_fsm_miss_valid),
+      .type_i               (req_fsm_miss_req),
+      .amo_i                (AMO_NONE),
+      .gnt_o                (gnt_miss_fsm),
+      .addr_i               (miss_addr),
+      .we_i                 (req_fsm_miss_we),
+      .wdata_i              (req_fsm_miss_wdata),
+      .be_i                 (req_fsm_miss_be),
+      .size_i               (req_fsm_miss_size),
+      .id_i                 ({{CVA6Cfg.AxiIdWidth - 4{1'b0}}, 4'b0111}),
+      .valid_o              (valid_miss_fsm),
+      .rdata_o              (data_miss_fsm),
+      .id_o                 (),
+      .critical_word_o      (critical_word_o),
+      .critical_word_valid_o(critical_word_valid_o),
+      .axi_req_o            (axi_data_o),
+      .axi_resp_i           (axi_data_i)
+  );
+
+  // -----------------
+  // Replacement LFSR
+  // -----------------
+  lfsr_8bit #(
+      .WIDTH(DCACHE_SET_ASSOC)
+  ) i_lfsr (
+      .en_i          (lfsr_enable),
+      .refill_way_oh (lfsr_oh),
+      .refill_way_bin(lfsr_bin),
+      .*
+  );
+
+  // -----------------
+  // Struct Split
+  // -----------------
+  // Hack as system verilog support in modelsim seems to be buggy here
+  always_comb begin
+    automatic miss_req_t miss_req;
+
+    for (int unsigned i = 0; i < NR_PORTS; i++) begin
+      miss_req           = miss_req_t'(miss_req_i[i]);
+      miss_req_valid[i]  = miss_req.valid;
+      miss_req_bypass[i] = miss_req.bypass;
+      miss_req_addr[i]   = miss_req.addr;
+      miss_req_wdata[i]  = miss_req.wdata;
+      miss_req_we[i]     = miss_req.we;
+      miss_req_be[i]     = miss_req.be;
+      miss_req_size[i]   = miss_req.size;
+    end
+  end
+endmodule
+
+// --------------
+// AXI Arbiter
+// --------------
+//
+// Description: Arbitrates access to AXI refill/bypass
+//
+module axi_adapter_arbiter #(
+    parameter NR_PORTS = 4,
+    parameter MAX_OUTSTANDING_REQ = 0,
+    parameter type req_t = std_cache_pkg::bypass_req_t,
+    parameter type rsp_t = std_cache_pkg::bypass_rsp_t
+) (
+    input  logic                clk_i,   // Clock
+    input  logic                rst_ni,  // Asynchronous reset active low
+    // Master ports
+    input  req_t [NR_PORTS-1:0] req_i,
+    output rsp_t [NR_PORTS-1:0] rsp_o,
+    // Slave port
+    output req_t                req_o,
+    input  rsp_t                rsp_i
+);
+
+  localparam MAX_OUTSTANDING_CNT_WIDTH = $clog2(
+      MAX_OUTSTANDING_REQ + 1
+  ) > 0 ? $clog2(
+      MAX_OUTSTANDING_REQ + 1
+  ) : 1;
+
+  typedef logic [MAX_OUTSTANDING_CNT_WIDTH-1:0] outstanding_cnt_t;
+
+  enum logic {
+    IDLE,
+    SERVING
+  }
+      state_d, state_q;
+
+  req_t req_d, req_q;
+  logic [NR_PORTS-1:0] sel_d, sel_q;
+  outstanding_cnt_t outstanding_cnt_d, outstanding_cnt_q;
+
+  logic [NR_PORTS-1:0] req_flat;
+  logic any_unselected_port_valid;
+
+  for (genvar i = 0; i < NR_PORTS; i++) begin : gen_req_flat
+    assign req_flat[i] = req_i[i].req;
+  end
+  assign any_unselected_port_valid = |(req_flat & ~(1 << sel_q));
+
+
+  always_comb begin
+    sel_d = sel_q;
+    outstanding_cnt_d = outstanding_cnt_q;
+
+    state_d = state_q;
+    req_d = req_q;
+
+    req_o = req_q;
+
+    rsp_o = '0;
+    rsp_o[sel_q].rdata = rsp_i.rdata;
+
+    case (state_q)
+
+      IDLE: begin
+        // wait for incoming requests
+        for (int unsigned i = 0; i < NR_PORTS; i++) begin
+          if (req_i[i].req == 1'b1) begin
+            sel_d   = i[$bits(sel_d)-1:0];
+            state_d = SERVING;
+            break;
+          end
+        end
+
+        req_d = req_i[sel_d];
+        req_o = req_i[sel_d];
+        rsp_o[sel_d].gnt = req_i[sel_d].req;
+
+        // Count outstanding transactions, i.e. requests which have been
+        // granted but response hasn't arrived yet
+        if (req_o.req && rsp_i.gnt) begin
+          req_d.req = 1'b0;
+          outstanding_cnt_d += 1;
+        end
+      end
+
+      SERVING: begin
+        // We can accept multiple outstanding transactions from same port.
+        // To ensure fairness, we allow this only if all other ports are idle
+        if ((!req_o.req) && !any_unselected_port_valid &&
+          (outstanding_cnt_q != (MAX_OUTSTANDING_REQ - 1))) begin
+          if (req_i[sel_q].req) begin
+            req_d = req_i[sel_q];
+            req_o = req_i[sel_q];
+            rsp_o[sel_q].gnt = 1'b1;
+            state_d = SERVING;
+          end
+        end
+
+        // Count outstanding transactions, i.e. requests which have been
+        // granted but response hasn't arrived yet
+        if (req_o.req && rsp_i.gnt) begin
+          req_d.req = 1'b0;
+          outstanding_cnt_d += 1;
+        end
+        if (rsp_i.valid) begin
+          outstanding_cnt_d -= 1;
+          rsp_o[sel_q].valid = 1'b1;
+
+          if ((outstanding_cnt_d == 0) && (!req_o.req || rsp_i.gnt)) state_d = IDLE;
+        end
+      end
+
+      default:  /* default */;
+    endcase
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      state_q           <= IDLE;
+      sel_q             <= '0;
+      req_q             <= '0;
+      outstanding_cnt_q <= '0;
+    end else begin
+      state_q           <= state_d;
+      sel_q             <= sel_d;
+      req_q             <= req_d;
+      outstanding_cnt_q <= outstanding_cnt_d;
+    end
+  end
+  // ------------
+  // Assertions
+  // ------------
+
+  //pragma translate_off
+`ifndef VERILATOR
+  // make sure that we eventually get an rvalid after we received a grant
+  assert property (@(posedge clk_i) rsp_i.gnt |-> ##[1:$] rsp_i.valid)
+  else begin
+    $error("There was a grant without a rvalid");
+    $stop();
+  end
+  // assert that there is no grant without a request or outstanding transactions
+  assert property (@(negedge clk_i) rsp_i.gnt |-> req_o.req)
+  else begin
+    $error("There was a grant without a request.");
+    $stop();
+  end
+  // assert that the address does not contain X when request is sent
+  assert property (@(posedge clk_i) (req_o.req) |-> (!$isunknown(req_o.addr)))
+  else begin
+    $error("address contains X when request is set");
+    $stop();
+  end
+
+`endif
+  //pragma translate_on
+endmodule
diff --git a/test/type_param/core/cache_subsystem/std_cache_subsystem.sv b/test/type_param/core/cache_subsystem/std_cache_subsystem.sv
new file mode 100644
index 0000000..45ba8bd
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/std_cache_subsystem.sv
@@ -0,0 +1,315 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba    <zarubaf@iis.ee.ethz.ch>, ETH Zurich
+//         Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 15.08.2018
+// Description: Standard Ariane cache subsystem with instruction cache and
+//              write-back data cache.
+
+
+module std_cache_subsystem
+  import ariane_pkg::*;
+  import std_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned NumPorts = 4,
+    parameter type axi_ar_chan_t = logic,
+    parameter type axi_aw_chan_t = logic,
+    parameter type axi_w_chan_t = logic,
+    parameter type axi_req_t = logic,
+    parameter type axi_rsp_t = logic
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    input riscv::priv_lvl_t priv_lvl_i,
+    // I$
+    input logic icache_en_i,  // enable icache (or bypass e.g: in debug mode)
+    input logic icache_flush_i,  // flush the icache, flush and kill have to be asserted together
+    output logic icache_miss_o,  // to performance counter
+    // address translation requests
+    input icache_areq_t icache_areq_i,  // to/from frontend
+    output icache_arsp_t icache_areq_o,
+    // data requests
+    input icache_dreq_t icache_dreq_i,  // to/from frontend
+    output icache_drsp_t icache_dreq_o,
+    // AMOs
+    input amo_req_t amo_req_i,
+    output amo_resp_t amo_resp_o,
+    // D$
+    // Cache management
+    input logic dcache_enable_i,  // from CSR
+    input logic dcache_flush_i,  // high until acknowledged
+    output logic                           dcache_flush_ack_o,     // send a single cycle acknowledge signal when the cache is flushed
+    output logic dcache_miss_o,  // we missed on a ld/st
+    output logic                           wbuffer_empty_o,        // statically set to 1, as there is no wbuffer in this cache system
+    // Request ports
+    input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i,  // to/from LSU
+    output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o,  // to/from LSU
+    // memory side
+    output axi_req_t axi_req_o,
+    input axi_rsp_t axi_resp_i
+);
+
+  assign wbuffer_empty_o = 1'b1;
+
+  axi_req_t axi_req_icache;
+  axi_rsp_t axi_resp_icache;
+  axi_req_t axi_req_bypass;
+  axi_rsp_t axi_resp_bypass;
+  axi_req_t axi_req_data;
+  axi_rsp_t axi_resp_data;
+
+  cva6_icache_axi_wrapper #(
+      .CVA6Cfg  (CVA6Cfg),
+      .axi_req_t(axi_req_t),
+      .axi_rsp_t(axi_rsp_t)
+  ) i_cva6_icache_axi_wrapper (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .priv_lvl_i(priv_lvl_i),
+      .flush_i   (icache_flush_i),
+      .en_i      (icache_en_i),
+      .miss_o    (icache_miss_o),
+      .areq_i    (icache_areq_i),
+      .areq_o    (icache_areq_o),
+      .dreq_i    (icache_dreq_i),
+      .dreq_o    (icache_dreq_o),
+      .axi_req_o (axi_req_icache),
+      .axi_resp_i(axi_resp_icache)
+  );
+
+  // decreasing priority
+  // Port 0: PTW
+  // Port 1: Load Unit
+  // Port 2: Accelerator
+  // Port 3: Store Unit
+  std_nbdcache #(
+      .CVA6Cfg  (CVA6Cfg),
+      .NumPorts (NumPorts),
+      .axi_req_t(axi_req_t),
+      .axi_rsp_t(axi_rsp_t)
+  ) i_nbdcache (
+      .clk_i,
+      .rst_ni,
+      .enable_i    (dcache_enable_i),
+      .flush_i     (dcache_flush_i),
+      .flush_ack_o (dcache_flush_ack_o),
+      .miss_o      (dcache_miss_o),
+      .axi_bypass_o(axi_req_bypass),
+      .axi_bypass_i(axi_resp_bypass),
+      .axi_data_o  (axi_req_data),
+      .axi_data_i  (axi_resp_data),
+      .req_ports_i (dcache_req_ports_i),
+      .req_ports_o (dcache_req_ports_o),
+      .amo_req_i,
+      .amo_resp_o
+  );
+
+  // -----------------------
+  // Arbitrate AXI Ports
+  // -----------------------
+  logic [1:0] w_select, w_select_fifo, w_select_arbiter;
+  logic [1:0] w_fifo_usage;
+  logic w_fifo_empty, w_fifo_full;
+
+
+  // AR Channel
+  stream_arbiter #(
+      .DATA_T(axi_ar_chan_t),
+      .N_INP (3)
+  ) i_stream_arbiter_ar (
+      .clk_i,
+      .rst_ni,
+      .inp_data_i ({axi_req_icache.ar, axi_req_bypass.ar, axi_req_data.ar}),
+      .inp_valid_i({axi_req_icache.ar_valid, axi_req_bypass.ar_valid, axi_req_data.ar_valid}),
+      .inp_ready_o({axi_resp_icache.ar_ready, axi_resp_bypass.ar_ready, axi_resp_data.ar_ready}),
+      .oup_data_o (axi_req_o.ar),
+      .oup_valid_o(axi_req_o.ar_valid),
+      .oup_ready_i(axi_resp_i.ar_ready)
+  );
+
+  // AW Channel
+  stream_arbiter #(
+      .DATA_T(axi_aw_chan_t),
+      .N_INP (3)
+  ) i_stream_arbiter_aw (
+      .clk_i,
+      .rst_ni,
+      .inp_data_i ({axi_req_icache.aw, axi_req_bypass.aw, axi_req_data.aw}),
+      .inp_valid_i({axi_req_icache.aw_valid, axi_req_bypass.aw_valid, axi_req_data.aw_valid}),
+      .inp_ready_o({axi_resp_icache.aw_ready, axi_resp_bypass.aw_ready, axi_resp_data.aw_ready}),
+      .oup_data_o (axi_req_o.aw),
+      .oup_valid_o(axi_req_o.aw_valid),
+      .oup_ready_i(axi_resp_i.aw_ready)
+  );
+
+  // WID has been removed in AXI 4 so we need to keep track which AW request has been accepted
+  // to forward the correct write data.
+  always_comb begin
+    w_select = 0;
+    unique casez (axi_req_o.aw.id)
+      4'b0111: w_select = 2;  // dcache
+      4'b1???: w_select = 1;  // bypass
+      default: w_select = 0;  // icache
+    endcase
+  end
+
+  // W Channel
+  fifo_v3 #(
+      .DATA_WIDTH  (2),
+      // we can have a maximum of 4 oustanding transactions as each port is blocking
+      .DEPTH       (4),
+      .FALL_THROUGH(1'b1)
+  ) i_fifo_w_channel (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (1'b0),
+      .testmode_i(1'b0),
+      .full_o    (w_fifo_full),
+      .empty_o   (),                                                          // leave open
+      .usage_o   (w_fifo_usage),
+      .data_i    (w_select),
+      // a new transaction was requested and granted
+      .push_i    (axi_req_o.aw_valid & axi_resp_i.aw_ready),
+      // write ID to select the output MUX
+      .data_o    (w_select_fifo),
+      // transaction has finished
+      .pop_i     (axi_req_o.w_valid & axi_resp_i.w_ready & axi_req_o.w.last)
+  );
+
+  // In fall-through mode, the empty_o will be low when push_i is high (on zero usage).
+  // We do not want this here. Also, usage_o is missing the MSB, so on full fifo, usage_o is zero.
+  assign w_fifo_empty = w_fifo_usage == 0 && !w_fifo_full;
+
+  // icache will never write so select it as default (e.g.: when no arbitration is active)
+  // this is equal to setting it to zero
+  assign w_select_arbiter = w_fifo_empty ? (axi_req_o.aw_valid ? w_select : 0) : w_select_fifo;
+
+  stream_mux #(
+      .DATA_T(axi_w_chan_t),
+      .N_INP (3)
+  ) i_stream_mux_w (
+      .inp_data_i ({axi_req_data.w, axi_req_bypass.w, axi_req_icache.w}),
+      .inp_valid_i({axi_req_data.w_valid, axi_req_bypass.w_valid, axi_req_icache.w_valid}),
+      .inp_ready_o({axi_resp_data.w_ready, axi_resp_bypass.w_ready, axi_resp_icache.w_ready}),
+      .inp_sel_i  (w_select_arbiter),
+      .oup_data_o (axi_req_o.w),
+      .oup_valid_o(axi_req_o.w_valid),
+      .oup_ready_i(axi_resp_i.w_ready)
+  );
+
+  // Route responses based on ID
+  // 0000 -> I$
+  // 0111 -> D$
+  // 1??? -> Bypass
+  // R Channel
+  assign axi_resp_icache.r = axi_resp_i.r;
+  assign axi_resp_bypass.r = axi_resp_i.r;
+  assign axi_resp_data.r   = axi_resp_i.r;
+
+  logic [1:0] r_select;
+
+  always_comb begin
+    r_select = 0;
+    unique casez (axi_resp_i.r.id)
+      4'b0111: r_select = 0;  // dcache
+      4'b1???: r_select = 1;  // bypass
+      4'b0000: r_select = 2;  // icache
+      default: r_select = 0;
+    endcase
+  end
+
+  stream_demux #(
+      .N_OUP(3)
+  ) i_stream_demux_r (
+      .inp_valid_i(axi_resp_i.r_valid),
+      .inp_ready_o(axi_req_o.r_ready),
+      .oup_sel_i  (r_select),
+      .oup_valid_o({axi_resp_icache.r_valid, axi_resp_bypass.r_valid, axi_resp_data.r_valid}),
+      .oup_ready_i({axi_req_icache.r_ready, axi_req_bypass.r_ready, axi_req_data.r_ready})
+  );
+
+  // B Channel
+  logic [1:0] b_select;
+
+  assign axi_resp_icache.b = axi_resp_i.b;
+  assign axi_resp_bypass.b = axi_resp_i.b;
+  assign axi_resp_data.b   = axi_resp_i.b;
+
+  always_comb begin
+    b_select = 0;
+    unique casez (axi_resp_i.b.id)
+      4'b0111: b_select = 0;  // dcache
+      4'b1???: b_select = 1;  // bypass
+      4'b0000: b_select = 2;  // icache
+      default: b_select = 0;
+    endcase
+  end
+
+  stream_demux #(
+      .N_OUP(3)
+  ) i_stream_demux_b (
+      .inp_valid_i(axi_resp_i.b_valid),
+      .inp_ready_o(axi_req_o.b_ready),
+      .oup_sel_i  (b_select),
+      .oup_valid_o({axi_resp_icache.b_valid, axi_resp_bypass.b_valid, axi_resp_data.b_valid}),
+      .oup_ready_i({axi_req_icache.b_ready, axi_req_bypass.b_ready, axi_req_data.b_ready})
+  );
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  //pragma translate_off
+`ifndef VERILATOR
+
+  a_invalid_instruction_fetch :
+  assert property (
+    @(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
+  else
+    $warning(
+        1,
+        "[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
+        icache_dreq_o.vaddr,
+        icache_dreq_o.data
+    );
+
+  a_invalid_write_data :
+  assert property (
+    @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> |dcache_req_ports_i[NumPorts-1].data_be |-> (|dcache_req_ports_i[NumPorts-1].data_wdata) !== 1'hX)
+  else
+    $warning(
+        1,
+        "[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
+        {
+          dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index
+        },
+        dcache_req_ports_i[NumPorts-1].data_be,
+        dcache_req_ports_i[NumPorts-1].data_wdata
+    );
+  generate
+    for (genvar j = 0; j < NumPorts - 1; j++) begin
+      a_invalid_read_data :
+      assert property (
+          @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
+      else
+        $warning(
+            1,
+            "[l1 dcache] reading invalid data on port %01d: data=%016X",
+            j,
+            dcache_req_ports_o[j].data_rdata
+        );
+    end
+  endgenerate
+
+`endif
+  //pragma translate_on
+endmodule  // std_cache_subsystem
diff --git a/test/type_param/core/cache_subsystem/std_nbdcache.sv b/test/type_param/core/cache_subsystem/std_nbdcache.sv
new file mode 100644
index 0000000..367c67c
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/std_nbdcache.sv
@@ -0,0 +1,279 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 13.10.2017
+// Description: Nonblocking private L1 dcache
+
+
+module std_nbdcache
+  import std_cache_pkg::*;
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned NumPorts = 4,
+    parameter type axi_req_t = logic,
+    parameter type axi_rsp_t = logic
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    // Cache management
+    input logic enable_i,  // from CSR
+    input logic flush_i,  // high until acknowledged
+    output logic flush_ack_o,  // send a single cycle acknowledge signal when the cache is flushed
+    output logic miss_o,  // we missed on a LD/ST
+    // AMOs
+    input amo_req_t amo_req_i,
+    output amo_resp_t amo_resp_o,
+    // Request ports
+    input dcache_req_i_t [NumPorts-1:0] req_ports_i,  // request ports
+    output dcache_req_o_t [NumPorts-1:0] req_ports_o,  // request ports
+    // Cache AXI refill port
+    output axi_req_t axi_data_o,
+    input axi_rsp_t axi_data_i,
+    output axi_req_t axi_bypass_o,
+    input axi_rsp_t axi_bypass_i
+);
+
+  import std_cache_pkg::*;
+
+  // -------------------------------
+  // Controller <-> Arbiter
+  // -------------------------------
+  // 1. Miss handler
+  // 2. PTW
+  // 3. Load Unit
+  // 4. Accelerator
+  // 5. Store unit
+  logic        [            NumPorts:0][  DCACHE_SET_ASSOC-1:0] req;
+  logic        [            NumPorts:0][DCACHE_INDEX_WIDTH-1:0] addr;
+  logic        [            NumPorts:0]                         gnt;
+  cache_line_t [  DCACHE_SET_ASSOC-1:0]                         rdata;
+  logic        [            NumPorts:0][  DCACHE_TAG_WIDTH-1:0] tag;
+
+  cache_line_t [            NumPorts:0]                         wdata;
+  logic        [            NumPorts:0]                         we;
+  cl_be_t      [            NumPorts:0]                         be;
+  logic        [  DCACHE_SET_ASSOC-1:0]                         hit_way;
+  // -------------------------------
+  // Controller <-> Miss unit
+  // -------------------------------
+  logic        [          NumPorts-1:0]                         busy;
+  logic        [          NumPorts-1:0][                  55:0] mshr_addr;
+  logic        [          NumPorts-1:0]                         mshr_addr_matches;
+  logic        [          NumPorts-1:0]                         mshr_index_matches;
+  logic        [                  63:0]                         critical_word;
+  logic                                                         critical_word_valid;
+
+  logic        [          NumPorts-1:0][ $bits(miss_req_t)-1:0] miss_req;
+  logic        [          NumPorts-1:0]                         miss_gnt;
+  logic        [          NumPorts-1:0]                         active_serving;
+
+  logic        [          NumPorts-1:0]                         bypass_gnt;
+  logic        [          NumPorts-1:0]                         bypass_valid;
+  logic        [          NumPorts-1:0][                  63:0] bypass_data;
+  // -------------------------------
+  // Arbiter <-> Datram,
+  // -------------------------------
+  logic        [  DCACHE_SET_ASSOC-1:0]                         req_ram;
+  logic        [DCACHE_INDEX_WIDTH-1:0]                         addr_ram;
+  logic                                                         we_ram;
+  cache_line_t                                                  wdata_ram;
+  cache_line_t [  DCACHE_SET_ASSOC-1:0]                         rdata_ram;
+  cl_be_t                                                       be_ram;
+
+  // ------------------
+  // Cache Controller
+  // ------------------
+  generate
+    for (genvar i = 0; i < NumPorts; i++) begin : master_ports
+      cache_ctrl #(
+          .CVA6Cfg(CVA6Cfg)
+      ) i_cache_ctrl (
+          .bypass_i  (~enable_i),
+          .busy_o    (busy[i]),
+          // from core
+          .req_port_i(req_ports_i[i]),
+          .req_port_o(req_ports_o[i]),
+          // to SRAM array
+          .req_o     (req[i+1]),
+          .addr_o    (addr[i+1]),
+          .gnt_i     (gnt[i+1]),
+          .data_i    (rdata),
+          .tag_o     (tag[i+1]),
+          .data_o    (wdata[i+1]),
+          .we_o      (we[i+1]),
+          .be_o      (be[i+1]),
+          .hit_way_i (hit_way),
+
+          .miss_req_o           (miss_req[i]),
+          .miss_gnt_i           (miss_gnt[i]),
+          .active_serving_i     (active_serving[i]),
+          .critical_word_i      (critical_word),
+          .critical_word_valid_i(critical_word_valid),
+          .bypass_gnt_i         (bypass_gnt[i]),
+          .bypass_valid_i       (bypass_valid[i]),
+          .bypass_data_i        (bypass_data[i]),
+
+          .mshr_addr_o         (mshr_addr[i]),
+          .mshr_addr_matches_i (mshr_addr_matches[i]),
+          .mshr_index_matches_i(mshr_index_matches[i]),
+          .*
+      );
+    end
+  endgenerate
+
+  // ------------------
+  // Miss Handling Unit
+  // ------------------
+  miss_handler #(
+      .CVA6Cfg  (CVA6Cfg),
+      .NR_PORTS (NumPorts),
+      .axi_req_t(axi_req_t),
+      .axi_rsp_t(axi_rsp_t)
+  ) i_miss_handler (
+      .flush_i              (flush_i),
+      .busy_i               (|busy),
+      // AMOs
+      .amo_req_i            (amo_req_i),
+      .amo_resp_o           (amo_resp_o),
+      .miss_req_i           (miss_req),
+      .miss_gnt_o           (miss_gnt),
+      .bypass_gnt_o         (bypass_gnt),
+      .bypass_valid_o       (bypass_valid),
+      .bypass_data_o        (bypass_data),
+      .critical_word_o      (critical_word),
+      .critical_word_valid_o(critical_word_valid),
+      .mshr_addr_i          (mshr_addr),
+      .mshr_addr_matches_o  (mshr_addr_matches),
+      .mshr_index_matches_o (mshr_index_matches),
+      .active_serving_o     (active_serving),
+      .req_o                (req[0]),
+      .addr_o               (addr[0]),
+      .data_i               (rdata),
+      .be_o                 (be[0]),
+      .data_o               (wdata[0]),
+      .we_o                 (we[0]),
+      .axi_bypass_o,
+      .axi_bypass_i,
+      .axi_data_o,
+      .axi_data_i,
+      .*
+  );
+
+  assign tag[0] = '0;
+
+  // --------------
+  // Memory Arrays
+  // --------------
+  for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
+    sram #(
+        .DATA_WIDTH(DCACHE_LINE_WIDTH),
+        .NUM_WORDS (DCACHE_NUM_WORDS)
+    ) data_sram (
+        .req_i  (req_ram[i]),
+        .rst_ni (rst_ni),
+        .we_i   (we_ram),
+        .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
+        .wuser_i('0),
+        .wdata_i(wdata_ram.data),
+        .be_i   (be_ram.data),
+        .ruser_o(),
+        .rdata_o(rdata_ram[i].data),
+        .*
+    );
+
+    sram #(
+        .DATA_WIDTH(DCACHE_TAG_WIDTH),
+        .NUM_WORDS (DCACHE_NUM_WORDS)
+    ) tag_sram (
+        .req_i  (req_ram[i]),
+        .rst_ni (rst_ni),
+        .we_i   (we_ram),
+        .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
+        .wuser_i('0),
+        .wdata_i(wdata_ram.tag),
+        .be_i   (be_ram.tag),
+        .ruser_o(),
+        .rdata_o(rdata_ram[i].tag),
+        .*
+    );
+
+  end
+
+  // ----------------
+  // Valid/Dirty Regs
+  // ----------------
+
+  // align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
+  // note: if you have an SRAM that supports flat bit enables for your target technology,
+  // you can use it here to save the extra 4x overhead introduced by this workaround.
+  logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
+
+  for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
+    assign dirty_wdata[8*i]   = wdata_ram.dirty;
+    assign dirty_wdata[8*i+1] = wdata_ram.valid;
+    assign rdata_ram[i].dirty = dirty_rdata[8*i];
+    assign rdata_ram[i].valid = dirty_rdata[8*i+1];
+  end
+
+  sram #(
+      .USER_WIDTH(1),
+      .DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH),
+      .NUM_WORDS (DCACHE_NUM_WORDS)
+  ) valid_dirty_sram (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .req_i  (|req_ram),
+      .we_i   (we_ram),
+      .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
+      .wuser_i('0),
+      .wdata_i(dirty_wdata),
+      .be_i   (be_ram.vldrty),
+      .ruser_o(),
+      .rdata_o(dirty_rdata)
+  );
+
+  // ------------------------------------------------
+  // Tag Comparison and memory arbitration
+  // ------------------------------------------------
+  tag_cmp #(
+      .CVA6Cfg         (CVA6Cfg),
+      .NR_PORTS        (NumPorts + 1),
+      .ADDR_WIDTH      (DCACHE_INDEX_WIDTH),
+      .DCACHE_SET_ASSOC(DCACHE_SET_ASSOC)
+  ) i_tag_cmp (
+      .req_i    (req),
+      .gnt_o    (gnt),
+      .addr_i   (addr),
+      .wdata_i  (wdata),
+      .we_i     (we),
+      .be_i     (be),
+      .rdata_o  (rdata),
+      .tag_i    (tag),
+      .hit_way_o(hit_way),
+
+      .req_o  (req_ram),
+      .addr_o (addr_ram),
+      .wdata_o(wdata_ram),
+      .we_o   (we_ram),
+      .be_o   (be_ram),
+      .rdata_i(rdata_ram),
+      .*
+  );
+
+
+  //pragma translate_off
+  initial begin
+    assert (DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16})
+    else $fatal(1, "Cache line size needs to be a power of two multiple of AxiDataWidth");
+  end
+  //pragma translate_on
+endmodule
diff --git a/test/type_param/core/cache_subsystem/tag_cmp.sv b/test/type_param/core/cache_subsystem/tag_cmp.sv
new file mode 100644
index 0000000..a378c13
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/tag_cmp.sv
@@ -0,0 +1,106 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+// --------------
+// Tag Compare
+// --------------
+//
+// Description: Arbitrates access to cache memories, simplified request grant protocol
+//              checks for hit or miss on cache
+//
+module tag_cmp #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg          = config_pkg::cva6_cfg_empty,
+    parameter int unsigned           NR_PORTS         = 3,
+    parameter int unsigned           ADDR_WIDTH       = 64,
+    parameter type                   l_data_t         = std_cache_pkg::cache_line_t,
+    parameter type                   l_be_t           = std_cache_pkg::cl_be_t,
+    parameter int unsigned           DCACHE_SET_ASSOC = 8
+) (
+    input logic clk_i,
+    input logic rst_ni,
+
+    input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i,
+    output logic [NR_PORTS-1:0] gnt_o,
+    input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
+    input l_data_t [NR_PORTS-1:0] wdata_i,
+    input logic [NR_PORTS-1:0] we_i,
+    input l_be_t [NR_PORTS-1:0] be_i,
+    output l_data_t [DCACHE_SET_ASSOC-1:0] rdata_o,
+    input  logic    [NR_PORTS-1:0][ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
+    output logic [DCACHE_SET_ASSOC-1:0] hit_way_o,  // we've got a hit on the corresponding way
+
+
+    output logic    [DCACHE_SET_ASSOC-1:0] req_o,
+    output logic    [      ADDR_WIDTH-1:0] addr_o,
+    output l_data_t                        wdata_o,
+    output logic                           we_o,
+    output l_be_t                          be_o,
+    input  l_data_t [DCACHE_SET_ASSOC-1:0] rdata_i
+);
+
+  assign rdata_o = rdata_i;
+  // one hot encoded
+  logic [NR_PORTS-1:0] id_d, id_q;
+  logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] sel_tag;
+
+  always_comb begin : tag_sel
+    sel_tag = '0;
+    for (int unsigned i = 0; i < NR_PORTS; i++) if (id_q[i]) sel_tag = tag_i[i];
+  end
+
+  for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp
+    assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
+  end
+
+  always_comb begin
+
+    gnt_o   = '0;
+    id_d    = '0;
+    wdata_o = '0;
+    req_o   = '0;
+    addr_o  = '0;
+    be_o    = '0;
+    we_o    = '0;
+    // Request Side
+    // priority select
+    for (int unsigned i = 0; i < NR_PORTS; i++) begin
+      req_o    = req_i[i];
+      id_d     = (1'b1 << i);
+      gnt_o[i] = 1'b1;
+      addr_o   = addr_i[i];
+      be_o     = be_i[i];
+      we_o     = we_i[i];
+      wdata_o  = wdata_i[i];
+
+      if (req_i[i]) break;
+    end
+
+`ifndef SYNTHESIS
+`ifndef VERILATOR
+    // assert that cache only hits on one way
+    // this only needs to be checked one cycle after all ways have been requested
+    onehot :
+    assert property (@(posedge clk_i) disable iff (!rst_ni) &req_i |=> $onehot0(hit_way_o))
+    else begin
+      $fatal(1, "Hit should be one-hot encoded");
+    end
+`endif
+`endif
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      id_q <= 0;
+    end else begin
+      id_q <= id_d;
+    end
+  end
+
+endmodule
diff --git a/test/type_param/core/cache_subsystem/wt_axi_adapter.sv b/test/type_param/core/cache_subsystem/wt_axi_adapter.sv
new file mode 100644
index 0000000..1647f1d
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/wt_axi_adapter.sv
@@ -0,0 +1,712 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 08.08.2018
+// Description: adapter module to connect the L1D$ and L1I$ to a 64bit AXI bus.
+//
+
+
+module wt_axi_adapter
+  import ariane_pkg::*;
+  import wt_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned ReqFifoDepth = 2,
+    parameter int unsigned MetaFifoDepth = wt_cache_pkg::DCACHE_MAX_TX,
+    parameter type axi_req_t = logic,
+    parameter type axi_rsp_t = logic
+) (
+    input logic clk_i,
+    input logic rst_ni,
+
+    // icache
+    input  logic         icache_data_req_i,
+    output logic         icache_data_ack_o,
+    input  icache_req_t  icache_data_i,
+    // returning packets must be consumed immediately
+    output logic         icache_rtrn_vld_o,
+    output icache_rtrn_t icache_rtrn_o,
+
+    // dcache
+    input  logic         dcache_data_req_i,
+    output logic         dcache_data_ack_o,
+    input  dcache_req_t  dcache_data_i,
+    // returning packets must be consumed immediately
+    output logic         dcache_rtrn_vld_o,
+    output dcache_rtrn_t dcache_rtrn_o,
+
+    // AXI port
+    output axi_req_t axi_req_o,
+    input  axi_rsp_t axi_resp_i,
+
+    // Invalidations
+    input  logic [63:0] inval_addr_i,
+    input  logic        inval_valid_i,
+    output logic        inval_ready_o
+);
+
+  // support up to 512bit cache lines
+  localparam AxiNumWords = (ariane_pkg::ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH > ariane_pkg::DCACHE_LINE_WIDTH)  +
+                           (ariane_pkg::DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH <= ariane_pkg::DCACHE_LINE_WIDTH) ;
+  localparam MaxNumWords = $clog2(CVA6Cfg.AxiDataWidth / 8);
+  localparam AxiRdBlenIcache = ariane_pkg::ICACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1;
+  localparam AxiRdBlenDcache = ariane_pkg::DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1;
+
+  ///////////////////////////////////////////////////////
+  // request path
+  ///////////////////////////////////////////////////////
+
+  icache_req_t icache_data;
+  logic icache_data_full, icache_data_empty;
+  dcache_req_t dcache_data;
+  logic dcache_data_full, dcache_data_empty;
+
+  logic [1:0] arb_req, arb_ack;
+  logic arb_idx, arb_gnt;
+
+  logic axi_rd_req, axi_rd_gnt;
+  logic axi_wr_req, axi_wr_gnt;
+  logic axi_wr_valid, axi_rd_valid, axi_rd_rdy, axi_wr_rdy;
+  logic axi_rd_lock, axi_wr_lock, axi_rd_exokay, axi_wr_exokay, wr_exokay;
+  logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr, axi_wr_addr;
+  logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen;
+  logic [2:0] axi_rd_size, axi_wr_size;
+  logic [CVA6Cfg.AxiIdWidth-1:0]
+      axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out, wr_id_out;
+  logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] axi_wr_data;
+  logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] axi_wr_user;
+  logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
+  logic [CVA6Cfg.AxiUserWidth-1:0] axi_rd_user;
+  logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] axi_wr_be;
+  logic [5:0] axi_wr_atop;
+  logic invalidate;
+  logic [$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] amo_off_d, amo_off_q;
+  // AMO generates r beat
+  logic amo_gen_r_d, amo_gen_r_q;
+
+  logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] icache_rtrn_tid_d, icache_rtrn_tid_q;
+  logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_tid_d, dcache_rtrn_tid_q;
+  logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_rd_tid, dcache_rtrn_wr_tid;
+  logic dcache_rd_pop, dcache_wr_pop;
+  logic icache_rd_full, icache_rd_empty;
+  logic dcache_rd_full, dcache_rd_empty;
+  logic dcache_wr_full, dcache_wr_empty;
+
+  assign icache_data_ack_o = icache_data_req_i & ~icache_data_full;
+  assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full;
+
+  // arbiter
+  assign arb_req = {
+    ~(dcache_data_empty | dcache_wr_full | dcache_rd_full), ~(icache_data_empty | icache_rd_full)
+  };
+
+  assign arb_gnt = axi_rd_gnt | axi_wr_gnt;
+
+  rr_arb_tree #(
+      .NumIn    (2),
+      .DataWidth(1),
+      .AxiVldRdy(1'b1),
+      .LockIn   (1'b1)
+  ) i_rr_arb_tree (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i('0),
+      .rr_i   ('0),
+      .req_i  (arb_req),
+      .gnt_o  (arb_ack),
+      .data_i ('0),
+      .gnt_i  (arb_gnt),
+      .req_o  (),
+      .data_o (),
+      .idx_o  (arb_idx)
+  );
+
+  // request side
+  always_comb begin : p_axi_req
+    // write channel
+    axi_wr_id_in = {{CVA6Cfg.AxiIdWidth-1{1'b0}}, arb_idx};
+    axi_wr_data[0]  = {(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.data}};
+    axi_wr_user[0]  = dcache_data.user;
+    // Cast to AXI address width
+    axi_wr_addr  = {{CVA6Cfg.AxiAddrWidth-riscv::PLEN{1'b0}}, dcache_data.paddr};
+    axi_wr_size  = dcache_data.size;
+    axi_wr_req   = 1'b0;
+    axi_wr_blen  = '0;// single word writes
+    axi_wr_be    = '0;
+    axi_wr_lock  = '0;
+    axi_wr_atop  = '0;
+    amo_off_d    = amo_off_q;
+    amo_gen_r_d  = amo_gen_r_q;
+
+    // read channel
+    axi_rd_id_in = {{CVA6Cfg.AxiIdWidth-1{1'b0}}, arb_idx};
+    axi_rd_req   = 1'b0;
+    axi_rd_lock  = '0;
+    axi_rd_blen  = '0;
+
+    if (dcache_data.paddr[2] == 1'b0) begin
+      axi_wr_user = {{64 - CVA6Cfg.AxiUserWidth{1'b0}}, dcache_data.user};
+    end else begin
+      axi_wr_user = {dcache_data.user, {64 - CVA6Cfg.AxiUserWidth{1'b0}}};
+    end
+
+    // arbiter mux
+    if (arb_idx) begin
+      // Cast to AXI address width
+      axi_rd_addr = {{CVA6Cfg.AxiAddrWidth - riscv::PLEN{1'b0}}, dcache_data.paddr};
+      // If dcache_data.size MSB is set, we want to read as much as possible
+      axi_rd_size = dcache_data.size[2] ? MaxNumWords[2:0] : dcache_data.size;
+      if (dcache_data.size[2]) begin
+        axi_rd_blen = AxiRdBlenDcache[$clog2(AxiNumWords)-1:0];
+      end
+    end else begin
+      // Cast to AXI address width
+      axi_rd_addr = {{CVA6Cfg.AxiAddrWidth - riscv::PLEN{1'b0}}, icache_data.paddr};
+      axi_rd_size = MaxNumWords[2:0];  // always request max number of words in case of ifill
+      if (!icache_data.nc) begin
+        axi_rd_blen = AxiRdBlenIcache[$clog2(AxiNumWords)-1:0];
+      end
+    end
+
+    // signal that an invalidation message
+    // needs to be generated
+    invalidate = 1'b0;
+
+    // decode message type
+    if (|arb_req) begin
+      if (arb_idx == 0) begin
+        //////////////////////////////////////
+        // IMISS
+        axi_rd_req = 1'b1;
+        //////////////////////////////////////
+      end else begin
+        unique case (dcache_data.rtype)
+          //////////////////////////////////////
+          wt_cache_pkg::DCACHE_LOAD_REQ: begin
+            axi_rd_req = 1'b1;
+          end
+          //////////////////////////////////////
+          wt_cache_pkg::DCACHE_STORE_REQ: begin
+            axi_wr_req = 1'b1;
+            axi_wr_be  = '0;
+            unique case (dcache_data.size[1:0])
+              2'b00:
+              axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1;  // byte
+              2'b01:
+              axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = '1;  // hword
+              2'b10:
+              axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = '1;  // word
+              default:
+              if (riscv::IS_XLEN64)
+                axi_wr_be[0][dcache_data.paddr[$clog2(
+                    CVA6Cfg.AxiDataWidth/8
+                )-1:0]+:8] = '1;  // dword
+            endcase
+          end
+          //////////////////////////////////////
+          wt_cache_pkg::DCACHE_ATOMIC_REQ: begin
+            if (CVA6Cfg.RVA) begin
+              // default
+              // push back an invalidation here.
+              // since we only keep one read tx in flight, and since
+              // the dcache drains all writes/reads before executing
+              // an atomic, this is safe.
+              invalidate = arb_gnt;
+              axi_wr_req = 1'b1;
+              axi_wr_be  = '0;
+              unique case (dcache_data.size[1:0])
+                2'b00:
+                axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1;  // byte
+                2'b01:
+                axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] =
+                    '1;  // hword
+                2'b10:
+                axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] =
+                    '1;  // word
+                default:
+                axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:8] =
+                    '1;  // dword
+              endcase
+              amo_gen_r_d = 1'b1;
+              // need to use a separate ID here, so concat an additional bit
+              axi_wr_id_in[1] = 1'b1;
+
+              unique case (dcache_data.amo_op)
+                AMO_LR: begin
+                  axi_rd_lock     = 1'b1;
+                  axi_rd_req      = 1'b1;
+                  axi_rd_id_in[1] = 1'b1;
+                  // tie to zero in this special case
+                  axi_wr_req      = 1'b0;
+                  axi_wr_be       = '0;
+                end
+                AMO_SC: begin
+                  axi_wr_lock = 1'b1;
+                  amo_gen_r_d = 1'b0;
+                  // needed to properly encode success. store the result at offset within the returned
+                  // AXI data word aligned with the requested word size.
+                  amo_off_d = dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-
+                                                1:0] & ~((1 << dcache_data.size[1:0]) - 1);
+                end
+                // RISC-V atops have a load semantic
+                AMO_SWAP: axi_wr_atop = axi_pkg::ATOP_ATOMICSWAP;
+                AMO_ADD:
+                axi_wr_atop = {
+                  axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD
+                };
+                AMO_AND: begin
+                  // in this case we need to invert the data to get a "CLR"
+                  axi_wr_data[0] = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.data}};
+                  axi_wr_user = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.user}};
+                  axi_wr_atop = {
+                    axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR
+                  };
+                end
+                AMO_OR:
+                axi_wr_atop = {
+                  axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET
+                };
+                AMO_XOR:
+                axi_wr_atop = {
+                  axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR
+                };
+                AMO_MAX:
+                axi_wr_atop = {
+                  axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX
+                };
+                AMO_MAXU:
+                axi_wr_atop = {
+                  axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX
+                };
+                AMO_MIN:
+                axi_wr_atop = {
+                  axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN
+                };
+                AMO_MINU:
+                axi_wr_atop = {
+                  axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN
+                };
+                default: ;  // Do nothing
+              endcase
+            end
+          end
+          default: ;  // Do nothing
+          //////////////////////////////////////
+        endcase
+      end
+    end
+  end
+
+  fifo_v3 #(
+      .dtype(icache_req_t),
+      .DEPTH(ReqFifoDepth)
+  ) i_icache_data_fifo (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (1'b0),
+      .testmode_i(1'b0),
+      .full_o    (icache_data_full),
+      .empty_o   (icache_data_empty),
+      .usage_o   (),
+      .data_i    (icache_data_i),
+      .push_i    (icache_data_ack_o),
+      .data_o    (icache_data),
+      .pop_i     (arb_ack[0])
+  );
+
+  fifo_v3 #(
+      .dtype(dcache_req_t),
+      .DEPTH(ReqFifoDepth)
+  ) i_dcache_data_fifo (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (1'b0),
+      .testmode_i(1'b0),
+      .full_o    (dcache_data_full),
+      .empty_o   (dcache_data_empty),
+      .usage_o   (),
+      .data_i    (dcache_data_i),
+      .push_i    (dcache_data_ack_o),
+      .data_o    (dcache_data),
+      .pop_i     (arb_ack[1])
+  );
+
+  ///////////////////////////////////////////////////////
+  // meta info feedback fifos
+  ///////////////////////////////////////////////////////
+
+  logic icache_rtrn_rd_en, dcache_rtrn_rd_en;
+  logic icache_rtrn_vld_d, icache_rtrn_vld_q, dcache_rtrn_vld_d, dcache_rtrn_vld_q;
+
+  fifo_v3 #(
+      .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
+      .DEPTH     (MetaFifoDepth)
+  ) i_rd_icache_id (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (1'b0),
+      .testmode_i(1'b0),
+      .full_o    (icache_rd_full),
+      .empty_o   (icache_rd_empty),
+      .usage_o   (),
+      .data_i    (icache_data.tid),
+      .push_i    (arb_ack[0] & axi_rd_gnt),
+      .data_o    (icache_rtrn_tid_d),
+      .pop_i     (icache_rtrn_vld_d)
+  );
+
+  fifo_v3 #(
+      .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
+      .DEPTH     (MetaFifoDepth)
+  ) i_rd_dcache_id (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (1'b0),
+      .testmode_i(1'b0),
+      .full_o    (dcache_rd_full),
+      .empty_o   (dcache_rd_empty),
+      .usage_o   (),
+      .data_i    (dcache_data.tid),
+      .push_i    (arb_ack[1] & axi_rd_gnt),
+      .data_o    (dcache_rtrn_rd_tid),
+      .pop_i     (dcache_rd_pop)
+  );
+
+  fifo_v3 #(
+      .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
+      .DEPTH     (MetaFifoDepth)
+  ) i_wr_dcache_id (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (1'b0),
+      .testmode_i(1'b0),
+      .full_o    (dcache_wr_full),
+      .empty_o   (dcache_wr_empty),
+      .usage_o   (),
+      .data_i    (dcache_data.tid),
+      .push_i    (arb_ack[1] & axi_wr_gnt),
+      .data_o    (dcache_rtrn_wr_tid),
+      .pop_i     (dcache_wr_pop)
+  );
+
+  // select correct tid to return
+  assign dcache_rtrn_tid_d = (dcache_wr_pop) ? dcache_rtrn_wr_tid : dcache_rtrn_rd_tid;
+
+  ///////////////////////////////////////////////////////
+  // return path
+  ///////////////////////////////////////////////////////
+
+  // buffer write responses
+  logic b_full, b_empty, b_push, b_pop;
+  assign axi_wr_rdy = ~b_full;
+  assign b_push     = axi_wr_valid & axi_wr_rdy;
+
+  fifo_v3 #(
+      .DATA_WIDTH  (CVA6Cfg.AxiIdWidth + 1),
+      .DEPTH       (MetaFifoDepth),
+      .FALL_THROUGH(1'b1)
+  ) i_b_fifo (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (1'b0),
+      .testmode_i(1'b0),
+      .full_o    (b_full),
+      .empty_o   (b_empty),
+      .usage_o   (),
+      .data_i    ({axi_wr_exokay, axi_wr_id_out}),
+      .push_i    (b_push),
+      .data_o    ({wr_exokay, wr_id_out}),
+      .pop_i     (b_pop)
+  );
+
+  // buffer read responses in shift regs
+  logic icache_first_d, icache_first_q, dcache_first_d, dcache_first_q;
+  logic [ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0]
+      icache_rd_shift_user_d, icache_rd_shift_user_q;
+  logic [DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0]
+      dcache_rd_shift_user_d, dcache_rd_shift_user_q;
+  logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
+      icache_rd_shift_d, icache_rd_shift_q;
+  logic [DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
+      dcache_rd_shift_d, dcache_rd_shift_q;
+  wt_cache_pkg::dcache_in_t dcache_rtrn_type_d, dcache_rtrn_type_q;
+  wt_cache_pkg::dcache_inval_t dcache_rtrn_inv_d, dcache_rtrn_inv_q;
+  logic dcache_sc_rtrn, axi_rd_last;
+
+  always_comb begin : p_axi_rtrn_shift
+    // output directly from regs
+    icache_rtrn_o          = '0;
+    icache_rtrn_o.rtype    = wt_cache_pkg::ICACHE_IFILL_ACK;
+    icache_rtrn_o.tid      = icache_rtrn_tid_q;
+    icache_rtrn_o.data     = icache_rd_shift_q;
+    icache_rtrn_o.user     = icache_rd_shift_user_q;
+    icache_rtrn_vld_o      = icache_rtrn_vld_q;
+
+    dcache_rtrn_o          = '0;
+    dcache_rtrn_o.rtype    = dcache_rtrn_type_q;
+    dcache_rtrn_o.inv      = dcache_rtrn_inv_q;
+    dcache_rtrn_o.tid      = dcache_rtrn_tid_q;
+    dcache_rtrn_o.data     = dcache_rd_shift_q;
+    dcache_rtrn_o.user     = dcache_rd_shift_user_q;
+    dcache_rtrn_vld_o      = dcache_rtrn_vld_q;
+
+    // read shift registers
+    icache_rd_shift_d      = icache_rd_shift_q;
+    icache_rd_shift_user_d = icache_rd_shift_user_q;
+    dcache_rd_shift_d      = dcache_rd_shift_q;
+    dcache_rd_shift_user_d = dcache_rd_shift_user_q;
+    icache_first_d         = icache_first_q;
+    dcache_first_d         = dcache_first_q;
+
+    if (icache_rtrn_rd_en) begin
+      icache_first_d = axi_rd_last;
+      if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
+        icache_rd_shift_d[0] = axi_rd_data;
+      end else begin
+        icache_rd_shift_d = {
+          axi_rd_data, icache_rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]
+        };
+      end
+      icache_rd_shift_user_d = {
+        axi_rd_user, icache_rd_shift_user_q[ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]
+      };
+      // if this is a single word transaction, we need to make sure that word is placed at offset 0
+      if (icache_first_q) begin
+        icache_rd_shift_d[0] = axi_rd_data;
+        icache_rd_shift_user_d[0] = axi_rd_user;
+      end
+    end
+
+    if (dcache_rtrn_rd_en) begin
+      dcache_first_d = axi_rd_last;
+      if (DCACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
+        dcache_rd_shift_d[0] = axi_rd_data;
+      end else begin
+        dcache_rd_shift_d = {
+          axi_rd_data, dcache_rd_shift_q[DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]
+        };
+      end
+      dcache_rd_shift_user_d = {
+        axi_rd_user, dcache_rd_shift_user_q[DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]
+      };
+      // if this is a single word transaction, we need to make sure that word is placed at offset 0
+      if (dcache_first_q) begin
+        dcache_rd_shift_d[0] = axi_rd_data;
+        dcache_rd_shift_user_d[0] = axi_rd_user;
+      end
+    end else if (CVA6Cfg.RVA && dcache_sc_rtrn) begin
+      // encode lr/sc success
+      dcache_rd_shift_d[0] = '0;
+      dcache_rd_shift_user_d[0] = '0;
+      dcache_rd_shift_d[0][amo_off_q*8] = (wr_exokay) ? '0 : 1'b1;
+      dcache_rd_shift_user_d[0][amo_off_q*8] = (wr_exokay) ? '0 : 1'b1;
+    end
+  end
+
+  // decode virtual read channels of icache
+  always_comb begin : p_axi_rtrn_decode
+    // we are not ready when invalidating
+    // note: b's are buffered separately
+    axi_rd_rdy        = ~invalidate;
+
+    icache_rtrn_rd_en = 1'b0;
+    icache_rtrn_vld_d = 1'b0;
+
+    // decode virtual icache channel,
+    // this is independent on dcache decoding below
+    if (axi_rd_valid && axi_rd_id_out == 0 && axi_rd_rdy) begin
+      icache_rtrn_rd_en = 1'b1;
+      icache_rtrn_vld_d = axi_rd_last;
+    end
+
+    dcache_rtrn_rd_en  = 1'b0;
+    dcache_rtrn_vld_d  = 1'b0;
+    dcache_rd_pop      = 1'b0;
+    dcache_wr_pop      = 1'b0;
+    dcache_rtrn_inv_d  = '0;
+    dcache_rtrn_type_d = wt_cache_pkg::DCACHE_LOAD_ACK;
+    b_pop              = 1'b0;
+    dcache_sc_rtrn     = 1'b0;
+
+    // External invalidation requests (from coprocessor). This is safe as
+    // there are no other transactions when a coprocessor has pending stores.
+    inval_ready_o      = 1'b0;
+    if (inval_valid_i) begin
+      inval_ready_o         = 1'b1;
+      dcache_rtrn_type_d    = wt_cache_pkg::DCACHE_INV_REQ;
+      dcache_rtrn_vld_d     = 1'b1;
+      dcache_rtrn_inv_d.all = 1'b1;
+      dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
+      //////////////////////////////////////
+      // dcache needs some special treatment
+      // for arbitration and decoding of atomics
+      //////////////////////////////////////
+      // this is safe, there is no other read tx in flight than this atomic.
+      // note that this self invalidation is handled in this way due to the
+      // write-through cache architecture, which is aligned with the openpiton
+      // cache subsystem.
+    end else if (CVA6Cfg.RVA && invalidate) begin
+      dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
+      dcache_rtrn_vld_d = 1'b1;
+
+      dcache_rtrn_inv_d.all = 1'b1;
+      dcache_rtrn_inv_d.idx = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
+      //////////////////////////////////////
+      // read responses
+      // note that in case of atomics, the dcache sequentializes requests and
+      // guarantees that there are no other pending transactions in flight
+    end else if (axi_rd_valid && axi_rd_id_out[0] && axi_rd_rdy) begin
+      dcache_rtrn_rd_en = 1'b1;
+      dcache_rtrn_vld_d = axi_rd_last;
+
+      // if this was an atomic op
+      if (CVA6Cfg.RVA && axi_rd_id_out[1]) begin
+        dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK;
+
+        // check if transaction was issued over write channel and pop that ID
+        if (!dcache_wr_empty) begin
+          dcache_wr_pop = axi_rd_last;
+          // if this is not the case, there MUST be an id in the read channel (LR)
+        end else begin
+          dcache_rd_pop = axi_rd_last;
+        end
+      end else begin
+        dcache_rd_pop = axi_rd_last;
+      end
+      //////////////////////////////////////
+      // write responses, check b fifo
+    end else if (!b_empty) begin
+      b_pop = 1'b1;
+
+      // this was an atomic
+      if (CVA6Cfg.RVA && wr_id_out[1]) begin
+        dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK;
+
+        // silently discard b response if we already popped the fifo
+        // with a R beat (iff the amo transaction generated an R beat)
+        if (!amo_gen_r_q) begin
+          dcache_rtrn_vld_d = 1'b1;
+          dcache_wr_pop     = 1'b1;
+          dcache_sc_rtrn    = 1'b1;
+        end
+      end else begin
+        // regular response
+        dcache_rtrn_type_d = wt_cache_pkg::DCACHE_STORE_ACK;
+        dcache_rtrn_vld_d  = 1'b1;
+        dcache_wr_pop      = 1'b1;
+      end
+    end
+    //////////////////////////////////////
+  end
+
+  // remote invalidations are not supported yet (this needs a cache coherence protocol)
+  // note that the atomic transactions would also need a "master exclusive monitor" in that case
+  // assign icache_rtrn_o.inv.idx  = '0;
+  // assign icache_rtrn_o.inv.way  = '0;
+  // assign icache_rtrn_o.inv.vld  = '0;
+  // assign icache_rtrn_o.inv.all  = '0;
+
+  // assign dcache_rtrn_o.inv.idx  = '0;
+  // assign dcache_rtrn_o.inv.way  = '0;
+  // assign dcache_rtrn_o.inv.vld  = '0;
+  // assign dcache_rtrn_o.inv.all  = '0;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf
+    if (!rst_ni) begin
+      icache_first_q         <= 1'b1;
+      dcache_first_q         <= 1'b1;
+      icache_rd_shift_q      <= '0;
+      icache_rd_shift_user_q <= '0;
+      dcache_rd_shift_q      <= '0;
+      dcache_rd_shift_user_q <= '0;
+      icache_rtrn_vld_q      <= '0;
+      dcache_rtrn_vld_q      <= '0;
+      icache_rtrn_tid_q      <= '0;
+      dcache_rtrn_tid_q      <= '0;
+      dcache_rtrn_type_q     <= wt_cache_pkg::DCACHE_LOAD_ACK;
+      dcache_rtrn_inv_q      <= '0;
+      amo_off_q              <= '0;
+      amo_gen_r_q            <= 1'b0;
+    end else begin
+      icache_first_q         <= icache_first_d;
+      dcache_first_q         <= dcache_first_d;
+      icache_rd_shift_q      <= icache_rd_shift_d;
+      icache_rd_shift_user_q <= icache_rd_shift_user_d;
+      dcache_rd_shift_q      <= dcache_rd_shift_d;
+      dcache_rd_shift_user_q <= dcache_rd_shift_user_d;
+      icache_rtrn_vld_q      <= icache_rtrn_vld_d;
+      dcache_rtrn_vld_q      <= dcache_rtrn_vld_d;
+      icache_rtrn_tid_q      <= icache_rtrn_tid_d;
+      dcache_rtrn_tid_q      <= dcache_rtrn_tid_d;
+      dcache_rtrn_type_q     <= dcache_rtrn_type_d;
+      dcache_rtrn_inv_q      <= dcache_rtrn_inv_d;
+      amo_off_q              <= amo_off_d;
+      amo_gen_r_q            <= amo_gen_r_d;
+    end
+  end
+
+
+  ///////////////////////////////////////////////////////
+  // axi protocol shim
+  ///////////////////////////////////////////////////////
+
+  axi_shim #(
+      .CVA6Cfg    (CVA6Cfg),
+      .AxiNumWords(AxiNumWords),
+      .axi_req_t  (axi_req_t),
+      .axi_rsp_t  (axi_rsp_t)
+  ) i_axi_shim (
+      .clk_i      (clk_i),
+      .rst_ni     (rst_ni),
+      .rd_req_i   (axi_rd_req),
+      .rd_gnt_o   (axi_rd_gnt),
+      .rd_addr_i  (axi_rd_addr),
+      .rd_blen_i  (axi_rd_blen),
+      .rd_size_i  (axi_rd_size),
+      .rd_id_i    (axi_rd_id_in),
+      .rd_rdy_i   (axi_rd_rdy),
+      .rd_lock_i  (axi_rd_lock),
+      .rd_last_o  (axi_rd_last),
+      .rd_valid_o (axi_rd_valid),
+      .rd_data_o  (axi_rd_data),
+      .rd_user_o  (axi_rd_user),
+      .rd_id_o    (axi_rd_id_out),
+      .rd_exokay_o(axi_rd_exokay),
+      .wr_req_i   (axi_wr_req),
+      .wr_gnt_o   (axi_wr_gnt),
+      .wr_addr_i  (axi_wr_addr),
+      .wr_data_i  (axi_wr_data),
+      .wr_user_i  (axi_wr_user),
+      .wr_be_i    (axi_wr_be),
+      .wr_blen_i  (axi_wr_blen),
+      .wr_size_i  (axi_wr_size),
+      .wr_id_i    (axi_wr_id_in),
+      .wr_lock_i  (axi_wr_lock),
+      .wr_atop_i  (axi_wr_atop),
+      .wr_rdy_i   (axi_wr_rdy),
+      .wr_valid_o (axi_wr_valid),
+      .wr_id_o    (axi_wr_id_out),
+      .wr_exokay_o(axi_wr_exokay),
+      .axi_req_o  (axi_req_o),
+      .axi_resp_i (axi_resp_i)
+  );
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  //pragma translate_off
+`ifndef VERILATOR
+
+`endif
+  //pragma translate_on
+
+endmodule  // wt_l15_adapter
diff --git a/test/type_param/core/cache_subsystem/wt_cache_subsystem.sv b/test/type_param/core/cache_subsystem/wt_cache_subsystem.sv
new file mode 100644
index 0000000..ec09467
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/wt_cache_subsystem.sv
@@ -0,0 +1,233 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 15.08.2018
+// Description: Ariane cache subsystem that is compatible with the OpenPiton
+//              coherent memory system.
+//
+//              Define PITON_ARIANE if you want to use this cache.
+//              Define DCACHE_TYPE if you want to use this cache
+//              with a standard 64 bit AXI interface instead of the OpenPiton
+//              L1.5 interface.
+
+
+module wt_cache_subsystem
+  import ariane_pkg::*;
+  import wt_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg    = config_pkg::cva6_cfg_empty,
+    parameter int unsigned           NumPorts   = 4,
+    parameter type                   noc_req_t  = logic,
+    parameter type                   noc_resp_t = logic
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    // I$
+    input logic icache_en_i,  // enable icache (or bypass e.g: in debug mode)
+    input logic icache_flush_i,  // flush the icache, flush and kill have to be asserted together
+    output logic icache_miss_o,  // to performance counter
+    // address translation requests
+    input icache_areq_t icache_areq_i,  // to/from frontend
+    output icache_arsp_t icache_areq_o,
+    // data requests
+    input icache_dreq_t icache_dreq_i,  // to/from frontend
+    output icache_drsp_t icache_dreq_o,
+    // D$
+    // Cache management
+    input logic dcache_enable_i,  // from CSR
+    input logic dcache_flush_i,  // high until acknowledged
+    output logic                           dcache_flush_ack_o,     // send a single cycle acknowledge signal when the cache is flushed
+    output logic dcache_miss_o,  // we missed on a ld/st
+    // For Performance Counter
+    output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
+    // AMO interface
+    input amo_req_t dcache_amo_req_i,
+    output amo_resp_t dcache_amo_resp_o,
+    // Request ports
+    input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i,  // to/from LSU
+    output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o,  // to/from LSU
+    // writebuffer status
+    output logic wbuffer_empty_o,
+    output logic wbuffer_not_ni_o,
+    // memory side
+    output noc_req_t noc_req_o,
+    input noc_resp_t noc_resp_i,
+    // Invalidations
+    input logic [63:0] inval_addr_i,
+    input logic inval_valid_i,
+    output logic inval_ready_o
+    // TODO: interrupt interface
+);
+
+  logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld;
+  wt_cache_pkg::icache_req_t  icache_adapter;
+  wt_cache_pkg::icache_rtrn_t adapter_icache;
+
+
+  logic dcache_adapter_data_req, adapter_dcache_data_ack, adapter_dcache_rtrn_vld;
+  wt_cache_pkg::dcache_req_t  dcache_adapter;
+  wt_cache_pkg::dcache_rtrn_t adapter_dcache;
+
+  cva6_icache #(
+      // use ID 0 for icache reads
+      .CVA6Cfg(CVA6Cfg),
+      .RdTxId (0)
+  ) i_cva6_icache (
+      .clk_i         (clk_i),
+      .rst_ni        (rst_ni),
+      .flush_i       (icache_flush_i),
+      .en_i          (icache_en_i),
+      .miss_o        (icache_miss_o),
+      .areq_i        (icache_areq_i),
+      .areq_o        (icache_areq_o),
+      .dreq_i        (icache_dreq_i),
+      .dreq_o        (icache_dreq_o),
+      .mem_rtrn_vld_i(adapter_icache_rtrn_vld),
+      .mem_rtrn_i    (adapter_icache),
+      .mem_data_req_o(icache_adapter_data_req),
+      .mem_data_ack_i(adapter_icache_data_ack),
+      .mem_data_o    (icache_adapter)
+  );
+
+
+  // Note:
+  // Ports 0/1 for PTW and LD unit are read only.
+  // they have equal prio and are RR arbited
+  // Port 2 is write only and goes into the merging write buffer
+  wt_dcache #(
+      .CVA6Cfg  (CVA6Cfg),
+      // use ID 1 for dcache reads and amos. note that the writebuffer
+      // uses all IDs up to DCACHE_MAX_TX-1 for write transactions.
+      .RdAmoTxId(1)
+  ) i_wt_dcache (
+      .clk_i           (clk_i),
+      .rst_ni          (rst_ni),
+      .enable_i        (dcache_enable_i),
+      .flush_i         (dcache_flush_i),
+      .flush_ack_o     (dcache_flush_ack_o),
+      .miss_o          (dcache_miss_o),
+      .wbuffer_empty_o (wbuffer_empty_o),
+      .wbuffer_not_ni_o(wbuffer_not_ni_o),
+      .amo_req_i       (dcache_amo_req_i),
+      .amo_resp_o      (dcache_amo_resp_o),
+      .req_ports_i     (dcache_req_ports_i),
+      .req_ports_o     (dcache_req_ports_o),
+      .miss_vld_bits_o (miss_vld_bits_o),
+      .mem_rtrn_vld_i  (adapter_dcache_rtrn_vld),
+      .mem_rtrn_i      (adapter_dcache),
+      .mem_data_req_o  (dcache_adapter_data_req),
+      .mem_data_ack_i  (adapter_dcache_data_ack),
+      .mem_data_o      (dcache_adapter)
+  );
+
+
+  ///////////////////////////////////////////////////////
+  // memory plumbing, either use 64bit AXI port or native
+  // L15 cache interface (derived from OpenSPARC CCX).
+  ///////////////////////////////////////////////////////
+
+`ifdef PITON_ARIANE
+  wt_l15_adapter #(
+      .CVA6Cfg(CVA6Cfg),
+  ) i_adapter (
+      .clk_i            (clk_i),
+      .rst_ni           (rst_ni),
+      .icache_data_req_i(icache_adapter_data_req),
+      .icache_data_ack_o(adapter_icache_data_ack),
+      .icache_data_i    (icache_adapter),
+      .icache_rtrn_vld_o(adapter_icache_rtrn_vld),
+      .icache_rtrn_o    (adapter_icache),
+      .dcache_data_req_i(dcache_adapter_data_req),
+      .dcache_data_ack_o(adapter_dcache_data_ack),
+      .dcache_data_i    (dcache_adapter),
+      .dcache_rtrn_vld_o(adapter_dcache_rtrn_vld),
+      .dcache_rtrn_o    (adapter_dcache),
+      .l15_req_o        (noc_req_o),
+      .l15_rtrn_i       (noc_resp_i)
+  );
+`else
+  wt_axi_adapter #(
+      .CVA6Cfg  (CVA6Cfg),
+      .axi_req_t(noc_req_t),
+      .axi_rsp_t(noc_resp_t)
+  ) i_adapter (
+      .clk_i            (clk_i),
+      .rst_ni           (rst_ni),
+      .icache_data_req_i(icache_adapter_data_req),
+      .icache_data_ack_o(adapter_icache_data_ack),
+      .icache_data_i    (icache_adapter),
+      .icache_rtrn_vld_o(adapter_icache_rtrn_vld),
+      .icache_rtrn_o    (adapter_icache),
+      .dcache_data_req_i(dcache_adapter_data_req),
+      .dcache_data_ack_o(adapter_dcache_data_ack),
+      .dcache_data_i    (dcache_adapter),
+      .dcache_rtrn_vld_o(adapter_dcache_rtrn_vld),
+      .dcache_rtrn_o    (adapter_dcache),
+      .axi_req_o        (noc_req_o),
+      .axi_resp_i       (noc_resp_i),
+      .inval_addr_i     (inval_addr_i),
+      .inval_valid_i    (inval_valid_i),
+      .inval_ready_o    (inval_ready_o)
+  );
+`endif
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  //pragma translate_off
+`ifndef VERILATOR
+  a_invalid_instruction_fetch :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
+  else
+    $warning(
+        1,
+        "[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
+        icache_dreq_o.vaddr,
+        icache_dreq_o.data
+    );
+
+  for (genvar j = 0; j < riscv::XLEN / 8; j++) begin : gen_invalid_write_assertion
+    a_invalid_write_data :
+    assert property (
+      @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> dcache_req_ports_i[NumPorts-1].data_be[j] |-> (|dcache_req_ports_i[NumPorts-1].data_wdata[j*8+:8] !== 1'hX))
+    else
+      $warning(
+          1,
+          "[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X, databe=%016X",
+          {
+            dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index
+          },
+          dcache_req_ports_i[NumPorts-1].data_be,
+          dcache_req_ports_i[NumPorts-1].data_wdata,
+          dcache_req_ports_i[NumPorts-1].data_be & dcache_req_ports_i[NumPorts-1].data_wdata
+      );
+  end
+
+
+  for (genvar j = 0; j < NumPorts - 1; j++) begin : gen_assertion
+    a_invalid_read_data :
+    assert property (
+      @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
+    else
+      $warning(
+          1,
+          "[l1 dcache] reading invalid data on port %01d: data=%016X",
+          j,
+          dcache_req_ports_o[j].data_rdata
+      );
+  end
+`endif
+  //pragma translate_on
+
+
+endmodule  // wt_cache_subsystem
diff --git a/test/type_param/core/cache_subsystem/wt_dcache.sv b/test/type_param/core/cache_subsystem/wt_dcache.sv
new file mode 100644
index 0000000..af672d8
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/wt_dcache.sv
@@ -0,0 +1,360 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 13.09.2018
+// Description: Write-Through Data cache that is compatible with openpiton.
+
+
+module wt_dcache
+  import ariane_pkg::*;
+  import wt_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned NumPorts = 4,  // number of miss ports
+    // ID to be used for read and AMO transactions.
+    // note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions
+    parameter logic [CACHE_ID_WIDTH-1:0] RdAmoTxId = 1
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni, // Asynchronous reset active low
+
+    // Cache management
+    input logic enable_i,  // from CSR
+    input logic flush_i,  // high until acknowledged
+    output logic flush_ack_o,  // send a single cycle acknowledge signal when the cache is flushed
+    output logic miss_o,  // we missed on a ld/st
+    output logic wbuffer_empty_o,
+    output logic wbuffer_not_ni_o,
+
+    // AMO interface
+    input  amo_req_t  amo_req_i,
+    output amo_resp_t amo_resp_o,
+
+    // Request ports
+    input  dcache_req_i_t [NumPorts-1:0] req_ports_i,
+    output dcache_req_o_t [NumPorts-1:0] req_ports_o,
+
+    output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
+
+    input  logic         mem_rtrn_vld_i,
+    input  dcache_rtrn_t mem_rtrn_i,
+    output logic         mem_data_req_o,
+    input  logic         mem_data_ack_i,
+    output dcache_req_t  mem_data_o
+);
+
+  // miss unit <-> read controllers
+  logic                                                               cache_en;
+
+  // miss unit <-> memory
+  logic                                                               wr_cl_vld;
+  logic                                                               wr_cl_nc;
+  logic         [      DCACHE_SET_ASSOC-1:0]                          wr_cl_we;
+  logic         [      DCACHE_TAG_WIDTH-1:0]                          wr_cl_tag;
+  logic         [   DCACHE_CL_IDX_WIDTH-1:0]                          wr_cl_idx;
+  logic         [   DCACHE_OFFSET_WIDTH-1:0]                          wr_cl_off;
+  logic         [     DCACHE_LINE_WIDTH-1:0]                          wr_cl_data;
+  logic         [DCACHE_USER_LINE_WIDTH-1:0]                          wr_cl_user;
+  logic         [   DCACHE_LINE_WIDTH/8-1:0]                          wr_cl_data_be;
+  logic         [      DCACHE_SET_ASSOC-1:0]                          wr_vld_bits;
+  logic         [      DCACHE_SET_ASSOC-1:0]                          wr_req;
+  logic                                                               wr_ack;
+  logic         [   DCACHE_CL_IDX_WIDTH-1:0]                          wr_idx;
+  logic         [   DCACHE_OFFSET_WIDTH-1:0]                          wr_off;
+  riscv::xlen_t                                                       wr_data;
+  logic         [       (riscv::XLEN/8)-1:0]                          wr_data_be;
+  logic         [     DCACHE_USER_WIDTH-1:0]                          wr_user;
+
+  // miss unit <-> controllers/wbuffer
+  logic         [              NumPorts-1:0]                          miss_req;
+  logic         [              NumPorts-1:0]                          miss_ack;
+  logic         [              NumPorts-1:0]                          miss_nc;
+  logic         [              NumPorts-1:0]                          miss_we;
+  logic         [              NumPorts-1:0][        riscv::XLEN-1:0] miss_wdata;
+  logic         [              NumPorts-1:0][  DCACHE_USER_WIDTH-1:0] miss_wuser;
+  logic         [              NumPorts-1:0][        riscv::PLEN-1:0] miss_paddr;
+  logic         [              NumPorts-1:0][                    2:0] miss_size;
+  logic         [              NumPorts-1:0][     CACHE_ID_WIDTH-1:0] miss_id;
+  logic         [              NumPorts-1:0]                          miss_replay;
+  logic         [              NumPorts-1:0]                          miss_rtrn_vld;
+  logic         [        CACHE_ID_WIDTH-1:0]                          miss_rtrn_id;
+
+  // memory <-> read controllers/miss unit
+  logic         [              NumPorts-1:0]                          rd_prio;
+  logic         [              NumPorts-1:0]                          rd_tag_only;
+  logic         [              NumPorts-1:0]                          rd_req;
+  logic         [              NumPorts-1:0]                          rd_ack;
+  logic         [              NumPorts-1:0][   DCACHE_TAG_WIDTH-1:0] rd_tag;
+  logic         [              NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx;
+  logic         [              NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off;
+  riscv::xlen_t                                                       rd_data;
+  logic         [     DCACHE_USER_WIDTH-1:0]                          rd_user;
+  logic         [      DCACHE_SET_ASSOC-1:0]                          rd_vld_bits;
+  logic         [      DCACHE_SET_ASSOC-1:0]                          rd_hit_oh;
+
+  // miss unit <-> wbuffer
+  logic         [         DCACHE_MAX_TX-1:0][        riscv::PLEN-1:0] tx_paddr;
+  logic         [         DCACHE_MAX_TX-1:0]                          tx_vld;
+
+  // wbuffer <-> memory
+  wbuffer_t     [     DCACHE_WBUF_DEPTH-1:0]                          wbuffer_data;
+
+
+  ///////////////////////////////////////////////////////
+  // miss handling unit
+  ///////////////////////////////////////////////////////
+
+  wt_dcache_missunit #(
+      .CVA6Cfg (CVA6Cfg),
+      .AmoTxId (RdAmoTxId),
+      .NumPorts(NumPorts)
+  ) i_wt_dcache_missunit (
+      .clk_i          (clk_i),
+      .rst_ni         (rst_ni),
+      .enable_i       (enable_i),
+      .flush_i        (flush_i),
+      .flush_ack_o    (flush_ack_o),
+      .miss_o         (miss_o),
+      .wbuffer_empty_i(wbuffer_empty_o),
+      .cache_en_o     (cache_en),
+      // amo interface
+      .amo_req_i      (amo_req_i),
+      .amo_resp_o     (amo_resp_o),
+      // miss handling interface
+      .miss_req_i     (miss_req),
+      .miss_ack_o     (miss_ack),
+      .miss_nc_i      (miss_nc),
+      .miss_we_i      (miss_we),
+      .miss_wdata_i   (miss_wdata),
+      .miss_wuser_i   (miss_wuser),
+      .miss_paddr_i   (miss_paddr),
+      .miss_vld_bits_i(miss_vld_bits_o),
+      .miss_size_i    (miss_size),
+      .miss_id_i      (miss_id),
+      .miss_replay_o  (miss_replay),
+      .miss_rtrn_vld_o(miss_rtrn_vld),
+      .miss_rtrn_id_o (miss_rtrn_id),
+      // from writebuffer
+      .tx_paddr_i     (tx_paddr),
+      .tx_vld_i       (tx_vld),
+      // cache memory interface
+      .wr_cl_vld_o    (wr_cl_vld),
+      .wr_cl_nc_o     (wr_cl_nc),
+      .wr_cl_we_o     (wr_cl_we),
+      .wr_cl_tag_o    (wr_cl_tag),
+      .wr_cl_idx_o    (wr_cl_idx),
+      .wr_cl_off_o    (wr_cl_off),
+      .wr_cl_data_o   (wr_cl_data),
+      .wr_cl_user_o   (wr_cl_user),
+      .wr_cl_data_be_o(wr_cl_data_be),
+      .wr_vld_bits_o  (wr_vld_bits),
+      // memory interface
+      .mem_rtrn_vld_i (mem_rtrn_vld_i),
+      .mem_rtrn_i     (mem_rtrn_i),
+      .mem_data_req_o (mem_data_req_o),
+      .mem_data_ack_i (mem_data_ack_i),
+      .mem_data_o     (mem_data_o)
+  );
+
+  ///////////////////////////////////////////////////////
+  // read controllers (LD unit and PTW/MMU)
+  ///////////////////////////////////////////////////////
+
+  // 0 is used by MMU, 1 by READ access requests
+  for (genvar k = 0; k < NumPorts - 1; k++) begin : gen_rd_ports
+    // set these to high prio ports
+    if ((k == 0 && MMU_PRESENT) || (k == 1) || (k == 2 && CVA6Cfg.EnableAccelerator)) begin
+      assign rd_prio[k] = 1'b1;
+      wt_dcache_ctrl #(
+          .CVA6Cfg(CVA6Cfg),
+          .RdTxId (RdAmoTxId)
+      ) i_wt_dcache_ctrl (
+          .clk_i          (clk_i),
+          .rst_ni         (rst_ni),
+          .cache_en_i     (cache_en),
+          // reqs from core
+          .req_port_i     (req_ports_i[k]),
+          .req_port_o     (req_ports_o[k]),
+          // miss interface
+          .miss_req_o     (miss_req[k]),
+          .miss_ack_i     (miss_ack[k]),
+          .miss_we_o      (miss_we[k]),
+          .miss_wdata_o   (miss_wdata[k]),
+          .miss_wuser_o   (miss_wuser[k]),
+          .miss_vld_bits_o(miss_vld_bits_o[k]),
+          .miss_paddr_o   (miss_paddr[k]),
+          .miss_nc_o      (miss_nc[k]),
+          .miss_size_o    (miss_size[k]),
+          .miss_id_o      (miss_id[k]),
+          .miss_replay_i  (miss_replay[k]),
+          .miss_rtrn_vld_i(miss_rtrn_vld[k]),
+          // used to detect readout mux collisions
+          .wr_cl_vld_i    (wr_cl_vld),
+          // cache mem interface
+          .rd_tag_o       (rd_tag[k]),
+          .rd_idx_o       (rd_idx[k]),
+          .rd_off_o       (rd_off[k]),
+          .rd_req_o       (rd_req[k]),
+          .rd_tag_only_o  (rd_tag_only[k]),
+          .rd_ack_i       (rd_ack[k]),
+          .rd_data_i      (rd_data),
+          .rd_user_i      (rd_user),
+          .rd_vld_bits_i  (rd_vld_bits),
+          .rd_hit_oh_i    (rd_hit_oh)
+      );
+    end else begin
+      assign rd_prio[k] = 1'b0;
+      assign req_ports_o[k] = '0;
+      assign miss_req[k] = 1'b0;
+      assign miss_we[k] = 1'b0;
+      assign miss_wdata[k] = {{riscv::XLEN} {1'b0}};
+      assign miss_wuser[k] = {{DCACHE_USER_WIDTH} {1'b0}};
+      assign miss_vld_bits_o[k] = {{DCACHE_SET_ASSOC} {1'b0}};
+      assign miss_paddr[k] = {{riscv::PLEN} {1'b0}};
+      assign miss_nc[k] = 1'b0;
+      assign miss_size[k] = 3'b0;
+      assign miss_id[k] = {{CACHE_ID_WIDTH} {1'b0}};
+      assign rd_tag[k] = {{DCACHE_TAG_WIDTH} {1'b0}};
+      assign rd_idx[k] = {{DCACHE_CL_IDX_WIDTH} {1'b0}};
+      assign rd_off[k] = {{DCACHE_OFFSET_WIDTH} {1'b0}};
+      assign rd_req[k] = 1'b0;
+      assign rd_tag_only[k] = 1'b0;
+    end
+  end
+
+  ///////////////////////////////////////////////////////
+  // store unit controller
+  ///////////////////////////////////////////////////////
+
+  // set read port to low priority
+  assign rd_prio[NumPorts-1] = 1'b0;
+
+  wt_dcache_wbuffer #(
+      .CVA6Cfg(CVA6Cfg)
+  ) i_wt_dcache_wbuffer (
+      .clk_i          (clk_i),
+      .rst_ni         (rst_ni),
+      .empty_o        (wbuffer_empty_o),
+      .not_ni_o       (wbuffer_not_ni_o),
+      // TODO: fix this
+      .cache_en_i     (cache_en),
+      // .cache_en_i      ( '0                  ),
+      // request ports from core (store unit)
+      .req_port_i     (req_ports_i[NumPorts-1]),
+      .req_port_o     (req_ports_o[NumPorts-1]),
+      // miss unit interface
+      .miss_req_o     (miss_req[NumPorts-1]),
+      .miss_ack_i     (miss_ack[NumPorts-1]),
+      .miss_we_o      (miss_we[NumPorts-1]),
+      .miss_wdata_o   (miss_wdata[NumPorts-1]),
+      .miss_wuser_o   (miss_wuser[NumPorts-1]),
+      .miss_vld_bits_o(miss_vld_bits_o[NumPorts-1]),
+      .miss_paddr_o   (miss_paddr[NumPorts-1]),
+      .miss_nc_o      (miss_nc[NumPorts-1]),
+      .miss_size_o    (miss_size[NumPorts-1]),
+      .miss_id_o      (miss_id[NumPorts-1]),
+      .miss_rtrn_vld_i(miss_rtrn_vld[NumPorts-1]),
+      .miss_rtrn_id_i (miss_rtrn_id),
+      // cache read interface
+      .rd_tag_o       (rd_tag[NumPorts-1]),
+      .rd_idx_o       (rd_idx[NumPorts-1]),
+      .rd_off_o       (rd_off[NumPorts-1]),
+      .rd_req_o       (rd_req[NumPorts-1]),
+      .rd_tag_only_o  (rd_tag_only[NumPorts-1]),
+      .rd_ack_i       (rd_ack[NumPorts-1]),
+      .rd_data_i      (rd_data),
+      .rd_vld_bits_i  (rd_vld_bits),
+      .rd_hit_oh_i    (rd_hit_oh),
+      // incoming invalidations/cache refills
+      .wr_cl_vld_i    (wr_cl_vld),
+      .wr_cl_idx_i    (wr_cl_idx),
+      // single word write interface
+      .wr_req_o       (wr_req),
+      .wr_ack_i       (wr_ack),
+      .wr_idx_o       (wr_idx),
+      .wr_off_o       (wr_off),
+      .wr_data_o      (wr_data),
+      .wr_user_o      (wr_user),
+      .wr_data_be_o   (wr_data_be),
+      // write buffer forwarding
+      .wbuffer_data_o (wbuffer_data),
+      .tx_paddr_o     (tx_paddr),
+      .tx_vld_o       (tx_vld)
+  );
+
+  ///////////////////////////////////////////////////////
+  // memory arrays, arbitration and tag comparison
+  ///////////////////////////////////////////////////////
+
+  wt_dcache_mem #(
+      .CVA6Cfg (CVA6Cfg),
+      .NumPorts(NumPorts)
+  ) i_wt_dcache_mem (
+      .clk_i          (clk_i),
+      .rst_ni         (rst_ni),
+      // read ports
+      .rd_prio_i      (rd_prio),
+      .rd_tag_i       (rd_tag),
+      .rd_idx_i       (rd_idx),
+      .rd_off_i       (rd_off),
+      .rd_req_i       (rd_req),
+      .rd_tag_only_i  (rd_tag_only),
+      .rd_ack_o       (rd_ack),
+      .rd_vld_bits_o  (rd_vld_bits),
+      .rd_hit_oh_o    (rd_hit_oh),
+      .rd_data_o      (rd_data),
+      .rd_user_o      (rd_user),
+      // cacheline write port
+      .wr_cl_vld_i    (wr_cl_vld),
+      .wr_cl_nc_i     (wr_cl_nc),
+      .wr_cl_we_i     (wr_cl_we),
+      .wr_cl_tag_i    (wr_cl_tag),
+      .wr_cl_idx_i    (wr_cl_idx),
+      .wr_cl_off_i    (wr_cl_off),
+      .wr_cl_data_i   (wr_cl_data),
+      .wr_cl_user_i   (wr_cl_user),
+      .wr_cl_data_be_i(wr_cl_data_be),
+      .wr_vld_bits_i  (wr_vld_bits),
+      // single word write port
+      .wr_req_i       (wr_req),
+      .wr_ack_o       (wr_ack),
+      .wr_idx_i       (wr_idx),
+      .wr_off_i       (wr_off),
+      .wr_data_i      (wr_data),
+      .wr_user_i      (wr_user),
+      .wr_data_be_i   (wr_data_be),
+      // write buffer forwarding
+      .wbuffer_data_i (wbuffer_data)
+  );
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  // check for concurrency issues
+
+
+  //pragma translate_off
+`ifndef VERILATOR
+  flush :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) flush_i |-> flush_ack_o |-> wbuffer_empty_o)
+  else $fatal(1, "[l1 dcache] flushed cache implies flushed wbuffer");
+
+  initial begin
+    // assert wrong parameterizations
+    assert (DCACHE_INDEX_WIDTH <= 12)
+    else $fatal(1, "[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages");
+  end
+`endif
+  //pragma translate_on
+
+endmodule  // wt_dcache
diff --git a/test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv b/test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv
new file mode 100644
index 0000000..b5973df
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv
@@ -0,0 +1,299 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 13.09.2018
+// Description: DCache controller for read port
+
+
+module wt_dcache_ctrl
+  import ariane_pkg::*;
+  import wt_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter logic [CACHE_ID_WIDTH-1:0]  RdTxId    = 1
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic cache_en_i,
+    // core request ports
+    input dcache_req_i_t req_port_i,
+    output dcache_req_o_t req_port_o,
+    // interface to miss handler
+    output logic miss_req_o,
+    input logic miss_ack_i,
+    output logic miss_we_o,  // unused (set to 0)
+    output riscv::xlen_t miss_wdata_o,  // unused (set to 0)
+    output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o,  // unused (set to 0)
+    output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,  // valid bits at the missed index
+    output logic [riscv::PLEN-1:0] miss_paddr_o,
+    output logic miss_nc_o,  // request to I/O space
+    output logic [2:0] miss_size_o,  // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline
+    output logic [CACHE_ID_WIDTH-1:0] miss_id_o,  // set to constant ID
+    input logic miss_replay_i,  // request collided with pending miss - have to replay the request
+    input  logic                            miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory
+    // used to detect readout mux collisions
+    input logic wr_cl_vld_i,
+    // cache memory interface
+    output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o,  // tag in - comes one cycle later
+    output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
+    output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
+    output logic rd_req_o,  // read the word at offset off_i[:3] in all ways
+    output logic rd_tag_only_o,  // set to zero here
+    input logic rd_ack_i,
+    input riscv::xlen_t rd_data_i,
+    input logic [DCACHE_USER_WIDTH-1:0] rd_user_i,
+    input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i,
+    input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i
+);
+
+  // controller FSM
+  typedef enum logic [2:0] {
+    IDLE,
+    READ,
+    MISS_REQ,
+    MISS_WAIT,
+    KILL_MISS,
+    KILL_MISS_ACK,
+    REPLAY_REQ,
+    REPLAY_READ
+  } state_e;
+  state_e state_d, state_q;
+
+  logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q;
+  logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q;
+  logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q;
+  logic [DCACHE_TID_WIDTH-1:0] id_d, id_q;
+  logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q;
+  logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q;
+  logic [1:0] data_size_d, data_size_q;
+
+  ///////////////////////////////////////////////////////
+  // misc
+  ///////////////////////////////////////////////////////
+
+  // map address to tag/idx/offset and save
+  assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q;
+  assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q;
+  assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q;
+  assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0]                  : address_off_q;
+  assign id_d = (req_port_o.data_gnt) ? req_port_i.data_id : id_q;
+  assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q;
+  assign rd_tag_o = address_tag_d;
+  assign rd_idx_o = address_idx_d;
+  assign rd_off_o = address_off_d;
+
+  assign req_port_o.data_rdata = rd_data_i;
+  assign req_port_o.data_ruser = rd_user_i;
+  assign req_port_o.data_rid = id_q;
+
+  // to miss unit
+  assign miss_vld_bits_o = vld_data_q;
+  assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q};
+  assign miss_size_o = (miss_nc_o) ? {1'b0, data_size_q} : 3'b111;
+
+  // noncacheable if request goes to I/O space, or if cache is disabled
+  assign miss_nc_o = (~cache_en_i) | (~config_pkg::is_inside_cacheable_regions(
+      CVA6Cfg,
+      {{{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {DCACHE_INDEX_WIDTH{1'b0}}}
+  ));
+
+
+  assign miss_we_o = '0;
+  assign miss_wdata_o = '0;
+  assign miss_wuser_o = '0;
+  assign miss_id_o = RdTxId;
+  assign rd_req_d = rd_req_o;
+  assign rd_ack_d = rd_ack_i;
+  assign rd_tag_only_o = '0;
+
+  ///////////////////////////////////////////////////////
+  // main control logic
+  ///////////////////////////////////////////////////////
+
+  always_comb begin : p_fsm
+    // default assignment
+    state_d                = state_q;
+    save_tag               = 1'b0;
+    rd_req_o               = 1'b0;
+    miss_req_o             = 1'b0;
+    req_port_o.data_rvalid = 1'b0;
+    req_port_o.data_gnt    = 1'b0;
+
+    // interfaces
+    unique case (state_q)
+      //////////////////////////////////
+      // wait for an incoming request
+      IDLE: begin
+        if (req_port_i.data_req) begin
+          rd_req_o = 1'b1;
+          // if read ack then ack the `req_port_o`, and goto `READ` state
+          if (rd_ack_i) begin
+            state_d = READ;
+            req_port_o.data_gnt = 1'b1;
+          end
+        end
+      end
+      //////////////////////////////////
+      // check whether we have a hit
+      // in case the cache is disabled,
+      // or in case the address is NC, we
+      // reuse the miss mechanism to handle
+      // the request
+      READ, REPLAY_READ: begin
+        // speculatively request cache line
+        rd_req_o = 1'b1;
+
+        // kill -> go back to IDLE
+        if (req_port_i.kill_req) begin
+          state_d = IDLE;
+          req_port_o.data_rvalid = 1'b1;
+        end else if (req_port_i.tag_valid | state_q == REPLAY_READ) begin
+          save_tag = (state_q != REPLAY_READ);
+          if (wr_cl_vld_i || !rd_ack_q) begin
+            state_d = REPLAY_REQ;
+            // we've got a hit
+          end else if ((|rd_hit_oh_i) && cache_en_i) begin
+            state_d = IDLE;
+            req_port_o.data_rvalid = 1'b1;
+            // we can handle another request
+            if (rd_ack_i && req_port_i.data_req) begin
+              state_d = READ;
+              req_port_o.data_gnt = 1'b1;
+            end
+            // we've got a miss
+          end else begin
+            state_d = MISS_REQ;
+          end
+        end
+      end
+      //////////////////////////////////
+      // issue request
+      MISS_REQ: begin
+        miss_req_o = 1'b1;
+
+        if (req_port_i.kill_req) begin
+          req_port_o.data_rvalid = 1'b1;
+          if (miss_ack_i) begin
+            state_d = KILL_MISS;
+          end else begin
+            state_d = KILL_MISS_ACK;
+          end
+        end else if (miss_replay_i) begin
+          state_d = REPLAY_REQ;
+        end else if (miss_ack_i) begin
+          state_d = MISS_WAIT;
+        end
+      end
+      //////////////////////////////////
+      // wait until the memory transaction
+      // returns.
+      MISS_WAIT: begin
+        if (req_port_i.kill_req) begin
+          req_port_o.data_rvalid = 1'b1;
+          if (miss_rtrn_vld_i) begin
+            state_d = IDLE;
+          end else begin
+            state_d = KILL_MISS;
+          end
+        end else if (miss_rtrn_vld_i) begin
+          state_d = IDLE;
+          req_port_o.data_rvalid = 1'b1;
+        end
+      end
+      //////////////////////////////////
+      // replay read request
+      REPLAY_REQ: begin
+        rd_req_o = 1'b1;
+        if (req_port_i.kill_req) begin
+          req_port_o.data_rvalid = 1'b1;
+          state_d = IDLE;
+        end else if (rd_ack_i) begin
+          state_d = REPLAY_READ;
+        end
+      end
+      //////////////////////////////////
+      KILL_MISS_ACK: begin
+        miss_req_o = 1'b1;
+        // in this case the miss handler did not issue
+        // a transaction and we can safely go to idle
+        if (miss_replay_i) begin
+          state_d = IDLE;
+        end else if (miss_ack_i) begin
+          state_d = KILL_MISS;
+        end
+      end
+      //////////////////////////////////
+      // killed miss,
+      // wait until miss unit responds and
+      // go back to idle
+      KILL_MISS: begin
+        if (miss_rtrn_vld_i) begin
+          state_d = IDLE;
+        end
+      end
+      default: begin
+        // we should never get here
+        state_d = IDLE;
+      end
+    endcase  // state_q
+  end
+
+  ///////////////////////////////////////////////////////
+  // ff's
+  ///////////////////////////////////////////////////////
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      state_q       <= IDLE;
+      address_tag_q <= '0;
+      address_idx_q <= '0;
+      address_off_q <= '0;
+      id_q          <= '0;
+      vld_data_q    <= '0;
+      data_size_q   <= '0;
+      rd_req_q      <= '0;
+      rd_ack_q      <= '0;
+    end else begin
+      state_q       <= state_d;
+      address_tag_q <= address_tag_d;
+      address_idx_q <= address_idx_d;
+      address_off_q <= address_off_d;
+      id_q          <= id_d;
+      vld_data_q    <= vld_data_d;
+      data_size_q   <= data_size_d;
+      rd_req_q      <= rd_req_d;
+      rd_ack_q      <= rd_ack_d;
+    end
+  end
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  //pragma translate_off
+`ifndef VERILATOR
+
+  hot1 :
+  assert property (@(posedge clk_i) disable iff (!rst_ni) (!rd_ack_i) |=> cache_en_i |-> $onehot0(
+      rd_hit_oh_i
+  ))
+  else $fatal(1, "[l1 dcache ctrl] rd_hit_oh_i signal must be hot1");
+
+  initial begin
+    // assert wrong parameterizations
+    assert (DCACHE_INDEX_WIDTH <= 12)
+    else
+      $fatal(1, "[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages");
+  end
+`endif
+  //pragma translate_on
+
+endmodule  // wt_dcache_ctrl
diff --git a/test/type_param/core/cache_subsystem/wt_dcache_mem.sv b/test/type_param/core/cache_subsystem/wt_dcache_mem.sv
new file mode 100644
index 0000000..b2b41c3
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/wt_dcache_mem.sv
@@ -0,0 +1,428 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 13.09.2018
+// Description: Memory arrays, arbiter and tag comparison for WT dcache.
+//
+//
+// Notes: 1) all ports can trigger a readout of all ways, and the way where the tag hits is selected
+//
+//        2) only port0 can write full cache lines. higher ports are read only. also, port0 can only read the tag array,
+//           and does not trigger a cache line readout.
+//
+//        3) the single word write port is a separate port without access to the tag memory.
+//           these single word writes can interleave with read operations if they go to different
+//           cacheline offsets, since each word offset is placed into a different SRAM bank.
+//
+//        4) Read ports with same priority are RR arbited. but high prio ports (rd_prio_i[port_nr] = '1b1) will stall
+//           low prio ports (rd_prio_i[port_nr] = '1b0)
+
+
+module wt_dcache_mem
+  import ariane_pkg::*;
+  import wt_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg  = config_pkg::cva6_cfg_empty,
+    parameter int unsigned           NumPorts = 3
+) (
+    input logic clk_i,
+    input logic rst_ni,
+
+    // ports
+    input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i,  // tag in - comes one cycle later
+    input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i,
+    input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i,
+    input logic [NumPorts-1:0] rd_req_i,  // read the word at offset off_i[:3] in all ways
+    input  logic  [NumPorts-1:0]                              rd_tag_only_i,      // only do a tag/valid lookup, no access to data arrays
+    input logic [NumPorts-1:0] rd_prio_i,  // 0: low prio, 1: high prio
+    output logic [NumPorts-1:0] rd_ack_o,
+    output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o,
+    output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o,
+    output riscv::xlen_t rd_data_o,
+    output logic [DCACHE_USER_WIDTH-1:0] rd_user_o,
+
+    // only available on port 0, uses address signals of port 0
+    input logic                              wr_cl_vld_i,
+    input logic                              wr_cl_nc_i,       // noncacheable access
+    input logic [      DCACHE_SET_ASSOC-1:0] wr_cl_we_i,       // writes a full cacheline
+    input logic [      DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i,
+    input logic [   DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
+    input logic [   DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i,
+    input logic [     DCACHE_LINE_WIDTH-1:0] wr_cl_data_i,
+    input logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i,
+    input logic [   DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i,
+    input logic [      DCACHE_SET_ASSOC-1:0] wr_vld_bits_i,
+
+    // separate port for single word write, no tag access
+    input logic [DCACHE_SET_ASSOC-1:0] wr_req_i,  // write a single word to offset off_i[:3]
+    output logic wr_ack_o,
+    input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i,
+    input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i,
+    input riscv::xlen_t wr_data_i,
+    input logic [DCACHE_USER_WIDTH-1:0] wr_user_i,
+    input logic [(riscv::XLEN/8)-1:0] wr_data_be_i,
+
+    // forwarded wbuffer
+    input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i
+);
+
+  // functions
+  function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh(
+      input logic [DCACHE_NUM_BANKS_WIDTH-1:0] in);
+    logic [DCACHE_NUM_BANKS-1:0] out;
+    out     = '0;
+    out[in] = 1'b1;
+    return out;
+  endfunction
+
+  // number of bits needed to address AXI data. If AxiDataWidth equals XLEN this parameter
+  // is not needed. Therefore, increment it by one to avoid reverse range select during elaboration.
+  localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == riscv::XLEN ? $clog2(
+      CVA6Cfg.AxiDataWidth / 8
+  ) + 1 : $clog2(
+      CVA6Cfg.AxiDataWidth / 8
+  );
+
+  logic [DCACHE_NUM_BANKS-1:0]                                               bank_req;
+  logic [DCACHE_NUM_BANKS-1:0]                                               bank_we;
+  logic [DCACHE_NUM_BANKS-1:0][   DCACHE_SET_ASSOC-1:0][(riscv::XLEN/8)-1:0] bank_be;
+  logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0]                      bank_idx;
+  logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q;
+  logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q;
+
+  logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_wdata;  //
+  logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_rdata;  //
+  logic [DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] rdata_cl;  // selected word from each cacheline
+  logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_wuser;  //
+  logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_ruser;  //
+  logic [DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0]                      ruser_cl;          // selected word from each cacheline
+
+  logic [DCACHE_TAG_WIDTH-1:0] rd_tag;
+  logic [DCACHE_SET_ASSOC-1:0] vld_req;  // bit enable for valid regs
+  logic vld_we;  // valid bits write enable
+  logic [DCACHE_SET_ASSOC-1:0] vld_wdata;  // valid bits to write
+  logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0]            tag_rdata;                    // these are the tags coming from the tagmem
+  logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr;  // valid bit
+
+  logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q;
+
+  logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh;
+  logic [  (riscv::XLEN/8)-1:0] wbuffer_be;
+  riscv::xlen_t wbuffer_rdata, rdata;
+  logic [DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser;
+  logic [riscv::PLEN-1:0] wbuffer_cmp_addr;
+
+  logic cmp_en_d, cmp_en_q;
+  logic rd_acked;
+  logic [NumPorts-1:0] bank_collision, rd_req_masked, rd_req_prio;
+
+  ///////////////////////////////////////////////////////
+  // arbiter
+  ///////////////////////////////////////////////////////
+
+  // Priority is highest for lowest read port index
+  //
+  // SRAM bank mapping:
+  //
+  // Bank 0                   Bank 2
+  // [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] ..
+
+  // byte enable mapping
+  for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_bank
+    for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : gen_bank_way
+      assign bank_be[k][j]   = (wr_cl_we_i[j] & wr_cl_vld_i)  ? wr_cl_data_be_i[k*(riscv::XLEN/8) +: (riscv::XLEN/8)] :
+                               (wr_req_i[j]   & wr_ack_o)     ? wr_data_be_i              :
+                                                                '0;
+      assign bank_wdata[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ?  wr_cl_data_i[k*riscv::XLEN +: riscv::XLEN] :
+                                                                 wr_data_i;
+      assign bank_wuser[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ?  wr_cl_user_i[k*DCACHE_USER_WIDTH +: DCACHE_USER_WIDTH] :
+                                                                 wr_user_i;
+    end
+  end
+
+  assign vld_wdata     = wr_vld_bits_i;
+  assign vld_addr      = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
+  assign rd_tag        = rd_tag_i[vld_sel_q];  //delayed by one cycle
+  assign bank_off_d    = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d];
+  assign bank_idx_d    = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
+  assign vld_req       = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0;
+
+
+  // priority masking
+  // disable low prio requests when any of the high prio reqs is present
+  assign rd_req_prio   = rd_req_i & rd_prio_i;
+  assign rd_req_masked = (|rd_req_prio) ? rd_req_prio : rd_req_i;
+
+  logic rd_req;
+  rr_arb_tree #(
+      .NumIn    (NumPorts),
+      .DataWidth(1)
+  ) i_rr_arb_tree (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i('0),
+      .rr_i   ('0),
+      .req_i  (rd_req_masked),
+      .gnt_o  (rd_ack_o),
+      .data_i ('0),
+      .gnt_i  (~wr_cl_vld_i),
+      .req_o  (rd_req),
+      .data_o (),
+      .idx_o  (vld_sel_d)
+  );
+
+  assign rd_acked = rd_req & ~wr_cl_vld_i;
+
+  always_comb begin : p_bank_req
+    vld_we   = wr_cl_vld_i;
+    bank_req = '0;
+    wr_ack_o = '0;
+    bank_we  = '0;
+    bank_idx = '{default: wr_idx_i};
+
+    for (int k = 0; k < NumPorts; k++) begin
+      bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] == wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES];
+    end
+
+    if (wr_cl_vld_i & |wr_cl_we_i) begin
+      bank_req = '1;
+      bank_we  = '1;
+      bank_idx = '{default: wr_cl_idx_i};
+    end else begin
+      if (rd_acked) begin
+        if (!rd_tag_only_i[vld_sel_d]) begin
+          bank_req =
+              dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
+          bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]] = rd_idx_i[vld_sel_d];
+        end
+      end
+
+      if (|wr_req_i) begin
+        if (rd_tag_only_i[vld_sel_d] || !(rd_ack_o[vld_sel_d] && bank_collision[vld_sel_d])) begin
+          wr_ack_o = 1'b1;
+          bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
+          bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
+        end
+      end
+    end
+  end
+
+  ///////////////////////////////////////////////////////
+  // tag comparison, hit generatio, readoud muxes
+  ///////////////////////////////////////////////////////
+
+  logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_off;
+  logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off;
+  logic [                  $clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx;
+  logic [                   $clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx;
+
+  assign cmp_en_d = (|vld_req) & ~vld_we;
+
+  // word tag comparison in write buffer
+  assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} :
+                                            {rd_tag, bank_idx_q, bank_off_q};
+  // hit generation
+  for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
+    // tag comparison of ways >0
+    assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q;
+    // byte offset mux of ways >0
+    assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i];
+    assign ruser_cl[i] = bank_ruser[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i];
+  end
+
+  for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_wbuffer_hit
+    assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & ({{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_data_i[k].wtag} == (wbuffer_cmp_addr >> riscv::XLEN_ALIGN_BYTES));
+  end
+
+  lzc #(
+      .WIDTH(DCACHE_WBUF_DEPTH)
+  ) i_lzc_wbuffer_hit (
+      .in_i   (wbuffer_hit_oh),
+      .cnt_o  (wbuffer_hit_idx),
+      .empty_o()
+  );
+
+  lzc #(
+      .WIDTH(DCACHE_SET_ASSOC)
+  ) i_lzc_rd_hit (
+      .in_i   (rd_hit_oh_o),
+      .cnt_o  (rd_hit_idx),
+      .empty_o()
+  );
+
+  assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data;
+  assign wbuffer_ruser = wbuffer_data_i[wbuffer_hit_idx].user;
+  assign wbuffer_be    = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0;
+
+  if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset
+    // In case of an uncached read, return the desired XLEN-bit segment of the most recent AXI read
+    assign wr_cl_off     = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == riscv::XLEN) ? '0 :
+                              {{DCACHE_OFFSET_WIDTH-AXI_OFFSET_WIDTH{1'b0}}, wr_cl_off_i[AXI_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]} :
+                              wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES];
+  end else begin : gen_piton_offset
+    assign wr_cl_off = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
+  end
+
+  always_comb begin
+    if (wr_cl_vld_i) begin
+      rdata = wr_cl_data_i[wr_cl_off*riscv::XLEN+:riscv::XLEN];
+      ruser = wr_cl_user_i[wr_cl_off*DCACHE_USER_WIDTH+:DCACHE_USER_WIDTH];
+    end else begin
+      rdata = rdata_cl[rd_hit_idx];
+      ruser = ruser_cl[rd_hit_idx];
+    end
+  end
+
+  // overlay bytes that hit in the write buffer
+  for (genvar k = 0; k < (riscv::XLEN / 8); k++) begin : gen_rd_data
+    assign rd_data_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k+:8] : rdata[8*k+:8];
+  end
+  for (genvar k = 0; k < DCACHE_USER_WIDTH / 8; k++) begin : gen_rd_user
+    assign rd_user_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_ruser[8*k+:8] : ruser[8*k+:8];
+  end
+
+  ///////////////////////////////////////////////////////
+  // memory arrays and regs
+  ///////////////////////////////////////////////////////
+
+  logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata[DCACHE_SET_ASSOC-1:0];
+
+  for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_data_banks
+    // Data RAM
+    sram #(
+        .USER_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * DATA_USER_WIDTH),
+        .DATA_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * riscv::XLEN),
+        .USER_EN   (ariane_pkg::DATA_USER_EN),
+        .NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS)
+    ) i_data_sram (
+        .clk_i  (clk_i),
+        .rst_ni (rst_ni),
+        .req_i  (bank_req[k]),
+        .we_i   (bank_we[k]),
+        .addr_i (bank_idx[k]),
+        .wuser_i(bank_wuser[k]),
+        .wdata_i(bank_wdata[k]),
+        .be_i   (bank_be[k]),
+        .ruser_o(bank_ruser[k]),
+        .rdata_o(bank_rdata[k])
+    );
+  end
+
+  for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_srams
+
+    assign tag_rdata[i]     = vld_tag_rdata[i][DCACHE_TAG_WIDTH-1:0];
+    assign rd_vld_bits_o[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH];
+
+    // Tag RAM
+    sram #(
+        // tag + valid bit
+        .DATA_WIDTH(ariane_pkg::DCACHE_TAG_WIDTH + 1),
+        .NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS)
+    ) i_tag_sram (
+        .clk_i  (clk_i),
+        .rst_ni (rst_ni),
+        .req_i  (vld_req[i]),
+        .we_i   (vld_we),
+        .addr_i (vld_addr),
+        .wuser_i('0),
+        .wdata_i({vld_wdata[i], wr_cl_tag_i}),
+        .be_i   ('1),
+        .ruser_o(),
+        .rdata_o(vld_tag_rdata[i])
+    );
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      bank_idx_q <= '0;
+      bank_off_q <= '0;
+      vld_sel_q  <= '0;
+      cmp_en_q   <= '0;
+    end else begin
+      bank_idx_q <= bank_idx_d;
+      bank_off_q <= bank_off_d;
+      vld_sel_q  <= vld_sel_d;
+      cmp_en_q   <= cmp_en_d;
+    end
+  end
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  //pragma translate_off
+`ifndef VERILATOR
+  initial begin
+    cach_line_width_axi :
+    assert (DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth)
+    else $fatal(1, "[l1 dcache] cache line size needs to be greater or equal AXI data width");
+  end
+
+  initial begin
+    axi_xlen :
+    assert (CVA6Cfg.AxiDataWidth >= riscv::XLEN)
+    else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal XLEN");
+  end
+
+  initial begin
+    cach_line_width_xlen :
+    assert (DCACHE_LINE_WIDTH > riscv::XLEN)
+    else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than XLEN");
+  end
+
+  hit_hot1 :
+  assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(
+      rd_hit_oh_o
+  ))
+  else $fatal(1, "[l1 dcache] rd_hit_oh_o signal must be hot1");
+
+  word_write_hot1 :
+  assert property (@(posedge clk_i) disable iff (!rst_ni) wr_ack_o |-> $onehot0(wr_req_i))
+  else $fatal(1, "[l1 dcache] wr_req_i signal must be hot1");
+
+  wbuffer_hit_hot1 :
+  assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(
+      wbuffer_hit_oh
+  ))
+  else $fatal(1, "[l1 dcache] wbuffer_hit_oh signal must be hot1");
+
+  // this is only used for verification!
+  logic vld_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
+  logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
+  logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
+    if (!rst_ni) begin
+      vld_mirror <= '{default: '0};
+      tag_mirror <= '{default: '0};
+    end else begin
+      for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin
+        if (vld_req[i] & vld_we) begin
+          vld_mirror[vld_addr][i] <= vld_wdata[i];
+          tag_mirror[vld_addr][i] <= wr_cl_tag_i;
+        end
+      end
+    end
+  end
+
+  for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_dubl_test
+    assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata);
+  end
+
+  tag_write_duplicate :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
+  else $fatal(1, "[l1 dcache] cannot allocate a CL that is already present in the cache");
+
+`endif
+  //pragma translate_on
+
+endmodule  // wt_dcache_mem
diff --git a/test/type_param/core/cache_subsystem/wt_dcache_missunit.sv b/test/type_param/core/cache_subsystem/wt_dcache_missunit.sv
new file mode 100644
index 0000000..3e06a92
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/wt_dcache_missunit.sv
@@ -0,0 +1,645 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 13.09.2018
+// Description: miss controller for WT dcache. Note that the current assumption
+// is that the port with the highest index issues writes instead of reads.
+
+
+module wt_dcache_missunit
+  import ariane_pkg::*;
+  import wt_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter logic [CACHE_ID_WIDTH-1:0] AmoTxId = 1,  // TX id to be used for AMOs
+    parameter int unsigned NumPorts = 4  // number of miss ports
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    // cache management, signals from/to core
+    input logic enable_i,  // from CSR
+    input  logic                                       flush_i,     // flush request, this waits for pending tx (write, read) to finish and will clear the cache
+    output logic flush_ack_o,  // send a single cycle acknowledge signal when the cache is flushed
+    output logic miss_o,  // we missed on a ld/st
+    // local cache management signals
+    input logic wbuffer_empty_i,
+    output logic cache_en_o,  // local cache enable signal
+    // AMO interface
+    input amo_req_t amo_req_i,
+    output amo_resp_t amo_resp_o,
+    // miss handling interface (ld, ptw, wbuffer)
+    input logic [NumPorts-1:0] miss_req_i,
+    output logic [NumPorts-1:0] miss_ack_o,
+    input logic [NumPorts-1:0] miss_nc_i,
+    input logic [NumPorts-1:0] miss_we_i,
+    input logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata_i,
+    input logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser_i,
+    input logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr_i,
+    input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i,
+    input logic [NumPorts-1:0][2:0] miss_size_i,
+    input logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id_i,  // used as transaction ID
+    // signals that the request collided with a pending read
+    output logic [NumPorts-1:0] miss_replay_o,
+    // signals response from memory
+    output logic [NumPorts-1:0] miss_rtrn_vld_o,
+    output logic [CACHE_ID_WIDTH-1:0]                  miss_rtrn_id_o,     // only used for writes, set to zero fro reads
+    // from writebuffer
+    input  logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0]  tx_paddr_i,         // used to check for address collisions with read operations
+    input  logic [DCACHE_MAX_TX-1:0]                   tx_vld_i,           // used to check for address collisions with read operations
+    // write interface to cache memory
+    output logic wr_cl_vld_o,  // writes a full cacheline
+    output logic wr_cl_nc_o,  // writes a full cacheline
+    output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o,  // writes a full cacheline
+    output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o,
+    output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o,
+    output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o,
+    output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o,
+    output logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o,
+    output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o,
+    output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o,
+    // memory interface
+    input logic mem_rtrn_vld_i,
+    input dcache_rtrn_t mem_rtrn_i,
+    output logic mem_data_req_o,
+    input logic mem_data_ack_i,
+    output dcache_req_t mem_data_o
+);
+
+  // functions
+  function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh(
+      input logic [L1D_WAY_WIDTH-1:0] in);
+    logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out;
+    out     = '0;
+    out[in] = 1'b1;
+    return out;
+  endfunction
+
+  // align the physical address to the specified size:
+  // 000: bytes
+  // 001: hword
+  // 010: word
+  // 011: dword
+  // 111: DCACHE line
+  function automatic logic [riscv::PLEN-1:0] paddrSizeAlign(input logic [riscv::PLEN-1:0] paddr,
+                                                            input logic [2:0] size);
+    logic [riscv::PLEN-1:0] out;
+    out = paddr;
+    unique case (size)
+      3'b001:  out[0:0] = '0;
+      3'b010:  out[1:0] = '0;
+      3'b011:  out[2:0] = '0;
+      3'b111:  out[DCACHE_OFFSET_WIDTH-1:0] = '0;
+      default: ;
+    endcase
+    return out;
+  endfunction : paddrSizeAlign
+
+  // controller FSM
+  typedef enum logic [2:0] {
+    IDLE,
+    DRAIN,
+    AMO,
+    FLUSH,
+    STORE_WAIT,
+    LOAD_WAIT,
+    AMO_WAIT
+  } state_e;
+  state_e state_d, state_q;
+
+  // MSHR for reads
+  typedef struct packed {
+    logic [riscv::PLEN-1:0]              paddr;
+    logic [2:0]                          size;
+    logic [DCACHE_SET_ASSOC-1:0]         vld_bits;
+    logic [CACHE_ID_WIDTH-1:0]           id;
+    logic                                nc;
+    logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way;
+    logic [$clog2(NumPorts)-1:0]         miss_port_idx;
+  } mshr_t;
+
+  mshr_t mshr_d, mshr_q;
+  logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way, inv_way, rnd_way;
+  logic mshr_vld_d, mshr_vld_q, mshr_vld_q1;
+  logic mshr_allocate;
+  logic update_lfsr, all_ways_valid;
+
+  logic enable_d, enable_q;
+  logic flush_ack_d, flush_ack_q;
+  logic flush_en, flush_done;
+  logic mask_reads, lock_reqs;
+  logic amo_sel, miss_is_write;
+  logic amo_req_d, amo_req_q;
+  logic [63:0] amo_rtrn_mux;
+  riscv::xlen_t amo_data, amo_data_a, amo_data_b;
+  riscv::xlen_t amo_user;  //DCACHE USER ? DATA_USER_WIDTH
+  logic [riscv::PLEN-1:0] tmp_paddr;
+  logic [$clog2(NumPorts)-1:0] miss_port_idx;
+  logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q;
+  logic [NumPorts-1:0] miss_req_masked_d, miss_req_masked_q;
+
+  logic inv_vld, inv_vld_all, cl_write_en;
+  logic load_ack, store_ack, amo_ack;
+
+  logic [NumPorts-1:0] mshr_rdrd_collision_d, mshr_rdrd_collision_q;
+  logic [NumPorts-1:0] mshr_rdrd_collision;
+  logic tx_rdwr_collision, mshr_rdwr_collision;
+
+  ///////////////////////////////////////////////////////
+  // input arbitration and general control sigs
+  ///////////////////////////////////////////////////////
+
+  assign cache_en_o = enable_q;
+  assign cnt_d = (flush_en) ? cnt_q + 1 : '0;
+  assign flush_done = (cnt_q == wt_cache_pkg::DCACHE_NUM_WORDS - 1);
+
+  assign miss_req_masked_d = (lock_reqs)  ? miss_req_masked_q      :
+                             (mask_reads) ? miss_we_i & miss_req_i : miss_req_i;
+  assign miss_is_write = miss_we_i[miss_port_idx];
+
+  // read port arbiter
+  lzc #(
+      .WIDTH(NumPorts)
+  ) i_lzc_reqs (
+      .in_i   (miss_req_masked_d),
+      .cnt_o  (miss_port_idx),
+      .empty_o()
+  );
+
+  always_comb begin : p_ack
+    miss_ack_o = '0;
+    if (!amo_sel) begin
+      miss_ack_o[miss_port_idx] = mem_data_ack_i & mem_data_req_o;
+    end
+  end
+
+  ///////////////////////////////////////////////////////
+  // MSHR and way replacement logic (only for read ops)
+  ///////////////////////////////////////////////////////
+
+  // find invalid cache line
+  lzc #(
+      .WIDTH(ariane_pkg::DCACHE_SET_ASSOC)
+  ) i_lzc_inv (
+      .in_i   (~miss_vld_bits_i[miss_port_idx]),
+      .cnt_o  (inv_way),
+      .empty_o(all_ways_valid)
+  );
+
+  // generate random cacheline index
+  lfsr #(
+      .LfsrWidth(8),
+      .OutWidth ($clog2(ariane_pkg::DCACHE_SET_ASSOC))
+  ) i_lfsr_inv (
+      .clk_i (clk_i),
+      .rst_ni(rst_ni),
+      .en_i  (update_lfsr),
+      .out_o (rnd_way)
+  );
+
+  assign repl_way             = (all_ways_valid) ? rnd_way : inv_way;
+
+  assign mshr_d.size          = (mshr_allocate) ? miss_size_i[miss_port_idx] : mshr_q.size;
+  assign mshr_d.paddr         = (mshr_allocate) ? miss_paddr_i[miss_port_idx] : mshr_q.paddr;
+  assign mshr_d.vld_bits      = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits;
+  assign mshr_d.id            = (mshr_allocate) ? miss_id_i[miss_port_idx] : mshr_q.id;
+  assign mshr_d.nc            = (mshr_allocate) ? miss_nc_i[miss_port_idx] : mshr_q.nc;
+  assign mshr_d.repl_way      = (mshr_allocate) ? repl_way : mshr_q.repl_way;
+  assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx;
+
+  // currently we only have one outstanding read TX, hence an incoming load clears the MSHR
+  assign mshr_vld_d           = (mshr_allocate) ? 1'b1 : (load_ack) ? 1'b0 : mshr_vld_q;
+
+  assign miss_o               = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0;
+
+
+  for (genvar k = 0; k < NumPorts; k++) begin : gen_rdrd_collision
+    assign mshr_rdrd_collision[k]   = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1);
+    assign mshr_rdrd_collision_d[k] = (!miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k];
+  end
+
+  // read/write collision, stalls the corresponding request
+  // write port[NumPorts-1] collides with MSHR_Q
+  assign mshr_rdwr_collision = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[NumPorts-1][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && mshr_vld_q;
+
+  // read collides with inflight TX
+  always_comb begin : p_tx_coll
+    tx_rdwr_collision = 1'b0;
+    for (int k = 0; k < DCACHE_MAX_TX; k++) begin
+      tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k];
+    end
+  end
+
+  ///////////////////////////////////////////////////////
+  // to memory
+  ///////////////////////////////////////////////////////
+
+  // if size = 32bit word, select appropriate offset, replicate for openpiton...
+
+  if (CVA6Cfg.RVA) begin
+    if (riscv::IS_XLEN64) begin : gen_amo_64b_data
+      assign amo_data_a = {amo_req_i.operand_b[0+:32], amo_req_i.operand_b[0+:32]};
+      assign amo_data_b = amo_req_i.operand_b;
+    end else begin : gen_amo_32b_data
+      assign amo_data_a = amo_req_i.operand_b[0+:32];
+    end
+  end
+
+  always_comb begin
+    if (CVA6Cfg.RVA) begin
+      if (riscv::IS_XLEN64) begin
+        if (amo_req_i.size == 2'b10) begin
+          amo_data = amo_data_a;
+        end else begin
+          amo_data = amo_data_b;
+        end
+      end else begin
+        amo_data = amo_data_a;
+      end
+      if (ariane_pkg::DATA_USER_EN) begin
+        amo_user = amo_data;
+      end else begin
+        amo_user = '0;
+      end
+    end
+  end
+
+  if (CVA6Cfg.RVA) begin
+    // note: openpiton returns a full cacheline!
+    if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_rtrn_mux
+      if (CVA6Cfg.AxiDataWidth > 64) begin
+        assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[$clog2(
+            CVA6Cfg.AxiDataWidth/8
+        )-1:3]*64+:64];
+      end else begin
+        assign amo_rtrn_mux = mem_rtrn_i.data[0+:64];
+      end
+    end else begin : gen_piton_rtrn_mux
+      assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64+:64];
+    end
+
+    // always sign extend 32bit values
+    assign amo_resp_o.result = (amo_req_i.size==2'b10) ? {{32{amo_rtrn_mux[amo_req_i.operand_a[2]*32 + 31]}},amo_rtrn_mux[amo_req_i.operand_a[2]*32 +: 32]} :
+                                                       amo_rtrn_mux ;
+    assign amo_req_d = amo_req_i.req;
+  end
+
+  // outgoing memory requests (AMOs are always uncached)
+  assign mem_data_o.tid = (CVA6Cfg.RVA && amo_sel) ? AmoTxId : miss_id_i[miss_port_idx];
+  assign mem_data_o.nc = (CVA6Cfg.RVA && amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx];
+  assign mem_data_o.way = (CVA6Cfg.RVA && amo_sel) ? '0 : repl_way;
+  assign mem_data_o.data = (CVA6Cfg.RVA && amo_sel) ? amo_data : miss_wdata_i[miss_port_idx];
+  assign mem_data_o.user = (CVA6Cfg.RVA && amo_sel) ? amo_user : miss_wuser_i[miss_port_idx];
+  assign mem_data_o.size   = (CVA6Cfg.RVA && amo_sel) ? {1'b0, amo_req_i.size} : miss_size_i [miss_port_idx];
+  assign mem_data_o.amo_op = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.amo_op : AMO_NONE;
+
+  assign tmp_paddr         = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.operand_a[riscv::PLEN-1:0] : miss_paddr_i[miss_port_idx];
+  assign mem_data_o.paddr = paddrSizeAlign(tmp_paddr, mem_data_o.size);
+
+  ///////////////////////////////////////////////////////
+  // back-off mechanism for LR/SC completion guarantee
+  ///////////////////////////////////////////////////////
+
+  logic sc_fail, sc_pass, sc_backoff_over;
+  exp_backoff #(
+      .Seed  (3),
+      .MaxExp(16)
+  ) i_exp_backoff (
+      .clk_i,
+      .rst_ni,
+      .set_i    (sc_fail),
+      .clr_i    (sc_pass),
+      .is_zero_o(sc_backoff_over)
+  );
+
+  ///////////////////////////////////////////////////////
+  // responses from memory
+  ///////////////////////////////////////////////////////
+
+  // keep track of pending stores
+  logic store_sent;
+  logic [$clog2(wt_cache_pkg::DCACHE_MAX_TX + 1)-1:0] stores_inflight_d, stores_inflight_q;
+  assign store_sent = mem_data_req_o & mem_data_ack_i & (mem_data_o.rtype == DCACHE_STORE_REQ);
+
+  assign stores_inflight_d = (store_ack && store_sent) ? stores_inflight_q     :
+                             (store_ack)               ? stores_inflight_q - 1 :
+                             (store_sent)              ? stores_inflight_q + 1 :
+                                                         stores_inflight_q;
+
+  // incoming responses
+  always_comb begin : p_rtrn_logic
+    load_ack        = 1'b0;
+    store_ack       = 1'b0;
+    amo_ack         = 1'b0;
+    inv_vld         = 1'b0;
+    inv_vld_all     = 1'b0;
+    sc_fail         = 1'b0;
+    sc_pass         = 1'b0;
+    miss_rtrn_vld_o = '0;
+    if (mem_rtrn_vld_i) begin
+      unique case (mem_rtrn_i.rtype)
+        DCACHE_LOAD_ACK: begin
+          if (mshr_vld_q) begin
+            load_ack = 1'b1;
+            miss_rtrn_vld_o[mshr_q.miss_port_idx] = 1'b1;
+          end
+        end
+        DCACHE_STORE_ACK: begin
+          if (stores_inflight_q > 0) begin
+            store_ack = 1'b1;
+            miss_rtrn_vld_o[NumPorts-1] = 1'b1;
+          end
+        end
+        DCACHE_ATOMIC_ACK: begin
+          if (CVA6Cfg.RVA) begin
+            if (amo_req_q) begin
+              amo_ack = 1'b1;
+              // need to set SC backoff counter if
+              // this op failed
+              if (amo_req_i.amo_op == AMO_SC) begin
+                if (amo_resp_o.result > 0) begin
+                  sc_fail = 1'b1;
+                end else begin
+                  sc_pass = 1'b1;
+                end
+              end
+            end
+          end
+        end
+        DCACHE_INV_REQ: begin
+          inv_vld     = mem_rtrn_i.inv.vld | mem_rtrn_i.inv.all;
+          inv_vld_all = mem_rtrn_i.inv.all;
+        end
+        // TODO:
+        // DCACHE_INT_REQ: begin
+        // end
+        default: begin
+        end
+      endcase
+    end
+  end
+
+  // to write buffer
+  assign miss_rtrn_id_o = mem_rtrn_i.tid;
+
+  ///////////////////////////////////////////////////////
+  // writes to cache memory
+  ///////////////////////////////////////////////////////
+
+  // cacheline write port
+  assign wr_cl_nc_o = mshr_q.nc;
+  assign wr_cl_vld_o = load_ack | (|wr_cl_we_o);
+
+  assign wr_cl_we_o = (flush_en) ? '1 : (inv_vld_all) ? '1 : (inv_vld) ? dcache_way_bin2oh(
+      mem_rtrn_i.inv.way
+  ) : (cl_write_en) ? dcache_way_bin2oh(
+      mshr_q.repl_way
+  ) : '0;
+
+  assign wr_vld_bits_o = (flush_en) ? '0 : (inv_vld) ? '0 : (cl_write_en) ? dcache_way_bin2oh(
+      mshr_q.repl_way
+  ) : '0;
+
+  assign wr_cl_idx_o     = (flush_en) ? cnt_q                                                        :
+                           (inv_vld)  ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] :
+                                        mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
+
+  assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
+  assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0];
+  assign wr_cl_data_o = mem_rtrn_i.data;
+  assign wr_cl_user_o = mem_rtrn_i.user;
+  assign wr_cl_data_be_o = (cl_write_en) ? '1 : '0;// we only write complete cachelines into the memory
+
+  // only non-NC responses write to the cache
+  assign cl_write_en = load_ack & ~mshr_q.nc;
+
+  ///////////////////////////////////////////////////////
+  // main control logic for generating tx
+  ///////////////////////////////////////////////////////
+
+  always_comb begin : p_fsm
+    // default assignment
+    state_d          = state_q;
+
+    flush_ack_o      = 1'b0;
+    mem_data_o.rtype = DCACHE_LOAD_REQ;
+    mem_data_req_o   = 1'b0;
+    amo_resp_o.ack   = 1'b0;
+    miss_replay_o    = '0;
+
+    // disabling cache is possible anytime, enabling goes via flush
+    enable_d         = enable_q & enable_i;
+    flush_ack_d      = flush_ack_q;
+    flush_en         = 1'b0;
+    amo_sel          = 1'b0;
+    update_lfsr      = 1'b0;
+    mshr_allocate    = 1'b0;
+    lock_reqs        = 1'b0;
+    mask_reads       = mshr_vld_q;
+
+    // interfaces
+    unique case (state_q)
+      //////////////////////////////////
+      // wait for misses / amo ops
+      IDLE: begin
+        if (flush_i || (enable_i && !enable_q)) begin
+          if (wbuffer_empty_i && !mshr_vld_q) begin
+            flush_ack_d = flush_i;
+            state_d     = FLUSH;
+          end else begin
+            state_d = DRAIN;
+          end
+        end else if (CVA6Cfg.RVA && amo_req_i.req) begin
+          if (wbuffer_empty_i && !mshr_vld_q) begin
+            state_d = AMO;
+          end else begin
+            state_d = DRAIN;
+          end
+          // we've got a miss to handle
+        end else if (|miss_req_masked_d) begin
+          // this is a write miss, just pass through (but check whether write collides with MSHR)
+          if (miss_is_write) begin
+            // stall in case this write collides with the MSHR address
+            if (!mshr_rdwr_collision) begin
+              mem_data_req_o   = 1'b1;
+              mem_data_o.rtype = DCACHE_STORE_REQ;
+              if (!mem_data_ack_i) begin
+                state_d = STORE_WAIT;
+              end
+            end
+            // this is a read miss, can only allocate 1 MSHR
+            // in case of a load_ack we can accept a new miss, since the MSHR is being cleared
+          end else if (!mshr_vld_q || load_ack) begin
+            // replay the read request in case the address has collided with MSHR during the time the request was pending
+            // i.e., the cache state may have been updated in the mean time due to a refill at the same CL address
+            if (mshr_rdrd_collision_d[miss_port_idx]) begin
+              miss_replay_o[miss_port_idx] = 1'b1;
+              // stall in case this CL address overlaps with a write TX that is in flight
+            end else if (!tx_rdwr_collision) begin
+              mem_data_req_o   = 1'b1;
+              mem_data_o.rtype = DCACHE_LOAD_REQ;
+              update_lfsr      = all_ways_valid & mem_data_ack_i;  // need to evict a random way
+              mshr_allocate    = mem_data_ack_i;
+              if (!mem_data_ack_i) begin
+                state_d = LOAD_WAIT;
+              end
+            end
+          end
+        end
+      end
+      //////////////////////////////////
+      // wait until this request is acked
+      STORE_WAIT: begin
+        lock_reqs        = 1'b1;
+        mem_data_req_o   = 1'b1;
+        mem_data_o.rtype = DCACHE_STORE_REQ;
+        if (mem_data_ack_i) begin
+          state_d = IDLE;
+        end
+      end
+      //////////////////////////////////
+      // wait until this request is acked
+      LOAD_WAIT: begin
+        lock_reqs        = 1'b1;
+        mem_data_req_o   = 1'b1;
+        mem_data_o.rtype = DCACHE_LOAD_REQ;
+        if (mem_data_ack_i) begin
+          update_lfsr   = all_ways_valid;  // need to evict a random way
+          mshr_allocate = 1'b1;
+          state_d       = IDLE;
+        end
+      end
+      //////////////////////////////////
+      // only handle stores, do not accept new read requests
+      // wait until MSHR is cleared and wbuffer is empty
+      DRAIN: begin
+        mask_reads = 1'b1;
+        // these are writes, check whether they collide with MSHR
+        if (|miss_req_masked_d && !mshr_rdwr_collision) begin
+          mem_data_req_o   = 1'b1;
+          mem_data_o.rtype = DCACHE_STORE_REQ;
+        end
+
+        if (wbuffer_empty_i && !mshr_vld_q) begin
+          state_d = IDLE;
+        end
+      end
+      //////////////////////////////////
+      // flush the cache
+      FLUSH: begin
+        // internal flush signal
+        flush_en = 1'b1;
+        if (flush_done) begin
+          state_d     = IDLE;
+          flush_ack_o = flush_ack_q;
+          flush_ack_d = 1'b0;
+          enable_d    = enable_i;
+        end
+      end
+      //////////////////////////////////
+      // send out amo op request
+      AMO: begin
+        if (CVA6Cfg.RVA) begin
+          mem_data_o.rtype = DCACHE_ATOMIC_REQ;
+          amo_sel          = 1'b1;
+          // if this is an LR, we need to consult the backoff counter
+          if ((amo_req_i.amo_op != AMO_LR) || sc_backoff_over) begin
+            mem_data_req_o = 1'b1;
+            if (mem_data_ack_i) begin
+              state_d = AMO_WAIT;
+            end
+          end
+        end
+      end
+      //////////////////////////////////
+      // block and wait until AMO OP returns
+      AMO_WAIT: begin
+        if (CVA6Cfg.RVA) begin
+          amo_sel = 1'b1;
+          if (amo_ack) begin
+            amo_resp_o.ack = 1'b1;
+            state_d        = IDLE;
+          end
+        end
+      end
+      //////////////////////////////////
+      default: begin
+        // we should never get here
+        state_d = IDLE;
+      end
+    endcase  // state_q
+  end
+
+  ///////////////////////////////////////////////////////
+  // ff's
+  ///////////////////////////////////////////////////////
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      state_q               <= FLUSH;
+      cnt_q                 <= '0;
+      enable_q              <= '0;
+      flush_ack_q           <= '0;
+      mshr_vld_q            <= '0;
+      mshr_vld_q1           <= '0;
+      mshr_q                <= '0;
+      mshr_rdrd_collision_q <= '0;
+      miss_req_masked_q     <= '0;
+      amo_req_q             <= '0;
+      stores_inflight_q     <= '0;
+    end else begin
+      state_q               <= state_d;
+      cnt_q                 <= cnt_d;
+      enable_q              <= enable_d;
+      flush_ack_q           <= flush_ack_d;
+      mshr_vld_q            <= mshr_vld_d;
+      mshr_vld_q1           <= mshr_vld_q;
+      mshr_q                <= mshr_d;
+      mshr_rdrd_collision_q <= mshr_rdrd_collision_d;
+      miss_req_masked_q     <= miss_req_masked_d;
+      amo_req_q             <= amo_req_d;
+      stores_inflight_q     <= stores_inflight_d;
+    end
+  end
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  //pragma translate_off
+`ifndef VERILATOR
+
+  read_tid :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id)
+  else $fatal(1, "[l1 dcache missunit] TID of load response doesn't match");
+
+  read_ports :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) |miss_req_i[NumPorts-2:0] |-> miss_we_i[NumPorts-2:0] == 0)
+  else $fatal(1, "[l1 dcache missunit] only last port can issue write requests");
+
+  write_port :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) miss_req_i[NumPorts-1] |-> miss_we_i[NumPorts-1])
+  else $fatal(1, "[l1 dcache missunit] last port can only issue write requests");
+
+  initial begin
+    // assert wrong parameterizations
+    assert (NumPorts >= 2)
+    else
+      $fatal(
+          1, "[l1 dcache missunit] at least two ports are required (one read port, one write port)"
+      );
+  end
+`endif
+  //pragma translate_on
+
+endmodule  // wt_dcache_missunit
diff --git a/test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv b/test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv
new file mode 100644
index 0000000..8e9c39d
--- /dev/null
+++ b/test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv
@@ -0,0 +1,635 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 13.09.2018
+// Description: coalescing write buffer for WT dcache
+//
+// A couple of notes:
+//
+// 1) the write buffer behaves as a fully-associative cache, and is therefore coalescing.
+//    this cache is used by the cache readout logic to forward data to the load unit.
+//
+//    each byte can be in the following states (valid/dirty/txblock):
+//
+//    0/0/0:    invalid -> free entry in the buffer
+//    1/1/0:    valid and dirty, Byte is hence not part of TX in-flight
+//    1/0/1:    valid and not dirty, Byte is part of a TX in-flight
+//    1/1/1:    valid and, part of tx and dirty. this means that the byte has been
+//              overwritten while in TX and needs to be retransmitted once the write of that byte returns.
+//    1/0/0:    this would represent a clean state, but is never reached in the wbuffer in the current implementation.
+//              this is because when a TX returns, and the byte is in state [1/0/1], it is written to cache if needed and
+//              its state is immediately cleared to 0/x/x.
+//
+//    this state is used to distinguish between bytes that have been written and not
+//    yet sent to the memory subsystem, and bytes that are part of a transaction.
+//
+// 2) further, each word in the write buffer has a cache states (checked, hit_oh)
+//
+//    checked == 0: unknown cache state
+//    checked == 1: cache state has been looked up, valid way is stored in "hit_oh"
+//
+//    cache invalidations/refills affecting a particular word will clear its word state to 0,
+//    so another lookup has to be done. note that these lookups are triggered as soon as there is
+//    a valid word with checked == 0 in the write buffer.
+//
+// 3) returning write ACKs trigger a cache update if the word is present in the cache, and evict that
+//    word from the write buffer. if the word is not allocated to the cache, it is just evicted from the write buffer.
+//    if the word cache state is VOID, the pipeline is stalled until it is clear whether that word is in the cache or not.
+//
+// 4) we handle NC writes using the writebuffer circuitry. upon an NC request, the writebuffer will first be drained.
+//    then, only the NC word is written into the write buffer and no further write requests are acknowledged until that
+//    word has been evicted from the write buffer.
+
+
+module wt_dcache_wbuffer
+  import ariane_pkg::*;
+  import wt_cache_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni, // Asynchronous reset active low
+
+    input logic cache_en_i,  // writes are treated as NC if disabled
+    output logic empty_o,  // asserted if no data is present in write buffer
+    output logic not_ni_o,  // asserted if no ni data is present in write buffer
+    // core request ports
+    input dcache_req_i_t req_port_i,
+    output dcache_req_o_t req_port_o,
+    // interface to miss handler
+    input logic miss_ack_i,
+    output logic [riscv::PLEN-1:0] miss_paddr_o,
+    output logic miss_req_o,
+    output logic miss_we_o,  // always 1 here
+    output riscv::xlen_t miss_wdata_o,
+    output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o,
+    output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,  // unused here (set to 0)
+    output logic miss_nc_o,  // request to I/O space
+    output logic [2:0] miss_size_o,  //
+    output logic [CACHE_ID_WIDTH-1:0]          miss_id_o,       // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1)
+    // write responses from memory
+    input logic miss_rtrn_vld_i,
+    input logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_i,  // transaction ID to clear
+    // cache read interface
+    output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o,  // tag in - comes one cycle later
+    output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
+    output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
+    output logic rd_req_o,  // read the word at offset off_i[:3] in all ways
+    output logic rd_tag_only_o,  // set to 1 here as we do not have to read the data arrays
+    input logic rd_ack_i,
+    input riscv::xlen_t rd_data_i,  // unused
+    input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i,  // unused
+    input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i,
+    // cacheline writes
+    input logic wr_cl_vld_i,
+    input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
+    // cache word write interface
+    output logic [DCACHE_SET_ASSOC-1:0] wr_req_o,
+    input logic wr_ack_i,
+    output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o,
+    output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o,
+    output riscv::xlen_t wr_data_o,
+    output logic [(riscv::XLEN/8)-1:0] wr_data_be_o,
+    output logic [DCACHE_USER_WIDTH-1:0] wr_user_o,
+    // to forwarding logic and miss unit
+    output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o,
+    output logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0]     tx_paddr_o,      // used to check for address collisions with read operations
+    output logic [DCACHE_MAX_TX-1:0] tx_vld_o
+);
+
+  tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q;
+  wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q;
+  logic [DCACHE_WBUF_DEPTH-1:0] valid;
+  logic [DCACHE_WBUF_DEPTH-1:0] dirty;
+  logic [DCACHE_WBUF_DEPTH-1:0] tocheck;
+  logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit;
+  //logic     [DCACHE_WBUF_DEPTH-1:0][7:0]    bdirty;
+  logic [DCACHE_WBUF_DEPTH-1:0][(riscv::XLEN/8)-1:0] bdirty;
+
+  logic [$clog2(DCACHE_WBUF_DEPTH)-1:0]
+      next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr;
+  logic [CACHE_ID_WIDTH-1:0] tx_id, rtrn_id;
+
+  logic [riscv::XLEN_ALIGN_BYTES-1:0] bdirty_off;
+  logic [(riscv::XLEN/8)-1:0] tx_be;
+  logic [riscv::PLEN-1:0] wr_paddr, rd_paddr, extract_tag;
+  logic [DCACHE_TAG_WIDTH-1:0] rd_tag_d, rd_tag_q;
+  logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_d, rd_hit_oh_q;
+  logic check_en_d, check_en_q, check_en_q1;
+  logic full, dirty_rd_en, rdy;
+  logic rtrn_empty, evict;
+  logic [DCACHE_WBUF_DEPTH-1:0] ni_pending_d, ni_pending_q;
+  logic wbuffer_wren;
+  logic free_tx_slots;
+
+  logic wr_cl_vld_q, wr_cl_vld_d;
+  logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d;
+
+  logic [riscv::PLEN-1:0] debug_paddr[DCACHE_WBUF_DEPTH-1:0];
+
+  wbuffer_t wbuffer_check_mux, wbuffer_dirty_mux;
+
+  ///////////////////////////////////////////////////////
+  // misc
+  ///////////////////////////////////////////////////////
+  logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] miss_tag;
+  logic is_nc_miss;
+  logic is_ni;
+  assign miss_tag = miss_paddr_o[ariane_pkg::DCACHE_INDEX_WIDTH+:ariane_pkg::DCACHE_TAG_WIDTH];
+  assign is_nc_miss = !config_pkg::is_inside_cacheable_regions(
+      CVA6Cfg,
+      {
+        {64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {DCACHE_INDEX_WIDTH{1'b0}}
+      }
+  );
+  assign miss_nc_o = !cache_en_i || is_nc_miss;
+  // Non-idempotent if request goes to NI region
+  assign is_ni = config_pkg::is_inside_nonidempotent_regions(
+      CVA6Cfg,
+      {
+        {64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}},
+        req_port_i.address_tag,
+        {DCACHE_INDEX_WIDTH{1'b0}}
+      }
+  );
+
+  assign miss_we_o = 1'b1;
+  assign miss_vld_bits_o = '0;
+  assign wbuffer_data_o = wbuffer_q;
+
+  for (genvar k = 0; k < DCACHE_MAX_TX; k++) begin : gen_tx_vld
+    assign tx_vld_o[k] = tx_stat_q[k].vld;
+    assign tx_paddr_o[k] = {
+      {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[tx_stat_q[k].ptr].wtag << riscv::XLEN_ALIGN_BYTES
+    };
+  end
+
+  ///////////////////////////////////////////////////////
+  // openpiton does not understand byte enable sigs
+  // need to convert to the four cases:
+  // 00: byte
+  // 01: halfword
+  // 10: word
+  // 11: dword
+  // non-contiguous writes need to be serialized!
+  // e.g. merged dwords with BE like this: 8'b01001100
+  ///////////////////////////////////////////////////////
+
+  // get byte offset
+  lzc #(
+      .WIDTH(riscv::XLEN / 8)
+  ) i_vld_bdirty (
+      .in_i   (bdirty[dirty_ptr]),
+      .cnt_o  (bdirty_off),
+      .empty_o()
+  );
+
+  // add the offset to the physical base address of this buffer entry
+  assign miss_paddr_o = {wbuffer_dirty_mux.wtag, bdirty_off};
+  assign miss_id_o = tx_id;
+
+  // is there any dirty word to be transmitted, and is there a free TX slot?
+  assign miss_req_o = (|dirty) && free_tx_slots;
+
+  // get size of aligned words, and the corresponding byte enables
+  // note: openpiton can only handle aligned offsets + size, and hence
+  // we have to split unaligned data into multiple transfers (see toSize64)
+  // e.g. if we have the following valid bytes: 0011_1001 -> TX0: 0000_0001, TX1: 0000_1000, TX2: 0011_0000
+  if (riscv::IS_XLEN64) begin : gen_size_64b
+    assign miss_size_o = {1'b0, toSize64(bdirty[dirty_ptr])};
+  end else begin : gen_size_32b
+    assign miss_size_o = {1'b0, toSize32(bdirty[dirty_ptr])};
+  end
+
+  // replicate transfers shorter than a dword
+  assign miss_wdata_o = riscv::IS_XLEN64 ? repData64(
+      wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]
+  ) : repData32(
+      wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]
+  );
+  if (ariane_pkg::DATA_USER_EN) begin
+    assign miss_wuser_o = riscv::IS_XLEN64 ? repData64(
+        wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]
+    ) : repData32(
+        wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]
+    );
+  end else begin
+    assign miss_wuser_o = '0;
+  end
+
+  assign tx_be = riscv::IS_XLEN64 ? to_byte_enable8(
+      bdirty_off, miss_size_o[1:0]
+  ) : to_byte_enable4(
+      bdirty_off, miss_size_o[1:0]
+  );
+
+  ///////////////////////////////////////////////////////
+  // TX status registers and ID counters
+  ///////////////////////////////////////////////////////
+
+  // TODO: todo: make this fall through if timing permits it
+  fifo_v3 #(
+      .FALL_THROUGH(1'b0),
+      .DATA_WIDTH  ($clog2(DCACHE_MAX_TX)),
+      .DEPTH       (DCACHE_MAX_TX)
+  ) i_rtrn_id_fifo (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (1'b0),
+      .testmode_i(1'b0),
+      .full_o    (),
+      .empty_o   (rtrn_empty),
+      .usage_o   (),
+      .data_i    (miss_rtrn_id_i),
+      .push_i    (miss_rtrn_vld_i),
+      .data_o    (rtrn_id),
+      .pop_i     (evict)
+  );
+
+  always_comb begin : p_tx_stat
+    tx_stat_d = tx_stat_q;
+    evict     = 1'b0;
+    wr_req_o  = '0;
+
+    // clear entry if it is clear whether it can be pushed to the cache or not
+    if ((!rtrn_empty) && wbuffer_q[rtrn_ptr].checked) begin
+      // check if data is clean and can be written, otherwise skip
+      // check if CL is present, otherwise skip
+      if ((|wr_data_be_o) && (|wbuffer_q[rtrn_ptr].hit_oh)) begin
+        wr_req_o = wbuffer_q[rtrn_ptr].hit_oh;
+        if (wr_ack_i) begin
+          evict    = 1'b1;
+          tx_stat_d[rtrn_id].vld = 1'b0;
+        end
+      end else begin
+        evict = 1'b1;
+        tx_stat_d[rtrn_id].vld = 1'b0;
+      end
+    end
+
+    // allocate a new entry
+    if (dirty_rd_en) begin
+      tx_stat_d[tx_id].vld = 1'b1;
+      tx_stat_d[tx_id].ptr = dirty_ptr;
+      tx_stat_d[tx_id].be  = tx_be;
+    end
+  end
+
+  assign free_tx_slots = |(~tx_vld_o);
+
+  // next word to lookup in the cache
+  rr_arb_tree #(
+      .NumIn    (DCACHE_MAX_TX),
+      .LockIn   (1'b1),
+      .DataWidth(1)
+  ) i_tx_id_rr (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i('0),
+      .rr_i   ('0),
+      .req_i  (~tx_vld_o),
+      .gnt_o  (),
+      .data_i ('0),
+      .gnt_i  (dirty_rd_en),
+      .req_o  (),
+      .data_o (),
+      .idx_o  (tx_id)
+  );
+
+  ///////////////////////////////////////////////////////
+  // cache readout & update
+  ///////////////////////////////////////////////////////
+
+  assign extract_tag = rd_paddr >> DCACHE_INDEX_WIDTH;
+  assign rd_tag_d = extract_tag[DCACHE_TAG_WIDTH-1:0];
+
+  // trigger TAG readout in cache
+  assign rd_tag_only_o = 1'b1;
+  assign rd_paddr = {
+    {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_check_mux.wtag << riscv::XLEN_ALIGN_BYTES
+  };
+  assign rd_req_o = |tocheck;
+  assign rd_tag_o = rd_tag_q;  //delay by one cycle
+  assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
+  assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0];
+  assign check_en_d = rd_req_o & rd_ack_i;
+
+  // cache update port
+  assign rtrn_ptr = tx_stat_q[rtrn_id].ptr;
+  // if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache
+  // when the TX returns
+  assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty);
+  assign wr_paddr = {
+    {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[rtrn_ptr].wtag << riscv::XLEN_ALIGN_BYTES
+  };
+  assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
+  assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0];
+  assign wr_data_o = wbuffer_q[rtrn_ptr].data;
+  assign wr_user_o = wbuffer_q[rtrn_ptr].user;
+
+
+  ///////////////////////////////////////////////////////
+  // readout of status bits, index calculation
+  ///////////////////////////////////////////////////////
+
+  logic [DCACHE_WBUF_DEPTH-1:0][DCACHE_CL_IDX_WIDTH-1:0] wtag_comp;
+
+  assign wr_cl_vld_d = wr_cl_vld_i;
+  assign wr_cl_idx_d = wr_cl_idx_i;
+
+  for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_flags
+    // only for debug, will be pruned
+    if (CVA6Cfg.DebugEn) begin
+      assign debug_paddr[k] = {
+        {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[k].wtag << riscv::XLEN_ALIGN_BYTES
+      };
+    end
+
+    // dirty bytes that are ready for transmission.
+    // note that we cannot retransmit a word that is already in-flight
+    // since the multiple transactions might overtake each other in the memory system!
+    assign bdirty[k] = (|wbuffer_q[k].txblock) ? '0 : wbuffer_q[k].dirty & wbuffer_q[k].valid;
+
+
+    assign dirty[k] = |bdirty[k];
+    assign valid[k] = |wbuffer_q[k].valid;
+    assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]});
+
+    // checks if an invalidation/cache refill hits a particular word
+    // note: an invalidation can hit multiple words!
+    // need to respect previous cycle, too, since we add a cycle of latency to the rd_hit_oh_i signal...
+    assign wtag_comp[k] = wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES-1:DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES];
+    assign inval_hit[k]  = (wr_cl_vld_d & valid[k] & (wtag_comp[k] == wr_cl_idx_d)) |
+                           (wr_cl_vld_q & valid[k] & (wtag_comp[k] == wr_cl_idx_q));
+
+    // these word have to be looked up in the cache
+    assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k];
+  end
+
+  assign wr_ptr = (|wbuffer_hit_oh) ? hit_ptr : next_ptr;
+  assign rdy    = (|wbuffer_hit_oh) | (~full);
+
+  // next free entry in the buffer
+  lzc #(
+      .WIDTH(DCACHE_WBUF_DEPTH)
+  ) i_vld_lzc (
+      .in_i   (~valid),
+      .cnt_o  (next_ptr),
+      .empty_o(full)
+  );
+
+  // get index of hit
+  lzc #(
+      .WIDTH(DCACHE_WBUF_DEPTH)
+  ) i_hit_lzc (
+      .in_i   (wbuffer_hit_oh),
+      .cnt_o  (hit_ptr),
+      .empty_o()
+  );
+
+  // next dirty word to serve
+  rr_arb_tree #(
+      .NumIn   (DCACHE_WBUF_DEPTH),
+      .LockIn  (1'b1),
+      .DataType(wbuffer_t)
+  ) i_dirty_rr (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i('0),
+      .rr_i   ('0),
+      .req_i  (dirty),
+      .gnt_o  (),
+      .data_i (wbuffer_q),
+      .gnt_i  (dirty_rd_en),
+      .req_o  (),
+      .data_o (wbuffer_dirty_mux),
+      .idx_o  (dirty_ptr)
+  );
+
+  // next word to lookup in the cache
+  rr_arb_tree #(
+      .NumIn   (DCACHE_WBUF_DEPTH),
+      .DataType(wbuffer_t)
+  ) i_clean_rr (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i('0),
+      .rr_i   ('0),
+      .req_i  (tocheck),
+      .gnt_o  (),
+      .data_i (wbuffer_q),
+      .gnt_i  (check_en_d),
+      .req_o  (),
+      .data_o (wbuffer_check_mux),
+      .idx_o  (check_ptr_d)
+  );
+
+  ///////////////////////////////////////////////////////
+  // update logic
+  ///////////////////////////////////////////////////////
+
+  assign req_port_o.data_rvalid = '0;
+  assign req_port_o.data_rdata  = '0;
+  assign req_port_o.data_ruser  = '0;
+  assign req_port_o.data_rid    = '0;
+
+  assign rd_hit_oh_d = rd_hit_oh_i;
+
+  logic ni_inside, ni_conflict;
+  assign ni_inside = |ni_pending_q;
+  assign ni_conflict = CVA6Cfg.NonIdemPotenceEn && is_ni && ni_inside;
+  assign not_ni_o = !ni_inside;
+  assign empty_o    = !(|valid);
+
+  // TODO: rewrite and separate into MUXES and write strobe logic
+  always_comb begin : p_buffer
+    wbuffer_d           = wbuffer_q;
+    ni_pending_d        = ni_pending_q;
+    dirty_rd_en         = 1'b0;
+    req_port_o.data_gnt = 1'b0;
+    wbuffer_wren        = 1'b0;
+
+    // TAG lookup returns, mark corresponding word
+    if (check_en_q1) begin
+      if (|wbuffer_q[check_ptr_q1].valid) begin
+        wbuffer_d[check_ptr_q1].checked = 1'b1;
+        wbuffer_d[check_ptr_q1].hit_oh  = rd_hit_oh_q;
+      end
+    end
+
+    // if an invalidation or cache line refill comes in and hits on the write buffer,
+    // we have to discard our knowledge of the corresponding cacheline state
+    for (int k = 0; k < DCACHE_WBUF_DEPTH; k++) begin
+      if (inval_hit[k]) begin
+        wbuffer_d[k].checked = 1'b0;
+      end
+    end
+
+    // once TX write response came back, we can clear the TX block. if it was not dirty, we
+    // can completely evict it - otherwise we have to leave it there for retransmission
+    if (evict) begin
+      for (int k = 0; k < (riscv::XLEN / 8); k++) begin
+        if (tx_stat_q[rtrn_id].be[k]) begin
+          wbuffer_d[rtrn_ptr].txblock[k] = 1'b0;
+          if (!wbuffer_q[rtrn_ptr].dirty[k]) begin
+            wbuffer_d[rtrn_ptr].valid[k] = 1'b0;
+
+            // NOTE: this is not strictly needed, but makes it much
+            // easier to debug, since no invalid data remains in the buffer
+            // wbuffer_d[rtrn_ptr].data[k*8 +:8] = '0;
+          end
+        end
+      end
+      // if all bytes are evicted, clear the cache status flag
+      if (wbuffer_d[rtrn_ptr].valid == 0) begin
+        wbuffer_d[rtrn_ptr].checked = 1'b0;
+        ni_pending_d[rtrn_ptr] = 1'b0;
+      end
+    end
+
+    // mark bytes sent out to the memory system
+    if (miss_req_o && miss_ack_i) begin
+      dirty_rd_en = 1'b1;
+      for (int k = 0; k < (riscv::XLEN / 8); k++) begin
+        if (tx_be[k]) begin
+          wbuffer_d[dirty_ptr].dirty[k]   = 1'b0;
+          wbuffer_d[dirty_ptr].txblock[k] = 1'b1;
+        end
+      end
+    end
+
+    // write new word into the buffer
+    if (req_port_i.data_req && rdy) begin
+      // in case we have an NI address, need to drain the buffer first
+      // in case we are serving an NI address,  we block until it is written to memory
+      if (!ni_conflict) begin  //empty of NI operations
+        wbuffer_wren = 1'b1;
+
+        req_port_o.data_gnt = 1'b1;
+        ni_pending_d[wr_ptr] = is_ni;
+
+        wbuffer_d[wr_ptr].checked = 1'b0;
+        wbuffer_d[wr_ptr].wtag = {
+          req_port_i.address_tag,
+          req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]
+        };
+
+        // mark bytes as dirty
+        for (int k = 0; k < (riscv::XLEN / 8); k++) begin
+          if (req_port_i.data_be[k]) begin
+            wbuffer_d[wr_ptr].valid[k]     = 1'b1;
+            wbuffer_d[wr_ptr].dirty[k]     = 1'b1;
+            wbuffer_d[wr_ptr].data[k*8+:8] = req_port_i.data_wdata[k*8+:8];
+            if (ariane_pkg::DATA_USER_EN) begin
+              wbuffer_d[wr_ptr].user[k*8+:8] = req_port_i.data_wuser[k*8+:8];
+            end else begin
+              wbuffer_d[wr_ptr].user[k*8+:8] = '0;
+            end
+          end
+        end
+      end
+    end
+  end
+
+
+  ///////////////////////////////////////////////////////
+  // ff's
+  ///////////////////////////////////////////////////////
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      wbuffer_q    <= '{default: '0};
+      tx_stat_q    <= '{default: '0};
+      ni_pending_q <= '0;
+      check_ptr_q  <= '0;
+      check_ptr_q1 <= '0;
+      check_en_q   <= '0;
+      check_en_q1  <= '0;
+      rd_tag_q     <= '0;
+      rd_hit_oh_q  <= '0;
+      wr_cl_vld_q  <= '0;
+      wr_cl_idx_q  <= '0;
+    end else begin
+      wbuffer_q    <= wbuffer_d;
+      tx_stat_q    <= tx_stat_d;
+      ni_pending_q <= ni_pending_d;
+      check_ptr_q  <= check_ptr_d;
+      check_ptr_q1 <= check_ptr_q;
+      check_en_q   <= check_en_d;
+      check_en_q1  <= check_en_q;
+      rd_tag_q     <= rd_tag_d;
+      rd_hit_oh_q  <= rd_hit_oh_d;
+      wr_cl_vld_q  <= wr_cl_vld_d;
+      wr_cl_idx_q  <= wr_cl_idx_d;
+    end
+  end
+
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  //pragma translate_off
+`ifndef VERILATOR
+
+  hot1 :
+  assert property (@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> $onehot0(
+      wbuffer_hit_oh
+  ))
+  else $fatal(1, "[l1 dcache wbuffer] wbuffer_hit_oh signal must be hot1");
+
+  tx_status :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) evict && miss_ack_i && miss_req_o |-> (tx_id != rtrn_id))
+  else $fatal(1, "[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle");
+
+  tx_valid0 :
+  assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> tx_stat_q[rtrn_id].vld)
+  else $fatal(1, "[l1 dcache wbuffer] evicting invalid transaction slot");
+
+  tx_valid1 :
+  assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid)
+  else $fatal(1, "[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid");
+
+  write_full :
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> req_port_o.data_gnt |-> ((!full) || (|wbuffer_hit_oh)))
+  else $fatal(1, "[l1 dcache wbuffer] cannot write if full or no hit");
+
+  unused0 :
+  assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.tag_valid)
+  else $fatal(1, "[l1 dcache wbuffer] req_port_i.tag_valid should not be asserted");
+
+  unused1 :
+  assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.kill_req)
+  else $fatal(1, "[l1 dcache wbuffer] req_port_i.kill_req should not be asserted");
+
+  for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_assert1
+    for (genvar j = 0; j < (riscv::XLEN / 8); j++) begin : gen_assert2
+      byteStates :
+      assert property (
+        @(posedge clk_i) disable iff (!rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} )
+      else
+        $fatal(
+            1,
+            "[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b",
+            j,
+            k,
+            wbuffer_q[k].valid[j],
+            wbuffer_q[k].dirty[j],
+            wbuffer_q[k].txblock[j]
+        );
+    end
+  end
+`endif
+  //pragma translate_on
+
+endmodule  // wt_dcache_wbuffer
diff --git a/test/type_param/core/commit_stage.sv b/test/type_param/core/commit_stage.sv
new file mode 100644
index 0000000..8f168be
--- /dev/null
+++ b/test/type_param/core/commit_stage.sv
@@ -0,0 +1,298 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 15.04.2017
+// Description: Commits to the architectural state resulting from the scoreboard.
+
+
+module commit_stage
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    input logic halt_i,  // request to halt the core
+    input logic flush_dcache_i,  // request to flush dcache -> also flush the pipeline
+    output exception_t exception_o,  // take exception to controller
+    output logic dirty_fp_state_o,  // mark the F state as dirty
+    input logic single_step_i,  // we are in single step debug mode
+    // from scoreboard
+    input  scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,     // the instruction we want to commit
+    output logic [CVA6Cfg.NrCommitPorts-1:0]              commit_ack_o,       // acknowledge that we are indeed committing
+    // to register file
+    output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o,  // register file write address
+    output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o,  // register file write data
+    output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o,  // register file write enable
+    output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o,  // floating point register enable
+    // Atomic memory operations
+    input amo_resp_t amo_resp_i,  // result of AMO operation
+    // to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline)
+    output logic [riscv::VLEN-1:0] pc_o,
+    // to/from CSR file
+    output fu_op csr_op_o,  // decoded CSR operation
+    output riscv::xlen_t csr_wdata_o,  // data to write to CSR
+    input riscv::xlen_t csr_rdata_i,  // data to read from CSR
+    input  exception_t                              csr_exception_i,    // exception or interrupt occurred in CSR stage (the same as commit)
+    output logic csr_write_fflags_o,  // write the fflags CSR
+    // commit signals to ex
+    output logic commit_lsu_o,  // commit the pending store
+    input logic commit_lsu_ready_i,  // commit buffer of LSU is ready
+    output logic [TRANS_ID_BITS-1:0] commit_tran_id_o,  // transaction id of first commit port
+    output logic amo_valid_commit_o,  // valid AMO in commit stage
+    input logic no_st_pending_i,  // there is no store pending
+    output logic commit_csr_o,  // commit the pending CSR instruction
+    output logic fence_i_o,  // flush I$ and pipeline
+    output logic fence_o,  // flush D$ and pipeline
+    output logic flush_commit_o,  // request a pipeline flush
+    output logic sfence_vma_o  // flush TLBs and pipeline
+);
+
+  // ila_0 i_ila_commit (
+  //     .clk(clk_i), // input wire clk
+  //     .probe0(commit_instr_i[0].pc), // input wire [63:0]  probe0
+  //     .probe1(commit_instr_i[1].pc), // input wire [63:0]  probe1
+  //     .probe2(commit_instr_i[0].valid), // input wire [0:0]  probe2
+  //     .probe3(commit_instr_i[1].valid), // input wire [0:0]  probe3
+  //     .probe4(commit_ack_o[0]), // input wire [0:0]  probe4
+  //     .probe5(commit_ack_o[0]), // input wire [0:0]  probe5
+  //     .probe6(1'b0), // input wire [0:0]  probe6
+  //     .probe7(1'b0), // input wire [0:0]  probe7
+  //     .probe8(1'b0), // input wire [0:0]  probe8
+  //     .probe9(1'b0) // input wire [0:0]  probe9
+  // );
+
+  for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_waddr
+    assign waddr_o[i] = commit_instr_i[i].rd[4:0];
+  end
+
+  assign pc_o = commit_instr_i[0].pc;
+  // Dirty the FP state if we are committing anything related to the FPU
+  always_comb begin : dirty_fp_state
+    dirty_fp_state_o = 1'b0;
+    for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
+      dirty_fp_state_o |= commit_ack_o[i] & (commit_instr_i[i].fu inside {FPU, FPU_VEC} || (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
+          commit_instr_i[i].op
+      )));
+      // Check if we issued a vector floating-point instruction to the accellerator
+      dirty_fp_state_o |= commit_instr_i[i].fu == ACCEL && commit_instr_i[i].vfp;
+    end
+  end
+
+  assign commit_tran_id_o = commit_instr_i[0].trans_id;
+
+  logic instr_0_is_amo;
+  assign instr_0_is_amo = is_amo(commit_instr_i[0].op);
+  // -------------------
+  // Commit Instruction
+  // -------------------
+  // write register file or commit instruction in LSU or CSR Buffer
+  always_comb begin : commit
+    // default assignments
+    commit_ack_o[0] = 1'b0;
+
+    amo_valid_commit_o = 1'b0;
+
+    we_gpr_o[0] = 1'b0;
+    we_fpr_o = '{default: 1'b0};
+    commit_lsu_o = 1'b0;
+    commit_csr_o = 1'b0;
+    // amos will commit on port 0
+    wdata_o[0] = (CVA6Cfg.RVA && amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result;
+    csr_op_o = ADD;  // this corresponds to a CSR NOP
+    csr_wdata_o = {riscv::XLEN{1'b0}};
+    fence_i_o = 1'b0;
+    fence_o = 1'b0;
+    sfence_vma_o = 1'b0;
+    csr_write_fflags_o = 1'b0;
+    flush_commit_o = 1'b0;
+
+    // we will not commit the instruction if we took an exception
+    // and we do not commit the instruction if we requested a halt
+    if (commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i) begin
+      // we can definitely write the register file
+      // if the instruction is not committing anything the destination
+      commit_ack_o[0] = 1'b1;
+      if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[0].op)) begin
+        we_fpr_o[0] = 1'b1;
+      end else begin
+        we_gpr_o[0] = 1'b1;
+      end
+      // check whether the instruction we retire was a store
+      if ((!CVA6Cfg.RVA && commit_instr_i[0].fu == STORE) || (CVA6Cfg.RVA && commit_instr_i[0].fu == STORE && !instr_0_is_amo)) begin
+        // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store)
+        if (commit_lsu_ready_i) begin
+          commit_ack_o[0] = 1'b1;
+          commit_lsu_o = 1'b1;
+          // stall in case the store buffer is not able to accept anymore instructions
+        end else begin
+          commit_ack_o[0] = 1'b0;
+        end
+      end
+      // ---------
+      // FPU Flags
+      // ---------
+      if (CVA6Cfg.FpPresent) begin
+        if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
+          // write the CSR with potential exception flags from retiring floating point instruction
+          csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[0].ex.cause[4:0]};
+          csr_write_fflags_o = 1'b1;
+          commit_ack_o[0] = 1'b1;
+        end
+      end
+      // ---------
+      // CSR Logic
+      // ---------
+      // check whether the instruction we retire was a CSR instruction and it did not
+      // throw an exception
+      if (commit_instr_i[0].fu == CSR) begin
+        // write the CSR file
+        csr_op_o    = commit_instr_i[0].op;
+        csr_wdata_o = commit_instr_i[0].result;
+        if (!csr_exception_i.valid) begin
+          commit_csr_o = 1'b1;
+          wdata_o[0] = csr_rdata_i;
+          commit_ack_o[0] = 1'b1;
+        end else begin
+          commit_ack_o[0] = 1'b0;
+          we_gpr_o[0] = 1'b0;
+        end
+      end
+      // ------------------
+      // SFENCE.VMA Logic
+      // ------------------
+      // sfence.vma is idempotent so we can safely re-execute it after returning
+      // from interrupt service routine
+      // check if this instruction was a SFENCE_VMA
+      if (CVA6Cfg.RVS && commit_instr_i[0].op == SFENCE_VMA) begin
+        // no store pending so we can flush the TLBs and pipeline
+        sfence_vma_o = no_st_pending_i;
+        // wait for the store buffer to drain until flushing the pipeline
+        commit_ack_o[0] = no_st_pending_i;
+      end
+      // ------------------
+      // FENCE.I Logic
+      // ------------------
+      // fence.i is idempotent so we can safely re-execute it after returning
+      // from interrupt service routine
+      // Fence synchronizes data and instruction streams. That means that we need to flush the private icache
+      // and the private dcache. This is the most expensive instruction.
+      if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && DCACHE_TYPE == int'(config_pkg::WB) && commit_instr_i[0].fu != STORE)) begin
+        commit_ack_o[0] = no_st_pending_i;
+        // tell the controller to flush the I$
+        fence_i_o = no_st_pending_i;
+      end
+      // ------------------
+      // FENCE Logic
+      // ------------------
+      // fence is idempotent so we can safely re-execute it after returning
+      // from interrupt service routine
+      if (commit_instr_i[0].op == FENCE) begin
+        commit_ack_o[0] = no_st_pending_i;
+        // tell the controller to flush the D$
+        fence_o = no_st_pending_i;
+      end
+      // ------------------
+      // AMO
+      // ------------------
+      if (CVA6Cfg.RVA && instr_0_is_amo) begin
+        // AMO finished
+        commit_ack_o[0] = amo_resp_i.ack;
+        // flush the pipeline
+        flush_commit_o = amo_resp_i.ack;
+        amo_valid_commit_o = 1'b1;
+        we_gpr_o[0] = amo_resp_i.ack;
+      end
+    end
+
+    if (CVA6Cfg.NrCommitPorts > 1) begin
+
+      commit_ack_o[1] = 1'b0;
+      we_gpr_o[1]     = 1'b0;
+      wdata_o[1]      = commit_instr_i[1].result;
+
+      // -----------------
+      // Commit Port 2
+      // -----------------
+      // check if the second instruction can be committed as well and the first wasn't a CSR instruction
+      // also if we are in single step mode don't retire the second instruction
+      if (commit_ack_o[0] && commit_instr_i[1].valid
+                                && !halt_i
+                                && !(commit_instr_i[0].fu inside {CSR})
+                                && !flush_dcache_i
+                                && !instr_0_is_amo
+                                && !single_step_i) begin
+        // only if the first instruction didn't throw an exception and this instruction won't throw an exception
+        // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC
+        if (!exception_o.valid && !commit_instr_i[1].ex.valid
+                                       && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin
+
+          if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[1].op)) we_fpr_o[1] = 1'b1;
+          else we_gpr_o[1] = 1'b1;
+
+          commit_ack_o[1] = 1'b1;
+
+          // additionally check if we are retiring an FPU instruction because we need to make sure that we write all
+          // exception flags
+          if (CVA6Cfg.FpPresent && commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin
+            if (csr_write_fflags_o)
+              csr_wdata_o = {
+                {riscv::XLEN - 5{1'b0}},
+                (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])
+              };
+            else csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[1].ex.cause[4:0]};
+
+            csr_write_fflags_o = 1'b1;
+          end
+        end
+      end
+    end
+  end
+
+  // -----------------------------
+  // Exception & Interrupt Logic
+  // -----------------------------
+  // here we know for sure that we are taking the exception
+  always_comb begin : exception_handling
+    // Multiple simultaneous interrupts and traps at the same privilege level are handled in the following decreasing
+    // priority order: external interrupts, software interrupts, timer interrupts, then finally any synchronous traps. (1.10 p.30)
+    // interrupts are correctly prioritized in the CSR reg file, exceptions are prioritized here
+    exception_o.valid = 1'b0;
+    exception_o.cause = '0;
+    exception_o.tval  = '0;
+    // we need a valid instruction in the commit stage
+    if (commit_instr_i[0].valid) begin
+      // ------------------------
+      // check for CSR exception
+      // ------------------------
+      if (csr_exception_i.valid) begin
+        exception_o      = csr_exception_i;
+        // if no earlier exception happened the commit instruction will still contain
+        // the instruction bits from the ID stage. If a earlier exception happened we don't care
+        // as we will overwrite it anyway in the next IF bl
+        exception_o.tval = commit_instr_i[0].ex.tval;
+      end
+      // ------------------------
+      // Earlier Exceptions
+      // ------------------------
+      // but we give precedence to exceptions which happened earlier e.g.: instruction page
+      // faults for example
+      if (commit_instr_i[0].ex.valid) begin
+        exception_o = commit_instr_i[0].ex;
+      end
+    end
+    // Don't take any exceptions iff:
+    // - If we halted the processor
+    if (halt_i) begin
+      exception_o.valid = 1'b0;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/compressed_decoder.sv b/test/type_param/core/compressed_decoder.sv
new file mode 100644
index 0000000..c218a83
--- /dev/null
+++ b/test/type_param/core/compressed_decoder.sv
@@ -0,0 +1,935 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.                                          //
+//
+// Author:         Florian Zaruba - zarubaf@iis.ee.ethz.ch
+// Engineer:       Sven Stucki - svstucki@student.ethz.ch
+//
+// Design Name:    Compressed instruction decoder
+// Project Name:   zero-riscy
+// Language:       SystemVerilog
+//
+// Description:    Decodes RISC-V compressed instructions into their RV32
+//                 equivalent. This module is fully combinatorial.
+
+
+module compressed_decoder #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input  logic [31:0] instr_i,
+    output logic [31:0] instr_o,
+    output logic        illegal_instr_o,
+    output logic        is_compressed_o
+);
+
+  // -------------------
+  // Compressed Decoder
+  // -------------------
+  always_comb begin
+    illegal_instr_o = 1'b0;
+    instr_o         = '0;
+    is_compressed_o = 1'b1;
+    instr_o         = instr_i;
+
+    // I: |    imm[11:0]    | rs1 | funct3 |    rd    | opcode |
+    // S: | imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode |
+    unique case (instr_i[1:0])
+      // C0
+      riscv::OpcodeC0: begin
+        unique case (instr_i[15:13])
+          riscv::OpcodeC0Addi4spn: begin
+            // c.addi4spn -> addi rd', x2, imm
+            instr_o = {
+              2'b0,
+              instr_i[10:7],
+              instr_i[12:11],
+              instr_i[5],
+              instr_i[6],
+              2'b00,
+              5'h02,
+              3'b000,
+              2'b01,
+              instr_i[4:2],
+              riscv::OpcodeOpImm
+            };
+            if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1;
+          end
+
+          riscv::OpcodeC0Fld: begin
+            if (CVA6Cfg.FpPresent) begin
+              // c.fld -> fld rd', imm(rs1')
+              // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 |
+              instr_o = {
+                4'b0,
+                instr_i[6:5],
+                instr_i[12:10],
+                3'b000,
+                2'b01,
+                instr_i[9:7],
+                3'b011,
+                2'b01,
+                instr_i[4:2],
+                riscv::OpcodeLoadFp
+              };
+            end else begin
+              illegal_instr_o = 1'b1;
+            end
+          end
+
+          riscv::OpcodeC0Lw: begin
+            // c.lw -> lw rd', imm(rs1')
+            instr_o = {
+              5'b0,
+              instr_i[5],
+              instr_i[12:10],
+              instr_i[6],
+              2'b00,
+              2'b01,
+              instr_i[9:7],
+              3'b010,
+              2'b01,
+              instr_i[4:2],
+              riscv::OpcodeLoad
+            };
+          end
+
+          riscv::OpcodeC0Ld: begin
+            // RV64
+            //   c.ld -> ld rd', imm(rs1')
+            // RV32
+            //   c.flw -> flw fprd', imm(rs1')
+            if (riscv::IS_XLEN64) begin
+              // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 |
+              instr_o = {
+                4'b0,
+                instr_i[6:5],
+                instr_i[12:10],
+                3'b000,
+                2'b01,
+                instr_i[9:7],
+                3'b011,
+                2'b01,
+                instr_i[4:2],
+                riscv::OpcodeLoad
+              };
+            end else begin
+              if (CVA6Cfg.FpPresent) begin
+                // CFLW: | funct3 (change to LW) | imm[5:3] | rs1' | imm[2|6] | rd' | C0 |
+                instr_o = {
+                  5'b0,
+                  instr_i[5],
+                  instr_i[12:10],
+                  instr_i[6],
+                  2'b00,
+                  2'b01,
+                  instr_i[9:7],
+                  3'b010,
+                  2'b01,
+                  instr_i[4:2],
+                  riscv::OpcodeLoadFp
+                };
+              end else begin
+                illegal_instr_o = 1'b1;
+              end
+            end
+          end
+
+          riscv::OpcodeC0Zcb: begin
+            if (CVA6Cfg.RVZCB) begin
+              unique case (instr_i[12:10])
+                3'b000: begin
+                  // c.lbu -> lbu rd', uimm(rs1')
+                  instr_o = {
+                    10'b0,
+                    instr_i[5],
+                    instr_i[6],
+                    2'b01,
+                    instr_i[9:7],
+                    3'b100,
+                    2'b01,
+                    instr_i[4:2],
+                    riscv::OpcodeLoad
+                  };
+                end
+
+                3'b001: begin
+                  if (instr_i[6]) begin
+                    // c.lh -> lh rd', uimm(rs1')
+                    instr_o = {
+                      10'b0,
+                      instr_i[5],
+                      1'b0,
+                      2'b01,
+                      instr_i[9:7],
+                      3'b001,
+                      2'b01,
+                      instr_i[4:2],
+                      riscv::OpcodeLoad
+                    };
+                  end else begin
+                    // c.lhu -> lhu rd', uimm(rs1')
+                    instr_o = {
+                      10'b0,
+                      instr_i[5],
+                      1'b0,
+                      2'b01,
+                      instr_i[9:7],
+                      3'b101,
+                      2'b01,
+                      instr_i[4:2],
+                      riscv::OpcodeLoad
+                    };
+                  end
+                end
+
+                3'b010: begin
+                  // c.sb -> sb rs2', uimm(rs1')
+                  instr_o = {
+                    7'b0,
+                    2'b01,
+                    instr_i[4:2],
+                    2'b01,
+                    instr_i[9:7],
+                    3'b000,
+                    3'b0,
+                    instr_i[5],
+                    instr_i[6],
+                    riscv::OpcodeStore
+                  };
+                end
+
+                3'b011: begin
+                  // c.sh -> sh rs2', uimm(rs1')
+                  instr_o = {
+                    7'b0,
+                    2'b01,
+                    instr_i[4:2],
+                    2'b01,
+                    instr_i[9:7],
+                    3'b001,
+                    3'b0,
+                    instr_i[5],
+                    1'b0,
+                    riscv::OpcodeStore
+                  };
+                end
+
+                default: begin
+                  illegal_instr_o = 1'b1;
+                end
+              endcase
+
+            end else begin
+              instr_o = instr_i;
+              illegal_instr_o = 1'b1;
+            end
+          end
+
+          riscv::OpcodeC0Fsd: begin
+            if (CVA6Cfg.FpPresent) begin
+              // c.fsd -> fsd rs2', imm(rs1')
+              instr_o = {
+                4'b0,
+                instr_i[6:5],
+                instr_i[12],
+                2'b01,
+                instr_i[4:2],
+                2'b01,
+                instr_i[9:7],
+                3'b011,
+                instr_i[11:10],
+                3'b000,
+                riscv::OpcodeStoreFp
+              };
+            end else begin
+              illegal_instr_o = 1'b1;
+            end
+          end
+
+          riscv::OpcodeC0Sw: begin
+            // c.sw -> sw rs2', imm(rs1')
+            instr_o = {
+              5'b0,
+              instr_i[5],
+              instr_i[12],
+              2'b01,
+              instr_i[4:2],
+              2'b01,
+              instr_i[9:7],
+              3'b010,
+              instr_i[11:10],
+              instr_i[6],
+              2'b00,
+              riscv::OpcodeStore
+            };
+          end
+
+          riscv::OpcodeC0Sd: begin
+            // RV64
+            //   c.sd -> sd rs2', imm(rs1')
+            // RV32
+            //   c.fsw -> fsw fprs2', imm(rs1')
+            if (riscv::IS_XLEN64) begin
+              instr_o = {
+                4'b0,
+                instr_i[6:5],
+                instr_i[12],
+                2'b01,
+                instr_i[4:2],
+                2'b01,
+                instr_i[9:7],
+                3'b011,
+                instr_i[11:10],
+                3'b000,
+                riscv::OpcodeStore
+              };
+            end else begin
+              if (CVA6Cfg.FpPresent) begin
+                instr_o = {
+                  5'b0,
+                  instr_i[5],
+                  instr_i[12],
+                  2'b01,
+                  instr_i[4:2],
+                  2'b01,
+                  instr_i[9:7],
+                  3'b010,
+                  instr_i[11:10],
+                  instr_i[6],
+                  2'b00,
+                  riscv::OpcodeStoreFp
+                };
+              end else begin
+                illegal_instr_o = 1'b1;
+              end
+            end
+          end
+
+          default: begin
+            illegal_instr_o = 1'b1;
+          end
+        endcase
+      end
+
+      // C1
+      riscv::OpcodeC1: begin
+        unique case (instr_i[15:13])
+          riscv::OpcodeC1Addi: begin
+            // c.addi -> addi rd, rd, nzimm
+            // c.nop -> addi 0, 0, 0
+            instr_o = {
+              {6{instr_i[12]}},
+              instr_i[12],
+              instr_i[6:2],
+              instr_i[11:7],
+              3'b0,
+              instr_i[11:7],
+              riscv::OpcodeOpImm
+            };
+          end
+
+
+          riscv::OpcodeC1Addiw: begin  // or riscv::OpcodeC1Jal for RV32IC
+            if (riscv::IS_XLEN64) begin
+              // c.addiw -> addiw rd, rd, nzimm for RV64IC
+              if (instr_i[11:7] != 5'h0) begin  // only valid if the destination is not r0
+                instr_o = {
+                  {6{instr_i[12]}},
+                  instr_i[12],
+                  instr_i[6:2],
+                  instr_i[11:7],
+                  3'b0,
+                  instr_i[11:7],
+                  riscv::OpcodeOpImm32
+                };
+              end else begin
+                illegal_instr_o = 1'b1;
+              end
+            end else begin
+              // c.jal -> jal x1, imm for RV32IC only
+              instr_o = {
+                instr_i[12],
+                instr_i[8],
+                instr_i[10:9],
+                instr_i[6],
+                instr_i[7],
+                instr_i[2],
+                instr_i[11],
+                instr_i[5:3],
+                {9{instr_i[12]}},
+                5'b1,
+                riscv::OpcodeJal
+              };
+
+
+
+            end
+          end
+
+          riscv::OpcodeC1Li: begin
+            // c.li -> addi rd, x0, nzimm
+            instr_o = {
+              {6{instr_i[12]}},
+              instr_i[12],
+              instr_i[6:2],
+              5'b0,
+              3'b0,
+              instr_i[11:7],
+              riscv::OpcodeOpImm
+            };
+          end
+
+          riscv::OpcodeC1LuiAddi16sp: begin
+            // c.lui -> lui rd, imm
+            instr_o = {{15{instr_i[12]}}, instr_i[6:2], instr_i[11:7], riscv::OpcodeLui};
+
+            if (instr_i[11:7] == 5'h02) begin
+              // c.addi16sp -> addi x2, x2, nzimm
+              instr_o = {
+                {3{instr_i[12]}},
+                instr_i[4:3],
+                instr_i[5],
+                instr_i[2],
+                instr_i[6],
+                4'b0,
+                5'h02,
+                3'b000,
+                5'h02,
+                riscv::OpcodeOpImm
+              };
+            end
+
+            if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
+          end
+
+          riscv::OpcodeC1MiscAlu: begin
+            unique case (instr_i[11:10])
+              2'b00, 2'b01: begin
+                // 00: c.srli -> srli rd, rd, shamt
+                // 01: c.srai -> srai rd, rd, shamt
+                instr_o = {
+                  1'b0,
+                  instr_i[10],
+                  4'b0,
+                  instr_i[12],
+                  instr_i[6:2],
+                  2'b01,
+                  instr_i[9:7],
+                  3'b101,
+                  2'b01,
+                  instr_i[9:7],
+                  riscv::OpcodeOpImm
+                };
+              end
+
+              2'b10: begin
+                // c.andi -> andi rd, rd, imm
+                instr_o = {
+                  {6{instr_i[12]}},
+                  instr_i[12],
+                  instr_i[6:2],
+                  2'b01,
+                  instr_i[9:7],
+                  3'b111,
+                  2'b01,
+                  instr_i[9:7],
+                  riscv::OpcodeOpImm
+                };
+              end
+
+              2'b11: begin
+                unique case ({
+                  instr_i[12], instr_i[6:5]
+                })
+                  3'b000: begin
+                    // c.sub -> sub rd', rd', rs2'
+                    instr_o = {
+                      2'b01,
+                      5'b0,
+                      2'b01,
+                      instr_i[4:2],
+                      2'b01,
+                      instr_i[9:7],
+                      3'b000,
+                      2'b01,
+                      instr_i[9:7],
+                      riscv::OpcodeOp
+                    };
+                  end
+
+                  3'b001: begin
+                    // c.xor -> xor rd', rd', rs2'
+                    instr_o = {
+                      7'b0,
+                      2'b01,
+                      instr_i[4:2],
+                      2'b01,
+                      instr_i[9:7],
+                      3'b100,
+                      2'b01,
+                      instr_i[9:7],
+                      riscv::OpcodeOp
+                    };
+                  end
+
+                  3'b010: begin
+                    // c.or  -> or  rd', rd', rs2'
+                    instr_o = {
+                      7'b0,
+                      2'b01,
+                      instr_i[4:2],
+                      2'b01,
+                      instr_i[9:7],
+                      3'b110,
+                      2'b01,
+                      instr_i[9:7],
+                      riscv::OpcodeOp
+                    };
+                  end
+
+                  3'b011: begin
+                    // c.and -> and rd', rd', rs2'
+                    instr_o = {
+                      7'b0,
+                      2'b01,
+                      instr_i[4:2],
+                      2'b01,
+                      instr_i[9:7],
+                      3'b111,
+                      2'b01,
+                      instr_i[9:7],
+                      riscv::OpcodeOp
+                    };
+                  end
+
+                  3'b100: begin
+                    if (riscv::IS_XLEN64) begin
+                      // c.subw -> subw rd', rd', rs2'
+                      instr_o = {
+                        2'b01,
+                        5'b0,
+                        2'b01,
+                        instr_i[4:2],
+                        2'b01,
+                        instr_i[9:7],
+                        3'b000,
+                        2'b01,
+                        instr_i[9:7],
+                        riscv::OpcodeOp32
+                      };
+                    end else begin
+                      illegal_instr_o = 1'b1;
+                    end
+                  end
+
+                  3'b101: begin
+                    if (riscv::IS_XLEN64) begin
+                      // c.addw -> addw rd', rd', rs2'
+                      instr_o = {
+                        2'b00,
+                        5'b0,
+                        2'b01,
+                        instr_i[4:2],
+                        2'b01,
+                        instr_i[9:7],
+                        3'b000,
+                        2'b01,
+                        instr_i[9:7],
+                        riscv::OpcodeOp32
+                      };
+                    end else begin
+                      illegal_instr_o = 1'b1;
+                    end
+                  end
+
+                  3'b110: begin
+                    if (CVA6Cfg.RVZCB) begin
+                      // c.mul -> mul rd', rd', rs2'
+                      instr_o = {
+                        6'b0,
+                        1'b1,
+                        2'b01,
+                        instr_i[4:2],
+                        2'b01,
+                        instr_i[9:7],
+                        3'b000,
+                        2'b01,
+                        instr_i[9:7],
+                        riscv::OpcodeOp
+                      };
+                    end else begin
+                      instr_o = instr_i;
+                      illegal_instr_o = 1'b1;
+                    end
+                  end
+
+                  3'b111: begin
+                    if (CVA6Cfg.RVZCB) begin
+
+                      unique case (instr_i[4:2])
+                        3'b000: begin
+                          // c.zext.b -> andi rd', rd', 0xff
+                          instr_o = {
+                            4'b0,
+                            8'hFF,
+                            2'b01,
+                            instr_i[9:7],
+                            3'b111,
+                            2'b01,
+                            instr_i[9:7],
+                            riscv::OpcodeOpImm
+                          };
+                        end
+
+                        3'b001: begin
+                          if (CVA6Cfg.RVB) begin
+                            // c.sext.b -> sext.b rd', rd'
+                            instr_o = {
+                              7'h30,
+                              5'h4,
+                              2'b01,
+                              instr_i[9:7],
+                              3'b001,
+                              2'b01,
+                              instr_i[9:7],
+                              riscv::OpcodeOpImm
+                            };
+                          end else illegal_instr_o = 1'b1;
+                        end
+
+                        3'b010: begin
+                          if (CVA6Cfg.RVB) begin
+                            // c.zext.h -> zext.h rd', rd'
+                            if (riscv::IS_XLEN64) begin
+                              instr_o = {
+                                7'h4,
+                                5'h0,
+                                2'b01,
+                                instr_i[9:7],
+                                3'b100,
+                                2'b01,
+                                instr_i[9:7],
+                                riscv::OpcodeOp32
+                              };
+                            end else begin
+                              instr_o = {
+                                7'h4,
+                                5'h0,
+                                2'b01,
+                                instr_i[9:7],
+                                3'b100,
+                                2'b01,
+                                instr_i[9:7],
+                                riscv::OpcodeOp
+                              };
+                            end
+                          end else illegal_instr_o = 1'b1;
+                        end
+
+                        3'b011: begin
+                          if (CVA6Cfg.RVB) begin
+                            // c.sext.h -> sext.h rd', rd'
+                            instr_o = {
+                              7'h30,
+                              5'h5,
+                              2'b01,
+                              instr_i[9:7],
+                              3'b001,
+                              2'b01,
+                              instr_i[9:7],
+                              riscv::OpcodeOpImm
+                            };
+                          end else illegal_instr_o = 1'b1;
+                        end
+
+                        3'b100: begin
+                          if (CVA6Cfg.RVB) begin
+                            // c.zext.w -> add.uw
+                            if (riscv::IS_XLEN64) begin
+                              instr_o = {
+                                7'h4,
+                                5'h0,
+                                2'b01,
+                                instr_i[9:7],
+                                3'b000,
+                                2'b01,
+                                instr_i[9:7],
+                                riscv::OpcodeOp32
+                              };
+                            end else begin
+                              illegal_instr_o = 1'b1;
+                            end
+                          end else illegal_instr_o = 1'b1;
+                        end
+
+                        3'b101: begin
+                          // c.not -> xori rd', rd', -1
+                          instr_o = {
+                            12'hFFF,
+                            2'b01,
+                            instr_i[9:7],
+                            3'b100,
+                            2'b01,
+                            instr_i[9:7],
+                            riscv::OpcodeOpImm
+                          };
+                        end
+
+                        default: begin
+                          instr_o = instr_i;
+                          illegal_instr_o = 1;
+                        end
+                      endcase
+                    end
+                  end
+                endcase
+              end
+            endcase
+          end
+
+          riscv::OpcodeC1J: begin
+            // 101: c.j   -> jal x0, imm
+            instr_o = {
+              instr_i[12],
+              instr_i[8],
+              instr_i[10:9],
+              instr_i[6],
+              instr_i[7],
+              instr_i[2],
+              instr_i[11],
+              instr_i[5:3],
+              {9{instr_i[12]}},
+              4'b0,
+              ~instr_i[15],
+              riscv::OpcodeJal
+            };
+          end
+
+          riscv::OpcodeC1Beqz, riscv::OpcodeC1Bnez: begin
+            // 0: c.beqz -> beq rs1', x0, imm
+            // 1: c.bnez -> bne rs1', x0, imm
+            instr_o = {
+              {4{instr_i[12]}},
+              instr_i[6:5],
+              instr_i[2],
+              5'b0,
+              2'b01,
+              instr_i[9:7],
+              2'b00,
+              instr_i[13],
+              instr_i[11:10],
+              instr_i[4:3],
+              instr_i[12],
+              riscv::OpcodeBranch
+            };
+          end
+        endcase
+      end
+
+      // C2
+      riscv::OpcodeC2: begin
+        unique case (instr_i[15:13])
+          riscv::OpcodeC2Slli: begin
+            // c.slli -> slli rd, rd, shamt
+            instr_o = {
+              6'b0,
+              instr_i[12],
+              instr_i[6:2],
+              instr_i[11:7],
+              3'b001,
+              instr_i[11:7],
+              riscv::OpcodeOpImm
+            };
+          end
+
+          riscv::OpcodeC2Fldsp: begin
+            if (CVA6Cfg.FpPresent) begin
+              // c.fldsp -> fld rd, imm(x2)
+              instr_o = {
+                3'b0,
+                instr_i[4:2],
+                instr_i[12],
+                instr_i[6:5],
+                3'b000,
+                5'h02,
+                3'b011,
+                instr_i[11:7],
+                riscv::OpcodeLoadFp
+              };
+            end else begin
+              illegal_instr_o = 1'b1;
+            end
+          end
+
+          riscv::OpcodeC2Lwsp: begin
+            // c.lwsp -> lw rd, imm(x2)
+            instr_o = {
+              4'b0,
+              instr_i[3:2],
+              instr_i[12],
+              instr_i[6:4],
+              2'b00,
+              5'h02,
+              3'b010,
+              instr_i[11:7],
+              riscv::OpcodeLoad
+            };
+            if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
+          end
+
+          riscv::OpcodeC2Ldsp: begin
+            // RV64
+            //   c.ldsp -> ld rd, imm(x2)
+            // RV32
+            //   c.flwsp -> flw fprd, imm(x2)
+            if (riscv::IS_XLEN64) begin
+              instr_o = {
+                3'b0,
+                instr_i[4:2],
+                instr_i[12],
+                instr_i[6:5],
+                3'b000,
+                5'h02,
+                3'b011,
+                instr_i[11:7],
+                riscv::OpcodeLoad
+              };
+              if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
+            end else begin
+              if (CVA6Cfg.FpPresent) begin
+                instr_o = {
+                  4'b0,
+                  instr_i[3:2],
+                  instr_i[12],
+                  instr_i[6:4],
+                  2'b00,
+                  5'h02,
+                  3'b010,
+                  instr_i[11:7],
+                  riscv::OpcodeLoadFp
+                };
+              end else begin
+                illegal_instr_o = 1'b1;
+              end
+            end
+          end
+
+          riscv::OpcodeC2JalrMvAdd: begin
+            if (instr_i[12] == 1'b0) begin
+              // c.mv -> add rd/rs1, x0, rs2
+              instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOp};
+
+              if (instr_i[6:2] == 5'b0) begin
+                // c.jr -> jalr x0, rd/rs1, 0
+                instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, riscv::OpcodeJalr};
+                // rs1 != 0
+                illegal_instr_o = (instr_i[11:7] != '0) ? 1'b0 : 1'b1;
+              end
+            end else begin
+              // c.add -> add rd, rd, rs2
+              instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOp};
+
+              if (instr_i[6:2] == 5'b0) begin
+                if (instr_i[11:7] == 5'b0) begin
+                  // c.ebreak -> ebreak
+                  instr_o = {32'h00_10_00_73};
+                end else begin
+                  // c.jalr -> jalr x1, rs1, 0
+                  instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, riscv::OpcodeJalr};
+                end
+              end
+            end
+          end
+
+          riscv::OpcodeC2Fsdsp: begin
+            if (CVA6Cfg.FpPresent) begin
+              // c.fsdsp -> fsd rs2, imm(x2)
+              instr_o = {
+                3'b0,
+                instr_i[9:7],
+                instr_i[12],
+                instr_i[6:2],
+                5'h02,
+                3'b011,
+                instr_i[11:10],
+                3'b000,
+                riscv::OpcodeStoreFp
+              };
+            end else begin
+              illegal_instr_o = 1'b1;
+            end
+          end
+
+          riscv::OpcodeC2Swsp: begin
+            // c.swsp -> sw rs2, imm(x2)
+            instr_o = {
+              4'b0,
+              instr_i[8:7],
+              instr_i[12],
+              instr_i[6:2],
+              5'h02,
+              3'b010,
+              instr_i[11:9],
+              2'b00,
+              riscv::OpcodeStore
+            };
+          end
+
+          riscv::OpcodeC2Sdsp: begin
+            // RV64
+            //   c.sdsp -> sd rs2, imm(x2)
+            // RV32
+            //   c.fswsp -> fsw fprs2, imm(x2)
+            if (riscv::IS_XLEN64) begin
+              instr_o = {
+                3'b0,
+                instr_i[9:7],
+                instr_i[12],
+                instr_i[6:2],
+                5'h02,
+                3'b011,
+                instr_i[11:10],
+                3'b000,
+                riscv::OpcodeStore
+              };
+            end else begin
+              if (CVA6Cfg.FpPresent) begin
+                instr_o = {
+                  4'b0,
+                  instr_i[8:7],
+                  instr_i[12],
+                  instr_i[6:2],
+                  5'h02,
+                  3'b010,
+                  instr_i[11:9],
+                  2'b00,
+                  riscv::OpcodeStoreFp
+                };
+              end else begin
+                illegal_instr_o = 1'b1;
+              end
+            end
+          end
+
+          default: begin
+            illegal_instr_o = 1'b1;
+          end
+        endcase
+      end
+
+      // normal instruction
+      default: is_compressed_o = 1'b0;
+    endcase
+
+    // Check if the instruction was illegal, if it was then output the offending instruction (zero-extended)
+    if (illegal_instr_o) begin
+      instr_o = instr_i;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/controller.sv b/test/type_param/core/controller.sv
new file mode 100644
index 0000000..c2db321
--- /dev/null
+++ b/test/type_param/core/controller.sv
@@ -0,0 +1,194 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 08.05.2017
+// Description: Flush controller
+
+
+module controller
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    output logic set_pc_commit_o,         // Set PC om PC Gen
+    output logic flush_if_o,              // Flush the IF stage
+    output logic flush_unissued_instr_o,  // Flush un-issued instructions of the scoreboard
+    output logic flush_id_o,              // Flush ID stage
+    output logic flush_ex_o,              // Flush EX stage
+    output logic flush_bp_o,              // Flush branch predictors
+    output logic flush_icache_o,          // Flush ICache
+    output logic flush_dcache_o,          // Flush DCache
+    input  logic flush_dcache_ack_i,      // Acknowledge the whole DCache Flush
+    output logic flush_tlb_o,             // Flush TLBs
+
+    input logic halt_csr_i,  // Halt request from CSR (WFI instruction)
+    input logic halt_acc_i,  // Halt request from accelerator dispatcher
+    output logic halt_o,  // Halt signal to commit stage
+    input logic eret_i,  // Return from exception
+    input logic ex_valid_i,  // We got an exception, flush the pipeline
+    input logic set_debug_pc_i,  // set the debug pc from CSR
+    input  bp_resolve_t     resolved_branch_i,      // We got a resolved branch, check if we need to flush the front-end
+    input logic flush_csr_i,  // We got an instruction which altered the CSR, flush the pipeline
+    input logic fence_i_i,  // fence.i in
+    input logic fence_i,  // fence in
+    input logic sfence_vma_i,  // We got an instruction to flush the TLBs and pipeline
+    input logic flush_commit_i,  // Flush request from commit stage
+    input logic flush_acc_i  // Flush request from accelerator
+);
+
+  // active fence - high if we are currently flushing the dcache
+  logic fence_active_d, fence_active_q;
+  logic flush_dcache;
+
+  // ------------
+  // Flush CTRL
+  // ------------
+  always_comb begin : flush_ctrl
+    fence_active_d         = fence_active_q;
+    set_pc_commit_o        = 1'b0;
+    flush_if_o             = 1'b0;
+    flush_unissued_instr_o = 1'b0;
+    flush_id_o             = 1'b0;
+    flush_ex_o             = 1'b0;
+    flush_dcache           = 1'b0;
+    flush_icache_o         = 1'b0;
+    flush_tlb_o            = 1'b0;
+    flush_bp_o             = 1'b0;
+    // ------------
+    // Mis-predict
+    // ------------
+    // flush on mispredict
+    if (resolved_branch_i.is_mispredict) begin
+      // flush only un-issued instructions
+      flush_unissued_instr_o = 1'b1;
+      // and if stage
+      flush_if_o             = 1'b1;
+    end
+
+    // ---------------------------------
+    // FENCE
+    // ---------------------------------
+    if (fence_i) begin
+      // this can be seen as a CSR instruction with side-effect
+      set_pc_commit_o        = 1'b1;
+      flush_if_o             = 1'b1;
+      flush_unissued_instr_o = 1'b1;
+      flush_id_o             = 1'b1;
+      flush_ex_o             = 1'b1;
+      // this is not needed in the case since we
+      // have a write-through cache in this case
+      if (DCACHE_TYPE == int'(config_pkg::WB)) begin
+        flush_dcache   = 1'b1;
+        fence_active_d = 1'b1;
+      end
+    end
+
+    // ---------------------------------
+    // FENCE.I
+    // ---------------------------------
+    if (fence_i_i) begin
+      set_pc_commit_o        = 1'b1;
+      flush_if_o             = 1'b1;
+      flush_unissued_instr_o = 1'b1;
+      flush_id_o             = 1'b1;
+      flush_ex_o             = 1'b1;
+      flush_icache_o         = 1'b1;
+      // this is not needed in the case since we
+      // have a write-through cache in this case
+      if (DCACHE_TYPE == int'(config_pkg::WB)) begin
+        flush_dcache   = 1'b1;
+        fence_active_d = 1'b1;
+      end
+    end
+
+    // this is not needed in the case since we
+    // have a write-through cache in this case
+    if (DCACHE_TYPE == int'(config_pkg::WB)) begin
+      // wait for the acknowledge here
+      if (flush_dcache_ack_i && fence_active_q) begin
+        fence_active_d = 1'b0;
+        // keep the flush dcache signal high as long as we didn't get the acknowledge from the cache
+      end else if (fence_active_q) begin
+        flush_dcache = 1'b1;
+      end
+    end
+    // ---------------------------------
+    // SFENCE.VMA
+    // ---------------------------------
+    if (CVA6Cfg.RVS && sfence_vma_i) begin
+      set_pc_commit_o        = 1'b1;
+      flush_if_o             = 1'b1;
+      flush_unissued_instr_o = 1'b1;
+      flush_id_o             = 1'b1;
+      flush_ex_o             = 1'b1;
+
+      flush_tlb_o            = 1'b1;
+    end
+
+    // Set PC to commit stage and flush pipeline
+    if (flush_csr_i || flush_acc_i) begin
+      set_pc_commit_o        = 1'b1;
+      flush_if_o             = 1'b1;
+      flush_unissued_instr_o = 1'b1;
+      flush_id_o             = 1'b1;
+      flush_ex_o             = 1'b1;
+    end else if (CVA6Cfg.RVA && flush_commit_i) begin
+      set_pc_commit_o        = 1'b1;
+      flush_if_o             = 1'b1;
+      flush_unissued_instr_o = 1'b1;
+      flush_id_o             = 1'b1;
+      flush_ex_o             = 1'b1;
+    end
+
+    // ---------------------------------
+    // 1. Exception
+    // 2. Return from exception
+    // ---------------------------------
+    if (ex_valid_i || eret_i || (CVA6Cfg.DebugEn && set_debug_pc_i)) begin
+      // don't flush pcgen as we want to take the exception: Flush PCGen is not a flush signal
+      // for the PC Gen stage but instead tells it to take the PC we gave it
+      set_pc_commit_o        = 1'b0;
+      flush_if_o             = 1'b1;
+      flush_unissued_instr_o = 1'b1;
+      flush_id_o             = 1'b1;
+      flush_ex_o             = 1'b1;
+      // this potentially reduces performance, but is needed
+      // to suppress speculative fetches to virtual memory from
+      // machine mode. TODO: remove when PMA checkers have been
+      // added to the system
+      flush_bp_o             = 1'b1;
+    end
+  end
+
+  // ----------------------
+  // Halt Logic
+  // ----------------------
+  always_comb begin
+    // halt the core if the fence is active
+    halt_o = halt_csr_i || halt_acc_i || (DCACHE_TYPE == int'(config_pkg::WB) && fence_active_q);
+  end
+
+  // ----------------------
+  // Registers
+  // ----------------------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      fence_active_q <= 1'b0;
+      flush_dcache_o <= 1'b0;
+    end else begin
+      fence_active_q <= fence_active_d;
+      // register on the flush signal, this signal might be critical
+      flush_dcache_o <= flush_dcache;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/csr_buffer.sv b/test/type_param/core/csr_buffer.sv
new file mode 100644
index 0000000..57be04d
--- /dev/null
+++ b/test/type_param/core/csr_buffer.sv
@@ -0,0 +1,76 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 05.05.2017
+// Description: Buffer to hold CSR address, this acts like a functional unit
+//              to the scoreboard.
+
+
+module csr_buffer
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,   // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic flush_i,
+
+    input fu_data_t fu_data_i,
+
+    output logic                csr_ready_o,   // FU is ready e.g. not busy
+    input  logic                csr_valid_i,   // Input is valid
+    output riscv::xlen_t        csr_result_o,
+    input  logic                csr_commit_i,  // commit the pending CSR OP
+    // to CSR file
+    output logic         [11:0] csr_addr_o     // CSR address to commit stage
+);
+  // this is a single entry store buffer for the address of the CSR
+  // which we are going to need in the commit stage
+  struct packed {
+    logic [11:0] csr_address;
+    logic        valid;
+  }
+      csr_reg_n, csr_reg_q;
+
+  // control logic, scoreboard signals
+  assign csr_result_o = fu_data_i.operand_a;
+  assign csr_addr_o   = csr_reg_q.csr_address;
+
+  // write logic
+  always_comb begin : write
+    csr_reg_n   = csr_reg_q;
+    // by default we are ready
+    csr_ready_o = 1'b1;
+    // if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready
+    if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i) csr_ready_o = 1'b0;
+    // if we got a valid from the scoreboard
+    // store the CSR address
+    if (csr_valid_i) begin
+      csr_reg_n.csr_address = fu_data_i.operand_b[11:0];
+      csr_reg_n.valid       = 1'b1;
+    end
+    // if we get a commit and no new valid instruction -> clear the valid bit
+    if (csr_commit_i && ~csr_valid_i) begin
+      csr_reg_n.valid = 1'b0;
+    end
+    // clear the buffer if we flushed
+    if (flush_i) csr_reg_n.valid = 1'b0;
+  end
+  // sequential process
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      csr_reg_q <= '{default: 0};
+    end else begin
+      csr_reg_q <= csr_reg_n;
+    end
+  end
+
+endmodule
diff --git a/test/type_param/core/csr_regfile.sv b/test/type_param/core/csr_regfile.sv
new file mode 100644
index 0000000..88c1644
--- /dev/null
+++ b/test/type_param/core/csr_regfile.sv
@@ -0,0 +1,1646 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 05.05.2017
+// Description: CSR Register File as specified by RISC-V
+
+
+module csr_regfile
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg        = config_pkg::cva6_cfg_empty,
+    parameter int                    AsidWidth      = 1,
+    parameter int unsigned           MHPMCounterNum = 6
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic time_irq_i,  // Timer threw a interrupt
+    // send a flush request out if a CSR with a side effect has changed (e.g. written)
+    output logic flush_o,
+    output logic halt_csr_o,  // halt requested
+    // commit acknowledge
+    input  scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit
+    input  logic [CVA6Cfg.NrCommitPorts-1:0]              commit_ack_i,   // Commit acknowledged a instruction -> increase instret CSR
+    // Core and Cluster ID
+    input  logic[riscv::VLEN-1:0] boot_addr_i,                // Address from which to start booting, mtvec is set to the same address
+    input  logic[riscv::XLEN-1:0] hart_id_i,                  // Hart id in a multicore environment (reflected in a CSR)
+    // we are taking an exception
+    input exception_t ex_i,  // We've got an exception from the commit stage, take it
+
+    input fu_op csr_op_i,  // Operation to perform on the CSR file
+    input logic [11:0] csr_addr_i,  // Address of the register to read/write
+    input logic [riscv::XLEN-1:0] csr_wdata_i,  // Write data in
+    output logic [riscv::XLEN-1:0] csr_rdata_o,  // Read data out
+    input logic dirty_fp_state_i,  // Mark the FP sate as dirty
+    input  logic                  csr_write_fflags_i,         // Write fflags register e.g.: we are retiring a floating point instruction
+    input logic dirty_v_state_i,  // Mark the V state as dirty
+    input logic [riscv::VLEN-1:0] pc_i,  // PC of instruction accessing the CSR
+    output exception_t csr_exception_o,  // attempts to access a CSR without appropriate privilege
+                                         // level or to write  a read-only register also
+                                         // raises illegal instruction exceptions.
+    // Interrupts/Exceptions
+    output logic  [riscv::VLEN-1:0] epc_o,                    // Output the exception PC to PC Gen, the correct CSR (mepc, sepc) is set accordingly
+    output logic eret_o,  // Return from exception, set the PC of epc_o
+    output logic  [riscv::VLEN-1:0] trap_vector_base_o,       // Output base of exception vector, correct CSR is output (mtvec, stvec)
+    output riscv::priv_lvl_t priv_lvl_o,  // Current privilege level the CPU is in
+    // FP Imprecise exceptions
+    input  logic            [4:0] acc_fflags_ex_i,            // Imprecise FP exception from the accelerator (fcsr.fflags format)
+    input logic acc_fflags_ex_valid_i,  // An FP exception from the accelerator occurred
+    // FPU
+    output riscv::xs_t fs_o,  // Floating point extension status
+    output logic [4:0] fflags_o,  // Floating-Point Accured Exceptions
+    output logic [2:0] frm_o,  // Floating-Point Dynamic Rounding Mode
+    output logic [6:0] fprec_o,  // Floating-Point Precision Control
+    // Vector extension
+    output riscv::xs_t vs_o,  // Vector extension status
+    // Decoder
+    output irq_ctrl_t irq_ctrl_o,  // interrupt management to id stage
+    // MMU
+    output logic en_translation_o,  // enable VA translation
+    output logic en_ld_st_translation_o,  // enable VA translation for load and stores
+    output riscv::priv_lvl_t      ld_st_priv_lvl_o,           // Privilege level at which load and stores should happen
+    output logic sum_o,
+    output logic mxr_o,
+    output logic [riscv::PPNW-1:0] satp_ppn_o,
+    output logic [AsidWidth-1:0] asid_o,
+    // external interrupts
+    input logic [1:0] irq_i,  // external interrupt in
+    input logic ipi_i,  // inter processor interrupt -> connected to machine mode sw
+    input logic debug_req_i,  // debug request in
+    output logic set_debug_pc_o,
+    // Virtualization Support
+    output logic tvm_o,  // trap virtual memory
+    output logic tw_o,  // timeout wait
+    output logic tsr_o,  // trap sret
+    output logic debug_mode_o,  // we are in debug mode -> that will change some decoding
+    output logic single_step_o,  // we are in single-step mode
+    // Caches
+    output logic icache_en_o,  // L1 ICache Enable
+    output logic dcache_en_o,  // L1 DCache Enable
+    // Accelerator
+    output logic acc_cons_en_o,  // Accelerator memory consistent mode
+    // Performance Counter
+    output logic [11:0] perf_addr_o,  // read/write address to performance counter module
+    output logic [riscv::XLEN-1:0] perf_data_o,  // write data to performance counter module
+    input logic [riscv::XLEN-1:0] perf_data_i,  // read data from performance counter module
+    output logic perf_we_o,
+    // PMPs
+    output riscv::pmpcfg_t [15:0] pmpcfg_o,  // PMP configuration containing pmpcfg for max 16 PMPs
+    output logic [15:0][riscv::PLEN-3:0] pmpaddr_o,  // PMP addresses
+    output logic [31:0] mcountinhibit_o
+);
+  // internal signal to keep track of access exceptions
+  logic read_access_exception, update_access_exception, privilege_violation;
+  logic csr_we, csr_read;
+  riscv::xlen_t csr_wdata, csr_rdata;
+  riscv::priv_lvl_t trap_to_priv_lvl;
+  // register for enabling load store address translation, this is critical, hence the register
+  logic en_ld_st_translation_d, en_ld_st_translation_q;
+  logic mprv;
+  logic mret;  // return from M-mode exception
+  logic sret;  // return from S-mode exception
+  logic dret;  // return from debug mode
+  // CSR write causes us to mark the FPU state as dirty
+  logic dirty_fp_state_csr;
+  riscv::mstatus_rv_t mstatus_q, mstatus_d;
+  riscv::xlen_t mstatus_extended;
+  riscv::satp_t satp_q, satp_d;
+  riscv::dcsr_t dcsr_q, dcsr_d;
+  riscv::csr_t csr_addr;
+  // privilege level register
+  riscv::priv_lvl_t priv_lvl_d, priv_lvl_q;
+  // we are in debug
+  logic debug_mode_q, debug_mode_d;
+  logic mtvec_rst_load_q;  // used to determine whether we came out of reset
+
+  riscv::xlen_t dpc_q, dpc_d;
+  riscv::xlen_t dscratch0_q, dscratch0_d;
+  riscv::xlen_t dscratch1_q, dscratch1_d;
+  riscv::xlen_t mtvec_q, mtvec_d;
+  riscv::xlen_t medeleg_q, medeleg_d;
+  riscv::xlen_t mideleg_q, mideleg_d;
+  riscv::xlen_t mip_q, mip_d;
+  riscv::xlen_t mie_q, mie_d;
+  riscv::xlen_t mcounteren_q, mcounteren_d;
+  riscv::xlen_t mscratch_q, mscratch_d;
+  riscv::xlen_t mepc_q, mepc_d;
+  riscv::xlen_t mcause_q, mcause_d;
+  riscv::xlen_t mtval_q, mtval_d;
+  logic fiom_d, fiom_q;
+
+  riscv::xlen_t stvec_q, stvec_d;
+  riscv::xlen_t scounteren_q, scounteren_d;
+  riscv::xlen_t sscratch_q, sscratch_d;
+  riscv::xlen_t sepc_q, sepc_d;
+  riscv::xlen_t scause_q, scause_d;
+  riscv::xlen_t stval_q, stval_d;
+  riscv::xlen_t dcache_q, dcache_d;
+  riscv::xlen_t icache_q, icache_d;
+  riscv::xlen_t acc_cons_q, acc_cons_d;
+
+  logic wfi_d, wfi_q;
+
+  logic [63:0] cycle_q, cycle_d;
+  logic [63:0] instret_q, instret_d;
+
+  riscv::pmpcfg_t [15:0] pmpcfg_q, pmpcfg_d;
+  logic [15:0][riscv::PLEN-3:0] pmpaddr_q, pmpaddr_d;
+  logic [MHPMCounterNum+3-1:0] mcountinhibit_d, mcountinhibit_q;
+  logic [3:0] index;
+
+  localparam riscv::xlen_t IsaCode = (riscv::XLEN'(CVA6Cfg.RVA) <<  0)                // A - Atomic Instructions extension
+  | (riscv::XLEN'(CVA6Cfg.RVB) << 1)  // C - Bitmanip extension
+  | (riscv::XLEN'(CVA6Cfg.RVC) << 2)  // C - Compressed extension
+  | (riscv::XLEN'(CVA6Cfg.RVD) << 3)  // D - Double precision floating-point extension
+  | (riscv::XLEN'(CVA6Cfg.RVF) << 5)  // F - Single precision floating-point extension
+  | (riscv::XLEN'(1) << 8)  // I - RV32I/64I/128I base ISA
+  | (riscv::XLEN'(1) << 12)  // M - Integer Multiply/Divide extension
+  | (riscv::XLEN'(0) << 13)  // N - User level interrupts supported
+  | (riscv::XLEN'(CVA6Cfg.RVS) << 18)  // S - Supervisor mode implemented
+  | (riscv::XLEN'(CVA6Cfg.RVU) << 20)  // U - User mode implemented
+  | (riscv::XLEN'(CVA6Cfg.RVV) << 21)  // V - Vector extension
+  | (riscv::XLEN'(CVA6Cfg.NSX) << 23)  // X - Non-standard extensions present
+  | ((riscv::XLEN == 64 ? 2 : 1) << riscv::XLEN - 2);  // MXL
+
+  assign pmpcfg_o  = pmpcfg_q[15:0];
+  assign pmpaddr_o = pmpaddr_q;
+
+  riscv::fcsr_t fcsr_q, fcsr_d;
+  // ----------------
+  // Assignments
+  // ----------------
+  assign csr_addr = riscv::csr_t'(csr_addr_i);
+  assign fs_o = mstatus_q.fs;
+  assign vs_o = mstatus_q.vs;
+  // ----------------
+  // CSR Read logic
+  // ----------------
+  assign mstatus_extended = riscv::IS_XLEN64 ? mstatus_q[riscv::XLEN-1:0] :
+                              {mstatus_q.sd, mstatus_q.wpri3[7:0], mstatus_q[22:0]};
+
+
+  always_comb begin : csr_read_process
+    // a read access exception can only occur if we attempt to read a CSR which does not exist
+    read_access_exception = 1'b0;
+    csr_rdata = '0;
+    perf_addr_o = csr_addr.address[11:0];
+    index = '0;
+
+    if (csr_read) begin
+      unique case (csr_addr.address)
+        riscv::CSR_FFLAGS: begin
+          if (CVA6Cfg.FpPresent) begin
+            csr_rdata = {{riscv::XLEN - 5{1'b0}}, fcsr_q.fflags};
+          end else begin
+            read_access_exception = 1'b1;
+          end
+        end
+        riscv::CSR_FRM: begin
+          if (CVA6Cfg.FpPresent) begin
+            csr_rdata = {{riscv::XLEN - 3{1'b0}}, fcsr_q.frm};
+          end else begin
+            read_access_exception = 1'b1;
+          end
+        end
+        riscv::CSR_FCSR: begin
+          if (CVA6Cfg.FpPresent) begin
+            csr_rdata = {{riscv::XLEN - 8{1'b0}}, fcsr_q.frm, fcsr_q.fflags};
+          end else begin
+            read_access_exception = 1'b1;
+          end
+        end
+        // non-standard extension
+        riscv::CSR_FTRAN: begin
+          if (CVA6Cfg.FpPresent) begin
+            csr_rdata = {{riscv::XLEN - 7{1'b0}}, fcsr_q.fprec};
+          end else begin
+            read_access_exception = 1'b1;
+          end
+        end
+        // debug registers
+        riscv::CSR_DCSR:
+        if (CVA6Cfg.DebugEn) csr_rdata = {{riscv::XLEN - 32{1'b0}}, dcsr_q};
+        else read_access_exception = 1'b1;
+        riscv::CSR_DPC:
+        if (CVA6Cfg.DebugEn) csr_rdata = dpc_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_DSCRATCH0:
+        if (CVA6Cfg.DebugEn) csr_rdata = dscratch0_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_DSCRATCH1:
+        if (CVA6Cfg.DebugEn) csr_rdata = dscratch1_q;
+        else read_access_exception = 1'b1;
+        // trigger module registers
+        riscv::CSR_TSELECT: read_access_exception = 1'b1;  // not implemented
+        riscv::CSR_TDATA1: read_access_exception = 1'b1;  // not implemented
+        riscv::CSR_TDATA2: read_access_exception = 1'b1;  // not implemented
+        riscv::CSR_TDATA3: read_access_exception = 1'b1;  // not implemented
+        // supervisor registers
+        riscv::CSR_SSTATUS: begin
+          if (CVA6Cfg.RVS)
+            csr_rdata = mstatus_extended & ariane_pkg::SMODE_STATUS_READ_MASK[riscv::XLEN-1:0];
+          else read_access_exception = 1'b1;
+        end
+        riscv::CSR_SIE:
+        if (CVA6Cfg.RVS) csr_rdata = mie_q & mideleg_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_SIP:
+        if (CVA6Cfg.RVS) csr_rdata = mip_q & mideleg_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_STVEC:
+        if (CVA6Cfg.RVS) csr_rdata = stvec_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_SCOUNTEREN:
+        if (CVA6Cfg.RVS) csr_rdata = scounteren_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_SSCRATCH:
+        if (CVA6Cfg.RVS) csr_rdata = sscratch_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_SEPC:
+        if (CVA6Cfg.RVS) csr_rdata = sepc_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_SCAUSE:
+        if (CVA6Cfg.RVS) csr_rdata = scause_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_STVAL:
+        if (CVA6Cfg.RVS) csr_rdata = stval_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_SATP: begin
+          if (CVA6Cfg.RVS) begin
+            // intercept reads to SATP if in S-Mode and TVM is enabled
+            if (priv_lvl_o == riscv::PRIV_LVL_S && mstatus_q.tvm) begin
+              read_access_exception = 1'b1;
+            end else begin
+              csr_rdata = satp_q;
+            end
+          end else begin
+            read_access_exception = 1'b1;
+          end
+        end
+        // machine mode registers
+        riscv::CSR_MSTATUS: csr_rdata = mstatus_extended;
+        riscv::CSR_MSTATUSH:
+        if (riscv::XLEN == 32) csr_rdata = '0;
+        else read_access_exception = 1'b1;
+        riscv::CSR_MISA: csr_rdata = IsaCode;
+        riscv::CSR_MEDELEG:
+        if (CVA6Cfg.RVS) csr_rdata = medeleg_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_MIDELEG:
+        if (CVA6Cfg.RVS) csr_rdata = mideleg_q;
+        else read_access_exception = 1'b1;
+        riscv::CSR_MIE: csr_rdata = mie_q;
+        riscv::CSR_MTVEC: csr_rdata = mtvec_q;
+        riscv::CSR_MCOUNTEREN: csr_rdata = mcounteren_q;
+        riscv::CSR_MSCRATCH: csr_rdata = mscratch_q;
+        riscv::CSR_MEPC: csr_rdata = mepc_q;
+        riscv::CSR_MCAUSE: csr_rdata = mcause_q;
+        riscv::CSR_MTVAL: csr_rdata = mtval_q;
+        riscv::CSR_MIP: csr_rdata = mip_q;
+        riscv::CSR_MENVCFG: csr_rdata = '0 | fiom_q;
+        riscv::CSR_MENVCFGH: begin
+          if (riscv::XLEN == 32) csr_rdata = '0;
+          else read_access_exception = 1'b1;
+        end
+        riscv::CSR_MVENDORID: csr_rdata = OPENHWGROUP_MVENDORID;
+        riscv::CSR_MARCHID: csr_rdata = ARIANE_MARCHID;
+        riscv::CSR_MIMPID: csr_rdata = '0;  // not implemented
+        riscv::CSR_MHARTID: csr_rdata = hart_id_i;
+        riscv::CSR_MCONFIGPTR: csr_rdata = '0;  // not implemented
+        riscv::CSR_MCOUNTINHIBIT:
+        csr_rdata = {{(riscv::XLEN - (MHPMCounterNum + 3)) {1'b0}}, mcountinhibit_q};
+        // Counters and Timers
+        riscv::CSR_MCYCLE: csr_rdata = cycle_q[riscv::XLEN-1:0];
+        riscv::CSR_MCYCLEH:
+        if (riscv::XLEN == 32) csr_rdata = cycle_q[63:32];
+        else read_access_exception = 1'b1;
+        riscv::CSR_MINSTRET: csr_rdata = instret_q[riscv::XLEN-1:0];
+        riscv::CSR_MINSTRETH:
+        if (riscv::XLEN == 32) csr_rdata = instret_q[63:32];
+        else read_access_exception = 1'b1;
+        riscv::CSR_CYCLE: csr_rdata = cycle_q[riscv::XLEN-1:0];
+        riscv::CSR_CYCLEH:
+        if (riscv::XLEN == 32) csr_rdata = cycle_q[63:32];
+        else read_access_exception = 1'b1;
+        riscv::CSR_INSTRET: csr_rdata = instret_q[riscv::XLEN-1:0];
+        riscv::CSR_INSTRETH:
+        if (riscv::XLEN == 32) csr_rdata = instret_q[63:32];
+        else read_access_exception = 1'b1;
+        //Event Selector
+        riscv::CSR_MHPM_EVENT_3,
+                riscv::CSR_MHPM_EVENT_4,
+                riscv::CSR_MHPM_EVENT_5,
+                riscv::CSR_MHPM_EVENT_6,
+                riscv::CSR_MHPM_EVENT_7,
+                riscv::CSR_MHPM_EVENT_8,
+                riscv::CSR_MHPM_EVENT_9,
+                riscv::CSR_MHPM_EVENT_10,
+                riscv::CSR_MHPM_EVENT_11,
+                riscv::CSR_MHPM_EVENT_12,
+                riscv::CSR_MHPM_EVENT_13,
+                riscv::CSR_MHPM_EVENT_14,
+                riscv::CSR_MHPM_EVENT_15,
+                riscv::CSR_MHPM_EVENT_16,
+                riscv::CSR_MHPM_EVENT_17,
+                riscv::CSR_MHPM_EVENT_18,
+                riscv::CSR_MHPM_EVENT_19,
+                riscv::CSR_MHPM_EVENT_20,
+                riscv::CSR_MHPM_EVENT_21,
+                riscv::CSR_MHPM_EVENT_22,
+                riscv::CSR_MHPM_EVENT_23,
+                riscv::CSR_MHPM_EVENT_24,
+                riscv::CSR_MHPM_EVENT_25,
+                riscv::CSR_MHPM_EVENT_26,
+                riscv::CSR_MHPM_EVENT_27,
+                riscv::CSR_MHPM_EVENT_28,
+                riscv::CSR_MHPM_EVENT_29,
+                riscv::CSR_MHPM_EVENT_30,
+                riscv::CSR_MHPM_EVENT_31 :
+        csr_rdata = perf_data_i;
+
+        riscv::CSR_MHPM_COUNTER_3,
+                riscv::CSR_MHPM_COUNTER_4,
+                riscv::CSR_MHPM_COUNTER_5,
+                riscv::CSR_MHPM_COUNTER_6,
+                riscv::CSR_MHPM_COUNTER_7,
+                riscv::CSR_MHPM_COUNTER_8,
+                riscv::CSR_MHPM_COUNTER_9,
+                riscv::CSR_MHPM_COUNTER_10,
+                riscv::CSR_MHPM_COUNTER_11,
+                riscv::CSR_MHPM_COUNTER_12,
+                riscv::CSR_MHPM_COUNTER_13,
+                riscv::CSR_MHPM_COUNTER_14,
+                riscv::CSR_MHPM_COUNTER_15,
+                riscv::CSR_MHPM_COUNTER_16,
+                riscv::CSR_MHPM_COUNTER_17,
+                riscv::CSR_MHPM_COUNTER_18,
+                riscv::CSR_MHPM_COUNTER_19,
+                riscv::CSR_MHPM_COUNTER_20,
+                riscv::CSR_MHPM_COUNTER_21,
+                riscv::CSR_MHPM_COUNTER_22,
+                riscv::CSR_MHPM_COUNTER_23,
+                riscv::CSR_MHPM_COUNTER_24,
+                riscv::CSR_MHPM_COUNTER_25,
+                riscv::CSR_MHPM_COUNTER_26,
+                riscv::CSR_MHPM_COUNTER_27,
+                riscv::CSR_MHPM_COUNTER_28,
+                riscv::CSR_MHPM_COUNTER_29,
+                riscv::CSR_MHPM_COUNTER_30,
+                riscv::CSR_MHPM_COUNTER_31 :
+        csr_rdata = perf_data_i;
+
+        riscv::CSR_MHPM_COUNTER_3H,
+                riscv::CSR_MHPM_COUNTER_4H,
+                riscv::CSR_MHPM_COUNTER_5H,
+                riscv::CSR_MHPM_COUNTER_6H,
+                riscv::CSR_MHPM_COUNTER_7H,
+                riscv::CSR_MHPM_COUNTER_8H,
+                riscv::CSR_MHPM_COUNTER_9H,
+                riscv::CSR_MHPM_COUNTER_10H,
+                riscv::CSR_MHPM_COUNTER_11H,
+                riscv::CSR_MHPM_COUNTER_12H,
+                riscv::CSR_MHPM_COUNTER_13H,
+                riscv::CSR_MHPM_COUNTER_14H,
+                riscv::CSR_MHPM_COUNTER_15H,
+                riscv::CSR_MHPM_COUNTER_16H,
+                riscv::CSR_MHPM_COUNTER_17H,
+                riscv::CSR_MHPM_COUNTER_18H,
+                riscv::CSR_MHPM_COUNTER_19H,
+                riscv::CSR_MHPM_COUNTER_20H,
+                riscv::CSR_MHPM_COUNTER_21H,
+                riscv::CSR_MHPM_COUNTER_22H,
+                riscv::CSR_MHPM_COUNTER_23H,
+                riscv::CSR_MHPM_COUNTER_24H,
+                riscv::CSR_MHPM_COUNTER_25H,
+                riscv::CSR_MHPM_COUNTER_26H,
+                riscv::CSR_MHPM_COUNTER_27H,
+                riscv::CSR_MHPM_COUNTER_28H,
+                riscv::CSR_MHPM_COUNTER_29H,
+                riscv::CSR_MHPM_COUNTER_30H,
+                riscv::CSR_MHPM_COUNTER_31H :
+        if (riscv::XLEN == 32) csr_rdata = perf_data_i;
+        else read_access_exception = 1'b1;
+
+        // Performance counters (User Mode - R/O Shadows)
+        riscv::CSR_HPM_COUNTER_3,
+                riscv::CSR_HPM_COUNTER_4,
+                riscv::CSR_HPM_COUNTER_5,
+                riscv::CSR_HPM_COUNTER_6,
+                riscv::CSR_HPM_COUNTER_7,
+                riscv::CSR_HPM_COUNTER_8,
+                riscv::CSR_HPM_COUNTER_9,
+                riscv::CSR_HPM_COUNTER_10,
+                riscv::CSR_HPM_COUNTER_11,
+                riscv::CSR_HPM_COUNTER_12,
+                riscv::CSR_HPM_COUNTER_13,
+                riscv::CSR_HPM_COUNTER_14,
+                riscv::CSR_HPM_COUNTER_15,
+                riscv::CSR_HPM_COUNTER_16,
+                riscv::CSR_HPM_COUNTER_17,
+                riscv::CSR_HPM_COUNTER_18,
+                riscv::CSR_HPM_COUNTER_19,
+                riscv::CSR_HPM_COUNTER_20,
+                riscv::CSR_HPM_COUNTER_21,
+                riscv::CSR_HPM_COUNTER_22,
+                riscv::CSR_HPM_COUNTER_23,
+                riscv::CSR_HPM_COUNTER_24,
+                riscv::CSR_HPM_COUNTER_25,
+                riscv::CSR_HPM_COUNTER_26,
+                riscv::CSR_HPM_COUNTER_27,
+                riscv::CSR_HPM_COUNTER_28,
+                riscv::CSR_HPM_COUNTER_29,
+                riscv::CSR_HPM_COUNTER_30,
+                riscv::CSR_HPM_COUNTER_31 :
+        csr_rdata = perf_data_i;
+
+        riscv::CSR_HPM_COUNTER_3H,
+                riscv::CSR_HPM_COUNTER_4H,
+                riscv::CSR_HPM_COUNTER_5H,
+                riscv::CSR_HPM_COUNTER_6H,
+                riscv::CSR_HPM_COUNTER_7H,
+                riscv::CSR_HPM_COUNTER_8H,
+                riscv::CSR_HPM_COUNTER_9H,
+                riscv::CSR_HPM_COUNTER_10H,
+                riscv::CSR_HPM_COUNTER_11H,
+                riscv::CSR_HPM_COUNTER_12H,
+                riscv::CSR_HPM_COUNTER_13H,
+                riscv::CSR_HPM_COUNTER_14H,
+                riscv::CSR_HPM_COUNTER_15H,
+                riscv::CSR_HPM_COUNTER_16H,
+                riscv::CSR_HPM_COUNTER_17H,
+                riscv::CSR_HPM_COUNTER_18H,
+                riscv::CSR_HPM_COUNTER_19H,
+                riscv::CSR_HPM_COUNTER_20H,
+                riscv::CSR_HPM_COUNTER_21H,
+                riscv::CSR_HPM_COUNTER_22H,
+                riscv::CSR_HPM_COUNTER_23H,
+                riscv::CSR_HPM_COUNTER_24H,
+                riscv::CSR_HPM_COUNTER_25H,
+                riscv::CSR_HPM_COUNTER_26H,
+                riscv::CSR_HPM_COUNTER_27H,
+                riscv::CSR_HPM_COUNTER_28H,
+                riscv::CSR_HPM_COUNTER_29H,
+                riscv::CSR_HPM_COUNTER_30H,
+                riscv::CSR_HPM_COUNTER_31H :
+        if (riscv::XLEN == 32) csr_rdata = perf_data_i;
+        else read_access_exception = 1'b1;
+
+        // custom (non RISC-V) cache control
+        riscv::CSR_DCACHE: csr_rdata = dcache_q;
+        riscv::CSR_ICACHE: csr_rdata = icache_q;
+        // custom (non RISC-V) accelerator memory consistency mode
+        riscv::CSR_ACC_CONS: begin
+          if (CVA6Cfg.EnableAccelerator) begin
+            csr_rdata = acc_cons_q;
+          end else begin
+            read_access_exception = 1'b1;
+          end
+        end
+        // PMPs
+        riscv::CSR_PMPCFG0: csr_rdata = pmpcfg_q[riscv::XLEN/8-1:0];
+        riscv::CSR_PMPCFG1:
+        if (riscv::XLEN == 32) csr_rdata = pmpcfg_q[7:4];
+        else read_access_exception = 1'b1;
+        riscv::CSR_PMPCFG2: csr_rdata = pmpcfg_q[8+:riscv::XLEN/8];
+        riscv::CSR_PMPCFG3:
+        if (riscv::XLEN == 32) csr_rdata = pmpcfg_q[15:12];
+        else read_access_exception = 1'b1;
+        // PMPADDR
+        riscv::CSR_PMPADDR0,
+                riscv::CSR_PMPADDR1,
+                riscv::CSR_PMPADDR2,
+                riscv::CSR_PMPADDR3,
+                riscv::CSR_PMPADDR4,
+                riscv::CSR_PMPADDR5,
+                riscv::CSR_PMPADDR6,
+                riscv::CSR_PMPADDR7,
+                riscv::CSR_PMPADDR8,
+                riscv::CSR_PMPADDR9,
+                riscv::CSR_PMPADDR10,
+                riscv::CSR_PMPADDR11,
+                riscv::CSR_PMPADDR12,
+                riscv::CSR_PMPADDR13,
+                riscv::CSR_PMPADDR14,
+                riscv::CSR_PMPADDR15: begin
+          // index is specified by the last byte in the address
+          index = csr_addr.csr_decode.address[3:0];
+          // Important: we only support granularity 8 bytes (G=1)
+          // -> last bit of pmpaddr must be set 0/1 based on the mode:
+          // NA4, NAPOT: 1
+          // TOR, OFF:   0
+          if (pmpcfg_q[index].addr_mode[1] == 1'b1) csr_rdata = pmpaddr_q[index][riscv::PLEN-3:0];
+          else csr_rdata = {pmpaddr_q[index][riscv::PLEN-3:1], 1'b0};
+        end
+        default: read_access_exception = 1'b1;
+      endcase
+    end
+  end
+  // ---------------------------
+  // CSR Write and update logic
+  // ---------------------------
+  riscv::xlen_t mask;
+  always_comb begin : csr_update
+    automatic riscv::satp_t satp;
+    automatic logic [63:0] instret;
+
+
+    satp            = satp_q;
+    instret         = instret_q;
+
+    mcountinhibit_d = mcountinhibit_q;
+
+    // --------------------
+    // Counters
+    // --------------------
+    cycle_d         = cycle_q;
+    instret_d       = instret_q;
+    if (!debug_mode_q) begin
+      // increase instruction retired counter
+      for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
+        if (commit_ack_i[i] && !ex_i.valid && !mcountinhibit_q[2]) instret++;
+      end
+      instret_d = instret;
+      // increment the cycle count
+      if (!mcountinhibit_q[0]) cycle_d = cycle_q + 1'b1;
+      else cycle_d = cycle_q;
+    end
+
+    eret_o                  = 1'b0;
+    flush_o                 = 1'b0;
+    update_access_exception = 1'b0;
+
+    set_debug_pc_o          = 1'b0;
+
+    perf_we_o               = 1'b0;
+    perf_data_o             = 'b0;
+
+    fcsr_d                  = fcsr_q;
+
+    priv_lvl_d              = priv_lvl_q;
+    debug_mode_d            = debug_mode_q;
+    dcsr_d                  = dcsr_q;
+    dpc_d                   = dpc_q;
+    dscratch0_d             = dscratch0_q;
+    dscratch1_d             = dscratch1_q;
+    mstatus_d               = mstatus_q;
+
+    // check whether we come out of reset
+    // this is a workaround. some tools have issues
+    // having boot_addr_i in the asynchronous
+    // reset assignment to mtvec_d, even though
+    // boot_addr_i will be assigned a constant
+    // on the top-level.
+    if (mtvec_rst_load_q) begin
+      mtvec_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, boot_addr_i} + 'h40;
+    end else begin
+      mtvec_d = mtvec_q;
+    end
+
+    medeleg_d              = medeleg_q;
+    mideleg_d              = mideleg_q;
+    mip_d                  = mip_q;
+    mie_d                  = mie_q;
+    mepc_d                 = mepc_q;
+    mcause_d               = mcause_q;
+    mcounteren_d           = mcounteren_q;
+    mscratch_d             = mscratch_q;
+    mtval_d                = mtval_q;
+    fiom_d                 = fiom_q;
+    dcache_d               = dcache_q;
+    icache_d               = icache_q;
+    acc_cons_d             = acc_cons_q;
+
+    sepc_d                 = sepc_q;
+    scause_d               = scause_q;
+    stvec_d                = stvec_q;
+    scounteren_d           = scounteren_q;
+    sscratch_d             = sscratch_q;
+    stval_d                = stval_q;
+    satp_d                 = satp_q;
+
+    en_ld_st_translation_d = en_ld_st_translation_q;
+    dirty_fp_state_csr     = 1'b0;
+
+    pmpcfg_d               = pmpcfg_q;
+    pmpaddr_d              = pmpaddr_q;
+
+    // check for correct access rights and that we are writing
+    if (csr_we) begin
+      unique case (csr_addr.address)
+        // Floating-Point
+        riscv::CSR_FFLAGS: begin
+          if (CVA6Cfg.FpPresent) begin
+            dirty_fp_state_csr = 1'b1;
+            fcsr_d.fflags = csr_wdata[4:0];
+            // this instruction has side-effects
+            flush_o = 1'b1;
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        riscv::CSR_FRM: begin
+          if (CVA6Cfg.FpPresent) begin
+            dirty_fp_state_csr = 1'b1;
+            fcsr_d.frm    = csr_wdata[2:0];
+            // this instruction has side-effects
+            flush_o = 1'b1;
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        riscv::CSR_FCSR: begin
+          if (CVA6Cfg.FpPresent) begin
+            dirty_fp_state_csr = 1'b1;
+            fcsr_d[7:0] = csr_wdata[7:0];  // ignore writes to reserved space
+            // this instruction has side-effects
+            flush_o = 1'b1;
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        riscv::CSR_FTRAN: begin
+          if (CVA6Cfg.FpPresent) begin
+            dirty_fp_state_csr = 1'b1;
+            fcsr_d.fprec = csr_wdata[6:0];  // ignore writes to reserved space
+            // this instruction has side-effects
+            flush_o = 1'b1;
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        // debug CSR
+        riscv::CSR_DCSR: begin
+          if (CVA6Cfg.DebugEn) begin
+            dcsr_d           = csr_wdata[31:0];
+            // debug is implemented
+            dcsr_d.xdebugver = 4'h4;
+            // currently not supported
+            dcsr_d.nmip      = 1'b0;
+            dcsr_d.stopcount = 1'b0;
+            dcsr_d.stoptime  = 1'b0;
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        riscv::CSR_DPC:
+        if (CVA6Cfg.DebugEn) dpc_d = csr_wdata;
+        else update_access_exception = 1'b1;
+        riscv::CSR_DSCRATCH0:
+        if (CVA6Cfg.DebugEn) dscratch0_d = csr_wdata;
+        else update_access_exception = 1'b1;
+        riscv::CSR_DSCRATCH1:
+        if (CVA6Cfg.DebugEn) dscratch1_d = csr_wdata;
+        else update_access_exception = 1'b1;
+        // trigger module CSRs
+        riscv::CSR_TSELECT: update_access_exception = 1'b1;  // not implemented
+        riscv::CSR_TDATA1: update_access_exception = 1'b1;  // not implemented
+        riscv::CSR_TDATA2: update_access_exception = 1'b1;  // not implemented
+        riscv::CSR_TDATA3: update_access_exception = 1'b1;  // not implemented
+        // sstatus is a subset of mstatus - mask it accordingly
+        riscv::CSR_SSTATUS: begin
+          if (CVA6Cfg.RVS) begin
+            mask = ariane_pkg::SMODE_STATUS_WRITE_MASK[riscv::XLEN-1:0];
+            mstatus_d = (mstatus_q & ~{{64-riscv::XLEN{1'b0}}, mask}) | {{64-riscv::XLEN{1'b0}}, (csr_wdata & mask)};
+            // hardwire to zero if floating point extension is not present
+            if (!CVA6Cfg.FpPresent) begin
+              mstatus_d.fs = riscv::Off;
+            end
+            // hardwire to zero if vector extension is not present
+            if (!CVA6Cfg.RVV) begin
+              mstatus_d.vs = riscv::Off;
+            end
+            // this instruction has side-effects
+            flush_o = 1'b1;
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        // even machine mode interrupts can be visible and set-able to supervisor
+        // if the corresponding bit in mideleg is set
+        riscv::CSR_SIE: begin
+          if (CVA6Cfg.RVS) begin
+            // the mideleg makes sure only delegate-able register (and therefore also only implemented registers) are written
+            mie_d = (mie_q & ~mideleg_q) | (csr_wdata & mideleg_q);
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+
+        riscv::CSR_SIP: begin
+          if (CVA6Cfg.RVS) begin
+            // only the supervisor software interrupt is write-able, iff delegated
+            mask  = riscv::MIP_SSIP & mideleg_q;
+            mip_d = (mip_q & ~mask) | (csr_wdata & mask);
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+
+        riscv::CSR_STVEC:
+        if (CVA6Cfg.RVS) stvec_d = {csr_wdata[riscv::XLEN-1:2], 1'b0, csr_wdata[0]};
+        else update_access_exception = 1'b1;
+        riscv::CSR_SCOUNTEREN:
+        if (CVA6Cfg.RVS) scounteren_d = {{riscv::XLEN - 32{1'b0}}, csr_wdata[31:0]};
+        else update_access_exception = 1'b1;
+        riscv::CSR_SSCRATCH:
+        if (CVA6Cfg.RVS) sscratch_d = csr_wdata;
+        else update_access_exception = 1'b1;
+        riscv::CSR_SEPC:
+        if (CVA6Cfg.RVS) sepc_d = {csr_wdata[riscv::XLEN-1:1], 1'b0};
+        else update_access_exception = 1'b1;
+        riscv::CSR_SCAUSE:
+        if (CVA6Cfg.RVS) scause_d = csr_wdata;
+        else update_access_exception = 1'b1;
+        riscv::CSR_STVAL:
+        if (CVA6Cfg.RVS) stval_d = csr_wdata;
+        else update_access_exception = 1'b1;
+        // supervisor address translation and protection
+        riscv::CSR_SATP: begin
+          if (CVA6Cfg.RVS) begin
+            // intercept SATP writes if in S-Mode and TVM is enabled
+            if (priv_lvl_o == riscv::PRIV_LVL_S && mstatus_q.tvm) update_access_exception = 1'b1;
+            else begin
+              satp      = riscv::satp_t'(csr_wdata);
+              // only make ASID_LEN - 1 bit stick, that way software can figure out how many ASID bits are supported
+              satp.asid = satp.asid & {{(riscv::ASIDW - AsidWidth) {1'b0}}, {AsidWidth{1'b1}}};
+              // only update if we actually support this mode
+              if (riscv::vm_mode_t'(satp.mode) == riscv::ModeOff ||
+                                riscv::vm_mode_t'(satp.mode) == riscv::MODE_SV)
+                satp_d = satp;
+            end
+            // changing the mode can have side-effects on address translation (e.g.: other instructions), re-fetch
+            // the next instruction by executing a flush
+            flush_o = 1'b1;
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+
+        riscv::CSR_MSTATUS: begin
+          mstatus_d    = {{64 - riscv::XLEN{1'b0}}, csr_wdata};
+          mstatus_d.xs = riscv::Off;
+          if (!CVA6Cfg.FpPresent) begin
+            mstatus_d.fs = riscv::Off;
+          end
+          if (!CVA6Cfg.RVV) begin
+            mstatus_d.vs = riscv::Off;
+          end
+          mstatus_d.wpri3 = 9'b0;
+          mstatus_d.wpri1 = 1'b0;
+          mstatus_d.wpri2 = 1'b0;
+          mstatus_d.wpri0 = 1'b0;
+          mstatus_d.ube   = 1'b0;  // CVA6 is little-endian
+          // this register has side-effects on other registers, flush the pipeline
+          flush_o         = 1'b1;
+        end
+        riscv::CSR_MSTATUSH: if (riscv::XLEN != 32) update_access_exception = 1'b1;
+        // MISA is WARL (Write Any Value, Reads Legal Value)
+        riscv::CSR_MISA: ;
+        // machine exception delegation register
+        // 0 - 15 exceptions supported
+        riscv::CSR_MEDELEG: begin
+          if (CVA6Cfg.RVS) begin
+            mask = (1 << riscv::INSTR_ADDR_MISALIGNED) |
+                             (1 << riscv::BREAKPOINT) |
+                             (1 << riscv::ENV_CALL_UMODE) |
+                             (1 << riscv::INSTR_PAGE_FAULT) |
+                             (1 << riscv::LOAD_PAGE_FAULT) |
+                             (1 << riscv::STORE_PAGE_FAULT);
+            medeleg_d = (medeleg_q & ~mask) | (csr_wdata & mask);
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        // machine interrupt delegation register
+        // we do not support user interrupt delegation
+        riscv::CSR_MIDELEG: begin
+          if (CVA6Cfg.RVS) begin
+            mask = riscv::MIP_SSIP | riscv::MIP_STIP | riscv::MIP_SEIP;
+            mideleg_d = (mideleg_q & ~mask) | (csr_wdata & mask);
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        // mask the register so that unsupported interrupts can never be set
+        riscv::CSR_MIE: begin
+          mask = riscv::MIP_SSIP | riscv::MIP_STIP | riscv::MIP_SEIP | riscv::MIP_MSIP | riscv::MIP_MTIP | riscv::MIP_MEIP;
+          mie_d = (mie_q & ~mask) | (csr_wdata & mask); // we only support supervisor and M-mode interrupts
+        end
+
+        riscv::CSR_MTVEC: begin
+          mtvec_d = {csr_wdata[riscv::XLEN-1:2], 1'b0, csr_wdata[0]};
+          // we are in vector mode, this implementation requires the additional
+          // alignment constraint of 64 * 4 bytes
+          if (csr_wdata[0]) mtvec_d = {csr_wdata[riscv::XLEN-1:8], 7'b0, csr_wdata[0]};
+        end
+        riscv::CSR_MCOUNTEREN: mcounteren_d = {{riscv::XLEN - 32{1'b0}}, csr_wdata[31:0]};
+
+        riscv::CSR_MSCRATCH: mscratch_d = csr_wdata;
+        riscv::CSR_MEPC: mepc_d = {csr_wdata[riscv::XLEN-1:1], 1'b0};
+        riscv::CSR_MCAUSE: mcause_d = csr_wdata;
+        riscv::CSR_MTVAL: mtval_d = csr_wdata;
+        riscv::CSR_MIP: begin
+          mask  = riscv::MIP_SSIP | riscv::MIP_STIP | riscv::MIP_SEIP;
+          mip_d = (mip_q & ~mask) | (csr_wdata & mask);
+        end
+        riscv::CSR_MENVCFG: if (CVA6Cfg.RVS) fiom_d = csr_wdata[0];
+        riscv::CSR_MENVCFGH: begin
+          if (riscv::XLEN != 32) update_access_exception = 1'b1;
+        end
+        riscv::CSR_MCOUNTINHIBIT:
+        mcountinhibit_d = {csr_wdata[MHPMCounterNum+2:2], 1'b0, csr_wdata[0]};
+        // performance counters
+        riscv::CSR_MCYCLE: cycle_d[riscv::XLEN-1:0] = csr_wdata;
+        riscv::CSR_MCYCLEH:
+        if (riscv::XLEN == 32) cycle_d[63:32] = csr_wdata;
+        else update_access_exception = 1'b1;
+        riscv::CSR_MINSTRET: instret_d[riscv::XLEN-1:0] = csr_wdata;
+        riscv::CSR_MINSTRETH:
+        if (riscv::XLEN == 32) instret_d[63:32] = csr_wdata;
+        else update_access_exception = 1'b1;
+        //Event Selector
+        riscv::CSR_MHPM_EVENT_3,
+                riscv::CSR_MHPM_EVENT_4,
+                riscv::CSR_MHPM_EVENT_5,
+                riscv::CSR_MHPM_EVENT_6,
+                riscv::CSR_MHPM_EVENT_7,
+                riscv::CSR_MHPM_EVENT_8,
+                riscv::CSR_MHPM_EVENT_9,
+                riscv::CSR_MHPM_EVENT_10,
+                riscv::CSR_MHPM_EVENT_11,
+                riscv::CSR_MHPM_EVENT_12,
+                riscv::CSR_MHPM_EVENT_13,
+                riscv::CSR_MHPM_EVENT_14,
+                riscv::CSR_MHPM_EVENT_15,
+                riscv::CSR_MHPM_EVENT_16,
+                riscv::CSR_MHPM_EVENT_17,
+                riscv::CSR_MHPM_EVENT_18,
+                riscv::CSR_MHPM_EVENT_19,
+                riscv::CSR_MHPM_EVENT_20,
+                riscv::CSR_MHPM_EVENT_21,
+                riscv::CSR_MHPM_EVENT_22,
+                riscv::CSR_MHPM_EVENT_23,
+                riscv::CSR_MHPM_EVENT_24,
+                riscv::CSR_MHPM_EVENT_25,
+                riscv::CSR_MHPM_EVENT_26,
+                riscv::CSR_MHPM_EVENT_27,
+                riscv::CSR_MHPM_EVENT_28,
+                riscv::CSR_MHPM_EVENT_29,
+                riscv::CSR_MHPM_EVENT_30,
+                riscv::CSR_MHPM_EVENT_31 :     begin
+          perf_we_o   = 1'b1;
+          perf_data_o = csr_wdata;
+        end
+
+        riscv::CSR_MHPM_COUNTER_3,
+                riscv::CSR_MHPM_COUNTER_4,
+                riscv::CSR_MHPM_COUNTER_5,
+                riscv::CSR_MHPM_COUNTER_6,
+                riscv::CSR_MHPM_COUNTER_7,
+                riscv::CSR_MHPM_COUNTER_8,
+                riscv::CSR_MHPM_COUNTER_9,
+                riscv::CSR_MHPM_COUNTER_10,
+                riscv::CSR_MHPM_COUNTER_11,
+                riscv::CSR_MHPM_COUNTER_12,
+                riscv::CSR_MHPM_COUNTER_13,
+                riscv::CSR_MHPM_COUNTER_14,
+                riscv::CSR_MHPM_COUNTER_15,
+                riscv::CSR_MHPM_COUNTER_16,
+                riscv::CSR_MHPM_COUNTER_17,
+                riscv::CSR_MHPM_COUNTER_18,
+                riscv::CSR_MHPM_COUNTER_19,
+                riscv::CSR_MHPM_COUNTER_20,
+                riscv::CSR_MHPM_COUNTER_21,
+                riscv::CSR_MHPM_COUNTER_22,
+                riscv::CSR_MHPM_COUNTER_23,
+                riscv::CSR_MHPM_COUNTER_24,
+                riscv::CSR_MHPM_COUNTER_25,
+                riscv::CSR_MHPM_COUNTER_26,
+                riscv::CSR_MHPM_COUNTER_27,
+                riscv::CSR_MHPM_COUNTER_28,
+                riscv::CSR_MHPM_COUNTER_29,
+                riscv::CSR_MHPM_COUNTER_30,
+                riscv::CSR_MHPM_COUNTER_31 :  begin
+          perf_we_o   = 1'b1;
+          perf_data_o = csr_wdata;
+        end
+
+        riscv::CSR_MHPM_COUNTER_3H,
+                riscv::CSR_MHPM_COUNTER_4H,
+                riscv::CSR_MHPM_COUNTER_5H,
+                riscv::CSR_MHPM_COUNTER_6H,
+                riscv::CSR_MHPM_COUNTER_7H,
+                riscv::CSR_MHPM_COUNTER_8H,
+                riscv::CSR_MHPM_COUNTER_9H,
+                riscv::CSR_MHPM_COUNTER_10H,
+                riscv::CSR_MHPM_COUNTER_11H,
+                riscv::CSR_MHPM_COUNTER_12H,
+                riscv::CSR_MHPM_COUNTER_13H,
+                riscv::CSR_MHPM_COUNTER_14H,
+                riscv::CSR_MHPM_COUNTER_15H,
+                riscv::CSR_MHPM_COUNTER_16H,
+                riscv::CSR_MHPM_COUNTER_17H,
+                riscv::CSR_MHPM_COUNTER_18H,
+                riscv::CSR_MHPM_COUNTER_19H,
+                riscv::CSR_MHPM_COUNTER_20H,
+                riscv::CSR_MHPM_COUNTER_21H,
+                riscv::CSR_MHPM_COUNTER_22H,
+                riscv::CSR_MHPM_COUNTER_23H,
+                riscv::CSR_MHPM_COUNTER_24H,
+                riscv::CSR_MHPM_COUNTER_25H,
+                riscv::CSR_MHPM_COUNTER_26H,
+                riscv::CSR_MHPM_COUNTER_27H,
+                riscv::CSR_MHPM_COUNTER_28H,
+                riscv::CSR_MHPM_COUNTER_29H,
+                riscv::CSR_MHPM_COUNTER_30H,
+                riscv::CSR_MHPM_COUNTER_31H :  begin
+          perf_we_o = 1'b1;
+          if (riscv::XLEN == 32) perf_data_o = csr_wdata;
+          else update_access_exception = 1'b1;
+        end
+
+        riscv::CSR_DCACHE: dcache_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]};  // enable bit
+        riscv::CSR_ICACHE: icache_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]};  // enable bit
+        riscv::CSR_ACC_CONS: begin
+          if (CVA6Cfg.EnableAccelerator) begin
+            acc_cons_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]};  // enable bit
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        // PMP locked logic
+        // 1. refuse to update any locked entry
+        // 2. also refuse to update the entry below a locked TOR entry
+        // Note that writes to pmpcfg below a locked TOR entry are valid
+        riscv::CSR_PMPCFG0:
+        for (int i = 0; i < (riscv::XLEN / 8); i++)
+        if (!pmpcfg_q[i].locked) pmpcfg_d[i] = csr_wdata[i*8+:8];
+        riscv::CSR_PMPCFG1: begin
+          if (riscv::XLEN == 32) begin
+            for (int i = 0; i < 4; i++)
+            if (!pmpcfg_q[i+4].locked) pmpcfg_d[i+4] = csr_wdata[i*8+:8];
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        riscv::CSR_PMPCFG2:
+        for (int i = 0; i < (riscv::XLEN / 8); i++)
+        if (!pmpcfg_q[i+8].locked) pmpcfg_d[i+8] = csr_wdata[i*8+:8];
+        riscv::CSR_PMPCFG3: begin
+          if (riscv::XLEN == 32) begin
+            for (int i = 0; i < 4; i++)
+            if (!pmpcfg_q[i+12].locked) pmpcfg_d[i+12] = csr_wdata[i*8+:8];
+          end else begin
+            update_access_exception = 1'b1;
+          end
+        end
+        riscv::CSR_PMPADDR0,
+                riscv::CSR_PMPADDR1,
+                riscv::CSR_PMPADDR2,
+                riscv::CSR_PMPADDR3,
+                riscv::CSR_PMPADDR4,
+                riscv::CSR_PMPADDR5,
+                riscv::CSR_PMPADDR6,
+                riscv::CSR_PMPADDR7,
+                riscv::CSR_PMPADDR8,
+                riscv::CSR_PMPADDR9,
+                riscv::CSR_PMPADDR10,
+                riscv::CSR_PMPADDR11,
+                riscv::CSR_PMPADDR12,
+                riscv::CSR_PMPADDR13,
+                riscv::CSR_PMPADDR14,
+                riscv::CSR_PMPADDR15:  begin
+          // index is specified by the last byte in the address
+          automatic logic [3:0] index = csr_addr.csr_decode.address[3:0];
+          // check if the entry or the entry above is locked
+          if (!pmpcfg_q[index].locked && !(pmpcfg_q[index+1].locked && pmpcfg_q[index].addr_mode == riscv::TOR)) begin
+            pmpaddr_d[index] = csr_wdata[riscv::PLEN-3:0];
+          end
+        end
+        default: update_access_exception = 1'b1;
+      endcase
+    end
+
+    mstatus_d.sxl = riscv::XLEN_64;
+    mstatus_d.uxl = riscv::XLEN_64;
+
+    // mark the floating point extension register as dirty
+    if (CVA6Cfg.FpPresent && (dirty_fp_state_csr || dirty_fp_state_i)) begin
+      mstatus_d.fs = riscv::Dirty;
+    end
+    // mark the vector extension register as dirty
+    if (CVA6Cfg.RVV && dirty_v_state_i) begin
+      mstatus_d.vs = riscv::Dirty;
+    end
+    // hardwired extension registers
+    mstatus_d.sd = (mstatus_q.xs == riscv::Dirty) | (mstatus_q.fs == riscv::Dirty);
+
+    // reserve PMPCFG bits 5 and 6 (hardwire to 0)
+    for (int i = 0; i < CVA6Cfg.NrPMPEntries; i++) pmpcfg_d[i].reserved = 2'b0;
+
+    // write the floating point status register
+    if (CVA6Cfg.FpPresent && csr_write_fflags_i) begin
+      fcsr_d.fflags = csr_wdata_i[4:0] | fcsr_q.fflags;
+    end
+
+    // ----------------------------
+    // Accelerator FP imprecise exceptions
+    // ----------------------------
+
+    // Update fflags as soon as a FP exception occurs in the accelerator
+    // The exception is imprecise, and the fcsr.fflags update always happens immediately
+    if (CVA6Cfg.EnableAccelerator) begin
+      fcsr_d.fflags |= acc_fflags_ex_valid_i ? acc_fflags_ex_i : 5'b0;
+    end
+
+    // ---------------------
+    // External Interrupts
+    // ---------------------
+    // Machine Mode External Interrupt Pending
+    mip_d[riscv::IRQ_M_EXT] = irq_i[0];
+    // Machine software interrupt
+    mip_d[riscv::IRQ_M_SOFT] = ipi_i;
+    // Timer interrupt pending, coming from platform timer
+    mip_d[riscv::IRQ_M_TIMER] = time_irq_i;
+
+    // -----------------------
+    // Manage Exception Stack
+    // -----------------------
+    // update exception CSRs
+    // we got an exception update cause, pc and stval register
+    trap_to_priv_lvl = riscv::PRIV_LVL_M;
+    // Exception is taken and we are not in debug mode
+    // exceptions in debug mode don't update any fields
+    if ((CVA6Cfg.DebugEn && !debug_mode_q && ex_i.cause != riscv::DEBUG_REQUEST && ex_i.valid) || (!CVA6Cfg.DebugEn && ex_i.valid)) begin
+      // do not flush, flush is reserved for CSR writes with side effects
+      flush_o = 1'b0;
+      // figure out where to trap to
+      // a m-mode trap might be delegated if we are taking it in S mode
+      // first figure out if this was an exception or an interrupt e.g.: look at bit (XLEN-1)
+      // the cause register can only be $clog2(riscv::XLEN) bits long (as we only support XLEN exceptions)
+      if (CVA6Cfg.RVS && ((ex_i.cause[riscv::XLEN-1] && mideleg_q[ex_i.cause[$clog2(
+              riscv::XLEN
+          )-1:0]]) || (~ex_i.cause[riscv::XLEN-1] && medeleg_q[ex_i.cause[$clog2(
+              riscv::XLEN
+          )-1:0]]))) begin
+        // traps never transition from a more-privileged mode to a less privileged mode
+        // so if we are already in M mode, stay there
+        if (priv_lvl_o == riscv::PRIV_LVL_M) trap_to_priv_lvl = riscv::PRIV_LVL_M;
+        else trap_to_priv_lvl = riscv::PRIV_LVL_S;
+      end
+
+      // trap to supervisor mode
+      if (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S) begin
+        // update sstatus
+        mstatus_d.sie = 1'b0;
+        mstatus_d.spie = mstatus_q.sie;
+        // this can either be user or supervisor mode
+        mstatus_d.spp = priv_lvl_q[0];
+        // set cause
+        scause_d = ex_i.cause;
+        // set epc
+        sepc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
+        // set mtval or stval
+        stval_d        = (ariane_pkg::ZERO_TVAL
+                                  && (ex_i.cause inside {
+                                    riscv::ILLEGAL_INSTR,
+                                    riscv::BREAKPOINT,
+                                    riscv::ENV_CALL_UMODE,
+                                    riscv::ENV_CALL_SMODE,
+                                    riscv::ENV_CALL_MMODE
+                                  } || ex_i.cause[riscv::XLEN-1])) ? '0 : ex_i.tval;
+        // trap to machine mode
+      end else begin
+        // update mstatus
+        mstatus_d.mie = 1'b0;
+        mstatus_d.mpie = mstatus_q.mie;
+        // save the previous privilege mode
+        mstatus_d.mpp = priv_lvl_q;
+        mcause_d = ex_i.cause;
+        // set epc
+        mepc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
+        // set mtval or stval
+        mtval_d        = (ariane_pkg::ZERO_TVAL
+                                  && (ex_i.cause inside {
+                                    riscv::ILLEGAL_INSTR,
+                                    riscv::BREAKPOINT,
+                                    riscv::ENV_CALL_UMODE,
+                                    riscv::ENV_CALL_SMODE,
+                                    riscv::ENV_CALL_MMODE
+                                  } || ex_i.cause[riscv::XLEN-1])) ? '0 : ex_i.tval;
+      end
+
+      priv_lvl_d = trap_to_priv_lvl;
+    end
+
+    // ------------------------------
+    // Debug
+    // ------------------------------
+    // Explains why Debug Mode was entered.
+    // When there are multiple reasons to enter Debug Mode in a single cycle, hardware should set cause to the cause with the highest priority.
+    // 1: An ebreak instruction was executed. (priority 3)
+    // 2: The Trigger Module caused a breakpoint exception. (priority 4)
+    // 3: The debugger requested entry to Debug Mode. (priority 2)
+    // 4: The hart single stepped because step was set. (priority 1)
+    // we are currently not in debug mode and could potentially enter
+    if (!debug_mode_q) begin
+      dcsr_d.prv = priv_lvl_o;
+      // trigger module fired
+
+      // caused by a breakpoint
+      if (CVA6Cfg.DebugEn && ex_i.valid && ex_i.cause == riscv::BREAKPOINT) begin
+        dcsr_d.prv = priv_lvl_o;
+        // check that we actually want to enter debug depending on the privilege level we are currently in
+        unique case (priv_lvl_o)
+          riscv::PRIV_LVL_M: begin
+            debug_mode_d   = dcsr_q.ebreakm;
+            set_debug_pc_o = dcsr_q.ebreakm;
+          end
+          riscv::PRIV_LVL_S: begin
+            if (CVA6Cfg.RVS) begin
+              debug_mode_d   = dcsr_q.ebreaks;
+              set_debug_pc_o = dcsr_q.ebreaks;
+            end
+          end
+          riscv::PRIV_LVL_U: begin
+            if (CVA6Cfg.RVU) begin
+              debug_mode_d   = dcsr_q.ebreaku;
+              set_debug_pc_o = dcsr_q.ebreaku;
+            end
+          end
+          default: ;
+        endcase
+        // save PC of next this instruction e.g.: the next one to be executed
+        dpc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
+        dcsr_d.cause = ariane_pkg::CauseBreakpoint;
+      end
+
+      // we've got a debug request
+      if (CVA6Cfg.DebugEn && ex_i.valid && ex_i.cause == riscv::DEBUG_REQUEST) begin
+        dcsr_d.prv = priv_lvl_o;
+        // save the PC
+        dpc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
+        // enter debug mode
+        debug_mode_d = 1'b1;
+        // jump to the base address
+        set_debug_pc_o = 1'b1;
+        // save the cause as external debug request
+        dcsr_d.cause = ariane_pkg::CauseRequest;
+      end
+
+      // single step enable and we just retired an instruction
+      if (CVA6Cfg.DebugEn && dcsr_q.step && commit_ack_i[0]) begin
+        dcsr_d.prv = priv_lvl_o;
+        // valid CTRL flow change
+        if (commit_instr_i[0].fu == CTRL_FLOW) begin
+          // we saved the correct target address during execute
+          dpc_d = {
+            {riscv::XLEN - riscv::VLEN{commit_instr_i[0].bp.predict_address[riscv::VLEN-1]}},
+            commit_instr_i[0].bp.predict_address
+          };
+          // exception valid
+        end else if (ex_i.valid) begin
+          dpc_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, trap_vector_base_o};
+          // return from environment
+        end else if (eret_o) begin
+          dpc_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, epc_o};
+          // consecutive PC
+        end else begin
+          dpc_d = {
+            {riscv::XLEN - riscv::VLEN{commit_instr_i[0].pc[riscv::VLEN-1]}},
+            commit_instr_i[0].pc + (commit_instr_i[0].is_compressed ? 'h2 : 'h4)
+          };
+        end
+        debug_mode_d   = 1'b1;
+        set_debug_pc_o = 1'b1;
+        dcsr_d.cause   = ariane_pkg::CauseSingleStep;
+      end
+    end
+    // go in halt-state again when we encounter an exception
+    if (CVA6Cfg.DebugEn && debug_mode_q && ex_i.valid && ex_i.cause == riscv::BREAKPOINT) begin
+      set_debug_pc_o = 1'b1;
+    end
+
+    // ------------------------------
+    // MPRV - Modify Privilege Level
+    // ------------------------------
+    // Set the address translation at which the load and stores should occur
+    // we can use the previous values since changing the address translation will always involve a pipeline flush
+    if (ariane_pkg::MMU_PRESENT && mprv && CVA6Cfg.RVS && riscv::vm_mode_t'(satp_q.mode) == riscv::MODE_SV && (mstatus_q.mpp != riscv::PRIV_LVL_M))
+      en_ld_st_translation_d = 1'b1;
+    else  // otherwise we go with the regular settings
+      en_ld_st_translation_d = en_translation_o;
+
+    ld_st_priv_lvl_o = (mprv) ? mstatus_q.mpp : priv_lvl_o;
+    en_ld_st_translation_o = en_ld_st_translation_q;
+    // ------------------------------
+    // Return from Environment
+    // ------------------------------
+    // When executing an xRET instruction, supposing xPP holds the value y, xIE is set to xPIE; the privilege
+    // mode is changed to y; xPIE is set to 1; and xPP is set to U
+    if (mret) begin
+      // return from exception, IF doesn't care from where we are returning
+      eret_o         = 1'b1;
+      // return to the previous privilege level and restore all enable flags
+      // get the previous machine interrupt enable flag
+      mstatus_d.mie  = mstatus_q.mpie;
+      // restore the previous privilege level
+      priv_lvl_d     = mstatus_q.mpp;
+      // set mpp to user mode
+      mstatus_d.mpp  = riscv::PRIV_LVL_U;
+      // set mpie to 1
+      mstatus_d.mpie = 1'b1;
+    end
+
+    if (CVA6Cfg.RVS && sret) begin
+      // return from exception, IF doesn't care from where we are returning
+      eret_o         = 1'b1;
+      // return the previous supervisor interrupt enable flag
+      mstatus_d.sie  = mstatus_q.spie;
+      // restore the previous privilege level
+      priv_lvl_d     = riscv::priv_lvl_t'({1'b0, mstatus_q.spp});
+      // set spp to user mode
+      mstatus_d.spp  = 1'b0;
+      // set spie to 1
+      mstatus_d.spie = 1'b1;
+    end
+
+    // return from debug mode
+    if (CVA6Cfg.DebugEn && dret) begin
+      // return from exception, IF doesn't care from where we are returning
+      eret_o       = 1'b1;
+      // restore the previous privilege level
+      priv_lvl_d   = riscv::priv_lvl_t'(dcsr_q.prv);
+      // actually return from debug mode
+      debug_mode_d = 1'b0;
+    end
+  end
+
+  // ---------------------------
+  // CSR OP Select Logic
+  // ---------------------------
+  always_comb begin : csr_op_logic
+    csr_wdata = csr_wdata_i;
+    csr_we    = 1'b1;
+    csr_read  = 1'b1;
+    mret      = 1'b0;
+    sret      = 1'b0;
+    dret      = 1'b0;
+
+    unique case (csr_op_i)
+      CSR_WRITE: csr_wdata = csr_wdata_i;
+      CSR_SET:   csr_wdata = csr_wdata_i | csr_rdata;
+      CSR_CLEAR: csr_wdata = (~csr_wdata_i) & csr_rdata;
+      CSR_READ:  csr_we = 1'b0;
+      MRET: begin
+        // the return should not have any write or read side-effects
+        csr_we   = 1'b0;
+        csr_read = 1'b0;
+        mret     = 1'b1;  // signal a return from machine mode
+      end
+      default: begin
+        if (CVA6Cfg.RVS && csr_op_i == SRET) begin
+          // the return should not have any write or read side-effects
+          csr_we   = 1'b0;
+          csr_read = 1'b0;
+          sret     = 1'b1;  // signal a return from supervisor mode
+        end else if (CVA6Cfg.DebugEn && csr_op_i == DRET) begin
+          // the return should not have any write or read side-effects
+          csr_we   = 1'b0;
+          csr_read = 1'b0;
+          dret     = 1'b1;  // signal a return from debug mode
+        end else begin
+          csr_we   = 1'b0;
+          csr_read = 1'b0;
+        end
+      end
+    endcase
+    // if we are violating our privilges do not update the architectural state
+    if (privilege_violation) begin
+      csr_we   = 1'b0;
+      csr_read = 1'b0;
+    end
+  end
+
+  assign irq_ctrl_o.mie = mie_q;
+  assign irq_ctrl_o.mip = mip_q;
+  assign irq_ctrl_o.sie = mstatus_q.sie;
+  assign irq_ctrl_o.mideleg = mideleg_q;
+  assign irq_ctrl_o.global_enable = (~debug_mode_q)
+      // interrupts are enabled during single step or we are not stepping
+      // No need to check interrupts during single step if we don't support DEBUG mode
+      & (~CVA6Cfg.DebugEn | (~dcsr_q.step | dcsr_q.stepie))
+                                    & ((mstatus_q.mie & (priv_lvl_o == riscv::PRIV_LVL_M))
+                                    | (priv_lvl_o != riscv::PRIV_LVL_M));
+
+  always_comb begin : privilege_check
+    // -----------------
+    // Privilege Check
+    // -----------------
+    privilege_violation = 1'b0;
+    // if we are reading or writing, check for the correct privilege level this has
+    // precedence over interrupts
+    if (csr_op_i inside {CSR_WRITE, CSR_SET, CSR_CLEAR, CSR_READ}) begin
+      if ((riscv::priv_lvl_t'(priv_lvl_o & csr_addr.csr_decode.priv_lvl) != csr_addr.csr_decode.priv_lvl)) begin
+        privilege_violation = 1'b1;
+      end
+      // check access to debug mode only CSRs
+      if ((!CVA6Cfg.DebugEn && csr_addr_i[11:4] == 8'h7b) || (CVA6Cfg.DebugEn && csr_addr_i[11:4] == 8'h7b && !debug_mode_q)) begin
+        privilege_violation = 1'b1;
+      end
+      // check counter-enabled counter CSR accesses
+      // counter address range is C00 to C1F
+      if (csr_addr_i inside {[riscv::CSR_CYCLE : riscv::CSR_HPM_COUNTER_31]}) begin
+        if (priv_lvl_o == riscv::PRIV_LVL_S && CVA6Cfg.RVS) begin
+          privilege_violation = ~mcounteren_q[csr_addr_i[4:0]];
+        end else if (priv_lvl_o == riscv::PRIV_LVL_U && CVA6Cfg.RVU) begin
+          privilege_violation = ~mcounteren_q[csr_addr_i[4:0]] | ~scounteren_q[csr_addr_i[4:0]];
+        end else if (priv_lvl_o == riscv::PRIV_LVL_M) begin
+          privilege_violation = 1'b0;
+        end
+      end
+    end
+  end
+  // ----------------------
+  // CSR Exception Control
+  // ----------------------
+  always_comb begin : exception_ctrl
+    csr_exception_o = {{riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, 1'b0};
+    // ----------------------------------
+    // Illegal Access (decode exception)
+    // ----------------------------------
+    // we got an exception in one of the processes above
+    // throw an illegal instruction exception
+    if (update_access_exception || read_access_exception) begin
+      csr_exception_o.cause = riscv::ILLEGAL_INSTR;
+      // we don't set the tval field as this will be set by the commit stage
+      // this spares the extra wiring from commit to CSR and back to commit
+      csr_exception_o.valid = 1'b1;
+    end
+
+    if (privilege_violation) begin
+      csr_exception_o.cause = riscv::ILLEGAL_INSTR;
+      csr_exception_o.valid = 1'b1;
+    end
+  end
+
+  // -------------------
+  // Wait for Interrupt
+  // -------------------
+  always_comb begin : wfi_ctrl
+    // wait for interrupt register
+    wfi_d = wfi_q;
+    // if there is any (enabled) interrupt pending un-stall the core
+    // also un-stall if we want to enter debug mode
+    if (|(mip_q & mie_q) || (CVA6Cfg.DebugEn && debug_req_i) || irq_i[1]) begin
+      wfi_d = 1'b0;
+      // or alternatively if there is no exception pending and we are not in debug mode wait here
+      // for the interrupt
+    end else if (((CVA6Cfg.DebugEn && !debug_mode_q) && csr_op_i == WFI && !ex_i.valid) || (!CVA6Cfg.DebugEn && csr_op_i == WFI && !ex_i.valid)) begin
+      wfi_d = 1'b1;
+    end
+  end
+
+  // output assignments dependent on privilege mode
+  always_comb begin : priv_output
+    trap_vector_base_o = {mtvec_q[riscv::VLEN-1:2], 2'b0};
+    // output user mode stvec
+    if (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S) begin
+      trap_vector_base_o = {stvec_q[riscv::VLEN-1:2], 2'b0};
+    end
+
+    // if we are in debug mode jump to a specific address
+    if (CVA6Cfg.DebugEn && debug_mode_q) begin
+      trap_vector_base_o = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.ExceptionAddress[riscv::VLEN-1:0];
+    end
+
+    // check if we are in vectored mode, if yes then do BASE + 4 * cause we
+    // are imposing an additional alignment-constraint of 64 * 4 bytes since
+    // we want to spare the costly addition. Furthermore check to which
+    // privilege level we are jumping and whether the vectored mode is
+    // activated for _that_ privilege level.
+    if (ex_i.cause[riscv::XLEN-1] &&
+                ((((CVA6Cfg.RVS || CVA6Cfg.RVU) && trap_to_priv_lvl == riscv::PRIV_LVL_M && mtvec_q[0]) || (!CVA6Cfg.RVS && !CVA6Cfg.RVU && mtvec_q[0]))
+               || (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S && stvec_q[0]))) begin
+      trap_vector_base_o[7:2] = ex_i.cause[5:0];
+    end
+
+    epc_o = mepc_q[riscv::VLEN-1:0];
+    // we are returning from supervisor mode, so take the sepc register
+    if (CVA6Cfg.RVS && sret) begin
+      epc_o = sepc_q[riscv::VLEN-1:0];
+    end
+    // we are returning from debug mode, to take the dpc register
+    if (CVA6Cfg.DebugEn && dret) begin
+      epc_o = dpc_q[riscv::VLEN-1:0];
+    end
+  end
+
+  // -------------------
+  // Output Assignments
+  // -------------------
+  always_comb begin
+    // When the SEIP bit is read with a CSRRW, CSRRS, or CSRRC instruction, the value
+    // returned in the rd destination register contains the logical-OR of the software-writable
+    // bit and the interrupt signal from the interrupt controller.
+    csr_rdata_o = csr_rdata;
+
+    unique case (csr_addr.address)
+      riscv::CSR_MIP:
+      csr_rdata_o = csr_rdata | ({{riscv::XLEN - 1{1'b0}}, irq_i[1]} << riscv::IRQ_S_EXT);
+      // in supervisor mode we also need to check whether we delegated this bit
+      riscv::CSR_SIP: begin
+        if (CVA6Cfg.RVS) begin
+          csr_rdata_o = csr_rdata
+                              | ({{riscv::XLEN-1{1'b0}}, (irq_i[1] & mideleg_q[riscv::IRQ_S_EXT])} << riscv::IRQ_S_EXT);
+        end
+      end
+      default: ;
+    endcase
+  end
+
+  // in debug mode we execute with privilege level M
+  assign priv_lvl_o = (CVA6Cfg.DebugEn && debug_mode_q) ? riscv::PRIV_LVL_M : priv_lvl_q;
+  // FPU outputs
+  assign fflags_o = fcsr_q.fflags;
+  assign frm_o = fcsr_q.frm;
+  assign fprec_o = fcsr_q.fprec;
+  // MMU outputs
+  assign satp_ppn_o = satp_q.ppn;
+  assign asid_o = satp_q.asid[AsidWidth-1:0];
+  assign sum_o = mstatus_q.sum;
+  // we support bare memory addressing and SV39
+  assign en_translation_o = ((CVA6Cfg.RVS && riscv::vm_mode_t'(satp_q.mode) == riscv::MODE_SV) &&
+                               priv_lvl_o != riscv::PRIV_LVL_M)
+                              ? 1'b1
+                              : 1'b0;
+  assign mxr_o = mstatus_q.mxr;
+  assign tvm_o = mstatus_q.tvm;
+  assign tw_o = mstatus_q.tw;
+  assign tsr_o = mstatus_q.tsr;
+  assign halt_csr_o = wfi_q;
+`ifdef PITON_ARIANE
+  assign icache_en_o = icache_q[0];
+`else
+  assign icache_en_o = icache_q[0] & (~debug_mode_q);
+`endif
+  assign dcache_en_o = dcache_q[0];
+  assign acc_cons_en_o = CVA6Cfg.EnableAccelerator ? acc_cons_q[0] : 1'b0;
+
+  // determine if mprv needs to be considered if in debug mode
+  assign mprv = (CVA6Cfg.DebugEn && debug_mode_q && !dcsr_q.mprven) ? 1'b0 : mstatus_q.mprv;
+  assign debug_mode_o = debug_mode_q;
+  assign single_step_o = dcsr_q.step;
+  assign mcountinhibit_o = {{29 - MHPMCounterNum{1'b0}}, mcountinhibit_q};
+
+  // sequential process
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      priv_lvl_q   <= riscv::PRIV_LVL_M;
+      // floating-point registers
+      fcsr_q       <= '0;
+      // debug signals
+      debug_mode_q <= 1'b0;
+      if (CVA6Cfg.DebugEn) begin
+        dcsr_q           <= '0;
+        dcsr_q.prv       <= riscv::PRIV_LVL_M;
+        dcsr_q.xdebugver <= 4'h4;
+        dpc_q            <= '0;
+        dscratch0_q      <= {riscv::XLEN{1'b0}};
+        dscratch1_q      <= {riscv::XLEN{1'b0}};
+      end
+      // machine mode registers
+      mstatus_q        <= 64'b0;
+      // set to boot address + direct mode + 4 byte offset which is the initial trap
+      mtvec_rst_load_q <= 1'b1;
+      mtvec_q          <= '0;
+      mip_q            <= {riscv::XLEN{1'b0}};
+      mie_q            <= {riscv::XLEN{1'b0}};
+      mepc_q           <= {riscv::XLEN{1'b0}};
+      mcause_q         <= {riscv::XLEN{1'b0}};
+      mcounteren_q     <= {riscv::XLEN{1'b0}};
+      mscratch_q       <= {riscv::XLEN{1'b0}};
+      mtval_q          <= {riscv::XLEN{1'b0}};
+      fiom_q           <= '0;
+      dcache_q         <= {{riscv::XLEN - 1{1'b0}}, 1'b1};
+      icache_q         <= {{riscv::XLEN - 1{1'b0}}, 1'b1};
+      mcountinhibit_q  <= '0;
+      acc_cons_q       <= {{riscv::XLEN - 1{1'b0}}, CVA6Cfg.EnableAccelerator};
+      // supervisor mode registers
+      if (CVA6Cfg.RVS) begin
+        medeleg_q    <= {riscv::XLEN{1'b0}};
+        mideleg_q    <= {riscv::XLEN{1'b0}};
+        sepc_q       <= {riscv::XLEN{1'b0}};
+        scause_q     <= {riscv::XLEN{1'b0}};
+        stvec_q      <= {riscv::XLEN{1'b0}};
+        scounteren_q <= {riscv::XLEN{1'b0}};
+        sscratch_q   <= {riscv::XLEN{1'b0}};
+        stval_q      <= {riscv::XLEN{1'b0}};
+        satp_q       <= {riscv::XLEN{1'b0}};
+      end
+      // timer and counters
+      cycle_q                <= 64'b0;
+      instret_q              <= 64'b0;
+      // aux registers
+      en_ld_st_translation_q <= 1'b0;
+      // wait for interrupt
+      wfi_q                  <= 1'b0;
+      // pmp
+      for (int i = 0; i < 16; i++) begin
+        if (i < CVA6Cfg.NrPMPEntries) begin
+          pmpcfg_q[i]  <= riscv::pmpcfg_t'(CVA6Cfg.PMPCfgRstVal[i]);
+          pmpaddr_q[i] <= CVA6Cfg.PMPAddrRstVal[i][riscv::PLEN-3:0];
+        end else begin
+          pmpcfg_q[i]  <= '0;
+          pmpaddr_q[i] <= '0;
+        end
+      end
+    end else begin
+      priv_lvl_q <= priv_lvl_d;
+      // floating-point registers
+      fcsr_q     <= fcsr_d;
+      // debug signals
+      if (CVA6Cfg.DebugEn) begin
+        debug_mode_q <= debug_mode_d;
+        dcsr_q       <= dcsr_d;
+        dpc_q        <= dpc_d;
+        dscratch0_q  <= dscratch0_d;
+        dscratch1_q  <= dscratch1_d;
+      end
+      // machine mode registers
+      mstatus_q        <= mstatus_d;
+      mtvec_rst_load_q <= 1'b0;
+      mtvec_q          <= mtvec_d;
+      mip_q            <= mip_d;
+      mie_q            <= mie_d;
+      mepc_q           <= mepc_d;
+      mcause_q         <= mcause_d;
+      mcounteren_q     <= mcounteren_d;
+      mscratch_q       <= mscratch_d;
+      mtval_q          <= mtval_d;
+      fiom_q           <= fiom_d;
+      dcache_q         <= dcache_d;
+      icache_q         <= icache_d;
+      mcountinhibit_q  <= mcountinhibit_d;
+      acc_cons_q       <= acc_cons_d;
+      // supervisor mode registers
+      if (CVA6Cfg.RVS) begin
+        medeleg_q    <= medeleg_d;
+        mideleg_q    <= mideleg_d;
+        sepc_q       <= sepc_d;
+        scause_q     <= scause_d;
+        stvec_q      <= stvec_d;
+        scounteren_q <= scounteren_d;
+        sscratch_q   <= sscratch_d;
+        stval_q      <= stval_d;
+        satp_q       <= satp_d;
+      end
+      // timer and counters
+      cycle_q                <= cycle_d;
+      instret_q              <= instret_d;
+      // aux registers
+      en_ld_st_translation_q <= en_ld_st_translation_d;
+      // wait for interrupt
+      wfi_q                  <= wfi_d;
+      // pmp
+      for (int i = 0; i < 16; i++) begin
+        if (i < CVA6Cfg.NrPMPEntries) begin
+          // We only support >=8-byte granularity, NA4 is disabled
+          if(!CVA6Cfg.PMPEntryReadOnly[i] && pmpcfg_d[i].addr_mode != riscv::NA4 && !(pmpcfg_d[i].access_type.r == '0 && pmpcfg_d[i].access_type.w == '1)) begin
+            pmpcfg_q[i] <= pmpcfg_d[i];
+          end else begin
+            pmpcfg_q[i] <= pmpcfg_q[i];
+          end
+          if (!CVA6Cfg.PMPEntryReadOnly[i]) begin
+            pmpaddr_q[i] <= pmpaddr_d[i];
+          end else begin
+            pmpaddr_q[i] <= pmpaddr_q[i];
+          end
+        end else begin
+          pmpcfg_q[i]  <= '0;
+          pmpaddr_q[i] <= '0;
+        end
+      end
+    end
+  end
+
+  //-------------
+  // Assertions
+  //-------------
+  //pragma translate_off
+  // check that eret and ex are never valid together
+  assert property (@(posedge clk_i) disable iff (!rst_ni !== '0) !(eret_o && ex_i.valid))
+  else begin
+    $error("eret and exception should never be valid at the same time");
+    $stop();
+  end
+  //pragma translate_on
+endmodule
diff --git a/test/type_param/core/cva6.sv b/test/type_param/core/cva6.sv
new file mode 100644
index 0000000..4b9ccf0
--- /dev/null
+++ b/test/type_param/core/cva6.sv
@@ -0,0 +1,1401 @@
+// Copyright 2017-2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 19.03.2017
+// Description: CVA6 Top-level module
+
+
+module cva6
+  import ariane_pkg::*;
+#(
+    // CVA6 config
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg,
+    parameter bit IsRVFI = bit'(cva6_config_pkg::CVA6ConfigRvfiTrace),
+    // RVFI
+    parameter type rvfi_probes_t = struct packed {
+      logic [TRANS_ID_BITS-1:0]                                               issue_pointer;
+      logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0]                    commit_pointer;
+      logic                                                                   flush_unissued_instr;
+      logic                                                                   decoded_instr_valid;
+      logic                                                                   flush;
+      logic                                                                   decoded_instr_ack;
+      logic                                                                   issue_instr_ack;
+      logic                                                                   fetch_entry_valid;
+      logic [31:0]                                                            instruction;
+      logic                                                                   is_compressed;
+      riscv::xlen_t                                                           rs1_forwarding;
+      riscv::xlen_t                                                           rs2_forwarding;
+      scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0]                          commit_instr;
+      exception_t                                                             ex_commit;
+      riscv::priv_lvl_t                                                       priv_lvl;
+      lsu_ctrl_t                                                              lsu_ctrl;
+      logic [((CVA6Cfg.CvxifEn || CVA6Cfg.RVV) ? 5 : 4)-1:0][riscv::XLEN-1:0] wbdata;
+      logic [CVA6Cfg.NrCommitPorts-1:0]                                       commit_ack;
+      logic [riscv::PLEN-1:0]                                                 mem_paddr;
+      logic                                                                   debug_mode;
+      logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0]                      wdata;
+    },
+
+    // AXI types
+    parameter type axi_ar_chan_t = struct packed {
+      logic [CVA6Cfg.AxiIdWidth-1:0]   id;
+      logic [CVA6Cfg.AxiAddrWidth-1:0] addr;
+      axi_pkg::len_t                   len;
+      axi_pkg::size_t                  size;
+      axi_pkg::burst_t                 burst;
+      logic                            lock;
+      axi_pkg::cache_t                 cache;
+      axi_pkg::prot_t                  prot;
+      axi_pkg::qos_t                   qos;
+      axi_pkg::region_t                region;
+      logic [CVA6Cfg.AxiUserWidth-1:0] user;
+    },
+    parameter type axi_aw_chan_t = struct packed {
+      logic [CVA6Cfg.AxiIdWidth-1:0]   id;
+      logic [CVA6Cfg.AxiAddrWidth-1:0] addr;
+      axi_pkg::len_t                   len;
+      axi_pkg::size_t                  size;
+      axi_pkg::burst_t                 burst;
+      logic                            lock;
+      axi_pkg::cache_t                 cache;
+      axi_pkg::prot_t                  prot;
+      axi_pkg::qos_t                   qos;
+      axi_pkg::region_t                region;
+      axi_pkg::atop_t                  atop;
+      logic [CVA6Cfg.AxiUserWidth-1:0] user;
+    },
+    parameter type axi_w_chan_t = struct packed {
+      logic [CVA6Cfg.AxiDataWidth-1:0]     data;
+      logic [(CVA6Cfg.AxiDataWidth/8)-1:0] strb;
+      logic                                last;
+      logic [CVA6Cfg.AxiUserWidth-1:0]     user;
+    },
+    parameter type b_chan_t = struct packed {
+      logic [CVA6Cfg.AxiIdWidth-1:0]   id;
+      axi_pkg::resp_t                  resp;
+      logic [CVA6Cfg.AxiUserWidth-1:0] user;
+    },
+    parameter type r_chan_t = struct packed {
+      logic [CVA6Cfg.AxiIdWidth-1:0]   id;
+      logic [CVA6Cfg.AxiDataWidth-1:0] data;
+      axi_pkg::resp_t                  resp;
+      logic                            last;
+      logic [CVA6Cfg.AxiUserWidth-1:0] user;
+    },
+    parameter type noc_req_t = struct packed {
+      axi_aw_chan_t aw;
+      logic         aw_valid;
+      axi_w_chan_t  w;
+      logic         w_valid;
+      logic         b_ready;
+      axi_ar_chan_t ar;
+      logic         ar_valid;
+      logic         r_ready;
+    },
+    parameter type noc_resp_t = struct packed {
+      logic    aw_ready;
+      logic    ar_ready;
+      logic    w_ready;
+      logic    b_valid;
+      b_chan_t b;
+      logic    r_valid;
+      r_chan_t r;
+    },
+    //
+    parameter type acc_cfg_t = logic,
+    parameter acc_cfg_t AccCfg = '0,
+    parameter type cvxif_req_t = cvxif_pkg::cvxif_req_t,
+    parameter type cvxif_resp_t = cvxif_pkg::cvxif_resp_t
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    // Core ID, Cluster ID and boot address are considered more or less static
+    input logic [riscv::VLEN-1:0] boot_addr_i,  // reset boot address
+    input  logic [riscv::XLEN-1:0]       hart_id_i,    // hart id in a multicore environment (reflected in a CSR)
+    // Interrupt inputs
+    input logic [1:0] irq_i,  // level sensitive IR lines, mip & sip (async)
+    input logic ipi_i,  // inter-processor interrupts (async)
+    // Timer facilities
+    input logic time_irq_i,  // timer interrupt in (async)
+    input logic debug_req_i,  // debug request (async)
+    // RISC-V formal interface port (`rvfi`):
+    // Can be left open when formal tracing is not needed.
+    output rvfi_probes_t rvfi_probes_o,
+    output cvxif_req_t cvxif_req_o,
+    input cvxif_resp_t cvxif_resp_i,
+    // memory side
+    output noc_req_t noc_req_o,
+    input noc_resp_t noc_resp_i
+);
+
+  // ------------------------------------------
+  // CVA6 configuration
+  // ------------------------------------------
+  // Extended config
+  localparam bit RVF = (riscv::IS_XLEN64 | riscv::IS_XLEN32) & CVA6Cfg.FpuEn;
+  localparam bit RVD = (riscv::IS_XLEN64 ? 1 : 0) & CVA6Cfg.FpuEn;
+  localparam bit FpPresent = RVF | RVD | CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8;
+  localparam bit NSX = CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8 | CVA6Cfg.XFVec;  // Are non-standard extensions present?
+  localparam int unsigned FLen = RVD ? 64 :  // D ext.
+  RVF ? 32 :  // F ext.
+  CVA6Cfg.XF16 ? 16 :  // Xf16 ext.
+  CVA6Cfg.XF16ALT ? 16 :  // Xf16alt ext.
+  CVA6Cfg.XF8 ? 8 :  // Xf8 ext.
+  1;  // Unused in case of no FP
+
+  // Transprecision floating-point extensions configuration
+  localparam bit RVFVec     = RVF             & CVA6Cfg.XFVec & FLen>32; // FP32 vectors available if vectors and larger fmt enabled
+  localparam bit XF16Vec    = CVA6Cfg.XF16    & CVA6Cfg.XFVec & FLen>16; // FP16 vectors available if vectors and larger fmt enabled
+  localparam bit XF16ALTVec = CVA6Cfg.XF16ALT & CVA6Cfg.XFVec & FLen>16; // FP16ALT vectors available if vectors and larger fmt enabled
+  localparam bit XF8Vec     = CVA6Cfg.XF8     & CVA6Cfg.XFVec & FLen>8;  // FP8 vectors available if vectors and larger fmt enabled
+
+  localparam bit EnableAccelerator = CVA6Cfg.RVV;  // Currently only used by V extension (Ara)
+  localparam int unsigned NrWbPorts = (CVA6Cfg.CvxifEn || EnableAccelerator) ? 5 : 4;
+
+  localparam NrRgprPorts = 2;
+
+  localparam bit NonIdemPotenceEn = CVA6Cfg.NrNonIdempotentRules && CVA6Cfg.NonIdempotentLength;  // Currently only used by V extension (Ara)
+
+  localparam config_pkg::cva6_cfg_t CVA6ExtendCfg = {
+    CVA6Cfg.NrCommitPorts,
+    CVA6Cfg.AxiAddrWidth,
+    CVA6Cfg.AxiDataWidth,
+    CVA6Cfg.AxiIdWidth,
+    CVA6Cfg.AxiUserWidth,
+    CVA6Cfg.NrLoadBufEntries,
+    CVA6Cfg.FpuEn,
+    CVA6Cfg.XF16,
+    CVA6Cfg.XF16ALT,
+    CVA6Cfg.XF8,
+    CVA6Cfg.RVA,
+    CVA6Cfg.RVB,
+    CVA6Cfg.RVV,
+    CVA6Cfg.RVC,
+    CVA6Cfg.RVZCB,
+    CVA6Cfg.XFVec,
+    CVA6Cfg.CvxifEn,
+    CVA6Cfg.ZiCondExtEn,
+    // Extended
+    bit'(RVF),
+    bit'(RVD),
+    bit'(FpPresent),
+    bit'(NSX),
+    unsigned'(FLen),
+    bit'(RVFVec),
+    bit'(XF16Vec),
+    bit'(XF16ALTVec),
+    bit'(XF8Vec),
+    unsigned'(NrRgprPorts),
+    unsigned'(NrWbPorts),
+    bit'(EnableAccelerator),
+    CVA6Cfg.RVS,
+    CVA6Cfg.RVU,
+    CVA6Cfg.HaltAddress,
+    CVA6Cfg.ExceptionAddress,
+    CVA6Cfg.RASDepth,
+    CVA6Cfg.BTBEntries,
+    CVA6Cfg.BHTEntries,
+    CVA6Cfg.DmBaseAddress,
+    CVA6Cfg.NrPMPEntries,
+    CVA6Cfg.PMPCfgRstVal,
+    CVA6Cfg.PMPAddrRstVal,
+    CVA6Cfg.PMPEntryReadOnly,
+    CVA6Cfg.NOCType,
+    CVA6Cfg.NrNonIdempotentRules,
+    CVA6Cfg.NonIdempotentAddrBase,
+    CVA6Cfg.NonIdempotentLength,
+    CVA6Cfg.NrExecuteRegionRules,
+    CVA6Cfg.ExecuteRegionAddrBase,
+    CVA6Cfg.ExecuteRegionLength,
+    CVA6Cfg.NrCachedRegionRules,
+    CVA6Cfg.CachedRegionAddrBase,
+    CVA6Cfg.CachedRegionLength,
+    CVA6Cfg.MaxOutstandingStores,
+    CVA6Cfg.DebugEn,
+    NonIdemPotenceEn,
+    CVA6Cfg.AxiBurstWriteEn
+  };
+
+
+  // ------------------------------------------
+  // Global Signals
+  // Signals connecting more than one module
+  // ------------------------------------------
+  riscv::priv_lvl_t                                   priv_lvl;
+  exception_t                                         ex_commit;  // exception from commit stage
+  bp_resolve_t                                        resolved_branch;
+  logic             [                riscv::VLEN-1:0] pc_commit;
+  logic                                               eret;
+  logic             [CVA6ExtendCfg.NrCommitPorts-1:0] commit_ack;
+
+  localparam NumPorts = 4;
+  cvxif_pkg::cvxif_req_t cvxif_req;
+  cvxif_pkg::cvxif_resp_t cvxif_resp;
+
+  // --------------
+  // PCGEN <-> CSR
+  // --------------
+  logic [riscv::VLEN-1:0] trap_vector_base_commit_pcgen;
+  logic [riscv::VLEN-1:0] epc_commit_pcgen;
+  // --------------
+  // IF <-> ID
+  // --------------
+  fetch_entry_t fetch_entry_if_id;
+  logic fetch_valid_if_id;
+  logic fetch_ready_id_if;
+
+  // --------------
+  // ID <-> ISSUE
+  // --------------
+  scoreboard_entry_t issue_entry_id_issue;
+  logic issue_entry_valid_id_issue;
+  logic is_ctrl_fow_id_issue;
+  logic issue_instr_issue_id;
+
+  // --------------
+  // ISSUE <-> EX
+  // --------------
+  logic [riscv::VLEN-1:0] rs1_forwarding_id_ex;  // unregistered version of fu_data_o.operanda
+  logic [riscv::VLEN-1:0] rs2_forwarding_id_ex;  // unregistered version of fu_data_o.operandb
+
+  fu_data_t fu_data_id_ex;
+  logic [riscv::VLEN-1:0] pc_id_ex;
+  logic is_compressed_instr_id_ex;
+  // fixed latency units
+  logic flu_ready_ex_id;
+  logic [TRANS_ID_BITS-1:0] flu_trans_id_ex_id;
+  logic flu_valid_ex_id;
+  riscv::xlen_t flu_result_ex_id;
+  exception_t flu_exception_ex_id;
+  // ALU
+  logic alu_valid_id_ex;
+  // Branches and Jumps
+  logic branch_valid_id_ex;
+
+  branchpredict_sbe_t branch_predict_id_ex;
+  logic resolve_branch_ex_id;
+  // LSU
+  logic lsu_valid_id_ex;
+  logic lsu_ready_ex_id;
+
+  logic [TRANS_ID_BITS-1:0] load_trans_id_ex_id;
+  riscv::xlen_t load_result_ex_id;
+  logic load_valid_ex_id;
+  exception_t load_exception_ex_id;
+
+  riscv::xlen_t store_result_ex_id;
+  logic [TRANS_ID_BITS-1:0] store_trans_id_ex_id;
+  logic store_valid_ex_id;
+  exception_t store_exception_ex_id;
+  // MULT
+  logic mult_valid_id_ex;
+  // FPU
+  logic fpu_ready_ex_id;
+  logic fpu_valid_id_ex;
+  logic [1:0] fpu_fmt_id_ex;
+  logic [2:0] fpu_rm_id_ex;
+  logic [TRANS_ID_BITS-1:0] fpu_trans_id_ex_id;
+  riscv::xlen_t fpu_result_ex_id;
+  logic fpu_valid_ex_id;
+  exception_t fpu_exception_ex_id;
+  // Accelerator
+  logic stall_acc_id;
+  scoreboard_entry_t issue_instr_id_acc;
+  logic issue_instr_hs_id_acc;
+  logic [TRANS_ID_BITS-1:0] acc_trans_id_ex_id;
+  riscv::xlen_t acc_result_ex_id;
+  logic acc_valid_ex_id;
+  exception_t acc_exception_ex_id;
+  logic halt_acc_ctrl;
+  logic [4:0] acc_resp_fflags;
+  logic acc_resp_fflags_valid;
+  // CSR
+  logic csr_valid_id_ex;
+  // CVXIF
+  logic [TRANS_ID_BITS-1:0] x_trans_id_ex_id;
+  riscv::xlen_t x_result_ex_id;
+  logic x_valid_ex_id;
+  exception_t x_exception_ex_id;
+  logic x_we_ex_id;
+  logic x_issue_valid_id_ex;
+  logic x_issue_ready_ex_id;
+  logic [31:0] x_off_instr_id_ex;
+  // --------------
+  // EX <-> COMMIT
+  // --------------
+  // CSR Commit
+  logic csr_commit_commit_ex;
+  logic dirty_fp_state;
+  logic dirty_v_state;
+  // LSU Commit
+  logic lsu_commit_commit_ex;
+  logic lsu_commit_ready_ex_commit;
+  logic [TRANS_ID_BITS-1:0] lsu_commit_trans_id;
+  logic stall_st_pending_ex;
+  logic no_st_pending_ex;
+  logic no_st_pending_commit;
+  logic amo_valid_commit;
+  // ACCEL Commit
+  logic acc_valid_acc_ex;
+  // --------------
+  // ID <-> COMMIT
+  // --------------
+  scoreboard_entry_t [CVA6ExtendCfg.NrCommitPorts-1:0] commit_instr_id_commit;
+  // --------------
+  // RVFI
+  // --------------
+  logic [TRANS_ID_BITS-1:0] rvfi_issue_pointer;
+  logic [CVA6ExtendCfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] rvfi_commit_pointer;
+  // --------------
+  // COMMIT <-> ID
+  // --------------
+  logic [CVA6ExtendCfg.NrCommitPorts-1:0][4:0] waddr_commit_id;
+  logic [CVA6ExtendCfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_commit_id;
+  logic [CVA6ExtendCfg.NrCommitPorts-1:0] we_gpr_commit_id;
+  logic [CVA6ExtendCfg.NrCommitPorts-1:0] we_fpr_commit_id;
+  // --------------
+  // CSR <-> *
+  // --------------
+  logic [4:0] fflags_csr_commit;
+  riscv::xs_t fs;
+  logic [2:0] frm_csr_id_issue_ex;
+  logic [6:0] fprec_csr_ex;
+  riscv::xs_t vs;
+  logic enable_translation_csr_ex;
+  logic en_ld_st_translation_csr_ex;
+  riscv::priv_lvl_t ld_st_priv_lvl_csr_ex;
+  logic sum_csr_ex;
+  logic mxr_csr_ex;
+  logic [riscv::PPNW-1:0] satp_ppn_csr_ex;
+  logic [ASID_WIDTH-1:0] asid_csr_ex;
+  logic [11:0] csr_addr_ex_csr;
+  fu_op csr_op_commit_csr;
+  riscv::xlen_t csr_wdata_commit_csr;
+  riscv::xlen_t csr_rdata_csr_commit;
+  exception_t csr_exception_csr_commit;
+  logic tvm_csr_id;
+  logic tw_csr_id;
+  logic tsr_csr_id;
+  irq_ctrl_t irq_ctrl_csr_id;
+  logic dcache_en_csr_nbdcache;
+  logic csr_write_fflags_commit_cs;
+  logic icache_en_csr;
+  logic acc_cons_en_csr;
+  logic debug_mode;
+  logic single_step_csr_commit;
+  riscv::pmpcfg_t [15:0] pmpcfg;
+  logic [15:0][riscv::PLEN-3:0] pmpaddr;
+  logic [31:0] mcountinhibit_csr_perf;
+  // ----------------------------
+  // Performance Counters <-> *
+  // ----------------------------
+  logic [11:0] addr_csr_perf;
+  riscv::xlen_t data_csr_perf, data_perf_csr;
+  logic                                                  we_csr_perf;
+
+  logic                                                  icache_flush_ctrl_cache;
+  logic                                                  itlb_miss_ex_perf;
+  logic                                                  dtlb_miss_ex_perf;
+  logic                                                  dcache_miss_cache_perf;
+  logic                                                  icache_miss_cache_perf;
+  logic          [   NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits;
+  logic                                                  stall_issue;
+  // --------------
+  // CTRL <-> *
+  // --------------
+  logic                                                  set_pc_ctrl_pcgen;
+  logic                                                  flush_csr_ctrl;
+  logic                                                  flush_unissued_instr_ctrl_id;
+  logic                                                  flush_ctrl_if;
+  logic                                                  flush_ctrl_id;
+  logic                                                  flush_ctrl_ex;
+  logic                                                  flush_ctrl_bp;
+  logic                                                  flush_tlb_ctrl_ex;
+  logic                                                  fence_i_commit_controller;
+  logic                                                  fence_commit_controller;
+  logic                                                  sfence_vma_commit_controller;
+  logic                                                  halt_ctrl;
+  logic                                                  halt_csr_ctrl;
+  logic                                                  dcache_flush_ctrl_cache;
+  logic                                                  dcache_flush_ack_cache_ctrl;
+  logic                                                  set_debug_pc;
+  logic                                                  flush_commit;
+  logic                                                  flush_acc;
+
+  icache_areq_t                                          icache_areq_ex_cache;
+  icache_arsp_t                                          icache_areq_cache_ex;
+  icache_dreq_t                                          icache_dreq_if_cache;
+  icache_drsp_t                                          icache_dreq_cache_if;
+
+  amo_req_t                                              amo_req;
+  amo_resp_t                                             amo_resp;
+  logic                                                  sb_full;
+
+  // ----------------
+  // DCache <-> *
+  // ----------------
+  dcache_req_i_t [            2:0]                       dcache_req_ports_ex_cache;
+  dcache_req_o_t [            2:0]                       dcache_req_ports_cache_ex;
+  dcache_req_i_t [            1:0]                       dcache_req_ports_acc_cache;
+  dcache_req_o_t [            1:0]                       dcache_req_ports_cache_acc;
+  logic                                                  dcache_commit_wbuffer_empty;
+  logic                                                  dcache_commit_wbuffer_not_ni;
+
+  //RVFI
+  lsu_ctrl_t                                             rvfi_lsu_ctrl;
+  logic          [riscv::PLEN-1:0]                       rvfi_mem_paddr;
+  logic                                                  rvfi_is_compressed;
+  rvfi_probes_t                                          rvfi_probes;
+
+
+  // Accelerator port
+  logic          [           63:0]                       inval_addr;
+  logic                                                  inval_valid;
+  logic                                                  inval_ready;
+
+  // --------------
+  // Frontend
+  // --------------
+  frontend #(
+      .CVA6Cfg(CVA6ExtendCfg)
+  ) i_frontend (
+      .flush_i            (flush_ctrl_if),                  // not entirely correct
+      .flush_bp_i         (1'b0),
+      .halt_i             (halt_ctrl),
+      .debug_mode_i       (debug_mode),
+      .boot_addr_i        (boot_addr_i[riscv::VLEN-1:0]),
+      .icache_dreq_i      (icache_dreq_cache_if),
+      .icache_dreq_o      (icache_dreq_if_cache),
+      .resolved_branch_i  (resolved_branch),
+      .pc_commit_i        (pc_commit),
+      .set_pc_commit_i    (set_pc_ctrl_pcgen),
+      .set_debug_pc_i     (set_debug_pc),
+      .epc_i              (epc_commit_pcgen),
+      .eret_i             (eret),
+      .trap_vector_base_i (trap_vector_base_commit_pcgen),
+      .ex_valid_i         (ex_commit.valid),
+      .fetch_entry_o      (fetch_entry_if_id),
+      .fetch_entry_valid_o(fetch_valid_if_id),
+      .fetch_entry_ready_i(fetch_ready_id_if),
+      .*
+  );
+
+  // ---------
+  // ID
+  // ---------
+  id_stage #(
+      .CVA6Cfg(CVA6ExtendCfg)
+  ) id_stage_i (
+      .clk_i,
+      .rst_ni,
+      .flush_i(flush_ctrl_if),
+      .debug_req_i,
+
+      .fetch_entry_i      (fetch_entry_if_id),
+      .fetch_entry_valid_i(fetch_valid_if_id),
+      .fetch_entry_ready_o(fetch_ready_id_if),
+
+      .issue_entry_o      (issue_entry_id_issue),
+      .issue_entry_valid_o(issue_entry_valid_id_issue),
+      .is_ctrl_flow_o     (is_ctrl_fow_id_issue),
+      .issue_instr_ack_i  (issue_instr_issue_id),
+
+      .rvfi_is_compressed_o(rvfi_is_compressed),
+
+      .priv_lvl_i  (priv_lvl),
+      .fs_i        (fs),
+      .frm_i       (frm_csr_id_issue_ex),
+      .vs_i        (vs),
+      .irq_i       (irq_i),
+      .irq_ctrl_i  (irq_ctrl_csr_id),
+      .debug_mode_i(debug_mode),
+      .tvm_i       (tvm_csr_id),
+      .tw_i        (tw_csr_id),
+      .tsr_i       (tsr_csr_id)
+  );
+
+  logic [NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_ex_id;
+  logic [NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_ex_id;
+  exception_t [NrWbPorts-1:0] ex_ex_ex_id;  // exception from execute, ex_stage to id_stage
+  logic [NrWbPorts-1:0] wt_valid_ex_id;
+
+  if (CVA6ExtendCfg.CvxifEn) begin
+    assign trans_id_ex_id = {
+      x_trans_id_ex_id,
+      flu_trans_id_ex_id,
+      load_trans_id_ex_id,
+      store_trans_id_ex_id,
+      fpu_trans_id_ex_id
+    };
+    assign wbdata_ex_id = {
+      x_result_ex_id, flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id
+    };
+    assign ex_ex_ex_id = {
+      x_exception_ex_id,
+      flu_exception_ex_id,
+      load_exception_ex_id,
+      store_exception_ex_id,
+      fpu_exception_ex_id
+    };
+    assign wt_valid_ex_id = {
+      x_valid_ex_id, flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id
+    };
+  end else if (CVA6ExtendCfg.EnableAccelerator) begin
+    assign trans_id_ex_id = {
+      flu_trans_id_ex_id,
+      load_trans_id_ex_id,
+      store_trans_id_ex_id,
+      fpu_trans_id_ex_id,
+      acc_trans_id_ex_id
+    };
+    assign wbdata_ex_id = {
+      flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id, acc_result_ex_id
+    };
+    assign ex_ex_ex_id = {
+      flu_exception_ex_id,
+      load_exception_ex_id,
+      store_exception_ex_id,
+      fpu_exception_ex_id,
+      acc_exception_ex_id
+    };
+    assign wt_valid_ex_id = {
+      flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id, acc_valid_ex_id
+    };
+  end else begin
+    assign trans_id_ex_id = {
+      flu_trans_id_ex_id, load_trans_id_ex_id, store_trans_id_ex_id, fpu_trans_id_ex_id
+    };
+    assign wbdata_ex_id = {
+      flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id
+    };
+    assign ex_ex_ex_id = {
+      flu_exception_ex_id, load_exception_ex_id, store_exception_ex_id, fpu_exception_ex_id
+    };
+    assign wt_valid_ex_id = {flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id};
+  end
+
+  if (CVA6ExtendCfg.CvxifEn && CVA6ExtendCfg.EnableAccelerator) begin : gen_err_xif_and_acc
+    $error("X-interface and accelerator port cannot be enabled at the same time.");
+  end
+
+  // ---------
+  // Issue
+  // ---------
+  issue_stage #(
+      .CVA6Cfg(CVA6ExtendCfg)
+  ) issue_stage_i (
+      .clk_i,
+      .rst_ni,
+      .sb_full_o             (sb_full),
+      .flush_unissued_instr_i(flush_unissued_instr_ctrl_id),
+      .flush_i               (flush_ctrl_id),
+      .stall_i               (stall_acc_id),
+      // ID Stage
+      .decoded_instr_i       (issue_entry_id_issue),
+      .decoded_instr_valid_i (issue_entry_valid_id_issue),
+      .is_ctrl_flow_i        (is_ctrl_fow_id_issue),
+      .decoded_instr_ack_o   (issue_instr_issue_id),
+      // Functional Units
+      .rs1_forwarding_o      (rs1_forwarding_id_ex),
+      .rs2_forwarding_o      (rs2_forwarding_id_ex),
+      .fu_data_o             (fu_data_id_ex),
+      .pc_o                  (pc_id_ex),
+      .is_compressed_instr_o (is_compressed_instr_id_ex),
+      // fixed latency unit ready
+      .flu_ready_i           (flu_ready_ex_id),
+      // ALU
+      .alu_valid_o           (alu_valid_id_ex),
+      // Branches and Jumps
+      .branch_valid_o        (branch_valid_id_ex),            // branch is valid
+      .branch_predict_o      (branch_predict_id_ex),          // branch predict to ex
+      .resolve_branch_i      (resolve_branch_ex_id),          // in order to resolve the branch
+      // LSU
+      .lsu_ready_i           (lsu_ready_ex_id),
+      .lsu_valid_o           (lsu_valid_id_ex),
+      // Multiplier
+      .mult_valid_o          (mult_valid_id_ex),
+      // FPU
+      .fpu_ready_i           (fpu_ready_ex_id),
+      .fpu_valid_o           (fpu_valid_id_ex),
+      .fpu_fmt_o             (fpu_fmt_id_ex),
+      .fpu_rm_o              (fpu_rm_id_ex),
+      // CSR
+      .csr_valid_o           (csr_valid_id_ex),
+      // CVXIF
+      .x_issue_valid_o       (x_issue_valid_id_ex),
+      .x_issue_ready_i       (x_issue_ready_ex_id),
+      .x_off_instr_o         (x_off_instr_id_ex),
+      // Accelerator
+      .issue_instr_o         (issue_instr_id_acc),
+      .issue_instr_hs_o      (issue_instr_hs_id_acc),
+      // Commit
+      .resolved_branch_i     (resolved_branch),
+      .trans_id_i            (trans_id_ex_id),
+      .wbdata_i              (wbdata_ex_id),
+      .ex_ex_i               (ex_ex_ex_id),
+      .wt_valid_i            (wt_valid_ex_id),
+      .x_we_i                (x_we_ex_id),
+
+      .waddr_i              (waddr_commit_id),
+      .wdata_i              (wdata_commit_id),
+      .we_gpr_i             (we_gpr_commit_id),
+      .we_fpr_i             (we_fpr_commit_id),
+      .commit_instr_o       (commit_instr_id_commit),
+      .commit_ack_i         (commit_ack),
+      // Performance Counters
+      .stall_issue_o        (stall_issue),
+      //RVFI
+      .rvfi_issue_pointer_o (rvfi_issue_pointer),
+      .rvfi_commit_pointer_o(rvfi_commit_pointer),
+      .*
+  );
+
+  // ---------
+  // EX
+  // ---------
+  ex_stage #(
+      .CVA6Cfg   (CVA6ExtendCfg),
+      .ASID_WIDTH(ASID_WIDTH)
+  ) ex_stage_i (
+      .clk_i                (clk_i),
+      .rst_ni               (rst_ni),
+      .debug_mode_i         (debug_mode),
+      .flush_i              (flush_ctrl_ex),
+      .rs1_forwarding_i     (rs1_forwarding_id_ex),
+      .rs2_forwarding_i     (rs2_forwarding_id_ex),
+      .fu_data_i            (fu_data_id_ex),
+      .pc_i                 (pc_id_ex),
+      .is_compressed_instr_i(is_compressed_instr_id_ex),
+      // fixed latency units
+      .flu_result_o         (flu_result_ex_id),
+      .flu_trans_id_o       (flu_trans_id_ex_id),
+      .flu_valid_o          (flu_valid_ex_id),
+      .flu_exception_o      (flu_exception_ex_id),
+      .flu_ready_o          (flu_ready_ex_id),
+      // ALU
+      .alu_valid_i          (alu_valid_id_ex),
+      // Branches and Jumps
+      .branch_valid_i       (branch_valid_id_ex),
+      .branch_predict_i     (branch_predict_id_ex),       // branch predict to ex
+      .resolved_branch_o    (resolved_branch),
+      .resolve_branch_o     (resolve_branch_ex_id),
+      // CSR
+      .csr_valid_i          (csr_valid_id_ex),
+      .csr_addr_o           (csr_addr_ex_csr),
+      .csr_commit_i         (csr_commit_commit_ex),       // from commit
+      // MULT
+      .mult_valid_i         (mult_valid_id_ex),
+      // LSU
+      .lsu_ready_o          (lsu_ready_ex_id),
+      .lsu_valid_i          (lsu_valid_id_ex),
+
+      .load_result_o   (load_result_ex_id),
+      .load_trans_id_o (load_trans_id_ex_id),
+      .load_valid_o    (load_valid_ex_id),
+      .load_exception_o(load_exception_ex_id),
+
+      .store_result_o   (store_result_ex_id),
+      .store_trans_id_o (store_trans_id_ex_id),
+      .store_valid_o    (store_valid_ex_id),
+      .store_exception_o(store_exception_ex_id),
+
+      .lsu_commit_i           (lsu_commit_commit_ex),          // from commit
+      .lsu_commit_ready_o     (lsu_commit_ready_ex_commit),    // to commit
+      .commit_tran_id_i       (lsu_commit_trans_id),           // from commit
+      .stall_st_pending_i     (stall_st_pending_ex),
+      .no_st_pending_o        (no_st_pending_ex),
+      // FPU
+      .fpu_ready_o            (fpu_ready_ex_id),
+      .fpu_valid_i            (fpu_valid_id_ex),
+      .fpu_fmt_i              (fpu_fmt_id_ex),
+      .fpu_rm_i               (fpu_rm_id_ex),
+      .fpu_frm_i              (frm_csr_id_issue_ex),
+      .fpu_prec_i             (fprec_csr_ex),
+      .fpu_trans_id_o         (fpu_trans_id_ex_id),
+      .fpu_result_o           (fpu_result_ex_id),
+      .fpu_valid_o            (fpu_valid_ex_id),
+      .fpu_exception_o        (fpu_exception_ex_id),
+      .amo_valid_commit_i     (amo_valid_commit),
+      .amo_req_o              (amo_req),
+      .amo_resp_i             (amo_resp),
+      // CoreV-X-Interface
+      .x_valid_i              (x_issue_valid_id_ex),
+      .x_ready_o              (x_issue_ready_ex_id),
+      .x_off_instr_i          (x_off_instr_id_ex),
+      .x_trans_id_o           (x_trans_id_ex_id),
+      .x_exception_o          (x_exception_ex_id),
+      .x_result_o             (x_result_ex_id),
+      .x_valid_o              (x_valid_ex_id),
+      .x_we_o                 (x_we_ex_id),
+      .cvxif_req_o            (cvxif_req),
+      .cvxif_resp_i           (cvxif_resp),
+      // Accelerator
+      .acc_valid_i            (acc_valid_acc_ex),
+      // Performance counters
+      .itlb_miss_o            (itlb_miss_ex_perf),
+      .dtlb_miss_o            (dtlb_miss_ex_perf),
+      // Memory Management
+      .enable_translation_i   (enable_translation_csr_ex),     // from CSR
+      .en_ld_st_translation_i (en_ld_st_translation_csr_ex),
+      .flush_tlb_i            (flush_tlb_ctrl_ex),
+      .priv_lvl_i             (priv_lvl),                      // from CSR
+      .ld_st_priv_lvl_i       (ld_st_priv_lvl_csr_ex),         // from CSR
+      .sum_i                  (sum_csr_ex),                    // from CSR
+      .mxr_i                  (mxr_csr_ex),                    // from CSR
+      .satp_ppn_i             (satp_ppn_csr_ex),               // from CSR
+      .asid_i                 (asid_csr_ex),                   // from CSR
+      .icache_areq_i          (icache_areq_cache_ex),
+      .icache_areq_o          (icache_areq_ex_cache),
+      // DCACHE interfaces
+      .dcache_req_ports_i     (dcache_req_ports_cache_ex),
+      .dcache_req_ports_o     (dcache_req_ports_ex_cache),
+      .dcache_wbuffer_empty_i (dcache_commit_wbuffer_empty),
+      .dcache_wbuffer_not_ni_i(dcache_commit_wbuffer_not_ni),
+      // PMP
+      .pmpcfg_i               (pmpcfg),
+      .pmpaddr_i              (pmpaddr),
+      //RVFI
+      .rvfi_lsu_ctrl_o        (rvfi_lsu_ctrl),
+      .rvfi_mem_paddr_o       (rvfi_mem_paddr)
+  );
+
+  // ---------
+  // Commit
+  // ---------
+
+  // we have to make sure that the whole write buffer path is empty before
+  // used e.g. for fence instructions.
+  assign no_st_pending_commit = no_st_pending_ex & dcache_commit_wbuffer_empty;
+
+  commit_stage #(
+      .CVA6Cfg(CVA6ExtendCfg)
+  ) commit_stage_i (
+      .clk_i,
+      .rst_ni,
+      .halt_i            (halt_ctrl),
+      .flush_dcache_i    (dcache_flush_ctrl_cache),
+      .exception_o       (ex_commit),
+      .dirty_fp_state_o  (dirty_fp_state),
+      .single_step_i     (single_step_csr_commit),
+      .commit_instr_i    (commit_instr_id_commit),
+      .commit_ack_o      (commit_ack),
+      .no_st_pending_i   (no_st_pending_commit),
+      .waddr_o           (waddr_commit_id),
+      .wdata_o           (wdata_commit_id),
+      .we_gpr_o          (we_gpr_commit_id),
+      .we_fpr_o          (we_fpr_commit_id),
+      .commit_lsu_o      (lsu_commit_commit_ex),
+      .commit_lsu_ready_i(lsu_commit_ready_ex_commit),
+      .commit_tran_id_o  (lsu_commit_trans_id),
+      .amo_valid_commit_o(amo_valid_commit),
+      .amo_resp_i        (amo_resp),
+      .commit_csr_o      (csr_commit_commit_ex),
+      .pc_o              (pc_commit),
+      .csr_op_o          (csr_op_commit_csr),
+      .csr_wdata_o       (csr_wdata_commit_csr),
+      .csr_rdata_i       (csr_rdata_csr_commit),
+      .csr_write_fflags_o(csr_write_fflags_commit_cs),
+      .csr_exception_i   (csr_exception_csr_commit),
+      .fence_i_o         (fence_i_commit_controller),
+      .fence_o           (fence_commit_controller),
+      .sfence_vma_o      (sfence_vma_commit_controller),
+      .flush_commit_o    (flush_commit),
+      .*
+  );
+
+  // ---------
+  // CSR
+  // ---------
+  csr_regfile #(
+      .CVA6Cfg       (CVA6ExtendCfg),
+      .AsidWidth     (ASID_WIDTH),
+      .MHPMCounterNum(MHPMCounterNum)
+  ) csr_regfile_i (
+      .flush_o               (flush_csr_ctrl),
+      .halt_csr_o            (halt_csr_ctrl),
+      .commit_instr_i        (commit_instr_id_commit),
+      .commit_ack_i          (commit_ack),
+      .boot_addr_i           (boot_addr_i[riscv::VLEN-1:0]),
+      .hart_id_i             (hart_id_i[riscv::XLEN-1:0]),
+      .ex_i                  (ex_commit),
+      .csr_op_i              (csr_op_commit_csr),
+      .csr_write_fflags_i    (csr_write_fflags_commit_cs),
+      .dirty_fp_state_i      (dirty_fp_state),
+      .dirty_v_state_i       (dirty_v_state),
+      .csr_addr_i            (csr_addr_ex_csr),
+      .csr_wdata_i           (csr_wdata_commit_csr),
+      .csr_rdata_o           (csr_rdata_csr_commit),
+      .pc_i                  (pc_commit),
+      .csr_exception_o       (csr_exception_csr_commit),
+      .epc_o                 (epc_commit_pcgen),
+      .eret_o                (eret),
+      .set_debug_pc_o        (set_debug_pc),
+      .trap_vector_base_o    (trap_vector_base_commit_pcgen),
+      .priv_lvl_o            (priv_lvl),
+      .acc_fflags_ex_i       (acc_resp_fflags),
+      .acc_fflags_ex_valid_i (acc_resp_fflags_valid),
+      .fs_o                  (fs),
+      .fflags_o              (fflags_csr_commit),
+      .frm_o                 (frm_csr_id_issue_ex),
+      .fprec_o               (fprec_csr_ex),
+      .vs_o                  (vs),
+      .irq_ctrl_o            (irq_ctrl_csr_id),
+      .ld_st_priv_lvl_o      (ld_st_priv_lvl_csr_ex),
+      .en_translation_o      (enable_translation_csr_ex),
+      .en_ld_st_translation_o(en_ld_st_translation_csr_ex),
+      .sum_o                 (sum_csr_ex),
+      .mxr_o                 (mxr_csr_ex),
+      .satp_ppn_o            (satp_ppn_csr_ex),
+      .asid_o                (asid_csr_ex),
+      .tvm_o                 (tvm_csr_id),
+      .tw_o                  (tw_csr_id),
+      .tsr_o                 (tsr_csr_id),
+      .debug_mode_o          (debug_mode),
+      .single_step_o         (single_step_csr_commit),
+      .dcache_en_o           (dcache_en_csr_nbdcache),
+      .icache_en_o           (icache_en_csr),
+      .acc_cons_en_o         (acc_cons_en_csr),
+      .perf_addr_o           (addr_csr_perf),
+      .perf_data_o           (data_csr_perf),
+      .perf_data_i           (data_perf_csr),
+      .perf_we_o             (we_csr_perf),
+      .pmpcfg_o              (pmpcfg),
+      .pmpaddr_o             (pmpaddr),
+      .mcountinhibit_o       (mcountinhibit_csr_perf),
+      .debug_req_i,
+      .ipi_i,
+      .irq_i,
+      .time_irq_i,
+      .*
+  );
+
+  // ------------------------
+  // Performance Counters
+  // ------------------------
+  if (PERF_COUNTER_EN) begin : gen_perf_counter
+    perf_counters #(
+        .CVA6Cfg (CVA6ExtendCfg),
+        .NumPorts(NumPorts)
+    ) perf_counters_i (
+        .clk_i         (clk_i),
+        .rst_ni        (rst_ni),
+        .debug_mode_i  (debug_mode),
+        .addr_i        (addr_csr_perf),
+        .we_i          (we_csr_perf),
+        .data_i        (data_csr_perf),
+        .data_o        (data_perf_csr),
+        .commit_instr_i(commit_instr_id_commit),
+        .commit_ack_i  (commit_ack),
+
+        .l1_icache_miss_i   (icache_miss_cache_perf),
+        .l1_dcache_miss_i   (dcache_miss_cache_perf),
+        .itlb_miss_i        (itlb_miss_ex_perf),
+        .dtlb_miss_i        (dtlb_miss_ex_perf),
+        .sb_full_i          (sb_full),
+        .if_empty_i         (~fetch_valid_if_id),
+        .ex_i               (ex_commit),
+        .eret_i             (eret),
+        .resolved_branch_i  (resolved_branch),
+        .branch_exceptions_i(flu_exception_ex_id),
+        .l1_icache_access_i (icache_dreq_if_cache),
+        .l1_dcache_access_i (dcache_req_ports_ex_cache),
+        .miss_vld_bits_i    (miss_vld_bits),
+        .i_tlb_flush_i      (flush_tlb_ctrl_ex),
+        .stall_issue_i      (stall_issue),
+        .mcountinhibit_i    (mcountinhibit_csr_perf)
+    );
+  end : gen_perf_counter
+  else begin : gen_no_perf_counter
+    assign data_perf_csr = '0;
+  end : gen_no_perf_counter
+
+  // ------------
+  // Controller
+  // ------------
+  controller #(
+      .CVA6Cfg(CVA6ExtendCfg)
+  ) controller_i (
+      // flush ports
+      .set_pc_commit_o       (set_pc_ctrl_pcgen),
+      .flush_unissued_instr_o(flush_unissued_instr_ctrl_id),
+      .flush_if_o            (flush_ctrl_if),
+      .flush_id_o            (flush_ctrl_id),
+      .flush_ex_o            (flush_ctrl_ex),
+      .flush_bp_o            (flush_ctrl_bp),
+      .flush_tlb_o           (flush_tlb_ctrl_ex),
+      .flush_dcache_o        (dcache_flush_ctrl_cache),
+      .flush_dcache_ack_i    (dcache_flush_ack_cache_ctrl),
+
+      .halt_csr_i       (halt_csr_ctrl),
+      .halt_acc_i       (halt_acc_ctrl),
+      .halt_o           (halt_ctrl),
+      // control ports
+      .eret_i           (eret),
+      .ex_valid_i       (ex_commit.valid),
+      .set_debug_pc_i   (set_debug_pc),
+      .flush_csr_i      (flush_csr_ctrl),
+      .resolved_branch_i(resolved_branch),
+      .fence_i_i        (fence_i_commit_controller),
+      .fence_i          (fence_commit_controller),
+      .sfence_vma_i     (sfence_vma_commit_controller),
+      .flush_commit_i   (flush_commit),
+      .flush_acc_i      (flush_acc),
+
+      .flush_icache_o(icache_flush_ctrl_cache),
+      .*
+  );
+
+  // -------------------
+  // Cache Subsystem
+  // -------------------
+
+  // Acc dispatcher and store buffer share a dcache request port.
+  // Store buffer always has priority access over acc dipsatcher.
+  dcache_req_i_t [NumPorts-1:0] dcache_req_to_cache;
+  dcache_req_o_t [NumPorts-1:0] dcache_req_from_cache;
+
+  // D$ request
+  assign dcache_req_to_cache[0] = dcache_req_ports_ex_cache[0];
+  assign dcache_req_to_cache[1] = dcache_req_ports_ex_cache[1];
+  assign dcache_req_to_cache[2] = dcache_req_ports_acc_cache[0];
+  assign dcache_req_to_cache[3] = dcache_req_ports_ex_cache[2].data_req ? dcache_req_ports_ex_cache [2] :
+                                                                          dcache_req_ports_acc_cache[1];
+
+  // D$ response
+  assign dcache_req_ports_cache_ex[0] = dcache_req_from_cache[0];
+  assign dcache_req_ports_cache_ex[1] = dcache_req_from_cache[1];
+  assign dcache_req_ports_cache_acc[0] = dcache_req_from_cache[2];
+  always_comb begin : gen_dcache_req_store_data_gnt
+    dcache_req_ports_cache_ex[2]  = dcache_req_from_cache[3];
+    dcache_req_ports_cache_acc[1] = dcache_req_from_cache[3];
+
+    // Set gnt signal
+    dcache_req_ports_cache_ex[2].data_gnt &= dcache_req_ports_ex_cache[2].data_req;
+    dcache_req_ports_cache_acc[1].data_gnt &= !dcache_req_ports_ex_cache[2].data_req;
+  end
+
+  if (DCACHE_TYPE == int'(config_pkg::WT)) begin : gen_cache_wt
+    // this is a cache subsystem that is compatible with OpenPiton
+    wt_cache_subsystem #(
+        .CVA6Cfg   (CVA6ExtendCfg),
+        .NumPorts  (NumPorts),
+        .noc_req_t (noc_req_t),
+        .noc_resp_t(noc_resp_t)
+    ) i_cache_subsystem (
+        // to D$
+        .clk_i             (clk_i),
+        .rst_ni            (rst_ni),
+        // I$
+        .icache_en_i       (icache_en_csr),
+        .icache_flush_i    (icache_flush_ctrl_cache),
+        .icache_miss_o     (icache_miss_cache_perf),
+        .icache_areq_i     (icache_areq_ex_cache),
+        .icache_areq_o     (icache_areq_cache_ex),
+        .icache_dreq_i     (icache_dreq_if_cache),
+        .icache_dreq_o     (icache_dreq_cache_if),
+        // D$
+        .dcache_enable_i   (dcache_en_csr_nbdcache),
+        .dcache_flush_i    (dcache_flush_ctrl_cache),
+        .dcache_flush_ack_o(dcache_flush_ack_cache_ctrl),
+        // to commit stage
+        .dcache_amo_req_i  (amo_req),
+        .dcache_amo_resp_o (amo_resp),
+        // from PTW, Load Unit  and Store Unit
+        .dcache_miss_o     (dcache_miss_cache_perf),
+        .miss_vld_bits_o   (miss_vld_bits),
+        .dcache_req_ports_i(dcache_req_to_cache),
+        .dcache_req_ports_o(dcache_req_from_cache),
+        // write buffer status
+        .wbuffer_empty_o   (dcache_commit_wbuffer_empty),
+        .wbuffer_not_ni_o  (dcache_commit_wbuffer_not_ni),
+        // memory side
+        .noc_req_o         (noc_req_o),
+        .noc_resp_i        (noc_resp_i),
+        .inval_addr_i      (inval_addr),
+        .inval_valid_i     (inval_valid),
+        .inval_ready_o     (inval_ready)
+    );
+  end else if (DCACHE_TYPE == int'(config_pkg::HPDCACHE)) begin : gen_cache_hpd
+    cva6_hpdcache_subsystem #(
+        .CVA6Cfg   (CVA6ExtendCfg),
+        .NumPorts  (NumPorts),
+        .noc_req_t (noc_req_t),
+        .noc_resp_t(noc_resp_t),
+        .cmo_req_t (logic  /*FIXME*/),
+        .cmo_rsp_t (logic  /*FIXME*/)
+    ) i_cache_subsystem (
+        .clk_i (clk_i),
+        .rst_ni(rst_ni),
+
+        .icache_en_i   (icache_en_csr),
+        .icache_flush_i(icache_flush_ctrl_cache),
+        .icache_miss_o (icache_miss_cache_perf),
+        .icache_areq_i (icache_areq_ex_cache),
+        .icache_areq_o (icache_areq_cache_ex),
+        .icache_dreq_i (icache_dreq_if_cache),
+        .icache_dreq_o (icache_dreq_cache_if),
+
+        .dcache_enable_i   (dcache_en_csr_nbdcache),
+        .dcache_flush_i    (dcache_flush_ctrl_cache),
+        .dcache_flush_ack_o(dcache_flush_ack_cache_ctrl),
+        .dcache_miss_o     (dcache_miss_cache_perf),
+
+        .dcache_amo_req_i (amo_req),
+        .dcache_amo_resp_o(amo_resp),
+
+        .dcache_cmo_req_i ('0  /*FIXME*/),
+        .dcache_cmo_resp_o(  /*FIXME*/),
+
+        .dcache_req_ports_i(dcache_req_to_cache),
+        .dcache_req_ports_o(dcache_req_from_cache),
+
+        .wbuffer_empty_o (dcache_commit_wbuffer_empty),
+        .wbuffer_not_ni_o(dcache_commit_wbuffer_not_ni),
+
+        .hwpf_base_set_i    ('0  /*FIXME*/),
+        .hwpf_base_i        ('0  /*FIXME*/),
+        .hwpf_base_o        (  /*FIXME*/),
+        .hwpf_param_set_i   ('0  /*FIXME*/),
+        .hwpf_param_i       ('0  /*FIXME*/),
+        .hwpf_param_o       (  /*FIXME*/),
+        .hwpf_throttle_set_i('0  /*FIXME*/),
+        .hwpf_throttle_i    ('0  /*FIXME*/),
+        .hwpf_throttle_o    (  /*FIXME*/),
+        .hwpf_status_o      (  /*FIXME*/),
+
+        .noc_req_o (noc_req_o),
+        .noc_resp_i(noc_resp_i)
+    );
+    assign inval_ready = 1'b1;
+  end else begin : gen_cache_wb
+    std_cache_subsystem #(
+        // note: this only works with one cacheable region
+        // not as important since this cache subsystem is about to be
+        // deprecated
+        .CVA6Cfg      (CVA6ExtendCfg),
+        .NumPorts     (NumPorts),
+        .axi_ar_chan_t(axi_ar_chan_t),
+        .axi_aw_chan_t(axi_aw_chan_t),
+        .axi_w_chan_t (axi_w_chan_t),
+        .axi_req_t    (noc_req_t),
+        .axi_rsp_t    (noc_resp_t)
+    ) i_cache_subsystem (
+        // to D$
+        .clk_i             (clk_i),
+        .rst_ni            (rst_ni),
+        .priv_lvl_i        (priv_lvl),
+        // I$
+        .icache_en_i       (icache_en_csr),
+        .icache_flush_i    (icache_flush_ctrl_cache),
+        .icache_miss_o     (icache_miss_cache_perf),
+        .icache_areq_i     (icache_areq_ex_cache),
+        .icache_areq_o     (icache_areq_cache_ex),
+        .icache_dreq_i     (icache_dreq_if_cache),
+        .icache_dreq_o     (icache_dreq_cache_if),
+        // D$
+        .dcache_enable_i   (dcache_en_csr_nbdcache),
+        .dcache_flush_i    (dcache_flush_ctrl_cache),
+        .dcache_flush_ack_o(dcache_flush_ack_cache_ctrl),
+        // to commit stage
+        .amo_req_i         (amo_req),
+        .amo_resp_o        (amo_resp),
+        .dcache_miss_o     (dcache_miss_cache_perf),
+        // this is statically set to 1 as the std_cache does not have a wbuffer
+        .wbuffer_empty_o   (dcache_commit_wbuffer_empty),
+        // from PTW, Load Unit  and Store Unit
+        .dcache_req_ports_i(dcache_req_to_cache),
+        .dcache_req_ports_o(dcache_req_from_cache),
+        // memory side
+        .axi_req_o         (noc_req_o),
+        .axi_resp_i        (noc_resp_i)
+    );
+    assign dcache_commit_wbuffer_not_ni = 1'b1;
+    assign inval_ready                  = 1'b1;
+  end
+
+  // ----------------
+  // Accelerator
+  // ----------------
+
+  if (CVA6ExtendCfg.EnableAccelerator) begin : gen_accelerator
+    acc_dispatcher #(
+        .CVA6Cfg   (CVA6ExtendCfg),
+        .acc_cfg_t (acc_cfg_t),
+        .AccCfg    (AccCfg),
+        .acc_req_t (cvxif_req_t),
+        .acc_resp_t(cvxif_resp_t)
+    ) i_acc_dispatcher (
+        .clk_i                 (clk_i),
+        .rst_ni                (rst_ni),
+        .flush_unissued_instr_i(flush_unissued_instr_ctrl_id),
+        .flush_ex_i            (flush_ctrl_ex),
+        .flush_pipeline_o      (flush_acc),
+        .acc_cons_en_i         (acc_cons_en_csr),
+        .acc_fflags_valid_o    (acc_resp_fflags_valid),
+        .acc_fflags_o          (acc_resp_fflags),
+        .ld_st_priv_lvl_i      (ld_st_priv_lvl_csr_ex),
+        .sum_i                 (sum_csr_ex),
+        .pmpcfg_i              (pmpcfg),
+        .pmpaddr_i             (pmpaddr),
+        .fcsr_frm_i            (frm_csr_id_issue_ex),
+        .dirty_v_state_o       (dirty_v_state),
+        .issue_instr_i         (issue_instr_id_acc),
+        .issue_instr_hs_i      (issue_instr_hs_id_acc),
+        .issue_stall_o         (stall_acc_id),
+        .fu_data_i             (fu_data_id_ex),
+        .commit_instr_i        (commit_instr_id_commit),
+        .commit_st_barrier_i   (fence_i_commit_controller | fence_commit_controller),
+        .acc_trans_id_o        (acc_trans_id_ex_id),
+        .acc_result_o          (acc_result_ex_id),
+        .acc_valid_o           (acc_valid_ex_id),
+        .acc_exception_o       (acc_exception_ex_id),
+        .acc_valid_ex_o        (acc_valid_acc_ex),
+        .commit_ack_i          (commit_ack),
+        .acc_stall_st_pending_o(stall_st_pending_ex),
+        .acc_no_st_pending_i   (no_st_pending_commit),
+        .dcache_req_ports_i    (dcache_req_ports_ex_cache),
+        .ctrl_halt_o           (halt_acc_ctrl),
+        .acc_dcache_req_ports_o(dcache_req_ports_acc_cache),
+        .acc_dcache_req_ports_i(dcache_req_ports_cache_acc),
+        .inval_ready_i         (inval_ready),
+        .inval_valid_o         (inval_valid),
+        .inval_addr_o          (inval_addr),
+        .acc_req_o             (cvxif_req_o),
+        .acc_resp_i            (cvxif_resp_i)
+    );
+  end : gen_accelerator
+  else begin : gen_no_accelerator
+    assign acc_trans_id_ex_id         = '0;
+    assign acc_result_ex_id           = '0;
+    assign acc_valid_ex_id            = '0;
+    assign acc_exception_ex_id        = '0;
+    assign acc_resp_fflags            = '0;
+    assign acc_resp_fflags_valid      = '0;
+    assign stall_acc_id               = '0;
+    assign dirty_v_state              = '0;
+    assign acc_valid_acc_ex           = '0;
+    assign halt_acc_ctrl              = '0;
+    assign stall_st_pending_ex        = '0;
+    assign flush_acc                  = '0;
+
+    // D$ connection is unused
+    assign dcache_req_ports_acc_cache = '0;
+
+    // No invalidation interface
+    assign inval_valid                = '0;
+    assign inval_addr                 = '0;
+
+    // Feed through cvxif
+    assign cvxif_req_o                = cvxif_req;
+    assign cvxif_resp                 = cvxif_resp_i;
+  end : gen_no_accelerator
+
+  // -------------------
+  // Parameter Check
+  // -------------------
+  // pragma translate_off
+`ifndef VERILATOR
+  initial config_pkg::check_cfg(CVA6Cfg);
+`endif
+  // pragma translate_on
+
+  // -------------------
+  // Instruction Tracer
+  // -------------------
+
+  //pragma translate_off
+`ifdef PITON_ARIANE
+  localparam PC_QUEUE_DEPTH = 16;
+
+  logic                                                    piton_pc_vld;
+  logic [                riscv::VLEN-1:0]                  piton_pc;
+  logic [CVA6ExtendCfg.NrCommitPorts-1:0][riscv::VLEN-1:0] pc_data;
+  logic [CVA6ExtendCfg.NrCommitPorts-1:0] pc_pop, pc_empty;
+
+  for (genvar i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin : gen_pc_fifo
+    fifo_v3 #(
+        .DATA_WIDTH(64),
+        .DEPTH(PC_QUEUE_DEPTH)
+    ) i_pc_fifo (
+        .clk_i     (clk_i),
+        .rst_ni    (rst_ni),
+        .flush_i   ('0),
+        .testmode_i('0),
+        .full_o    (),
+        .empty_o   (pc_empty[i]),
+        .usage_o   (),
+        .data_i    (commit_instr_id_commit[i].pc),
+        .push_i    (commit_ack[i] & ~commit_instr_id_commit[i].ex.valid),
+        .data_o    (pc_data[i]),
+        .pop_i     (pc_pop[i])
+    );
+  end
+
+  rr_arb_tree #(
+      .NumIn(CVA6ExtendCfg.NrCommitPorts),
+      .DataWidth(64)
+  ) i_rr_arb_tree (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i('0),
+      .rr_i   ('0),
+      .req_i  (~pc_empty),
+      .gnt_o  (pc_pop),
+      .data_i (pc_data),
+      .gnt_i  (piton_pc_vld),
+      .req_o  (piton_pc_vld),
+      .data_o (piton_pc),
+      .idx_o  ()
+  );
+`endif  // PITON_ARIANE
+
+`ifndef VERILATOR
+  instr_tracer_if tracer_if (clk_i);
+  // assign instruction tracer interface
+  // control signals
+  assign tracer_if.rstn           = rst_ni;
+  assign tracer_if.flush_unissued = flush_unissued_instr_ctrl_id;
+  assign tracer_if.flush          = flush_ctrl_ex;
+  // fetch
+  assign tracer_if.instruction    = id_stage_i.fetch_entry_i.instruction;
+  assign tracer_if.fetch_valid    = id_stage_i.fetch_entry_valid_i;
+  assign tracer_if.fetch_ack      = id_stage_i.fetch_entry_ready_o;
+  // Issue
+  assign tracer_if.issue_ack      = issue_stage_i.i_scoreboard.issue_ack_i;
+  assign tracer_if.issue_sbe      = issue_stage_i.i_scoreboard.issue_instr_o;
+  // write-back
+  assign tracer_if.waddr          = waddr_commit_id;
+  assign tracer_if.wdata          = wdata_commit_id;
+  assign tracer_if.we_gpr         = we_gpr_commit_id;
+  assign tracer_if.we_fpr         = we_fpr_commit_id;
+  // commit
+  assign tracer_if.commit_instr   = commit_instr_id_commit;
+  assign tracer_if.commit_ack     = commit_ack;
+  // branch predict
+  assign tracer_if.resolve_branch = resolved_branch;
+  // address translation
+  // stores
+  assign tracer_if.st_valid       = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.valid_i;
+  assign tracer_if.st_paddr       = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.paddr_i;
+  // loads
+  assign tracer_if.ld_valid       = ex_stage_i.lsu_i.i_load_unit.req_port_o.tag_valid;
+  assign tracer_if.ld_kill        = ex_stage_i.lsu_i.i_load_unit.req_port_o.kill_req;
+  assign tracer_if.ld_paddr       = ex_stage_i.lsu_i.i_load_unit.paddr_i;
+  // exceptions
+  assign tracer_if.exception      = commit_stage_i.exception_o;
+  // assign current privilege level
+  assign tracer_if.priv_lvl       = priv_lvl;
+  assign tracer_if.debug_mode     = debug_mode;
+
+  instr_tracer instr_tracer_i (
+      .tracer_if(tracer_if),
+      .hart_id_i
+  );
+
+  // mock tracer for Verilator, to be used with spike-dasm
+`else
+
+  int f;
+  logic [63:0] cycles;
+
+  initial begin
+    string fn;
+    $sformat(fn, "trace_hart_%0.0f.dasm", hart_id_i);
+    f = $fopen(fn, "w");
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      cycles <= 0;
+    end else begin
+      byte mode = "";
+      if (CVA6Cfg.DebugEn && debug_mode) mode = "D";
+      else begin
+        case (priv_lvl)
+          riscv::PRIV_LVL_M: mode = "M";
+          riscv::PRIV_LVL_S: if (CVA6Cfg.RVS) mode = "S";
+          riscv::PRIV_LVL_U: mode = "U";
+          default: ;  // Do nothing
+        endcase
+      end
+      for (int i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin
+        if (commit_ack[i] && !commit_instr_id_commit[i].ex.valid) begin
+          $fwrite(f, "%d 0x%0h %s (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, mode,
+                  commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].ex.tval[31:0]);
+        end else if (commit_ack[i] && commit_instr_id_commit[i].ex.valid) begin
+          if (commit_instr_id_commit[i].ex.cause == 2) begin
+            $fwrite(f, "Exception Cause: Illegal Instructions, DASM(%h) PC=%h\n",
+                    commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].pc);
+          end else begin
+            if (CVA6Cfg.DebugEn && debug_mode) begin
+              $fwrite(f, "%d 0x%0h %s (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc,
+                      mode, commit_instr_id_commit[i].ex.tval[31:0],
+                      commit_instr_id_commit[i].ex.tval[31:0]);
+            end else begin
+              $fwrite(f, "Exception Cause: %5d, DASM(%h) PC=%h\n",
+                      commit_instr_id_commit[i].ex.cause, commit_instr_id_commit[i].ex.tval[31:0],
+                      commit_instr_id_commit[i].pc);
+            end
+          end
+        end
+      end
+      cycles <= cycles + 1;
+    end
+  end
+
+  final begin
+    $fclose(f);
+  end
+`endif  // VERILATOR
+  //pragma translate_on
+
+
+  if (IsRVFI) begin
+
+    cva6_rvfi_probes #(
+        .CVA6Cfg   (CVA6ExtendCfg),
+        .rvfi_probes_t(rvfi_probes_t)
+    ) i_cva6_rvfi_combi (
+
+        .flush_i            (flush_ctrl_if),
+        .issue_instr_ack_i  (issue_instr_issue_id),
+        .fetch_entry_valid_i(fetch_valid_if_id),
+        .instruction_i      (fetch_entry_if_id.instruction),
+        .is_compressed_i    (rvfi_is_compressed),
+
+        .issue_pointer_i (rvfi_issue_pointer),
+        .commit_pointer_i(rvfi_commit_pointer),
+
+        .flush_unissued_instr_i(flush_unissued_instr_ctrl_id),
+        .decoded_instr_valid_i (issue_entry_valid_id_issue),
+        .decoded_instr_ack_i   (issue_instr_issue_id),
+
+        .rs1_forwarding_i(rs1_forwarding_id_ex),
+        .rs2_forwarding_i(rs2_forwarding_id_ex),
+
+        .commit_instr_i(commit_instr_id_commit),
+        .ex_commit_i   (ex_commit),
+        .priv_lvl_i    (priv_lvl),
+
+        .lsu_ctrl_i  (rvfi_lsu_ctrl),
+        .wbdata_i    (wbdata_ex_id),
+        .commit_ack_i(commit_ack),
+        .mem_paddr_i (rvfi_mem_paddr),
+        .debug_mode_i(debug_mode),
+        .wdata_i     (wdata_commit_id),
+
+        .rvfi_probes_o(rvfi_probes_o)
+
+    );
+
+  end  //IsRVFI
+
+endmodule  // ariane
diff --git a/test/type_param/core/cva6_accel_first_pass_decoder_stub.sv b/test/type_param/core/cva6_accel_first_pass_decoder_stub.sv
new file mode 100644
index 0000000..20ffb1e
--- /dev/null
+++ b/test/type_param/core/cva6_accel_first_pass_decoder_stub.sv
@@ -0,0 +1,31 @@
+// Copyright 2023 ETH Zurich and University of Bologna.
+// Solderpad Hardware License, Version 0.51, see LICENSE for details.
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Nils Wistoff <nwistoff@iis.ee.ethz.ch>
+
+// Module stub for the cva6_accel_first_pass_decoder. Replace this with your accelerator's
+// first pass decoder.
+
+module cva6_accel_first_pass_decoder
+  import ariane_pkg::*;
+(
+    input  logic              [31:0] instruction_i,           // instruction from IF
+    input  riscv::xs_t               fs_i,                    // floating point extension status
+    input  riscv::xs_t               vs_i,                    // vector extension status
+    output logic                     is_accel_o,              // is an accelerator instruction
+    output scoreboard_entry_t        instruction_o,           // predecoded instruction
+    output logic                     illegal_instr_o,         // is an illegal instruction
+    output logic                     is_control_flow_instr_o  // is a control flow instruction
+);
+
+  assign is_accel_o              = 1'b0;
+  assign instruction_o           = '0;
+  assign illegal_instr_o         = 1'b0;
+  assign is_control_flow_instr_o = 1'b0;
+
+  $error("cva6_accel_first_pass_decoder: instantiated non-functional module stub.\
+          Please replace this with your accelerator's first pass decoder \
+          (or unset ENABLE_ACCELERATOR).");
+
+endmodule : cva6_accel_first_pass_decoder
diff --git a/test/type_param/core/cva6_rvfi.sv b/test/type_param/core/cva6_rvfi.sv
new file mode 100644
index 0000000..972a50a
--- /dev/null
+++ b/test/type_param/core/cva6_rvfi.sv
@@ -0,0 +1,294 @@
+// Copyright 2024 Thales DIS France SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Yannick Casamatta - Thales
+// Date: 09/01/2024
+
+
+module cva6_rvfi
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter type rvfi_instr_t = logic,
+    parameter type rvfi_probes_t = logic
+) (
+
+    input logic clk_i,
+    input logic rst_ni,
+
+    input rvfi_probes_t rvfi_probes_i,
+    output rvfi_instr_t [CVA6Cfg.NrCommitPorts-1:0] rvfi_o
+
+);
+
+  // ------------------------------------------
+  // CVA6 configuration
+  // ------------------------------------------
+  // Extended config
+  localparam bit RVF = (riscv::IS_XLEN64 | riscv::IS_XLEN32) & CVA6Cfg.FpuEn;
+  localparam bit RVD = (riscv::IS_XLEN64 ? 1 : 0) & CVA6Cfg.FpuEn;
+  localparam bit FpPresent = RVF | RVD | CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8;
+  localparam bit NSX = CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8 | CVA6Cfg.XFVec;  // Are non-standard extensions present?
+  localparam int unsigned FLen = RVD ? 64 :  // D ext.
+  RVF ? 32 :  // F ext.
+  CVA6Cfg.XF16 ? 16 :  // Xf16 ext.
+  CVA6Cfg.XF16ALT ? 16 :  // Xf16alt ext.
+  CVA6Cfg.XF8 ? 8 :  // Xf8 ext.
+  1;  // Unused in case of no FP
+
+  // Transprecision floating-point extensions configuration
+  localparam bit RVFVec     = RVF             & CVA6Cfg.XFVec & FLen>32; // FP32 vectors available if vectors and larger fmt enabled
+  localparam bit XF16Vec    = CVA6Cfg.XF16    & CVA6Cfg.XFVec & FLen>16; // FP16 vectors available if vectors and larger fmt enabled
+  localparam bit XF16ALTVec = CVA6Cfg.XF16ALT & CVA6Cfg.XFVec & FLen>16; // FP16ALT vectors available if vectors and larger fmt enabled
+  localparam bit XF8Vec     = CVA6Cfg.XF8     & CVA6Cfg.XFVec & FLen>8;  // FP8 vectors available if vectors and larger fmt enabled
+
+  localparam bit EnableAccelerator = CVA6Cfg.RVV;  // Currently only used by V extension (Ara)
+  localparam int unsigned NrWbPorts = (CVA6Cfg.CvxifEn || EnableAccelerator) ? 5 : 4;
+
+  localparam NrRgprPorts = 2;
+
+  localparam bit NonIdemPotenceEn = CVA6Cfg.NrNonIdempotentRules && CVA6Cfg.NonIdempotentLength;  // Currently only used by V extension (Ara)
+
+  localparam config_pkg::cva6_cfg_t CVA6ExtendCfg = {
+    CVA6Cfg.NrCommitPorts,
+    CVA6Cfg.AxiAddrWidth,
+    CVA6Cfg.AxiDataWidth,
+    CVA6Cfg.AxiIdWidth,
+    CVA6Cfg.AxiUserWidth,
+    CVA6Cfg.NrLoadBufEntries,
+    CVA6Cfg.FpuEn,
+    CVA6Cfg.XF16,
+    CVA6Cfg.XF16ALT,
+    CVA6Cfg.XF8,
+    CVA6Cfg.RVA,
+    CVA6Cfg.RVB,
+    CVA6Cfg.RVV,
+    CVA6Cfg.RVC,
+    CVA6Cfg.RVZCB,
+    CVA6Cfg.XFVec,
+    CVA6Cfg.CvxifEn,
+    CVA6Cfg.ZiCondExtEn,
+    // Extended
+    bit'(RVF),
+    bit'(RVD),
+    bit'(FpPresent),
+    bit'(NSX),
+    unsigned'(FLen),
+    bit'(RVFVec),
+    bit'(XF16Vec),
+    bit'(XF16ALTVec),
+    bit'(XF8Vec),
+    unsigned'(NrRgprPorts),
+    unsigned'(NrWbPorts),
+    bit'(EnableAccelerator),
+    CVA6Cfg.RVS,
+    CVA6Cfg.RVU,
+    CVA6Cfg.HaltAddress,
+    CVA6Cfg.ExceptionAddress,
+    CVA6Cfg.RASDepth,
+    CVA6Cfg.BTBEntries,
+    CVA6Cfg.BHTEntries,
+    CVA6Cfg.DmBaseAddress,
+    CVA6Cfg.NrPMPEntries,
+    CVA6Cfg.PMPCfgRstVal,
+    CVA6Cfg.PMPAddrRstVal,
+    CVA6Cfg.PMPEntryReadOnly,
+    CVA6Cfg.NOCType,
+    CVA6Cfg.NrNonIdempotentRules,
+    CVA6Cfg.NonIdempotentAddrBase,
+    CVA6Cfg.NonIdempotentLength,
+    CVA6Cfg.NrExecuteRegionRules,
+    CVA6Cfg.ExecuteRegionAddrBase,
+    CVA6Cfg.ExecuteRegionLength,
+    CVA6Cfg.NrCachedRegionRules,
+    CVA6Cfg.CachedRegionAddrBase,
+    CVA6Cfg.CachedRegionLength,
+    CVA6Cfg.MaxOutstandingStores,
+    CVA6Cfg.DebugEn,
+    NonIdemPotenceEn,
+    CVA6Cfg.AxiBurstWriteEn
+  };
+
+  logic                                                                   flush;
+  logic                                                                   issue_instr_ack;
+  logic                                                                   fetch_entry_valid;
+  logic              [                           31:0]                    instruction;
+  logic                                                                   is_compressed;
+
+  logic              [              TRANS_ID_BITS-1:0]                    issue_pointer;
+  logic              [CVA6ExtendCfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer;
+
+  logic                                                                   flush_unissued_instr;
+  logic                                                                   decoded_instr_valid;
+  logic                                                                   decoded_instr_ack;
+
+  riscv::xlen_t                                                           rs1_forwarding;
+  riscv::xlen_t                                                           rs2_forwarding;
+
+  scoreboard_entry_t [CVA6ExtendCfg.NrCommitPorts-1:0]                    commit_instr;
+  exception_t                                                             ex_commit;
+  riscv::priv_lvl_t                                                       priv_lvl;
+
+  lsu_ctrl_t                                                              lsu_ctrl;
+  logic              [    CVA6ExtendCfg.NrWbPorts-1:0][  riscv::XLEN-1:0] wbdata;
+  logic              [CVA6ExtendCfg.NrCommitPorts-1:0]                    commit_ack;
+  logic              [                riscv::PLEN-1:0]                    mem_paddr;
+  logic                                                                   debug_mode;
+  logic              [CVA6ExtendCfg.NrCommitPorts-1:0][  riscv::XLEN-1:0] wdata;
+
+  logic              [                riscv::VLEN-1:0]                    lsu_addr;
+  logic              [            (riscv::XLEN/8)-1:0]                    lsu_rmask;
+  logic              [            (riscv::XLEN/8)-1:0]                    lsu_wmask;
+  logic              [              TRANS_ID_BITS-1:0]                    lsu_addr_trans_id;
+
+  assign flush = rvfi_probes_i.flush;
+  assign issue_instr_ack = rvfi_probes_i.issue_instr_ack;
+  assign fetch_entry_valid = rvfi_probes_i.fetch_entry_valid;
+  assign instruction = rvfi_probes_i.instruction;
+  assign is_compressed = rvfi_probes_i.is_compressed;
+
+  assign issue_pointer = rvfi_probes_i.issue_pointer;
+  assign commit_pointer = rvfi_probes_i.commit_pointer;
+
+  assign flush_unissued_instr = rvfi_probes_i.flush_unissued_instr;
+  assign decoded_instr_valid = rvfi_probes_i.decoded_instr_valid;
+  assign decoded_instr_ack = rvfi_probes_i.decoded_instr_ack;
+
+  assign rs1_forwarding = rvfi_probes_i.rs1_forwarding;
+  assign rs2_forwarding = rvfi_probes_i.rs2_forwarding;
+
+  assign commit_instr = rvfi_probes_i.commit_instr;
+  assign ex_commit = rvfi_probes_i.ex_commit;
+  assign priv_lvl = rvfi_probes_i.priv_lvl;
+
+  assign lsu_ctrl = rvfi_probes_i.lsu_ctrl;
+  assign wbdata = rvfi_probes_i.wbdata;
+  assign commit_ack = rvfi_probes_i.commit_ack;
+  assign mem_paddr = rvfi_probes_i.mem_paddr;
+  assign debug_mode = rvfi_probes_i.debug_mode;
+  assign wdata = rvfi_probes_i.wdata;
+
+  assign lsu_addr = lsu_ctrl.vaddr;
+  assign lsu_rmask = lsu_ctrl.fu == LOAD ? lsu_ctrl.be : '0;
+  assign lsu_wmask = lsu_ctrl.fu == STORE ? lsu_ctrl.be : '0;
+  assign lsu_addr_trans_id = lsu_ctrl.trans_id;
+
+
+  //ID STAGE
+
+  typedef struct packed {
+    logic        valid;
+    logic [31:0] instr;
+  } issue_struct_t;
+  issue_struct_t issue_n, issue_q;
+
+  always_comb begin
+    issue_n = issue_q;
+
+    if (issue_instr_ack) issue_n.valid = 1'b0;
+
+    if ((!issue_q.valid || issue_instr_ack) && fetch_entry_valid) begin
+      issue_n.valid = 1'b1;
+      issue_n.instr = (is_compressed) ? {{16{1'b0}}, instruction[15:0]} : instruction;
+    end
+
+    if (flush) issue_n.valid = 1'b0;
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      issue_q <= '0;
+    end else begin
+      issue_q <= issue_n;
+    end
+  end
+
+  //ISSUE STAGE
+
+  // this is the FIFO struct of the issue queue
+  typedef struct packed {
+    riscv::xlen_t rs1_rdata;
+    riscv::xlen_t rs2_rdata;
+    logic [riscv::VLEN-1:0] lsu_addr;
+    logic [(riscv::XLEN/8)-1:0] lsu_rmask;
+    logic [(riscv::XLEN/8)-1:0] lsu_wmask;
+    riscv::xlen_t lsu_wdata;
+    logic [31:0] instr;
+  } sb_mem_t;
+  sb_mem_t [NR_SB_ENTRIES-1:0] mem_q, mem_n;
+
+  always_comb begin : issue_fifo
+    mem_n = mem_q;
+
+    if (decoded_instr_valid && decoded_instr_ack && !flush_unissued_instr) begin
+      mem_n[issue_pointer] = '{
+          rs1_rdata: rs1_forwarding,
+          rs2_rdata: rs2_forwarding,
+          lsu_addr: '0,
+          lsu_rmask: '0,
+          lsu_wmask: '0,
+          lsu_wdata: '0,
+          instr: issue_q.instr
+      };
+    end
+
+    if (lsu_rmask != 0) begin
+      mem_n[lsu_addr_trans_id].lsu_addr  = lsu_addr;
+      mem_n[lsu_addr_trans_id].lsu_rmask = lsu_rmask;
+    end else if (lsu_wmask != 0) begin
+      mem_n[lsu_addr_trans_id].lsu_addr  = lsu_addr;
+      mem_n[lsu_addr_trans_id].lsu_wmask = lsu_wmask;
+      mem_n[lsu_addr_trans_id].lsu_wdata = wbdata[1];
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : regs
+    if (!rst_ni) begin
+      mem_q <= '{default: sb_mem_t'(0)};
+    end else begin
+      mem_q <= mem_n;
+    end
+  end
+
+  //----------------------------------------------------------------------------------------------------------
+  // PACK
+  //----------------------------------------------------------------------------------------------------------
+
+  always_comb begin
+    for (int i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin
+      logic exception;
+      exception = commit_instr[i].valid && ex_commit.valid;
+      rvfi_o[i].valid    = (commit_ack[i] && !ex_commit.valid) ||
+        (exception && (ex_commit.cause == riscv::ENV_CALL_MMODE ||
+                  ex_commit.cause == riscv::ENV_CALL_SMODE ||
+                  ex_commit.cause == riscv::ENV_CALL_UMODE));
+      rvfi_o[i].insn = mem_q[commit_pointer[i]].instr;
+      // when trap, the instruction is not executed
+      rvfi_o[i].trap = exception;
+      rvfi_o[i].cause = ex_commit.cause;
+      rvfi_o[i].mode = (CVA6ExtendCfg.DebugEn && debug_mode) ? 2'b10 : priv_lvl;
+      rvfi_o[i].ixl = riscv::XLEN == 64 ? 2 : 1;
+      rvfi_o[i].rs1_addr = commit_instr[i].rs1[4:0];
+      rvfi_o[i].rs2_addr = commit_instr[i].rs2[4:0];
+      rvfi_o[i].rd_addr = commit_instr[i].rd[4:0];
+      rvfi_o[i].rd_wdata = (CVA6ExtendCfg.FpPresent && is_rd_fpr(commit_instr[i].op)) ?
+          commit_instr[i].result : wdata[i];
+      rvfi_o[i].pc_rdata = commit_instr[i].pc;
+      rvfi_o[i].mem_addr = mem_q[commit_pointer[i]].lsu_addr;
+      // So far, only write paddr is reported. TODO: read paddr
+      rvfi_o[i].mem_paddr = mem_paddr;
+      rvfi_o[i].mem_wmask = mem_q[commit_pointer[i]].lsu_wmask;
+      rvfi_o[i].mem_wdata = mem_q[commit_pointer[i]].lsu_wdata;
+      rvfi_o[i].mem_rmask = mem_q[commit_pointer[i]].lsu_rmask;
+      rvfi_o[i].mem_rdata = commit_instr[i].result;
+      rvfi_o[i].rs1_rdata = mem_q[commit_pointer[i]].rs1_rdata;
+      rvfi_o[i].rs2_rdata = mem_q[commit_pointer[i]].rs2_rdata;
+    end
+  end
+
+
+endmodule
diff --git a/test/type_param/core/cva6_rvfi_probes.sv b/test/type_param/core/cva6_rvfi_probes.sv
new file mode 100644
index 0000000..81d2c5d
--- /dev/null
+++ b/test/type_param/core/cva6_rvfi_probes.sv
@@ -0,0 +1,81 @@
+// Copyright 2024 Thales DIS France SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Yannick Casamatta - Thales
+// Date: 09/01/2024
+
+
+module cva6_rvfi_probes
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter type rvfi_probes_t = logic
+) (
+
+    input logic        flush_i,
+    input logic        issue_instr_ack_i,
+    input logic        fetch_entry_valid_i,
+    input logic [31:0] instruction_i,
+    input logic        is_compressed_i,
+
+    input logic [TRANS_ID_BITS-1:0] issue_pointer_i,
+    input logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer_i,
+
+    input logic flush_unissued_instr_i,
+    input logic decoded_instr_valid_i,
+    input logic decoded_instr_ack_i,
+
+    input riscv::xlen_t rs1_forwarding_i,
+    input riscv::xlen_t rs2_forwarding_i,
+
+    input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
+    input exception_t ex_commit_i,
+    input riscv::priv_lvl_t priv_lvl_i,
+
+    input  lsu_ctrl_t                                                 lsu_ctrl_i,
+    input  logic         [    CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i,
+    input  logic         [CVA6Cfg.NrCommitPorts-1:0]                  commit_ack_i,
+    input  logic         [          riscv::PLEN-1:0]                  mem_paddr_i,
+    input  logic                                                      debug_mode_i,
+    input  logic         [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i,
+    output rvfi_probes_t                                              rvfi_probes_o
+);
+
+  always_comb begin
+    rvfi_probes_o = '0;
+
+    rvfi_probes_o.flush = flush_i;
+    rvfi_probes_o.issue_instr_ack = issue_instr_ack_i;
+    rvfi_probes_o.fetch_entry_valid = fetch_entry_valid_i;
+    rvfi_probes_o.instruction = instruction_i;
+    rvfi_probes_o.is_compressed = is_compressed_i;
+
+    rvfi_probes_o.issue_pointer = issue_pointer_i;
+    rvfi_probes_o.commit_pointer = commit_pointer_i;
+
+    rvfi_probes_o.flush_unissued_instr = flush_unissued_instr_i;
+    rvfi_probes_o.decoded_instr_valid = decoded_instr_valid_i;
+    rvfi_probes_o.decoded_instr_ack = decoded_instr_ack_i;
+
+    rvfi_probes_o.rs1_forwarding = rs1_forwarding_i;
+    rvfi_probes_o.rs2_forwarding = rs2_forwarding_i;
+
+    rvfi_probes_o.commit_instr = commit_instr_i;
+    rvfi_probes_o.ex_commit = ex_commit_i;
+    rvfi_probes_o.priv_lvl = priv_lvl_i;
+
+    rvfi_probes_o.lsu_ctrl = lsu_ctrl_i;
+    rvfi_probes_o.wbdata = wbdata_i;
+    rvfi_probes_o.commit_ack = commit_ack_i;
+    rvfi_probes_o.mem_paddr = mem_paddr_i;
+    rvfi_probes_o.debug_mode = debug_mode_i;
+    rvfi_probes_o.wdata = wdata_i;
+
+  end
+
+
+endmodule
diff --git a/test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv b/test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv
new file mode 100644
index 0000000..08e801c
--- /dev/null
+++ b/test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv
@@ -0,0 +1,155 @@
+// Copyright 2021 Thales DIS design services SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
+// Example coprocessor adds rs1,rs2(,rs3) together and gives back the result to the CPU via the CoreV-X-Interface.
+// Coprocessor delays the sending of the result depending on result least significant bits.
+
+module cvxif_example_coprocessor
+  import cvxif_pkg::*;
+  import cvxif_instr_pkg::*;
+(
+    input  logic        clk_i,        // Clock
+    input  logic        rst_ni,       // Asynchronous reset active low
+    input  cvxif_req_t  cvxif_req_i,
+    output cvxif_resp_t cvxif_resp_o
+);
+
+  //Compressed interface
+  logic               x_compressed_valid_i;
+  logic               x_compressed_ready_o;
+  x_compressed_req_t  x_compressed_req_i;
+  x_compressed_resp_t x_compressed_resp_o;
+  //Issue interface
+  logic               x_issue_valid_i;
+  logic               x_issue_ready_o;
+  x_issue_req_t       x_issue_req_i;
+  x_issue_resp_t      x_issue_resp_o;
+  //Commit interface
+  logic               x_commit_valid_i;
+  x_commit_t          x_commit_i;
+  //Memory interface
+  logic               x_mem_valid_o;
+  logic               x_mem_ready_i;
+  x_mem_req_t         x_mem_req_o;
+  x_mem_resp_t        x_mem_resp_i;
+  //Memory result interface
+  logic               x_mem_result_valid_i;
+  x_mem_result_t      x_mem_result_i;
+  //Result interface
+  logic               x_result_valid_o;
+  logic               x_result_ready_i;
+  x_result_t          x_result_o;
+
+  assign x_compressed_valid_i            = cvxif_req_i.x_compressed_valid;
+  assign x_compressed_req_i              = cvxif_req_i.x_compressed_req;
+  assign x_issue_valid_i                 = cvxif_req_i.x_issue_valid;
+  assign x_issue_req_i                   = cvxif_req_i.x_issue_req;
+  assign x_commit_valid_i                = cvxif_req_i.x_commit_valid;
+  assign x_commit_i                      = cvxif_req_i.x_commit;
+  assign x_mem_ready_i                   = cvxif_req_i.x_mem_ready;
+  assign x_mem_resp_i                    = cvxif_req_i.x_mem_resp;
+  assign x_mem_result_valid_i            = cvxif_req_i.x_mem_result_valid;
+  assign x_mem_result_i                  = cvxif_req_i.x_mem_result;
+  assign x_result_ready_i                = cvxif_req_i.x_result_ready;
+
+  assign cvxif_resp_o.x_compressed_ready = x_compressed_ready_o;
+  assign cvxif_resp_o.x_compressed_resp  = x_compressed_resp_o;
+  assign cvxif_resp_o.x_issue_ready      = x_issue_ready_o;
+  assign cvxif_resp_o.x_issue_resp       = x_issue_resp_o;
+  assign cvxif_resp_o.x_mem_valid        = x_mem_valid_o;
+  assign cvxif_resp_o.x_mem_req          = x_mem_req_o;
+  assign cvxif_resp_o.x_result_valid     = x_result_valid_o;
+  assign cvxif_resp_o.x_result           = x_result_o;
+
+  //Compressed interface
+  assign x_compressed_ready_o            = '0;
+  assign x_compressed_resp_o.instr       = '0;
+  assign x_compressed_resp_o.accept      = '0;
+
+  instr_decoder #(
+      .NbInstr   (cvxif_instr_pkg::NbInstr),
+      .CoproInstr(cvxif_instr_pkg::CoproInstr)
+  ) instr_decoder_i (
+      .clk_i         (clk_i),
+      .x_issue_req_i (x_issue_req_i),
+      .x_issue_resp_o(x_issue_resp_o)
+  );
+
+  typedef struct packed {
+    x_issue_req_t  req;
+    x_issue_resp_t resp;
+  } x_issue_t;
+
+  logic fifo_full, fifo_empty;
+  logic x_issue_ready_q;
+  logic instr_push, instr_pop;
+  x_issue_t req_i;
+  x_issue_t req_o;
+
+
+
+  assign instr_push = x_issue_resp_o.accept ? 1 : 0;
+  assign instr_pop = (x_commit_i.x_commit_kill && x_commit_valid_i) || x_result_valid_o;
+  assign x_issue_ready_q = ~fifo_full; // if something is in the fifo, the instruction is being processed
+                                       // so we can't receive anything else
+  assign req_i.req = x_issue_req_i;
+  assign req_i.resp = x_issue_resp_o;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : regs
+    if (!rst_ni) begin
+      x_issue_ready_o <= 1;
+    end else begin
+      x_issue_ready_o <= x_issue_ready_q;
+    end
+  end
+
+  fifo_v3 #(
+      .FALL_THROUGH(1),         //data_o ready and pop in the same cycle
+      .DATA_WIDTH  (64),
+      .DEPTH       (8),
+      .dtype       (x_issue_t)
+  ) fifo_commit_i (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (1'b0),
+      .testmode_i(1'b0),
+      .full_o    (fifo_full),
+      .empty_o   (fifo_empty),
+      .usage_o   (),
+      .data_i    (req_i),
+      .push_i    (instr_push),
+      .data_o    (req_o),
+      .pop_i     (instr_pop)
+  );
+
+  logic [3:0] c;
+  counter #(
+      .WIDTH(4)
+  ) counter_i (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .clear_i   (~x_commit_i.x_commit_kill && x_commit_valid_i),
+      .en_i      (1'b1),
+      .load_i    (),
+      .down_i    (),
+      .d_i       (),
+      .q_o       (c),
+      .overflow_o()
+  );
+
+  always_comb begin
+    x_result_o.data    = req_o.req.rs[0] + req_o.req.rs[1] + (X_NUM_RS == 3 ? req_o.req.rs[2] : 0);
+    x_result_valid_o   = (c == x_result_o.data[3:0]) && ~fifo_empty ? 1 : 0;
+    x_result_o.id      = req_o.req.id;
+    x_result_o.rd      = req_o.req.instr[11:7];
+    x_result_o.we      = req_o.resp.writeback & x_result_valid_o;
+    x_result_o.exc     = 0;
+    x_result_o.exccode = 0;
+  end
+
+endmodule
diff --git a/test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv b/test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv
new file mode 100644
index 0000000..035cb04
--- /dev/null
+++ b/test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv
@@ -0,0 +1,47 @@
+// Copyright 2021 Thales DIS design services SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
+
+package cvxif_instr_pkg;
+
+  typedef struct packed {
+    logic [31:0]              instr;
+    logic [31:0]              mask;
+    cvxif_pkg::x_issue_resp_t resp;
+  } copro_issue_resp_t;
+
+  // 2 Possible RISCV instructions for Coprocessor
+  parameter int unsigned NbInstr = 2;
+  parameter copro_issue_resp_t CoproInstr[NbInstr] = '{
+      '{
+          instr: 32'b00000_00_00000_00000_0_00_00000_0101011,  // custom1 opcode
+          mask: 32'b00000_00_00000_00000_0_00_00000_1111111,
+          resp : '{
+              accept : 1'b1,
+              writeback : 1'b0,
+              dualwrite : 1'b0,
+              dualread : 1'b0,
+              loadstore : 1'b0,
+              exc : 1'b0
+          }
+      },
+      '{
+          instr: 32'b00000_00_00000_00000_0_00_00000_1011011,  // custom2 opcode
+          mask: 32'b00000_00_00000_00000_0_00_00000_1111111,
+          resp : '{
+              accept : 1'b1,
+              writeback : 1'b1,
+              dualwrite : 1'b0,
+              dualread : 1'b0,
+              loadstore : 1'b0,
+              exc : 1'b0
+          }
+      }
+  };
+
+endpackage
diff --git a/test/type_param/core/cvxif_example/instr_decoder.sv b/test/type_param/core/cvxif_example/instr_decoder.sv
new file mode 100644
index 0000000..0cf1bdf
--- /dev/null
+++ b/test/type_param/core/cvxif_example/instr_decoder.sv
@@ -0,0 +1,49 @@
+// Copyright 2021 Thales DIS design services SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
+
+module instr_decoder
+  import cvxif_pkg::*;
+#(
+    parameter int                                 NbInstr             = 1,
+    parameter cvxif_instr_pkg::copro_issue_resp_t CoproInstr[NbInstr] = {0}
+) (
+    input  logic          clk_i,
+    input  x_issue_req_t  x_issue_req_i,
+    output x_issue_resp_t x_issue_resp_o
+);
+
+  logic [NbInstr-1:0] sel;
+
+  for (genvar i = 0; i < NbInstr; i++) begin : gen_predecoder_selector
+    assign sel[i] = ((CoproInstr[i].mask & x_issue_req_i.instr) == CoproInstr[i].instr);
+  end
+
+  always_comb begin
+    x_issue_resp_o.accept    = '0;
+    x_issue_resp_o.writeback = '0;
+    x_issue_resp_o.dualwrite = '0;
+    x_issue_resp_o.dualread  = '0;
+    x_issue_resp_o.loadstore = '0;
+    x_issue_resp_o.exc       = '0;
+    for (int unsigned i = 0; i < NbInstr; i++) begin
+      if (sel[i]) begin
+        x_issue_resp_o.accept    = CoproInstr[i].resp.accept;
+        x_issue_resp_o.writeback = CoproInstr[i].resp.writeback;
+        x_issue_resp_o.dualwrite = CoproInstr[i].resp.dualwrite;
+        x_issue_resp_o.dualread  = CoproInstr[i].resp.dualread;
+        x_issue_resp_o.loadstore = CoproInstr[i].resp.loadstore;
+        x_issue_resp_o.exc       = CoproInstr[i].resp.exc;
+      end
+    end
+  end
+
+  assert property (@(posedge clk_i) $onehot0(sel))
+  else $warning("This offloaded instruction is valid for multiple coprocessor instructions !");
+
+endmodule
diff --git a/test/type_param/core/cvxif_fu.sv b/test/type_param/core/cvxif_fu.sv
new file mode 100644
index 0000000..fb0058b
--- /dev/null
+++ b/test/type_param/core/cvxif_fu.sv
@@ -0,0 +1,112 @@
+// Copyright 2021 Thales DIS design services SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Guillaume CHAUVON (guillaume.chauvon@thalesgroup.com)
+
+// Functional Unit for the logic of the CoreV-X-Interface
+
+
+module cvxif_fu
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input  logic                                       clk_i,
+    input  logic                                       rst_ni,
+    input  fu_data_t                                   fu_data_i,
+    input  riscv::priv_lvl_t                           priv_lvl_i,
+    //from issue
+    input  logic                                       x_valid_i,
+    output logic                                       x_ready_o,
+    input  logic                   [             31:0] x_off_instr_i,
+    //to writeback
+    output logic                   [TRANS_ID_BITS-1:0] x_trans_id_o,
+    output exception_t                                 x_exception_o,
+    output riscv::xlen_t                               x_result_o,
+    output logic                                       x_valid_o,
+    output logic                                       x_we_o,
+    //to coprocessor
+    output cvxif_pkg::cvxif_req_t                      cvxif_req_o,
+    input  cvxif_pkg::cvxif_resp_t                     cvxif_resp_i
+);
+  localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS;
+
+  logic illegal_n, illegal_q;
+  logic [TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q;
+  logic [31:0] illegal_instr_n, illegal_instr_q;
+  logic [X_NUM_RS-1:0] rs_valid;
+
+  if (cvxif_pkg::X_NUM_RS == 3) begin : gen_third_operand
+    assign rs_valid = 3'b111;
+  end else begin : gen_no_third_operand
+    assign rs_valid = 2'b11;
+  end
+
+  always_comb begin
+    cvxif_req_o = '0;
+    cvxif_req_o.x_result_ready = 1'b1;
+    x_ready_o = cvxif_resp_i.x_issue_ready;
+    if (x_valid_i) begin
+      cvxif_req_o.x_issue_valid     = x_valid_i;
+      cvxif_req_o.x_issue_req.instr = x_off_instr_i;
+      cvxif_req_o.x_issue_req.mode  = priv_lvl_i;
+      cvxif_req_o.x_issue_req.id    = fu_data_i.trans_id;
+      cvxif_req_o.x_issue_req.rs[0] = fu_data_i.operand_a;
+      cvxif_req_o.x_issue_req.rs[1] = fu_data_i.operand_b;
+      if (cvxif_pkg::X_NUM_RS == 3) begin
+        cvxif_req_o.x_issue_req.rs[2] = fu_data_i.imm;
+      end
+      cvxif_req_o.x_issue_req.rs_valid   = rs_valid;
+      cvxif_req_o.x_commit_valid         = x_valid_i;
+      cvxif_req_o.x_commit.id            = fu_data_i.trans_id;
+      cvxif_req_o.x_commit.x_commit_kill = 1'b0;
+    end
+  end
+
+  always_comb begin
+    illegal_n       = illegal_q;
+    illegal_id_n    = illegal_id_q;
+    illegal_instr_n = illegal_instr_q;
+    if (~cvxif_resp_i.x_issue_resp.accept && cvxif_req_o.x_issue_valid && cvxif_resp_i.x_issue_ready && ~illegal_n) begin
+      illegal_n       = 1'b1;
+      illegal_id_n    = cvxif_req_o.x_issue_req.id;
+      illegal_instr_n = cvxif_req_o.x_issue_req.instr;
+    end
+    x_valid_o = cvxif_resp_i.x_result_valid;  //Read result only when CVXIF is enabled
+    x_trans_id_o = x_valid_o ? cvxif_resp_i.x_result.id : '0;
+    x_result_o = x_valid_o ? cvxif_resp_i.x_result.data : '0;
+    x_exception_o.cause   = x_valid_o ? {{(riscv::XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0;
+    x_exception_o.valid = x_valid_o ? cvxif_resp_i.x_result.exc : '0;
+    x_exception_o.tval = '0;
+    x_we_o = x_valid_o ? cvxif_resp_i.x_result.we : '0;
+    if (illegal_n) begin
+      if (~x_valid_o) begin
+        x_trans_id_o = illegal_id_n;
+        x_result_o = '0;
+        x_valid_o = 1'b1;
+        x_exception_o.cause = riscv::ILLEGAL_INSTR;
+        x_exception_o.valid = 1'b1;
+        x_exception_o.tval = illegal_instr_n;
+        x_we_o = '0;
+        illegal_n             = '0; // Reset flag for illegal instr. illegal_id and illegal instr values are a don't care, no need to reset it.
+      end
+    end
+  end
+
+  always_ff @(posedge clk_i, negedge rst_ni) begin
+    if (~rst_ni) begin
+      illegal_q       <= 1'b0;
+      illegal_id_q    <= '0;
+      illegal_instr_q <= '0;
+    end else begin
+      illegal_q       <= illegal_n;
+      illegal_id_q    <= illegal_id_n;
+      illegal_instr_q <= illegal_instr_n;
+    end
+  end
+
+endmodule
diff --git a/test/type_param/core/decoder.sv b/test/type_param/core/decoder.sv
new file mode 100644
index 0000000..eead0c4
--- /dev/null
+++ b/test/type_param/core/decoder.sv
@@ -0,0 +1,1397 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// File:   issue_read_operands.sv
+// Author: Florian Zaruba <zarubaf@ethz.ch>
+// Date:   8.4.2017
+//
+// Copyright (C) 2017 ETH Zurich, University of Bologna
+// All rights reserved.
+//
+// Description: Issues instruction from the scoreboard and fetches the operands
+//              This also includes all the forwarding logic
+//
+
+module decoder
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic debug_req_i,  // external debug request
+    input logic [riscv::VLEN-1:0] pc_i,  // PC from IF
+    input logic is_compressed_i,  // is a compressed instruction
+    input logic [15:0] compressed_instr_i,  // compressed form of instruction
+    input logic is_illegal_i,  // illegal compressed instruction
+    input logic [31:0] instruction_i,  // instruction from IF
+    input branchpredict_sbe_t branch_predict_i,
+    input exception_t ex_i,  // if an exception occured in if
+    input logic [1:0] irq_i,  // external interrupt
+    input irq_ctrl_t irq_ctrl_i,  // interrupt control and status information from CSRs
+    // From CSR
+    input riscv::priv_lvl_t priv_lvl_i,  // current privilege level
+    input logic debug_mode_i,  // we are in debug mode
+    input riscv::xs_t fs_i,  // floating point extension status
+    input logic [2:0] frm_i,  // floating-point dynamic rounding mode
+    input riscv::xs_t vs_i,  // vector extension status
+    input logic tvm_i,  // trap virtual memory
+    input logic tw_i,  // timeout wait
+    input logic tsr_i,  // trap sret
+    output scoreboard_entry_t instruction_o,  // scoreboard entry to scoreboard
+    output logic is_control_flow_instr_o  // this instruction will change the control flow
+);
+  logic illegal_instr;
+  logic illegal_instr_bm;
+  logic illegal_instr_zic;
+  logic illegal_instr_non_bm;
+  // this instruction is an environment call (ecall), it is handled like an exception
+  logic ecall;
+  // this instruction is a software break-point
+  logic ebreak;
+  // this instruction needs floating-point rounding-mode verification
+  logic check_fprm;
+  riscv::instruction_t instr;
+  assign instr = riscv::instruction_t'(instruction_i);
+  // --------------------
+  // Immediate select
+  // --------------------
+  enum logic         [3:0] {NOIMM, IIMM, SIMM, SBIMM, UIMM, JIMM, RS3} imm_select;
+
+  riscv::xlen_t                                                        imm_i_type;
+  riscv::xlen_t                                                        imm_s_type;
+  riscv::xlen_t                                                        imm_sb_type;
+  riscv::xlen_t                                                        imm_u_type;
+  riscv::xlen_t                                                        imm_uj_type;
+  riscv::xlen_t                                                        imm_bi_type;
+
+  // ---------------------------------------
+  // Accelerator instructions' first-pass decoder
+  // ---------------------------------------
+  logic                                                                is_accel;
+  scoreboard_entry_t                                                   acc_instruction;
+  logic                                                                acc_illegal_instr;
+  logic                                                                acc_is_control_flow_instr;
+
+  if (CVA6Cfg.EnableAccelerator) begin : gen_accel_decoder
+    // This module is responsible for a light-weight decoding of accelerator instructions,
+    // identifying them, but also whether they read/write scalar registers.
+    // Accelerators are supposed to define this module.
+    cva6_accel_first_pass_decoder i_accel_decoder (
+        .instruction_i(instruction_i),
+        .fs_i(fs_i),
+        .vs_i(vs_i),
+        .is_accel_o(is_accel),
+        .instruction_o(acc_instruction),
+        .illegal_instr_o(acc_illegal_instr),
+        .is_control_flow_instr_o(acc_is_control_flow_instr)
+    );
+  end : gen_accel_decoder
+  else begin
+    assign is_accel                  = 1'b0;
+    assign acc_instruction           = '0;
+    assign acc_illegal_instr         = 1'b1;  // this should never propagate
+    assign acc_is_control_flow_instr = 1'b0;
+  end
+
+  always_comb begin : decoder
+
+    imm_select                  = NOIMM;
+    is_control_flow_instr_o     = 1'b0;
+    illegal_instr               = 1'b0;
+    illegal_instr_non_bm        = 1'b0;
+    illegal_instr_bm            = 1'b0;
+    illegal_instr_zic           = 1'b0;
+    instruction_o.pc            = pc_i;
+    instruction_o.trans_id      = '0;
+    instruction_o.fu            = NONE;
+    instruction_o.op            = ariane_pkg::ADD;
+    instruction_o.rs1           = '0;
+    instruction_o.rs2           = '0;
+    instruction_o.rd            = '0;
+    instruction_o.use_pc        = 1'b0;
+    instruction_o.is_compressed = is_compressed_i;
+    instruction_o.use_zimm      = 1'b0;
+    instruction_o.bp            = branch_predict_i;
+    instruction_o.vfp           = 1'b0;
+    ecall                       = 1'b0;
+    ebreak                      = 1'b0;
+    check_fprm                  = 1'b0;
+
+    if (~ex_i.valid) begin
+      case (instr.rtype.opcode)
+        riscv::OpcodeSystem: begin
+          instruction_o.fu = CSR;
+          instruction_o.rs1[4:0] = instr.itype.rs1;
+          instruction_o.rs2[4:0] = instr.rtype.rs2;   //TODO: needs to be checked if better way is available
+          instruction_o.rd[4:0] = instr.itype.rd;
+
+          unique case (instr.itype.funct3)
+            3'b000: begin
+              // check if the RD and and RS1 fields are zero, this may be reset for the SENCE.VMA instruction
+              if (instr.itype.rs1 != '0 || instr.itype.rd != '0) illegal_instr = 1'b1;
+              // decode the immiediate field
+              case (instr.itype.imm)
+                // ECALL -> inject exception
+                12'b0: ecall = 1'b1;
+                // EBREAK -> inject exception
+                12'b1: ebreak = 1'b1;
+                // SRET
+                12'b1_0000_0010: begin
+                  if (CVA6Cfg.RVS) begin
+                    instruction_o.op = ariane_pkg::SRET;
+                    // check privilege level, SRET can only be executed in S and M mode
+                    // we'll just decode an illegal instruction if we are in the wrong privilege level
+                    if (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U) begin
+                      illegal_instr = 1'b1;
+                      //  do not change privilege level if this is an illegal instruction
+                      instruction_o.op = ariane_pkg::ADD;
+                    end
+                    // if we are in S-Mode and Trap SRET (tsr) is set -> trap on illegal instruction
+                    if (priv_lvl_i == riscv::PRIV_LVL_S && tsr_i) begin
+                      illegal_instr = 1'b1;
+                      //  do not change privilege level if this is an illegal instruction
+                      instruction_o.op = ariane_pkg::ADD;
+                    end
+                  end else begin
+                    illegal_instr = 1'b1;
+                    instruction_o.op = ariane_pkg::ADD;
+                  end
+                end
+                // MRET
+                12'b11_0000_0010: begin
+                  instruction_o.op = ariane_pkg::MRET;
+                  // check privilege level, MRET can only be executed in M mode
+                  // otherwise we decode an illegal instruction
+                  if ((CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S) || (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U))
+                    illegal_instr = 1'b1;
+                end
+                // DRET
+                12'b111_1011_0010: begin
+                  instruction_o.op = ariane_pkg::DRET;
+                  if (CVA6Cfg.DebugEn) begin
+                    // check that we are in debug mode when executing this instruction
+                    illegal_instr = (!debug_mode_i) ? 1'b1 : illegal_instr;
+                  end else begin
+                    illegal_instr = 1'b1;
+                  end
+                end
+                // WFI
+                12'b1_0000_0101: begin
+                  instruction_o.op = ariane_pkg::WFI;
+                  // if timeout wait is set, trap on an illegal instruction in S Mode
+                  // (after 0 cycles timeout)
+                  if (CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S && tw_i) begin
+                    illegal_instr = 1'b1;
+                    instruction_o.op = ariane_pkg::ADD;
+                  end
+                  // we don't support U mode interrupts so WFI is illegal in this context
+                  if (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U) begin
+                    illegal_instr = 1'b1;
+                    instruction_o.op = ariane_pkg::ADD;
+                  end
+                end
+                // SFENCE.VMA
+                default: begin
+                  if (instr.instr[31:25] == 7'b1001) begin
+                    // check privilege level, SFENCE.VMA can only be executed in M/S mode
+                    // otherwise decode an illegal instruction
+                    illegal_instr    = (((CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S) || ((!CVA6Cfg.RVS && !CVA6Cfg.RVU) || priv_lvl_i == riscv::PRIV_LVL_M)) && instr.itype.rd == '0) ? 1'b0 : 1'b1;
+                    instruction_o.op = ariane_pkg::SFENCE_VMA;
+                    // check TVM flag and intercept SFENCE.VMA call if necessary
+                    if (CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S && tvm_i)
+                      illegal_instr = 1'b1;
+                  end else begin
+                    illegal_instr = 1'b1;
+                  end
+                end
+              endcase
+            end
+            // atomically swaps values in the CSR and integer register
+            3'b001: begin  // CSRRW
+              imm_select = IIMM;
+              instruction_o.op = ariane_pkg::CSR_WRITE;
+            end
+            // atomically set values in the CSR and write back to rd
+            3'b010: begin  // CSRRS
+              imm_select = IIMM;
+              // this is just a read
+              if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ;
+              else instruction_o.op = ariane_pkg::CSR_SET;
+            end
+            // atomically clear values in the CSR and write back to rd
+            3'b011: begin  // CSRRC
+              imm_select = IIMM;
+              // this is just a read
+              if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ;
+              else instruction_o.op = ariane_pkg::CSR_CLEAR;
+            end
+            // use zimm and iimm
+            3'b101: begin  // CSRRWI
+              instruction_o.rs1[4:0] = instr.itype.rs1;
+              imm_select = IIMM;
+              instruction_o.use_zimm = 1'b1;
+              instruction_o.op = ariane_pkg::CSR_WRITE;
+            end
+            3'b110: begin  // CSRRSI
+              instruction_o.rs1[4:0] = instr.itype.rs1;
+              imm_select = IIMM;
+              instruction_o.use_zimm = 1'b1;
+              // this is just a read
+              if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ;
+              else instruction_o.op = ariane_pkg::CSR_SET;
+            end
+            3'b111: begin  // CSRRCI
+              instruction_o.rs1[4:0] = instr.itype.rs1;
+              imm_select = IIMM;
+              instruction_o.use_zimm = 1'b1;
+              // this is just a read
+              if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ;
+              else instruction_o.op = ariane_pkg::CSR_CLEAR;
+            end
+            default: illegal_instr = 1'b1;
+          endcase
+        end
+        // Memory ordering instructions
+        riscv::OpcodeMiscMem: begin
+          instruction_o.fu  = CSR;
+          instruction_o.rs1 = '0;
+          instruction_o.rs2 = '0;
+          instruction_o.rd  = '0;
+
+          case (instr.stype.funct3)
+            // FENCE
+            // Currently implemented as a whole DCache flush boldly ignoring other things
+            3'b000: instruction_o.op = ariane_pkg::FENCE;
+            // FENCE.I
+            3'b001: instruction_o.op = ariane_pkg::FENCE_I;
+
+            default: illegal_instr = 1'b1;
+          endcase
+        end
+
+        // --------------------------
+        // Reg-Reg Operations
+        // --------------------------
+        riscv::OpcodeOp: begin
+          // --------------------------------------------
+          // Vectorial Floating-Point Reg-Reg Operations
+          // --------------------------------------------
+          if (instr.rvftype.funct2 == 2'b10) begin  // Prefix 10 for all Xfvec ops
+            // only generate decoder if FP extensions are enabled (static)
+            if (CVA6Cfg.FpPresent && CVA6Cfg.XFVec && fs_i != riscv::Off) begin
+              automatic logic allow_replication;  // control honoring of replication flag
+
+              instruction_o.fu       = FPU_VEC;  // Same unit, but sets 'vectorial' signal
+              instruction_o.rs1[4:0] = instr.rvftype.rs1;
+              instruction_o.rs2[4:0] = instr.rvftype.rs2;
+              instruction_o.rd[4:0]  = instr.rvftype.rd;
+              check_fprm             = 1'b1;
+              allow_replication      = 1'b1;
+              // decode vectorial FP instruction
+              unique case (instr.rvftype.vecfltop)
+                5'b00001: begin
+                  instruction_o.op       = ariane_pkg::FADD;  // vfadd.vfmt - Vectorial FP Addition
+                  instruction_o.rs1      = '0;  // Operand A is set to 0
+                  instruction_o.rs2[4:0] = instr.rvftype.rs1;  // Operand B is set to rs1
+                  imm_select             = IIMM;  // Operand C is set to rs2
+                end
+                5'b00010: begin
+                  instruction_o.op = ariane_pkg::FSUB;  // vfsub.vfmt - Vectorial FP Subtraction
+                  instruction_o.rs1 = '0;  // Operand A is set to 0
+                  instruction_o.rs2[4:0] = instr.rvftype.rs1;  // Operand B is set to rs1
+                  imm_select = IIMM;  // Operand C is set to rs2
+                end
+                5'b00011:
+                instruction_o.op = ariane_pkg::FMUL;  // vfmul.vfmt - Vectorial FP Multiplication
+                5'b00100:
+                instruction_o.op = ariane_pkg::FDIV;  // vfdiv.vfmt - Vectorial FP Division
+                5'b00101: begin
+                  instruction_o.op = ariane_pkg::VFMIN;  // vfmin.vfmt - Vectorial FP Minimum
+                  check_fprm       = 1'b0;  // rounding mode irrelevant
+                end
+                5'b00110: begin
+                  instruction_o.op = ariane_pkg::VFMAX;  // vfmax.vfmt - Vectorial FP Maximum
+                  check_fprm       = 1'b0;  // rounding mode irrelevant
+                end
+                5'b00111: begin
+                  instruction_o.op  = ariane_pkg::FSQRT;  // vfsqrt.vfmt - Vectorial FP Square Root
+                  allow_replication = 1'b0;  // only one operand
+                  if (instr.rvftype.rs2 != 5'b00000) illegal_instr = 1'b1;  // rs2 must be 0
+                end
+                5'b01000: begin
+                  instruction_o.op = ariane_pkg::FMADD; // vfmac.vfmt - Vectorial FP Multiply-Accumulate
+                  imm_select = SIMM;  // rd into result field (upper bits don't matter)
+                end
+                5'b01001: begin
+                  instruction_o.op = ariane_pkg::FMSUB; // vfmre.vfmt - Vectorial FP Multiply-Reduce
+                  imm_select = SIMM;  // rd into result field (upper bits don't matter)
+                end
+                5'b01100: begin
+                  unique case (instr.rvftype.rs2) inside // operation encoded in rs2, `inside` for matching ?
+                    5'b00000: begin
+                      instruction_o.rs2[4:0] = instr.rvftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit
+                      if (instr.rvftype.repl)
+                        instruction_o.op = ariane_pkg::FMV_X2F;  // vfmv.vfmt.x - GPR to FPR Move
+                      else instruction_o.op = ariane_pkg::FMV_F2X;  // vfmv.x.vfmt - FPR to GPR Move
+                      check_fprm = 1'b0;  // no rounding for moves
+                    end
+                    5'b00001: begin
+                      instruction_o.op  = ariane_pkg::FCLASS; // vfclass.vfmt - Vectorial FP Classify
+                      check_fprm = 1'b0;  // no rounding for classification
+                      allow_replication = 1'b0;  // R must not be set
+                    end
+                    5'b00010:
+                    instruction_o.op = ariane_pkg::FCVT_F2I; // vfcvt.x.vfmt - Vectorial FP to Int Conversion
+                    5'b00011:
+                    instruction_o.op = ariane_pkg::FCVT_I2F; // vfcvt.vfmt.x - Vectorial Int to FP Conversion
+                    5'b001??: begin
+                      instruction_o.op       = ariane_pkg::FCVT_F2F; // vfcvt.vfmt.vfmt - Vectorial FP to FP Conversion
+                      instruction_o.rs2[4:0] = instr.rvftype.rd; // set rs2 = rd as target vector for conversion
+                      imm_select = IIMM;  // rs2 holds part of the intruction
+                      // TODO CHECK R bit for valid fmt combinations
+                      // determine source format
+                      unique case (instr.rvftype.rs2[21:20])
+                        // Only process instruction if corresponding extension is active (static)
+                        2'b00:   if (~CVA6Cfg.RVFVec) illegal_instr = 1'b1;
+                        2'b01:   if (~CVA6Cfg.XF16ALTVec) illegal_instr = 1'b1;
+                        2'b10:   if (~CVA6Cfg.XF16Vec) illegal_instr = 1'b1;
+                        2'b11:   if (~CVA6Cfg.XF8Vec) illegal_instr = 1'b1;
+                        default: illegal_instr = 1'b1;
+                      endcase
+                    end
+                    default: illegal_instr = 1'b1;
+                  endcase
+                end
+                5'b01101: begin
+                  check_fprm = 1'b0;  // no rounding for sign-injection
+                  instruction_o.op = ariane_pkg::VFSGNJ; // vfsgnj.vfmt - Vectorial FP Sign Injection
+                end
+                5'b01110: begin
+                  check_fprm = 1'b0;  // no rounding for sign-injection
+                  instruction_o.op = ariane_pkg::VFSGNJN; // vfsgnjn.vfmt - Vectorial FP Negated Sign Injection
+                end
+                5'b01111: begin
+                  check_fprm = 1'b0;  // no rounding for sign-injection
+                  instruction_o.op = ariane_pkg::VFSGNJX; // vfsgnjx.vfmt - Vectorial FP XORed Sign Injection
+                end
+                5'b10000: begin
+                  check_fprm       = 1'b0;  // no rounding for comparisons
+                  instruction_o.op = ariane_pkg::VFEQ;  // vfeq.vfmt - Vectorial FP Equality
+                end
+                5'b10001: begin
+                  check_fprm       = 1'b0;  // no rounding for comparisons
+                  instruction_o.op = ariane_pkg::VFNE;  // vfne.vfmt - Vectorial FP Non-Equality
+                end
+                5'b10010: begin
+                  check_fprm       = 1'b0;  // no rounding for comparisons
+                  instruction_o.op = ariane_pkg::VFLT;  // vfle.vfmt - Vectorial FP Less Than
+                end
+                5'b10011: begin
+                  check_fprm = 1'b0;  // no rounding for comparisons
+                  instruction_o.op = ariane_pkg::VFGE;  // vfge.vfmt - Vectorial FP Greater or Equal
+                end
+                5'b10100: begin
+                  check_fprm       = 1'b0;  // no rounding for comparisons
+                  instruction_o.op = ariane_pkg::VFLE;  // vfle.vfmt - Vectorial FP Less or Equal
+                end
+                5'b10101: begin
+                  check_fprm       = 1'b0;  // no rounding for comparisons
+                  instruction_o.op = ariane_pkg::VFGT;  // vfgt.vfmt - Vectorial FP Greater Than
+                end
+                5'b11000: begin
+                  instruction_o.op  = ariane_pkg::VFCPKAB_S; // vfcpka/b.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, lowest 4 entries
+                  imm_select = SIMM;  // rd into result field (upper bits don't matter)
+                  if (~CVA6Cfg.RVF)
+                    illegal_instr = 1'b1;  // if we don't support RVF, we can't cast from FP32
+                  // check destination format
+                  unique case (instr.rvftype.vfmt)
+                    // Only process instruction if corresponding extension is active and FLEN suffices (static)
+                    2'b00: begin
+                      if (~CVA6Cfg.RVFVec)
+                        illegal_instr = 1'b1;  // destination vector not supported
+                      if (instr.rvftype.repl)
+                        illegal_instr = 1'b1;  // no entries 2/3 in vector of 2 fp32
+                    end
+                    2'b01: begin
+                      if (~CVA6Cfg.XF16ALTVec)
+                        illegal_instr = 1'b1;  // destination vector not supported
+                    end
+                    2'b10: begin
+                      if (~CVA6Cfg.XF16Vec)
+                        illegal_instr = 1'b1;  // destination vector not supported
+                    end
+                    2'b11: begin
+                      if (~CVA6Cfg.XF8Vec)
+                        illegal_instr = 1'b1;  // destination vector not supported
+                    end
+                    default: illegal_instr = 1'b1;
+                  endcase
+                end
+                5'b11001: begin
+                  instruction_o.op  = ariane_pkg::VFCPKCD_S; // vfcpkc/d.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, second 4 entries
+                  imm_select = SIMM;  // rd into result field (upper bits don't matter)
+                  if (~CVA6Cfg.RVF)
+                    illegal_instr = 1'b1;  // if we don't support RVF, we can't cast from FP32
+                  // check destination format
+                  unique case (instr.rvftype.vfmt)
+                    // Only process instruction if corresponding extension is active and FLEN suffices (static)
+                    2'b00:   illegal_instr = 1'b1;  // no entries 4-7 in vector of 2 FP32
+                    2'b01:   illegal_instr = 1'b1;  // no entries 4-7 in vector of 4 FP16ALT
+                    2'b10:   illegal_instr = 1'b1;  // no entries 4-7 in vector of 4 FP16
+                    2'b11: begin
+                      if (~CVA6Cfg.XF8Vec)
+                        illegal_instr = 1'b1;  // destination vector not supported
+                    end
+                    default: illegal_instr = 1'b1;
+                  endcase
+                end
+                5'b11010: begin
+                  instruction_o.op  = ariane_pkg::VFCPKAB_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, lowest 4 entries
+                  imm_select = SIMM;  // rd into result field (upper bits don't matter)
+                  if (~CVA6Cfg.RVD)
+                    illegal_instr = 1'b1;  // if we don't support RVD, we can't cast from FP64
+                  // check destination format
+                  unique case (instr.rvftype.vfmt)
+                    // Only process instruction if corresponding extension is active and FLEN suffices (static)
+                    2'b00: begin
+                      if (~CVA6Cfg.RVFVec)
+                        illegal_instr = 1'b1;  // destination vector not supported
+                      if (instr.rvftype.repl)
+                        illegal_instr = 1'b1;  // no entries 2/3 in vector of 2 fp32
+                    end
+                    2'b01: begin
+                      if (~CVA6Cfg.XF16ALTVec)
+                        illegal_instr = 1'b1;  // destination vector not supported
+                    end
+                    2'b10: begin
+                      if (~CVA6Cfg.XF16Vec)
+                        illegal_instr = 1'b1;  // destination vector not supported
+                    end
+                    2'b11: begin
+                      if (~CVA6Cfg.XF8Vec)
+                        illegal_instr = 1'b1;  // destination vector not supported
+                    end
+                    default: illegal_instr = 1'b1;
+                  endcase
+                end
+                5'b11011: begin
+                  instruction_o.op  = ariane_pkg::VFCPKCD_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, second 4 entries
+                  imm_select = SIMM;  // rd into result field (upper bits don't matter)
+                  if (~CVA6Cfg.RVD)
+                    illegal_instr = 1'b1;  // if we don't support RVD, we can't cast from FP64
+                  // check destination format
+                  unique case (instr.rvftype.vfmt)
+                    // Only process instruction if corresponding extension is active and FLEN suffices (static)
+                    2'b00:   illegal_instr = 1'b1;  // no entries 4-7 in vector of 2 FP32
+                    2'b01:   illegal_instr = 1'b1;  // no entries 4-7 in vector of 4 FP16ALT
+                    2'b10:   illegal_instr = 1'b1;  // no entries 4-7 in vector of 4 FP16
+                    2'b11: begin
+                      if (~CVA6Cfg.XF8Vec)
+                        illegal_instr = 1'b1;  // destination vector not supported
+                    end
+                    default: illegal_instr = 1'b1;
+                  endcase
+                end
+                default: illegal_instr = 1'b1;
+              endcase
+
+              // check format
+              unique case (instr.rvftype.vfmt)
+                // Only process instruction if corresponding extension is active (static)
+                2'b00:   if (~CVA6Cfg.RVFVec) illegal_instr = 1'b1;
+                2'b01:   if (~CVA6Cfg.XF16ALTVec) illegal_instr = 1'b1;
+                2'b10:   if (~CVA6Cfg.XF16Vec) illegal_instr = 1'b1;
+                2'b11:   if (~CVA6Cfg.XF8Vec) illegal_instr = 1'b1;
+                default: illegal_instr = 1'b1;
+              endcase
+
+              // check disallowed replication
+              if (~allow_replication & instr.rvftype.repl) illegal_instr = 1'b1;
+
+              // check rounding mode
+              if (check_fprm) begin
+                unique case (frm_i) inside  // actual rounding mode from frm csr
+                  [3'b000 : 3'b100]: ;  //legal rounding modes
+                  default: illegal_instr = 1'b1;
+                endcase
+              end
+
+            end else begin  // No vectorial FP enabled (static)
+              illegal_instr = 1'b1;
+            end
+
+            // ---------------------------
+            // Integer Reg-Reg Operations
+            // ---------------------------
+          end else begin
+            if (CVA6Cfg.RVB) begin
+              instruction_o.fu  = (instr.rtype.funct7 == 7'b000_0001 || ((instr.rtype.funct7 == 7'b000_0101) && !(instr.rtype.funct3[14]))) ? MULT : ALU;
+            end else begin
+              instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU;
+            end
+            instruction_o.rs1[4:0] = instr.rtype.rs1;
+            instruction_o.rs2[4:0] = instr.rtype.rs2;
+            instruction_o.rd[4:0]  = instr.rtype.rd;
+
+            unique case ({
+              instr.rtype.funct7, instr.rtype.funct3
+            })
+              {7'b000_0000, 3'b000} : instruction_o.op = ariane_pkg::ADD;  // Add
+              {7'b010_0000, 3'b000} : instruction_o.op = ariane_pkg::SUB;  // Sub
+              {7'b000_0000, 3'b010} : instruction_o.op = ariane_pkg::SLTS;  // Set Lower Than
+              {
+                7'b000_0000, 3'b011
+              } :
+              instruction_o.op = ariane_pkg::SLTU;  // Set Lower Than Unsigned
+              {7'b000_0000, 3'b100} : instruction_o.op = ariane_pkg::XORL;  // Xor
+              {7'b000_0000, 3'b110} : instruction_o.op = ariane_pkg::ORL;  // Or
+              {7'b000_0000, 3'b111} : instruction_o.op = ariane_pkg::ANDL;  // And
+              {7'b000_0000, 3'b001} : instruction_o.op = ariane_pkg::SLL;  // Shift Left Logical
+              {7'b000_0000, 3'b101} : instruction_o.op = ariane_pkg::SRL;  // Shift Right Logical
+              {7'b010_0000, 3'b101} : instruction_o.op = ariane_pkg::SRA;  // Shift Right Arithmetic
+              // Multiplications
+              {7'b000_0001, 3'b000} : instruction_o.op = ariane_pkg::MUL;
+              {7'b000_0001, 3'b001} : instruction_o.op = ariane_pkg::MULH;
+              {7'b000_0001, 3'b010} : instruction_o.op = ariane_pkg::MULHSU;
+              {7'b000_0001, 3'b011} : instruction_o.op = ariane_pkg::MULHU;
+              {7'b000_0001, 3'b100} : instruction_o.op = ariane_pkg::DIV;
+              {7'b000_0001, 3'b101} : instruction_o.op = ariane_pkg::DIVU;
+              {7'b000_0001, 3'b110} : instruction_o.op = ariane_pkg::REM;
+              {7'b000_0001, 3'b111} : instruction_o.op = ariane_pkg::REMU;
+              default: begin
+                illegal_instr_non_bm = 1'b1;
+              end
+            endcase
+            if (CVA6Cfg.RVB) begin
+              unique case ({
+                instr.rtype.funct7, instr.rtype.funct3
+              })
+                //Logical with Negate
+                {7'b010_0000, 3'b111} : instruction_o.op = ariane_pkg::ANDN;  // Andn
+                {7'b010_0000, 3'b110} : instruction_o.op = ariane_pkg::ORN;  // Orn
+                {7'b010_0000, 3'b100} : instruction_o.op = ariane_pkg::XNOR;  // Xnor
+                //Shift and Add (Bitmanip)
+                {7'b001_0000, 3'b010} : instruction_o.op = ariane_pkg::SH1ADD;  // Sh1add
+                {7'b001_0000, 3'b100} : instruction_o.op = ariane_pkg::SH2ADD;  // Sh2add
+                {7'b001_0000, 3'b110} : instruction_o.op = ariane_pkg::SH3ADD;  // Sh3add
+                // Integer maximum/minimum
+                {7'b000_0101, 3'b110} : instruction_o.op = ariane_pkg::MAX;  // max
+                {7'b000_0101, 3'b111} : instruction_o.op = ariane_pkg::MAXU;  // maxu
+                {7'b000_0101, 3'b100} : instruction_o.op = ariane_pkg::MIN;  // min
+                {7'b000_0101, 3'b101} : instruction_o.op = ariane_pkg::MINU;  // minu
+                // Single bit instructions
+                {7'b010_0100, 3'b001} : instruction_o.op = ariane_pkg::BCLR;  // bclr
+                {7'b010_0100, 3'b101} : instruction_o.op = ariane_pkg::BEXT;  // bext
+                {7'b011_0100, 3'b001} : instruction_o.op = ariane_pkg::BINV;  // binv
+                {7'b001_0100, 3'b001} : instruction_o.op = ariane_pkg::BSET;  // bset
+                // Carry-Less-Multiplication (clmul, clmulh, clmulr)
+                {7'b000_0101, 3'b001} : instruction_o.op = ariane_pkg::CLMUL;  // clmul
+                {7'b000_0101, 3'b011} : instruction_o.op = ariane_pkg::CLMULH;  // clmulh
+                {7'b000_0101, 3'b010} : instruction_o.op = ariane_pkg::CLMULR;  // clmulr
+                // Bitwise Shifting
+                {7'b011_0000, 3'b001} : instruction_o.op = ariane_pkg::ROL;  // rol
+                {7'b011_0000, 3'b101} : instruction_o.op = ariane_pkg::ROR;  // ror
+                // Zero Extend Op
+                {7'b000_0100, 3'b100} : instruction_o.op = ariane_pkg::ZEXTH;
+                default: begin
+                  illegal_instr_bm = 1'b1;
+                end
+              endcase
+            end
+            if (CVA6Cfg.ZiCondExtEn) begin
+              unique case ({
+                instr.rtype.funct7, instr.rtype.funct3
+              })
+                //Conditional move
+                {7'b000_0111, 3'b101} : instruction_o.op = ariane_pkg::CZERO_EQZ;  // czero.eqz
+                {7'b000_0111, 3'b111} : instruction_o.op = ariane_pkg::CZERO_NEZ;  // czero.nez
+                default: begin
+                  illegal_instr_zic = 1'b1;
+                end
+              endcase
+            end
+            //VCS coverage on
+            unique case ({
+              CVA6Cfg.RVB, CVA6Cfg.ZiCondExtEn
+            })
+              2'b00: illegal_instr = illegal_instr_non_bm;
+              2'b01: illegal_instr = illegal_instr_non_bm & illegal_instr_zic;
+              2'b10: illegal_instr = illegal_instr_non_bm & illegal_instr_bm;
+              2'b11: illegal_instr = illegal_instr_non_bm & illegal_instr_bm & illegal_instr_zic;
+            endcase
+          end
+        end
+
+        // --------------------------
+        // 32bit Reg-Reg Operations
+        // --------------------------
+        riscv::OpcodeOp32: begin
+          instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU;
+          instruction_o.rs1[4:0] = instr.rtype.rs1;
+          instruction_o.rs2[4:0] = instr.rtype.rs2;
+          instruction_o.rd[4:0] = instr.rtype.rd;
+          if (riscv::IS_XLEN64) begin
+            unique case ({
+              instr.rtype.funct7, instr.rtype.funct3
+            })
+              {7'b000_0000, 3'b000} : instruction_o.op = ariane_pkg::ADDW;  // addw
+              {7'b010_0000, 3'b000} : instruction_o.op = ariane_pkg::SUBW;  // subw
+              {7'b000_0000, 3'b001} : instruction_o.op = ariane_pkg::SLLW;  // sllw
+              {7'b000_0000, 3'b101} : instruction_o.op = ariane_pkg::SRLW;  // srlw
+              {7'b010_0000, 3'b101} : instruction_o.op = ariane_pkg::SRAW;  // sraw
+              // Multiplications
+              {7'b000_0001, 3'b000} : instruction_o.op = ariane_pkg::MULW;
+              {7'b000_0001, 3'b100} : instruction_o.op = ariane_pkg::DIVW;
+              {7'b000_0001, 3'b101} : instruction_o.op = ariane_pkg::DIVUW;
+              {7'b000_0001, 3'b110} : instruction_o.op = ariane_pkg::REMW;
+              {7'b000_0001, 3'b111} : instruction_o.op = ariane_pkg::REMUW;
+              default: illegal_instr_non_bm = 1'b1;
+            endcase
+            if (CVA6Cfg.RVB) begin
+              unique case ({
+                instr.rtype.funct7, instr.rtype.funct3
+              })
+                // Shift with Add (Unsigned Word)
+                {7'b001_0000, 3'b010}: instruction_o.op = ariane_pkg::SH1ADDUW; // sh1add.uw
+                {7'b001_0000, 3'b100}: instruction_o.op = ariane_pkg::SH2ADDUW; // sh2add.uw
+                {7'b001_0000, 3'b110}: instruction_o.op = ariane_pkg::SH3ADDUW; // sh3add.uw
+                // Unsigned word Op's
+                {7'b000_0100, 3'b000}: instruction_o.op = ariane_pkg::ADDUW;    // add.uw
+                // Bitwise Shifting
+                {7'b011_0000, 3'b001}: instruction_o.op = ariane_pkg::ROLW;     // rolw
+                {7'b011_0000, 3'b101}: instruction_o.op = ariane_pkg::RORW;     // rorw
+                default: illegal_instr_bm = 1'b1;
+              endcase
+              illegal_instr = illegal_instr_non_bm & illegal_instr_bm;
+            end else begin
+              illegal_instr = illegal_instr_non_bm;
+            end
+          end else illegal_instr = 1'b1;
+        end
+        // --------------------------------
+        // Reg-Immediate Operations
+        // --------------------------------
+        riscv::OpcodeOpImm: begin
+          instruction_o.fu = ALU;
+          imm_select = IIMM;
+          instruction_o.rs1[4:0] = instr.itype.rs1;
+          instruction_o.rd[4:0] = instr.itype.rd;
+          unique case (instr.itype.funct3)
+            3'b000: instruction_o.op = ariane_pkg::ADD;  // Add Immediate
+            3'b010: instruction_o.op = ariane_pkg::SLTS;  // Set to one if Lower Than Immediate
+            3'b011:
+            instruction_o.op = ariane_pkg::SLTU;  // Set to one if Lower Than Immediate Unsigned
+            3'b100: instruction_o.op = ariane_pkg::XORL;  // Exclusive Or with Immediate
+            3'b110: instruction_o.op = ariane_pkg::ORL;  // Or with Immediate
+            3'b111: instruction_o.op = ariane_pkg::ANDL;  // And with Immediate
+
+            3'b001: begin
+              instruction_o.op = ariane_pkg::SLL;  // Shift Left Logical by Immediate
+              if (instr.instr[31:26] != 6'b0) illegal_instr_non_bm = 1'b1;
+              if (instr.instr[25] != 1'b0 && riscv::XLEN == 32) illegal_instr_non_bm = 1'b1;
+            end
+
+            3'b101: begin
+              if (instr.instr[31:26] == 6'b0)
+                instruction_o.op = ariane_pkg::SRL;  // Shift Right Logical by Immediate
+              else if (instr.instr[31:26] == 6'b010_000)
+                instruction_o.op = ariane_pkg::SRA;  // Shift Right Arithmetically by Immediate
+              else illegal_instr_non_bm = 1'b1;
+              if (instr.instr[25] != 1'b0 && riscv::XLEN == 32) illegal_instr_non_bm = 1'b1;
+            end
+          endcase
+          if (CVA6Cfg.RVB) begin
+            unique case (instr.itype.funct3)
+              3'b001: begin
+                if (instr.instr[31:25] == 7'b0110000) begin
+                  if (instr.instr[22:20] == 3'b100) instruction_o.op = ariane_pkg::SEXTB;
+                  else if (instr.instr[22:20] == 3'b101) instruction_o.op = ariane_pkg::SEXTH;
+                  else if (instr.instr[22:20] == 3'b010) instruction_o.op = ariane_pkg::CPOP;
+                  else if (instr.instr[22:20] == 3'b000) instruction_o.op = ariane_pkg::CLZ;
+                  else if (instr.instr[22:20] == 3'b001) instruction_o.op = ariane_pkg::CTZ;
+                end else if (instr.instr[31:26] == 6'b010010) instruction_o.op = ariane_pkg::BCLRI;
+                else if (instr.instr[31:26] == 6'b011010) instruction_o.op = ariane_pkg::BINVI;
+                else if (instr.instr[31:26] == 6'b001010) instruction_o.op = ariane_pkg::BSETI;
+                else illegal_instr_bm = 1'b1;
+              end
+              3'b101: begin
+                if (instr.instr[31:20] == 12'b001010000111) instruction_o.op = ariane_pkg::ORCB;
+                else if (riscv::IS_XLEN64 && instr.instr[31:20] == 12'b011010111000)
+                  instruction_o.op = ariane_pkg::REV8;
+                else if (instr.instr[31:20] == 12'b011010011000)
+                  instruction_o.op = ariane_pkg::REV8;
+                else if (instr.instr[31:26] == 6'b010_010) instruction_o.op = ariane_pkg::BEXTI;
+                else if (instr.instr[31:26] == 6'b011_000) instruction_o.op = ariane_pkg::RORI;
+                else illegal_instr_bm = 1'b1;
+              end
+              default: illegal_instr_bm = 1'b1;
+            endcase
+            illegal_instr = illegal_instr_non_bm & illegal_instr_bm;
+          end else begin
+            illegal_instr = illegal_instr_non_bm;
+          end
+        end
+
+        // --------------------------------
+        // 32 bit Reg-Immediate Operations
+        // --------------------------------
+        riscv::OpcodeOpImm32: begin
+          instruction_o.fu = ALU;
+          imm_select = IIMM;
+          instruction_o.rs1[4:0] = instr.itype.rs1;
+          instruction_o.rd[4:0] = instr.itype.rd;
+          if (riscv::IS_XLEN64) begin
+            unique case (instr.itype.funct3)
+              3'b000:  instruction_o.op = ariane_pkg::ADDW;  // Add Immediate
+              3'b001: begin
+                instruction_o.op = ariane_pkg::SLLW;  // Shift Left Logical by Immediate
+                if (instr.instr[31:25] != 7'b0) illegal_instr_non_bm = 1'b1;
+              end
+              3'b101: begin
+                if (instr.instr[31:25] == 7'b0)
+                  instruction_o.op = ariane_pkg::SRLW;  // Shift Right Logical by Immediate
+                else if (instr.instr[31:25] == 7'b010_0000)
+                  instruction_o.op = ariane_pkg::SRAW;  // Shift Right Arithmetically by Immediate
+                else illegal_instr_non_bm = 1'b1;
+              end
+              default: illegal_instr_non_bm = 1'b1;
+            endcase
+            if (CVA6Cfg.RVB) begin
+              unique case (instr.itype.funct3)
+                3'b001: begin
+                  if (instr.instr[31:25] == 7'b0110000) begin
+                    if (instr.instr[21:20] == 2'b10) instruction_o.op = ariane_pkg::CPOPW;
+                    else if (instr.instr[21:20] == 2'b00) instruction_o.op = ariane_pkg::CLZW;
+                    else if (instr.instr[21:20] == 2'b01) instruction_o.op = ariane_pkg::CTZW;
+                    else illegal_instr_bm = 1'b1;
+                  end else if (instr.instr[31:26] == 6'b000010) begin
+                    instruction_o.op = ariane_pkg::SLLIUW; // Shift Left Logic by Immediate (Unsigned Word)
+                  end else illegal_instr_bm = 1'b1;
+                end
+                3'b101: begin
+                  if (instr.instr[31:25] == 7'b011_0000) instruction_o.op = ariane_pkg::RORIW;
+                  else illegal_instr_bm = 1'b1;
+                end
+                default: illegal_instr_bm = 1'b1;
+              endcase
+              illegal_instr = illegal_instr_non_bm & illegal_instr_bm;
+            end else begin
+              illegal_instr = illegal_instr_non_bm;
+            end
+
+          end else illegal_instr = 1'b1;
+        end
+        // --------------------------------
+        // LSU
+        // --------------------------------
+        riscv::OpcodeStore: begin
+          instruction_o.fu = STORE;
+          imm_select = SIMM;
+          instruction_o.rs1[4:0] = instr.stype.rs1;
+          instruction_o.rs2[4:0] = instr.stype.rs2;
+          // determine store size
+          unique case (instr.stype.funct3)
+            3'b000: instruction_o.op = ariane_pkg::SB;
+            3'b001: instruction_o.op = ariane_pkg::SH;
+            3'b010: instruction_o.op = ariane_pkg::SW;
+            3'b011:
+            if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::SD;
+            else illegal_instr = 1'b1;
+            default: illegal_instr = 1'b1;
+          endcase
+        end
+
+        riscv::OpcodeLoad: begin
+          instruction_o.fu = LOAD;
+          imm_select = IIMM;
+          instruction_o.rs1[4:0] = instr.itype.rs1;
+          instruction_o.rd[4:0] = instr.itype.rd;
+          // determine load size and signed type
+          unique case (instr.itype.funct3)
+            3'b000: instruction_o.op = ariane_pkg::LB;
+            3'b001: instruction_o.op = ariane_pkg::LH;
+            3'b010: instruction_o.op = ariane_pkg::LW;
+            3'b100: instruction_o.op = ariane_pkg::LBU;
+            3'b101: instruction_o.op = ariane_pkg::LHU;
+            3'b110:
+            if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::LWU;
+            else illegal_instr = 1'b1;
+            3'b011:
+            if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::LD;
+            else illegal_instr = 1'b1;
+            default: illegal_instr = 1'b1;
+          endcase
+        end
+
+        // --------------------------------
+        // Floating-Point Load/store
+        // --------------------------------
+        riscv::OpcodeStoreFp: begin
+          if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static)
+            instruction_o.fu = STORE;
+            imm_select = SIMM;
+            instruction_o.rs1[4:0] = instr.stype.rs1;
+            instruction_o.rs2[4:0] = instr.stype.rs2;
+            // determine store size
+            unique case (instr.stype.funct3)
+              // Only process instruction if corresponding extension is active (static)
+              3'b000:
+              if (CVA6Cfg.XF8) instruction_o.op = ariane_pkg::FSB;
+              else illegal_instr = 1'b1;
+              3'b001:
+              if (CVA6Cfg.XF16 | CVA6Cfg.XF16ALT) instruction_o.op = ariane_pkg::FSH;
+              else illegal_instr = 1'b1;
+              3'b010:
+              if (CVA6Cfg.RVF) instruction_o.op = ariane_pkg::FSW;
+              else illegal_instr = 1'b1;
+              3'b011:
+              if (CVA6Cfg.RVD) instruction_o.op = ariane_pkg::FSD;
+              else illegal_instr = 1'b1;
+              default: illegal_instr = 1'b1;
+            endcase
+          end else illegal_instr = 1'b1;
+        end
+
+        riscv::OpcodeLoadFp: begin
+          if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static)
+            instruction_o.fu = LOAD;
+            imm_select = IIMM;
+            instruction_o.rs1[4:0] = instr.itype.rs1;
+            instruction_o.rd[4:0] = instr.itype.rd;
+            // determine load size
+            unique case (instr.itype.funct3)
+              // Only process instruction if corresponding extension is active (static)
+              3'b000:
+              if (CVA6Cfg.XF8) instruction_o.op = ariane_pkg::FLB;
+              else illegal_instr = 1'b1;
+              3'b001:
+              if (CVA6Cfg.XF16 | CVA6Cfg.XF16ALT) instruction_o.op = ariane_pkg::FLH;
+              else illegal_instr = 1'b1;
+              3'b010:
+              if (CVA6Cfg.RVF) instruction_o.op = ariane_pkg::FLW;
+              else illegal_instr = 1'b1;
+              3'b011:
+              if (CVA6Cfg.RVD) instruction_o.op = ariane_pkg::FLD;
+              else illegal_instr = 1'b1;
+              default: illegal_instr = 1'b1;
+            endcase
+          end else illegal_instr = 1'b1;
+        end
+
+        // ----------------------------------
+        // Floating-Point Reg-Reg Operations
+        // ----------------------------------
+        riscv::OpcodeMadd, riscv::OpcodeMsub, riscv::OpcodeNmsub, riscv::OpcodeNmadd: begin
+          if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static)
+            instruction_o.fu       = FPU;
+            instruction_o.rs1[4:0] = instr.r4type.rs1;
+            instruction_o.rs2[4:0] = instr.r4type.rs2;
+            instruction_o.rd[4:0]  = instr.r4type.rd;
+            imm_select             = RS3;  // rs3 into result field
+            check_fprm             = 1'b1;
+            // select the correct fused operation
+            unique case (instr.r4type.opcode)
+              default: instruction_o.op = ariane_pkg::FMADD;  // fmadd.fmt - FP Fused multiply-add
+              riscv::OpcodeMsub:
+              instruction_o.op = ariane_pkg::FMSUB;  // fmsub.fmt - FP Fused multiply-subtract
+              riscv::OpcodeNmsub:
+              instruction_o.op = ariane_pkg::FNMSUB; // fnmsub.fmt - FP Negated fused multiply-subtract
+              riscv::OpcodeNmadd:
+              instruction_o.op = ariane_pkg::FNMADD;  // fnmadd.fmt - FP Negated fused multiply-add
+            endcase
+
+            // determine fp format
+            unique case (instr.r4type.funct2)
+              // Only process instruction if corresponding extension is active (static)
+              2'b00:   if (~CVA6Cfg.RVF) illegal_instr = 1'b1;
+              2'b01:   if (~CVA6Cfg.RVD) illegal_instr = 1'b1;
+              2'b10:   if (~CVA6Cfg.XF16 & ~CVA6Cfg.XF16ALT) illegal_instr = 1'b1;
+              2'b11:   if (~CVA6Cfg.XF8) illegal_instr = 1'b1;
+              default: illegal_instr = 1'b1;
+            endcase
+
+            // check rounding mode
+            if (check_fprm) begin
+              unique case (instr.rftype.rm) inside
+                [3'b000 : 3'b100]: ;  //legal rounding modes
+                3'b101: begin  // Alternative Half-Precsision encded as fmt=10 and rm=101
+                  if (~CVA6Cfg.XF16ALT || instr.rftype.fmt != 2'b10) illegal_instr = 1'b1;
+                  unique case (frm_i) inside  // actual rounding mode from frm csr
+                    [3'b000 : 3'b100]: ;  //legal rounding modes
+                    default: illegal_instr = 1'b1;
+                  endcase
+                end
+                3'b111: begin
+                  // rounding mode from frm csr
+                  unique case (frm_i) inside
+                    [3'b000 : 3'b100]: ;  //legal rounding modes
+                    default: illegal_instr = 1'b1;
+                  endcase
+                end
+                default:           illegal_instr = 1'b1;
+              endcase
+            end
+          end else begin
+            illegal_instr = 1'b1;
+          end
+        end
+
+        riscv::OpcodeOpFp: begin
+          if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static)
+            instruction_o.fu       = FPU;
+            instruction_o.rs1[4:0] = instr.rftype.rs1;
+            instruction_o.rs2[4:0] = instr.rftype.rs2;
+            instruction_o.rd[4:0]  = instr.rftype.rd;
+            check_fprm             = 1'b1;
+            // decode FP instruction
+            unique case (instr.rftype.funct5)
+              5'b00000: begin
+                instruction_o.op       = ariane_pkg::FADD;  // fadd.fmt - FP Addition
+                instruction_o.rs1      = '0;  // Operand A is set to 0
+                instruction_o.rs2[4:0] = instr.rftype.rs1;  // Operand B is set to rs1
+                imm_select             = IIMM;  // Operand C is set to rs2
+              end
+              5'b00001: begin
+                instruction_o.op       = ariane_pkg::FSUB;  // fsub.fmt - FP Subtraction
+                instruction_o.rs1      = '0;  // Operand A is set to 0
+                instruction_o.rs2[4:0] = instr.rftype.rs1;  // Operand B is set to rs1
+                imm_select             = IIMM;  // Operand C is set to rs2
+              end
+              5'b00010: instruction_o.op = ariane_pkg::FMUL;  // fmul.fmt - FP Multiplication
+              5'b00011: instruction_o.op = ariane_pkg::FDIV;  // fdiv.fmt - FP Division
+              5'b01011: begin
+                instruction_o.op = ariane_pkg::FSQRT;  // fsqrt.fmt - FP Square Root
+                // rs2 must be zero
+                if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1;
+              end
+              5'b00100: begin
+                instruction_o.op = ariane_pkg::FSGNJ;  // fsgn{j[n]/jx}.fmt - FP Sign Injection
+                check_fprm       = 1'b0;  // instruction encoded in rm, do the check here
+                if (CVA6Cfg.XF16ALT) begin        // FP16ALT instructions encoded in rm separately (static)
+                  if (!(instr.rftype.rm inside {[3'b000 : 3'b010], [3'b100 : 3'b110]}))
+                    illegal_instr = 1'b1;
+                end else begin
+                  if (!(instr.rftype.rm inside {[3'b000 : 3'b010]})) illegal_instr = 1'b1;
+                end
+              end
+              5'b00101: begin
+                instruction_o.op = ariane_pkg::FMIN_MAX;  // fmin/fmax.fmt - FP Minimum / Maximum
+                check_fprm       = 1'b0;  // instruction encoded in rm, do the check here
+                if (CVA6Cfg.XF16ALT) begin           // FP16ALT instructions encoded in rm separately (static)
+                  if (!(instr.rftype.rm inside {[3'b000 : 3'b001], [3'b100 : 3'b101]}))
+                    illegal_instr = 1'b1;
+                end else begin
+                  if (!(instr.rftype.rm inside {[3'b000 : 3'b001]})) illegal_instr = 1'b1;
+                end
+              end
+              5'b01000: begin
+                instruction_o.op = ariane_pkg::FCVT_F2F;  // fcvt.fmt.fmt - FP to FP Conversion
+                instruction_o.rs2[4:0] = instr.rvftype.rs1; // tie rs2 to rs1 to be safe (vectors use rs2)
+                imm_select = IIMM;  // rs2 holds part of the intruction
+                if (|instr.rftype.rs2[24:23])
+                  illegal_instr = 1'b1;  // bits [22:20] used, other bits must be 0
+                // check source format
+                unique case (instr.rftype.rs2[22:20])
+                  // Only process instruction if corresponding extension is active (static)
+                  3'b000:  if (~CVA6Cfg.RVF) illegal_instr = 1'b1;
+                  3'b001:  if (~CVA6Cfg.RVD) illegal_instr = 1'b1;
+                  3'b010:  if (~CVA6Cfg.XF16) illegal_instr = 1'b1;
+                  3'b110:  if (~CVA6Cfg.XF16ALT) illegal_instr = 1'b1;
+                  3'b011:  if (~CVA6Cfg.XF8) illegal_instr = 1'b1;
+                  default: illegal_instr = 1'b1;
+                endcase
+              end
+              5'b10100: begin
+                instruction_o.op = ariane_pkg::FCMP;  // feq/flt/fle.fmt - FP Comparisons
+                check_fprm       = 1'b0;  // instruction encoded in rm, do the check here
+                if (CVA6Cfg.XF16ALT) begin       // FP16ALT instructions encoded in rm separately (static)
+                  if (!(instr.rftype.rm inside {[3'b000 : 3'b010], [3'b100 : 3'b110]}))
+                    illegal_instr = 1'b1;
+                end else begin
+                  if (!(instr.rftype.rm inside {[3'b000 : 3'b010]})) illegal_instr = 1'b1;
+                end
+              end
+              5'b11000: begin
+                instruction_o.op = ariane_pkg::FCVT_F2I;  // fcvt.ifmt.fmt - FP to Int Conversion
+                imm_select       = IIMM;  // rs2 holds part of the instruction
+                if (|instr.rftype.rs2[24:22])
+                  illegal_instr = 1'b1;  // bits [21:20] used, other bits must be 0
+              end
+              5'b11010: begin
+                instruction_o.op = ariane_pkg::FCVT_I2F;  // fcvt.fmt.ifmt - Int to FP Conversion
+                imm_select       = IIMM;  // rs2 holds part of the instruction
+                if (|instr.rftype.rs2[24:22])
+                  illegal_instr = 1'b1;  // bits [21:20] used, other bits must be 0
+              end
+              5'b11100: begin
+                instruction_o.rs2[4:0] = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit
+                check_fprm = 1'b0;  // instruction encoded in rm, do the check here
+                if (instr.rftype.rm == 3'b000 || (CVA6Cfg.XF16ALT && instr.rftype.rm == 3'b100)) // FP16ALT has separate encoding
+                  instruction_o.op = ariane_pkg::FMV_F2X;  // fmv.ifmt.fmt - FPR to GPR Move
+                else if (instr.rftype.rm == 3'b001 || (CVA6Cfg.XF16ALT && instr.rftype.rm == 3'b101)) // FP16ALT has separate encoding
+                  instruction_o.op = ariane_pkg::FCLASS;  // fclass.fmt - FP Classify
+                else illegal_instr = 1'b1;
+                // rs2 must be zero
+                if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1;
+              end
+              5'b11110: begin
+                instruction_o.op = ariane_pkg::FMV_X2F;  // fmv.fmt.ifmt - GPR to FPR Move
+                instruction_o.rs2[4:0] = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit
+                check_fprm = 1'b0;  // instruction encoded in rm, do the check here
+                if (!(instr.rftype.rm == 3'b000 || (CVA6Cfg.XF16ALT && instr.rftype.rm == 3'b100)))
+                  illegal_instr = 1'b1;
+                // rs2 must be zero
+                if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1;
+              end
+              default:  illegal_instr = 1'b1;
+            endcase
+
+            // check format
+            unique case (instr.rftype.fmt)
+              // Only process instruction if corresponding extension is active (static)
+              2'b00:   if (~CVA6Cfg.RVF) illegal_instr = 1'b1;
+              2'b01:   if (~CVA6Cfg.RVD) illegal_instr = 1'b1;
+              2'b10:   if (~CVA6Cfg.XF16 & ~CVA6Cfg.XF16ALT) illegal_instr = 1'b1;
+              2'b11:   if (~CVA6Cfg.XF8) illegal_instr = 1'b1;
+              default: illegal_instr = 1'b1;
+            endcase
+
+            // check rounding mode
+            if (check_fprm) begin
+              unique case (instr.rftype.rm) inside
+                [3'b000 : 3'b100]: ;  //legal rounding modes
+                3'b101: begin  // Alternative Half-Precsision encded as fmt=10 and rm=101
+                  if (~CVA6Cfg.XF16ALT || instr.rftype.fmt != 2'b10) illegal_instr = 1'b1;
+                  unique case (frm_i) inside  // actual rounding mode from frm csr
+                    [3'b000 : 3'b100]: ;  //legal rounding modes
+                    default: illegal_instr = 1'b1;
+                  endcase
+                end
+                3'b111: begin
+                  // rounding mode from frm csr
+                  unique case (frm_i) inside
+                    [3'b000 : 3'b100]: ;  //legal rounding modes
+                    default: illegal_instr = 1'b1;
+                  endcase
+                end
+                default:           illegal_instr = 1'b1;
+              endcase
+            end
+          end else begin
+            illegal_instr = 1'b1;
+          end
+        end
+
+        // ----------------------------------
+        // Atomic Operations
+        // ----------------------------------
+        riscv::OpcodeAmo: begin
+          // we are going to use the load unit for AMOs
+          instruction_o.fu = STORE;
+          instruction_o.rs1[4:0] = instr.atype.rs1;
+          instruction_o.rs2[4:0] = instr.atype.rs2;
+          instruction_o.rd[4:0] = instr.atype.rd;
+          // TODO(zarubaf): Ordering
+          // words
+          if (CVA6Cfg.RVA && instr.stype.funct3 == 3'h2) begin
+            unique case (instr.instr[31:27])
+              5'h0: instruction_o.op = ariane_pkg::AMO_ADDW;
+              5'h1: instruction_o.op = ariane_pkg::AMO_SWAPW;
+              5'h2: begin
+                instruction_o.op = ariane_pkg::AMO_LRW;
+                if (instr.atype.rs2 != 0) illegal_instr = 1'b1;
+              end
+              5'h3: instruction_o.op = ariane_pkg::AMO_SCW;
+              5'h4: instruction_o.op = ariane_pkg::AMO_XORW;
+              5'h8: instruction_o.op = ariane_pkg::AMO_ORW;
+              5'hC: instruction_o.op = ariane_pkg::AMO_ANDW;
+              5'h10: instruction_o.op = ariane_pkg::AMO_MINW;
+              5'h14: instruction_o.op = ariane_pkg::AMO_MAXW;
+              5'h18: instruction_o.op = ariane_pkg::AMO_MINWU;
+              5'h1C: instruction_o.op = ariane_pkg::AMO_MAXWU;
+              default: illegal_instr = 1'b1;
+            endcase
+            // double words
+          end else if (riscv::IS_XLEN64 && CVA6Cfg.RVA && instr.stype.funct3 == 3'h3) begin
+            unique case (instr.instr[31:27])
+              5'h0: instruction_o.op = ariane_pkg::AMO_ADDD;
+              5'h1: instruction_o.op = ariane_pkg::AMO_SWAPD;
+              5'h2: begin
+                instruction_o.op = ariane_pkg::AMO_LRD;
+                if (instr.atype.rs2 != 0) illegal_instr = 1'b1;
+              end
+              5'h3: instruction_o.op = ariane_pkg::AMO_SCD;
+              5'h4: instruction_o.op = ariane_pkg::AMO_XORD;
+              5'h8: instruction_o.op = ariane_pkg::AMO_ORD;
+              5'hC: instruction_o.op = ariane_pkg::AMO_ANDD;
+              5'h10: instruction_o.op = ariane_pkg::AMO_MIND;
+              5'h14: instruction_o.op = ariane_pkg::AMO_MAXD;
+              5'h18: instruction_o.op = ariane_pkg::AMO_MINDU;
+              5'h1C: instruction_o.op = ariane_pkg::AMO_MAXDU;
+              default: illegal_instr = 1'b1;
+            endcase
+          end else begin
+            illegal_instr = 1'b1;
+          end
+        end
+
+        // --------------------------------
+        // Control Flow Instructions
+        // --------------------------------
+        riscv::OpcodeBranch: begin
+          imm_select              = SBIMM;
+          instruction_o.fu        = CTRL_FLOW;
+          instruction_o.rs1[4:0]  = instr.stype.rs1;
+          instruction_o.rs2[4:0]  = instr.stype.rs2;
+
+          is_control_flow_instr_o = 1'b1;
+
+          case (instr.stype.funct3)
+            3'b000: instruction_o.op = ariane_pkg::EQ;
+            3'b001: instruction_o.op = ariane_pkg::NE;
+            3'b100: instruction_o.op = ariane_pkg::LTS;
+            3'b101: instruction_o.op = ariane_pkg::GES;
+            3'b110: instruction_o.op = ariane_pkg::LTU;
+            3'b111: instruction_o.op = ariane_pkg::GEU;
+            default: begin
+              is_control_flow_instr_o = 1'b0;
+              illegal_instr           = 1'b1;
+            end
+          endcase
+        end
+        // Jump and link register
+        riscv::OpcodeJalr: begin
+          instruction_o.fu        = CTRL_FLOW;
+          instruction_o.op        = ariane_pkg::JALR;
+          instruction_o.rs1[4:0]  = instr.itype.rs1;
+          imm_select              = IIMM;
+          instruction_o.rd[4:0]   = instr.itype.rd;
+          is_control_flow_instr_o = 1'b1;
+          // invalid jump and link register -> reserved for vector encoding
+          if (instr.itype.funct3 != 3'b0) illegal_instr = 1'b1;
+        end
+        // Jump and link
+        riscv::OpcodeJal: begin
+          instruction_o.fu        = CTRL_FLOW;
+          imm_select              = JIMM;
+          instruction_o.rd[4:0]   = instr.utype.rd;
+          is_control_flow_instr_o = 1'b1;
+        end
+
+        riscv::OpcodeAuipc: begin
+          instruction_o.fu      = ALU;
+          imm_select            = UIMM;
+          instruction_o.use_pc  = 1'b1;
+          instruction_o.rd[4:0] = instr.utype.rd;
+        end
+
+        riscv::OpcodeLui: begin
+          imm_select            = UIMM;
+          instruction_o.fu      = ALU;
+          instruction_o.rd[4:0] = instr.utype.rd;
+        end
+
+        default: illegal_instr = 1'b1;
+      endcase
+    end
+    if (CVA6Cfg.CvxifEn) begin
+      if (is_illegal_i || illegal_instr) begin
+        instruction_o.fu       = CVXIF;
+        instruction_o.rs1[4:0] = instr.r4type.rs1;
+        instruction_o.rs2[4:0] = instr.r4type.rs2;
+        instruction_o.rd[4:0]  = instr.r4type.rd;
+        instruction_o.op       = ariane_pkg::OFFLOAD;
+        imm_select             = RS3;
+      end
+    end
+
+    // Accelerator instructions.
+    // These can overwrite the previous decoding entirely.
+    if (CVA6Cfg.EnableAccelerator) begin // only generate decoder if accelerators are enabled (static)
+      if (is_accel) begin
+        instruction_o.fu        = acc_instruction.fu;
+        instruction_o.vfp       = acc_instruction.vfp;
+        instruction_o.rs1       = acc_instruction.rs1;
+        instruction_o.rs2       = acc_instruction.rs2;
+        instruction_o.rd        = acc_instruction.rd;
+        instruction_o.op        = acc_instruction.op;
+        illegal_instr           = acc_illegal_instr;
+        is_control_flow_instr_o = acc_is_control_flow_instr;
+      end
+    end
+  end
+
+  // --------------------------------
+  // Sign extend immediate
+  // --------------------------------
+  always_comb begin : sign_extend
+    imm_i_type = {{riscv::XLEN - 12{instruction_i[31]}}, instruction_i[31:20]};
+    imm_s_type = {{riscv::XLEN - 12{instruction_i[31]}}, instruction_i[31:25], instruction_i[11:7]};
+    imm_sb_type = {
+      {riscv::XLEN - 13{instruction_i[31]}},
+      instruction_i[31],
+      instruction_i[7],
+      instruction_i[30:25],
+      instruction_i[11:8],
+      1'b0
+    };
+    imm_u_type = {
+      {riscv::XLEN - 32{instruction_i[31]}}, instruction_i[31:12], 12'b0
+    };  // JAL, AUIPC, sign extended to 64 bit
+    imm_uj_type = {
+      {riscv::XLEN - 20{instruction_i[31]}},
+      instruction_i[19:12],
+      instruction_i[20],
+      instruction_i[30:21],
+      1'b0
+    };
+    imm_bi_type = {{riscv::XLEN - 5{instruction_i[24]}}, instruction_i[24:20]};
+
+    // NOIMM, IIMM, SIMM, BIMM, UIMM, JIMM, RS3
+    // select immediate
+    case (imm_select)
+      IIMM: begin
+        instruction_o.result  = imm_i_type;
+        instruction_o.use_imm = 1'b1;
+      end
+      SIMM: begin
+        instruction_o.result  = imm_s_type;
+        instruction_o.use_imm = 1'b1;
+      end
+      SBIMM: begin
+        instruction_o.result  = imm_sb_type;
+        instruction_o.use_imm = 1'b1;
+      end
+      UIMM: begin
+        instruction_o.result  = imm_u_type;
+        instruction_o.use_imm = 1'b1;
+      end
+      JIMM: begin
+        instruction_o.result  = imm_uj_type;
+        instruction_o.use_imm = 1'b1;
+      end
+      RS3: begin
+        // result holds address of fp operand rs3
+        instruction_o.result  = {{riscv::XLEN - 5{1'b0}}, instr.r4type.rs3};
+        instruction_o.use_imm = 1'b0;
+      end
+      default: begin
+        instruction_o.result  = {riscv::XLEN{1'b0}};
+        instruction_o.use_imm = 1'b0;
+      end
+    endcase
+
+    if (CVA6Cfg.EnableAccelerator) begin
+      if (is_accel) begin
+        instruction_o.result  = acc_instruction.result;
+        instruction_o.use_imm = acc_instruction.use_imm;
+      end
+    end
+  end
+
+  // ---------------------
+  // Exception handling
+  // ---------------------
+  riscv::xlen_t interrupt_cause;
+
+  // this instruction has already executed if the exception is valid
+  assign instruction_o.valid = instruction_o.ex.valid;
+
+  always_comb begin : exception_handling
+    interrupt_cause  = '0;
+    instruction_o.ex = ex_i;
+    // look if we didn't already get an exception in any previous
+    // stage - we should not overwrite it as we retain order regarding the exception
+    if (~ex_i.valid) begin
+      // if we didn't already get an exception save the instruction here as we may need it
+      // in the commit stage if we got a access exception to one of the CSR registers
+      instruction_o.ex.tval  = (is_compressed_i) ? {{riscv::XLEN-16{1'b0}}, compressed_instr_i} : {{riscv::XLEN-32{1'b0}}, instruction_i};
+      // instructions which will throw an exception are marked as valid
+      // e.g.: they can be committed anytime and do not need to wait for any functional unit
+      // check here if we decoded an invalid instruction or if the compressed decoder already decoded
+      // a invalid instruction
+      if (illegal_instr || is_illegal_i) begin
+        if (!CVA6Cfg.CvxifEn) instruction_o.ex.valid = 1'b1;
+        // we decoded an illegal exception here
+        instruction_o.ex.cause = riscv::ILLEGAL_INSTR;
+        // we got an ecall, set the correct cause depending on the current privilege level
+      end else if (ecall) begin
+        // this exception is valid
+        instruction_o.ex.valid = 1'b1;
+        // depending on the privilege mode, set the appropriate cause
+        if (priv_lvl_i == riscv::PRIV_LVL_S && CVA6Cfg.RVS) begin
+          instruction_o.ex.cause = riscv::ENV_CALL_SMODE;
+        end else if (priv_lvl_i == riscv::PRIV_LVL_U && CVA6Cfg.RVU) begin
+          instruction_o.ex.cause = riscv::ENV_CALL_UMODE;
+        end else if (priv_lvl_i == riscv::PRIV_LVL_M) begin
+          instruction_o.ex.cause = riscv::ENV_CALL_MMODE;
+        end
+      end else if (ebreak) begin
+        // this exception is valid
+        instruction_o.ex.valid = 1'b1;
+        // set breakpoint cause
+        instruction_o.ex.cause = riscv::BREAKPOINT;
+      end
+      // -----------------
+      // Interrupt Control
+      // -----------------
+      // we decode an interrupt the same as an exception, hence it will be taken if the instruction did not
+      // throw any previous exception.
+      // we have three interrupt sources: external interrupts, software interrupts, timer interrupts (order of precedence)
+      // for two privilege levels: Supervisor and Machine Mode
+      // Supervisor Timer Interrupt
+      if (irq_ctrl_i.mie[riscv::IRQ_S_TIMER] && irq_ctrl_i.mip[riscv::IRQ_S_TIMER]) begin
+        interrupt_cause = riscv::S_TIMER_INTERRUPT;
+      end
+      // Supervisor Software Interrupt
+      if (irq_ctrl_i.mie[riscv::IRQ_S_SOFT] && irq_ctrl_i.mip[riscv::IRQ_S_SOFT]) begin
+        interrupt_cause = riscv::S_SW_INTERRUPT;
+      end
+      // Supervisor External Interrupt
+      // The logical-OR of the software-writable bit and the signal from the external interrupt controller is
+      // used to generate external interrupts to the supervisor
+      if (irq_ctrl_i.mie[riscv::IRQ_S_EXT] && (irq_ctrl_i.mip[riscv::IRQ_S_EXT] | irq_i[ariane_pkg::SupervisorIrq])) begin
+        interrupt_cause = riscv::S_EXT_INTERRUPT;
+      end
+      // Machine Timer Interrupt
+      if (irq_ctrl_i.mip[riscv::IRQ_M_TIMER] && irq_ctrl_i.mie[riscv::IRQ_M_TIMER]) begin
+        interrupt_cause = riscv::M_TIMER_INTERRUPT;
+      end
+      // Machine Mode Software Interrupt
+      if (irq_ctrl_i.mip[riscv::IRQ_M_SOFT] && irq_ctrl_i.mie[riscv::IRQ_M_SOFT]) begin
+        interrupt_cause = riscv::M_SW_INTERRUPT;
+      end
+      // Machine Mode External Interrupt
+      if (irq_ctrl_i.mip[riscv::IRQ_M_EXT] && irq_ctrl_i.mie[riscv::IRQ_M_EXT]) begin
+        interrupt_cause = riscv::M_EXT_INTERRUPT;
+      end
+
+      if (interrupt_cause[riscv::XLEN-1] && irq_ctrl_i.global_enable) begin
+        // However, if bit i in mideleg is set, interrupts are considered to be globally enabled if the hart’s current privilege
+        // mode equals the delegated privilege mode (S or U) and that mode’s interrupt enable bit
+        // (SIE or UIE in mstatus) is set, or if the current privilege mode is less than the delegated privilege mode.
+        if (irq_ctrl_i.mideleg[interrupt_cause[$clog2(riscv::XLEN)-1:0]]) begin
+          if ((CVA6Cfg.RVS && irq_ctrl_i.sie && priv_lvl_i == riscv::PRIV_LVL_S) || (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U)) begin
+            instruction_o.ex.valid = 1'b1;
+            instruction_o.ex.cause = interrupt_cause;
+          end
+        end else begin
+          instruction_o.ex.valid = 1'b1;
+          instruction_o.ex.cause = interrupt_cause;
+        end
+      end
+    end
+
+    // a debug request has precendece over everything else
+    if (CVA6Cfg.DebugEn && debug_req_i && !debug_mode_i) begin
+      instruction_o.ex.valid = 1'b1;
+      instruction_o.ex.cause = riscv::DEBUG_REQUEST;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/ex_stage.sv b/test/type_param/core/ex_stage.sv
new file mode 100644
index 0000000..978e2a3
--- /dev/null
+++ b/test/type_param/core/ex_stage.sv
@@ -0,0 +1,413 @@
+
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 19.04.2017
+// Description: Instantiation of all functional units residing in the execute stage
+
+
+module ex_stage
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned ASID_WIDTH = 1
+) (
+    input logic clk_i,        // Clock
+    input logic rst_ni,       // Asynchronous reset active low
+    input logic flush_i,
+    input logic debug_mode_i,
+
+    input logic [riscv::VLEN-1:0] rs1_forwarding_i,
+    input logic [riscv::VLEN-1:0] rs2_forwarding_i,
+    input fu_data_t fu_data_i,
+    input logic [riscv::VLEN-1:0] pc_i,  // PC of current instruction
+    input logic is_compressed_instr_i,  // we need to know if this was a compressed instruction
+                                        // in order to calculate the next PC on a mis-predict
+    // Fixed latency unit(s)
+    output riscv::xlen_t flu_result_o,
+    output logic [TRANS_ID_BITS-1:0]               flu_trans_id_o,        // ID of scoreboard entry at which to write back
+    output exception_t flu_exception_o,
+    output logic flu_ready_o,  // FLU is ready
+    output logic flu_valid_o,  // FLU result is valid
+    // Branches and Jumps
+    // ALU 1
+    input logic alu_valid_i,  // Output is valid
+    // Branch Unit
+    input logic branch_valid_i,  // we are using the branch unit
+    input branchpredict_sbe_t branch_predict_i,
+    output bp_resolve_t resolved_branch_o,  // the branch engine uses the write back from the ALU
+    output logic resolve_branch_o,  // to ID signaling that we resolved the branch
+    // CSR
+    input logic csr_valid_i,
+    output logic [11:0] csr_addr_o,
+    input logic csr_commit_i,
+    // MULT
+    input logic mult_valid_i,  // Output is valid
+    // LSU
+    output logic lsu_ready_o,  // FU is ready
+    input logic lsu_valid_i,  // Input is valid
+
+    output logic                             load_valid_o,
+    output riscv::xlen_t                     load_result_o,
+    output logic         [TRANS_ID_BITS-1:0] load_trans_id_o,
+    output exception_t                       load_exception_o,
+    output logic                             store_valid_o,
+    output riscv::xlen_t                     store_result_o,
+    output logic         [TRANS_ID_BITS-1:0] store_trans_id_o,
+    output exception_t                       store_exception_o,
+
+    input logic lsu_commit_i,
+    output logic lsu_commit_ready_o,  // commit queue is ready to accept another commit request
+    input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
+    input logic stall_st_pending_i,
+    output logic no_st_pending_o,
+    input logic amo_valid_commit_i,
+    // FPU
+    output logic fpu_ready_o,  // FU is ready
+    input logic fpu_valid_i,  // Output is valid
+    input logic [1:0] fpu_fmt_i,  // FP format
+    input logic [2:0] fpu_rm_i,  // FP rm
+    input logic [2:0] fpu_frm_i,  // FP frm csr
+    input logic [6:0] fpu_prec_i,  // FP precision control
+    output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
+    output riscv::xlen_t fpu_result_o,
+    output logic fpu_valid_o,
+    output exception_t fpu_exception_o,
+    // CoreV-X-Interface
+    input logic x_valid_i,
+    output logic x_ready_o,
+    input logic [31:0] x_off_instr_i,
+    output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
+    output exception_t x_exception_o,
+    output riscv::xlen_t x_result_o,
+    output logic x_valid_o,
+    output logic x_we_o,
+    output cvxif_pkg::cvxif_req_t cvxif_req_o,
+    input cvxif_pkg::cvxif_resp_t cvxif_resp_i,
+    input logic acc_valid_i,  // Output is valid
+    // Memory Management
+    input logic enable_translation_i,
+    input logic en_ld_st_translation_i,
+    input logic flush_tlb_i,
+
+    input  riscv::priv_lvl_t                   priv_lvl_i,
+    input  riscv::priv_lvl_t                   ld_st_priv_lvl_i,
+    input  logic                               sum_i,
+    input  logic                               mxr_i,
+    input  logic             [riscv::PPNW-1:0] satp_ppn_i,
+    input  logic             [ ASID_WIDTH-1:0] asid_i,
+    // icache translation requests
+    input  icache_arsp_t                       icache_areq_i,
+    output icache_areq_t                       icache_areq_o,
+
+    // interface to dcache
+    input dcache_req_o_t [2:0] dcache_req_ports_i,
+    output dcache_req_i_t [2:0] dcache_req_ports_o,
+    input logic dcache_wbuffer_empty_i,
+    input logic dcache_wbuffer_not_ni_i,
+    output amo_req_t amo_req_o,  // request to cache subsytem
+    input amo_resp_t amo_resp_i,  // response from cache subsystem
+    // Performance counters
+    output logic itlb_miss_o,
+    output logic dtlb_miss_o,
+    // PMPs
+    input riscv::pmpcfg_t [15:0] pmpcfg_i,
+    input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
+
+    // RVFI
+    output lsu_ctrl_t                   rvfi_lsu_ctrl_o,
+    output            [riscv::PLEN-1:0] rvfi_mem_paddr_o
+);
+
+  // -------------------------
+  // Fixed Latency Units
+  // -------------------------
+  // all fixed latency units share a single issue port and a sing write
+  // port into the scoreboard. At the moment those are:
+  // 1. ALU - all operations are single cycle
+  // 2. Branch unit: operation is single cycle, the ALU is needed
+  //    for comparison
+  // 3. CSR: This is a small buffer which saves the address of the CSR.
+  //    The value is then re-fetched once the instruction retires. The buffer
+  //    is only a single entry deep, hence this operation will block all
+  //    other operations once this buffer is full. This should not be a major
+  //    concern though as CSRs are infrequent.
+  // 4. Multiplier/Divider: The multiplier has a fixed latency of 1 cycle.
+  //                        The issue logic will take care of not issuing
+  //                        another instruction if it will collide on the
+  //                        output port. Divisions are arbitrary in length
+  //                        they will simply block the issue of all other
+  //                        instructions.
+
+
+  logic current_instruction_is_sfence_vma;
+  // These two register store the rs1 and rs2 parameters in case of `SFENCE_VMA`
+  // instruction to be used for TLB flush in the next clock cycle.
+  logic [ASID_WIDTH-1:0] asid_to_be_flushed;
+  logic [riscv::VLEN-1:0] vaddr_to_be_flushed;
+
+  // from ALU to branch unit
+  logic alu_branch_res;  // branch comparison result
+  riscv::xlen_t alu_result, csr_result, mult_result;
+  logic [riscv::VLEN-1:0] branch_result;
+  logic csr_ready, mult_ready;
+  logic [TRANS_ID_BITS-1:0] mult_trans_id;
+  logic mult_valid;
+
+  // 1. ALU (combinatorial)
+  // data silence operation
+  fu_data_t alu_data;
+  assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0;
+
+  alu #(
+      .CVA6Cfg(CVA6Cfg)
+  ) alu_i (
+      .clk_i,
+      .rst_ni,
+      .fu_data_i       (alu_data),
+      .result_o        (alu_result),
+      .alu_branch_res_o(alu_branch_res)
+  );
+
+  // 2. Branch Unit (combinatorial)
+  // we don't silence the branch unit as this is already critical and we do
+  // not want to add another layer of logic
+  branch_unit #(
+      .CVA6Cfg(CVA6Cfg)
+  ) branch_unit_i (
+      .clk_i,
+      .rst_ni,
+      .debug_mode_i,
+      .fu_data_i,
+      .pc_i,
+      .is_compressed_instr_i,
+      // any functional unit is valid, check that there is no accidental mis-predict
+      .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) ,
+      .branch_valid_i,
+      .branch_comp_res_i(alu_branch_res),
+      .branch_result_o(branch_result),
+      .branch_predict_i,
+      .resolved_branch_o,
+      .resolve_branch_o,
+      .branch_exception_o(flu_exception_o)
+  );
+
+  // 3. CSR (sequential)
+  csr_buffer #(
+      .CVA6Cfg(CVA6Cfg)
+  ) csr_buffer_i (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .fu_data_i,
+      .csr_valid_i,
+      .csr_ready_o (csr_ready),
+      .csr_result_o(csr_result),
+      .csr_commit_i,
+      .csr_addr_o
+  );
+
+  assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i | mult_valid;
+
+  // result MUX
+  always_comb begin
+    // Branch result as default case
+    flu_result_o   = {{riscv::XLEN - riscv::VLEN{1'b0}}, branch_result};
+    flu_trans_id_o = fu_data_i.trans_id;
+    // ALU result
+    if (alu_valid_i) begin
+      flu_result_o = alu_result;
+      // CSR result
+    end else if (csr_valid_i) begin
+      flu_result_o = csr_result;
+    end else if (mult_valid) begin
+      flu_result_o   = mult_result;
+      flu_trans_id_o = mult_trans_id;
+    end
+  end
+
+  // ready flags for FLU
+  always_comb begin
+    flu_ready_o = csr_ready & mult_ready;
+  end
+
+  // 4. Multiplication (Sequential)
+  fu_data_t mult_data;
+  // input silencing of multiplier
+  assign mult_data = mult_valid_i ? fu_data_i : '0;
+
+  mult #(
+      .CVA6Cfg(CVA6Cfg)
+  ) i_mult (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .mult_valid_i,
+      .fu_data_i      (mult_data),
+      .result_o       (mult_result),
+      .mult_valid_o   (mult_valid),
+      .mult_ready_o   (mult_ready),
+      .mult_trans_id_o(mult_trans_id)
+  );
+
+  // ----------------
+  // FPU
+  // ----------------
+  generate
+    if (CVA6Cfg.FpPresent) begin : fpu_gen
+      fu_data_t fpu_data;
+      assign fpu_data = fpu_valid_i ? fu_data_i : '0;
+
+      fpu_wrap #(
+          .CVA6Cfg(CVA6Cfg)
+      ) fpu_i (
+          .clk_i,
+          .rst_ni,
+          .flush_i,
+          .fpu_valid_i,
+          .fpu_ready_o,
+          .fu_data_i(fpu_data),
+          .fpu_fmt_i,
+          .fpu_rm_i,
+          .fpu_frm_i,
+          .fpu_prec_i,
+          .fpu_trans_id_o,
+          .result_o (fpu_result_o),
+          .fpu_valid_o,
+          .fpu_exception_o
+      );
+    end else begin : no_fpu_gen
+      assign fpu_ready_o     = '0;
+      assign fpu_trans_id_o  = '0;
+      assign fpu_result_o    = '0;
+      assign fpu_valid_o     = '0;
+      assign fpu_exception_o = '0;
+    end
+  endgenerate
+
+  // ----------------
+  // Load-Store Unit
+  // ----------------
+  fu_data_t lsu_data;
+
+  assign lsu_data = lsu_valid_i ? fu_data_i : '0;
+
+  load_store_unit #(
+      .CVA6Cfg   (CVA6Cfg),
+      .ASID_WIDTH(ASID_WIDTH)
+  ) lsu_i (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .stall_st_pending_i,
+      .no_st_pending_o,
+      .fu_data_i            (lsu_data),
+      .lsu_ready_o,
+      .lsu_valid_i,
+      .load_trans_id_o,
+      .load_result_o,
+      .load_valid_o,
+      .load_exception_o,
+      .store_trans_id_o,
+      .store_result_o,
+      .store_valid_o,
+      .store_exception_o,
+      .commit_i             (lsu_commit_i),
+      .commit_ready_o       (lsu_commit_ready_o),
+      .commit_tran_id_i,
+      .enable_translation_i,
+      .en_ld_st_translation_i,
+      .icache_areq_i,
+      .icache_areq_o,
+      .priv_lvl_i,
+      .ld_st_priv_lvl_i,
+      .sum_i,
+      .mxr_i,
+      .satp_ppn_i,
+      .asid_i,
+      .asid_to_be_flushed_i (asid_to_be_flushed),
+      .vaddr_to_be_flushed_i(vaddr_to_be_flushed),
+      .flush_tlb_i,
+      .itlb_miss_o,
+      .dtlb_miss_o,
+      .dcache_req_ports_i,
+      .dcache_req_ports_o,
+      .dcache_wbuffer_empty_i,
+      .dcache_wbuffer_not_ni_i,
+      .amo_valid_commit_i,
+      .amo_req_o,
+      .amo_resp_i,
+      .pmpcfg_i,
+      .pmpaddr_i,
+      .rvfi_lsu_ctrl_o,
+      .rvfi_mem_paddr_o
+  );
+
+  if (CVA6Cfg.CvxifEn) begin : gen_cvxif
+    fu_data_t cvxif_data;
+    assign cvxif_data = x_valid_i ? fu_data_i : '0;
+    cvxif_fu #(
+        .CVA6Cfg(CVA6Cfg)
+    ) cvxif_fu_i (
+        .clk_i,
+        .rst_ni,
+        .fu_data_i,
+        .priv_lvl_i(ld_st_priv_lvl_i),
+        .x_valid_i,
+        .x_ready_o,
+        .x_off_instr_i,
+        .x_trans_id_o,
+        .x_exception_o,
+        .x_result_o,
+        .x_valid_o,
+        .x_we_o,
+        .cvxif_req_o,
+        .cvxif_resp_i
+    );
+  end else begin : gen_no_cvxif
+    assign cvxif_req_o   = '0;
+    assign x_trans_id_o  = '0;
+    assign x_exception_o = '0;
+    assign x_result_o    = '0;
+    assign x_valid_o     = '0;
+  end
+
+  if (CVA6Cfg.RVS) begin
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (~rst_ni) begin
+        current_instruction_is_sfence_vma <= 1'b0;
+      end else begin
+        if (flush_i) begin
+          current_instruction_is_sfence_vma <= 1'b0;
+        end else if ((fu_data_i.operation == SFENCE_VMA) && csr_valid_i) begin
+          current_instruction_is_sfence_vma <= 1'b1;
+        end
+      end
+    end
+
+    // This process stores the rs1 and rs2 parameters of a SFENCE_VMA instruction.
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (~rst_ni) begin
+        asid_to_be_flushed  <= '0;
+        vaddr_to_be_flushed <= '0;
+        // if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen
+      end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin
+        vaddr_to_be_flushed <= rs1_forwarding_i;
+        asid_to_be_flushed  <= rs2_forwarding_i[ASID_WIDTH-1:0];
+      end
+    end
+  end else begin
+    assign current_instruction_is_sfence_vma = 1'b0;
+    assign asid_to_be_flushed                = '0;
+    assign vaddr_to_be_flushed               = '0;
+  end
+
+endmodule
diff --git a/test/type_param/core/fpu_wrap.sv b/test/type_param/core/fpu_wrap.sv
new file mode 100644
index 0000000..9219029
--- /dev/null
+++ b/test/type_param/core/fpu_wrap.sv
@@ -0,0 +1,568 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Stefan Mach, ETH Zurich
+// Date: 12.04.2018
+// Description: Wrapper for the floating-point unit
+
+
+module fpu_wrap
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input  logic     clk_i,
+    input  logic     rst_ni,
+    input  logic     flush_i,
+    input  logic     fpu_valid_i,
+    output logic     fpu_ready_o,
+    input  fu_data_t fu_data_i,
+
+    input  logic       [              1:0] fpu_fmt_i,
+    input  logic       [              2:0] fpu_rm_i,
+    input  logic       [              2:0] fpu_frm_i,
+    input  logic       [              6:0] fpu_prec_i,
+    output logic       [TRANS_ID_BITS-1:0] fpu_trans_id_o,
+    output logic       [ CVA6Cfg.FLen-1:0] result_o,
+    output logic                           fpu_valid_o,
+    output exception_t                     fpu_exception_o
+);
+
+  // this is a workaround
+  // otherwise compilation might issue an error if FLEN=0
+  enum logic {
+    READY,
+    STALL
+  }
+      state_q, state_d;
+  if (CVA6Cfg.FpPresent) begin : fpu_gen
+    logic [CVA6Cfg.FLen-1:0] operand_a_i;
+    logic [CVA6Cfg.FLen-1:0] operand_b_i;
+    logic [CVA6Cfg.FLen-1:0] operand_c_i;
+    assign operand_a_i = fu_data_i.operand_a[CVA6Cfg.FLen-1:0];
+    assign operand_b_i = fu_data_i.operand_b[CVA6Cfg.FLen-1:0];
+    assign operand_c_i = fu_data_i.imm[CVA6Cfg.FLen-1:0];
+
+    //-----------------------------------
+    // FPnew config from FPnew package
+    //-----------------------------------
+    localparam OPBITS = fpnew_pkg::OP_BITS;
+    localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
+    localparam IFMTBITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
+
+    // Features (enabled formats, vectors etc.)
+    localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
+        Width: unsigned'(riscv::XLEN),  // parameterized using XLEN
+        EnableVectors: CVA6Cfg.XFVec,
+        EnableNanBox: 1'b1,
+        FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT},
+        IntFmtMask: {
+          CVA6Cfg.XFVec && CVA6Cfg.XF8,
+          CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT),
+          1'b1,
+          1'b1
+        }
+    };
+
+    // Implementation (number of registers etc)
+    localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
+        PipeRegs: '{  // FP32, FP64, FP16, FP8, FP16alt
+            '{
+                unsigned'(LAT_COMP_FP32),
+                unsigned'(LAT_COMP_FP64),
+                unsigned'(LAT_COMP_FP16),
+                unsigned'(LAT_COMP_FP8),
+                unsigned'(LAT_COMP_FP16ALT)
+            },  // ADDMUL
+            '{default: unsigned'(LAT_DIVSQRT)},  // DIVSQRT
+            '{default: unsigned'(LAT_NONCOMP)},  // NONCOMP
+            '{default: unsigned'(LAT_CONV)}
+        },  // CONV
+        UnitTypes: '{
+            '{default: fpnew_pkg::PARALLEL},  // ADDMUL
+            '{default: fpnew_pkg::MERGED},  // DIVSQRT
+            '{default: fpnew_pkg::PARALLEL},  // NONCOMP
+            '{default: fpnew_pkg::MERGED}
+        },  // CONV
+        PipeConfig: fpnew_pkg::DISTRIBUTED
+    };
+
+    //-------------------------------------------------
+    // Inputs to the FPU and protocol inversion buffer
+    //-------------------------------------------------
+    logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a;
+    logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b;
+    logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c;
+    logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op;
+    logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod;
+    logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt;
+    logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt;
+    logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt;
+    logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm;
+    logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op;
+
+    logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag;
+
+    logic fpu_in_ready, fpu_in_valid;
+    logic fpu_out_ready, fpu_out_valid;
+
+    logic [4:0] fpu_status;
+
+    // FSM to handle protocol inversion
+    logic hold_inputs;
+    logic use_hold;
+
+    //-----------------------------
+    // Translate inputs
+    //-----------------------------
+
+    always_comb begin : input_translation
+
+      automatic logic vec_replication;  // control honoring of replication flag
+      automatic logic replicate_c;  // replicate operand C instead of B (for ADD/SUB)
+      automatic logic check_ah;  // Decide for AH from RM field encoding
+
+      // Default Values
+      operand_a_d     = operand_a_i;
+      operand_b_d     = operand_b_i;  // immediates come through this port unless used as operand
+      operand_c_d     = operand_c_i;  // immediates come through this port unless used as operand
+      fpu_op_d        = fpnew_pkg::SGNJ;  // sign injection by default
+      fpu_op_mod_d    = 1'b0;
+      fpu_dstfmt_d    = fpnew_pkg::FP32;
+      fpu_ifmt_d      = fpnew_pkg::INT32;
+      fpu_rm_d        = fpu_rm_i;
+      fpu_vec_op_d    = fu_data_i.fu == FPU_VEC;
+      fpu_tag_d       = fu_data_i.trans_id;
+      vec_replication = fpu_rm_i[0];  // replication bit is sent via rm field
+      replicate_c     = 1'b0;
+      check_ah        = 1'b0;  // whether set scalar AH encoding from MSB of rm_i
+
+      // Scalar Rounding Modes - some ops encode inside RM but use smaller range
+      if (!(fpu_rm_i inside {[3'b000 : 3'b100]})) fpu_rm_d = fpu_frm_i;
+
+      // Vectorial ops always consult FRM
+      if (fpu_vec_op_d) fpu_rm_d = fpu_frm_i;
+
+      // Formats
+      unique case (fpu_fmt_i)
+        // FP32
+        2'b00:   fpu_dstfmt_d = fpnew_pkg::FP32;
+        // FP64 or FP16ALT (vectorial)
+        2'b01:   fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64;
+        // FP16 or FP16ALT (scalar)
+        2'b10: begin
+          if (!fpu_vec_op_d && fpu_rm_i == 3'b101) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
+          else fpu_dstfmt_d = fpnew_pkg::FP16;
+        end
+        // FP8
+        default: fpu_dstfmt_d = fpnew_pkg::FP8;
+      endcase
+
+      // By default, set src=dst
+      fpu_srcfmt_d = fpu_dstfmt_d;
+
+      // Operations (this can modify the rounding mode field and format!)
+      unique case (fu_data_i.operation)
+        // Addition
+        FADD: begin
+          fpu_op_d    = fpnew_pkg::ADD;
+          replicate_c = 1'b1; // second operand is in C
+        end
+        // Subtraction is modified ADD
+        FSUB: begin
+          fpu_op_d     = fpnew_pkg::ADD;
+          fpu_op_mod_d = 1'b1;
+          replicate_c  = 1'b1;  // second operand is in C
+        end
+        // Multiplication
+        FMUL:    fpu_op_d = fpnew_pkg::MUL;
+        // Division
+        FDIV:    fpu_op_d = fpnew_pkg::DIV;
+        // Min/Max - OP is encoded in rm (000-001)
+        FMIN_MAX: begin
+          fpu_op_d = fpnew_pkg::MINMAX;
+          fpu_rm_d = {1'b0, fpu_rm_i[1:0]};  // mask out AH encoding bit
+          check_ah = 1'b1;  // AH has RM MSB encoding
+        end
+        // Square Root
+        FSQRT:   fpu_op_d = fpnew_pkg::SQRT;
+        // Fused Multiply Add
+        FMADD:   fpu_op_d = fpnew_pkg::FMADD;
+        // Fused Multiply Subtract is modified FMADD
+        FMSUB: begin
+          fpu_op_d     = fpnew_pkg::FMADD;
+          fpu_op_mod_d = 1'b1;
+        end
+        // Fused Negated Multiply Subtract
+        FNMSUB:  fpu_op_d = fpnew_pkg::FNMSUB;
+        // Fused Negated Multiply Add is modified FNMSUB
+        FNMADD: begin
+          fpu_op_d     = fpnew_pkg::FNMSUB;
+          fpu_op_mod_d = 1'b1;
+        end
+        // Float to Int Cast - Op encoded in lowest two imm bits or rm
+        FCVT_F2I: begin
+          fpu_op_d = fpnew_pkg::F2I;
+          // Vectorial Ops encoded in R bit
+          if (fpu_vec_op_d) begin
+            fpu_op_mod_d    = fpu_rm_i[0];
+            vec_replication = 1'b0;  // no replication, R bit used for op
+            unique case (fpu_fmt_i)
+              2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
+              2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
+              2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
+            endcase
+            // Scalar casts encoded in imm
+          end else begin
+            fpu_op_mod_d = operand_c_i[0];
+            if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
+            else fpu_ifmt_d = fpnew_pkg::INT32;
+          end
+        end
+        // Int to Float Cast - Op encoded in lowest two imm bits or rm
+        FCVT_I2F: begin
+          fpu_op_d = fpnew_pkg::I2F;
+          // Vectorial Ops encoded in R bit
+          if (fpu_vec_op_d) begin
+            fpu_op_mod_d    = fpu_rm_i[0];
+            vec_replication = 1'b0;  // no replication, R bit used for op
+            unique case (fpu_fmt_i)
+              2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
+              2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
+              2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
+            endcase
+            // Scalar casts encoded in imm
+          end else begin
+            fpu_op_mod_d = operand_c_i[0];
+            if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
+            else fpu_ifmt_d = fpnew_pkg::INT32;
+          end
+        end
+        // Float to Float Cast - Source format encoded in lowest two/three imm bits
+        FCVT_F2F: begin
+          fpu_op_d = fpnew_pkg::F2F;
+          // Vectorial ops encoded in lowest two imm bits
+          if (fpu_vec_op_d) begin
+            vec_replication = 1'b0;  // no replication for casts (not needed)
+            unique case (operand_c_i[1:0])
+              2'b00: fpu_srcfmt_d = fpnew_pkg::FP32;
+              2'b01: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
+              2'b10: fpu_srcfmt_d = fpnew_pkg::FP16;
+              2'b11: fpu_srcfmt_d = fpnew_pkg::FP8;
+            endcase
+            // Scalar ops encoded in lowest three imm bits
+          end else begin
+            unique case (operand_c_i[2:0])
+              3'b000:  fpu_srcfmt_d = fpnew_pkg::FP32;
+              3'b001:  fpu_srcfmt_d = fpnew_pkg::FP64;
+              3'b010:  fpu_srcfmt_d = fpnew_pkg::FP16;
+              3'b110:  fpu_srcfmt_d = fpnew_pkg::FP16ALT;
+              3'b011:  fpu_srcfmt_d = fpnew_pkg::FP8;
+              default: ;  // Do nothing
+            endcase
+          end
+        end
+        // Scalar Sign Injection - op encoded in rm (000-010)
+        FSGNJ: begin
+          fpu_op_d = fpnew_pkg::SGNJ;
+          fpu_rm_d = {1'b0, fpu_rm_i[1:0]};  // mask out AH encoding bit
+          check_ah = 1'b1;  // AH has RM MSB encoding
+        end
+        // Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding
+        FMV_F2X: begin
+          fpu_op_d        = fpnew_pkg::SGNJ;
+          fpu_rm_d        = 3'b011;  // passthrough without checking nan-box
+          fpu_op_mod_d    = 1'b1;  // no NaN-Boxing
+          check_ah        = 1'b1;  // AH has RM MSB encoding
+          vec_replication = 1'b0;  // no replication, we set second operand
+        end
+        // Move from GPR to FPR - mapped to NOP since no recoding
+        FMV_X2F: begin
+          fpu_op_d        = fpnew_pkg::SGNJ;
+          fpu_rm_d        = 3'b011;  // passthrough without checking nan-box
+          check_ah        = 1'b1;  // AH has RM MSB encoding
+          vec_replication = 1'b0;  // no replication, we set second operand
+        end
+        // Scalar Comparisons - op encoded in rm (000-010)
+        FCMP: begin
+          fpu_op_d = fpnew_pkg::CMP;
+          fpu_rm_d = {1'b0, fpu_rm_i[1:0]};  // mask out AH encoding bit
+          check_ah = 1'b1;  // AH has RM MSB encoding
+        end
+        // Classification
+        FCLASS: begin
+          fpu_op_d = fpnew_pkg::CLASSIFY;
+          fpu_rm_d = {
+            1'b0, fpu_rm_i[1:0]
+          };  // mask out AH encoding bit - CLASS doesn't care anyways
+          check_ah = 1'b1;  // AH has RM MSB encoding
+        end
+        // Vectorial Minimum - set up scalar encoding in rm
+        VFMIN: begin
+          fpu_op_d = fpnew_pkg::MINMAX;
+          fpu_rm_d = 3'b000;  // min
+        end
+        // Vectorial Maximum - set up scalar encoding in rm
+        VFMAX: begin
+          fpu_op_d = fpnew_pkg::MINMAX;
+          fpu_rm_d = 3'b001;  // max
+        end
+        // Vectorial Sign Injection - set up scalar encoding in rm
+        VFSGNJ: begin
+          fpu_op_d = fpnew_pkg::SGNJ;
+          fpu_rm_d = 3'b000;  // sgnj
+        end
+        // Vectorial Negated Sign Injection - set up scalar encoding in rm
+        VFSGNJN: begin
+          fpu_op_d = fpnew_pkg::SGNJ;
+          fpu_rm_d = 3'b001;  // sgnjn
+        end
+        // Vectorial Xored Sign Injection - set up scalar encoding in rm
+        VFSGNJX: begin
+          fpu_op_d = fpnew_pkg::SGNJ;
+          fpu_rm_d = 3'b010;  // sgnjx
+        end
+        // Vectorial Equals - set up scalar encoding in rm
+        VFEQ: begin
+          fpu_op_d = fpnew_pkg::CMP;
+          fpu_rm_d = 3'b010;  // eq
+        end
+        // Vectorial Not Equals - set up scalar encoding in rm
+        VFNE: begin
+          fpu_op_d     = fpnew_pkg::CMP;
+          fpu_op_mod_d = 1'b1;  // invert output
+          fpu_rm_d     = 3'b010;  // eq
+        end
+        // Vectorial Less Than - set up scalar encoding in rm
+        VFLT: begin
+          fpu_op_d = fpnew_pkg::CMP;
+          fpu_rm_d = 3'b001;  // lt
+        end
+        // Vectorial Greater or Equal - set up scalar encoding in rm
+        VFGE: begin
+          fpu_op_d     = fpnew_pkg::CMP;
+          fpu_op_mod_d = 1'b1;  // invert output
+          fpu_rm_d     = 3'b001;  // lt
+        end
+        // Vectorial Less or Equal - set up scalar encoding in rm
+        VFLE: begin
+          fpu_op_d = fpnew_pkg::CMP;
+          fpu_rm_d = 3'b000;  // le
+        end
+        // Vectorial Greater Than - set up scalar encoding in rm
+        VFGT: begin
+          fpu_op_d     = fpnew_pkg::CMP;
+          fpu_op_mod_d = 1'b1;  // invert output
+          fpu_rm_d     = 3'b000;  // le
+        end
+        // Vectorial Convert-and-Pack from FP32, lower 4 entries
+        VFCPKAB_S: begin
+          fpu_op_d        = fpnew_pkg::CPKAB;
+          fpu_op_mod_d    = fpu_rm_i[0];  // A/B selection from R bit
+          vec_replication = 1'b0;  // no replication, R bit used for op
+          fpu_srcfmt_d    = fpnew_pkg::FP32;  // Cast from FP32
+        end
+        // Vectorial Convert-and-Pack from FP32, upper 4 entries
+        VFCPKCD_S: begin
+          fpu_op_d        = fpnew_pkg::CPKCD;
+          fpu_op_mod_d    = fpu_rm_i[0];  // C/D selection from R bit
+          vec_replication = 1'b0;  // no replication, R bit used for op
+          fpu_srcfmt_d    = fpnew_pkg::FP32;  // Cast from FP32
+        end
+        // Vectorial Convert-and-Pack from FP64, lower 4 entries
+        VFCPKAB_D: begin
+          fpu_op_d        = fpnew_pkg::CPKAB;
+          fpu_op_mod_d    = fpu_rm_i[0];  // A/B selection from R bit
+          vec_replication = 1'b0;  // no replication, R bit used for op
+          fpu_srcfmt_d    = fpnew_pkg::FP64;  // Cast from FP64
+        end
+        // Vectorial Convert-and-Pack from FP64, upper 4 entries
+        VFCPKCD_D: begin
+          fpu_op_d        = fpnew_pkg::CPKCD;
+          fpu_op_mod_d    = fpu_rm_i[0];  // C/D selection from R bit
+          vec_replication = 1'b0;  // no replication, R bit used for op
+          fpu_srcfmt_d    = fpnew_pkg::FP64;  // Cast from FP64
+        end
+        // No changes per default
+        default: ;  //nothing
+      endcase
+
+      // Scalar AH encoding fixing
+      if (!fpu_vec_op_d && check_ah) if (fpu_rm_i[2]) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
+
+      // Replication
+      if (fpu_vec_op_d && vec_replication) begin
+        if (replicate_c) begin
+          unique case (fpu_dstfmt_d)
+            fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i;
+            fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
+            operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}};
+            fpnew_pkg::FP8:
+            operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}};
+            default: ;  // Do nothing
+          endcase  // fpu_dstfmt_d
+        end else begin
+          unique case (fpu_dstfmt_d)
+            fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i;
+            fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
+            operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}};
+            fpnew_pkg::FP8:
+            operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}};
+            default: ;  // Do nothing
+          endcase  // fpu_dstfmt_d
+        end
+      end
+    end
+
+
+    //---------------------------------------------------------
+    // Upstream protocol inversion: InValid depends on InReady
+    //---------------------------------------------------------
+
+    always_comb begin : p_inputFSM
+      // Default Values
+      fpu_ready_o  = 1'b0;
+      fpu_in_valid = 1'b0;
+      hold_inputs  = 1'b0;  // hold register disabled
+      use_hold     = 1'b0;  // inputs go directly to unit
+      state_d      = state_q;  // stay in the same state
+
+      // FSM
+      unique case (state_q)
+        // Default state, ready for instructions
+        READY: begin
+          fpu_ready_o  = 1'b1;  // Act as if FPU ready
+          fpu_in_valid = fpu_valid_i;  // Forward input valid to FPU
+          // There is a transaction but the FPU can't handle it
+          if (fpu_valid_i & ~fpu_in_ready) begin
+            fpu_ready_o = 1'b0;  // No token given to Issue
+            hold_inputs = 1'b1;  // save inputs to the holding register
+            state_d     = STALL;  // stall future incoming requests
+          end
+        end
+        // We're stalling the upstream (ready=0)
+        STALL: begin
+          fpu_in_valid = 1'b1;  // we have data for the FPU
+          use_hold     = 1'b1;  // the data comes from the hold reg
+          // Wait until it's consumed
+          if (fpu_in_ready) begin
+            fpu_ready_o = 1'b1;  // Give a token to issue
+            state_d     = READY;  // accept future requests
+          end
+        end
+        // Default: emit default values
+        default: ;
+      endcase
+
+      // Flushing will override issue and go back to idle
+      if (flush_i) begin
+        state_d = READY;
+      end
+
+    end
+
+    // Buffer register and FSM state holding
+    always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg
+      if (~rst_ni) begin
+        state_q      <= READY;
+        operand_a_q  <= '0;
+        operand_b_q  <= '0;
+        operand_c_q  <= '0;
+        fpu_op_q     <= '0;
+        fpu_op_mod_q <= '0;
+        fpu_srcfmt_q <= '0;
+        fpu_dstfmt_q <= '0;
+        fpu_ifmt_q   <= '0;
+        fpu_rm_q     <= '0;
+        fpu_vec_op_q <= '0;
+        fpu_tag_q    <= '0;
+      end else begin
+        state_q <= state_d;
+        // Hold register is [TRIGGERED] by FSM
+        if (hold_inputs) begin
+          operand_a_q  <= operand_a_d;
+          operand_b_q  <= operand_b_d;
+          operand_c_q  <= operand_c_d;
+          fpu_op_q     <= fpu_op_d;
+          fpu_op_mod_q <= fpu_op_mod_d;
+          fpu_srcfmt_q <= fpu_srcfmt_d;
+          fpu_dstfmt_q <= fpu_dstfmt_d;
+          fpu_ifmt_q   <= fpu_ifmt_d;
+          fpu_rm_q     <= fpu_rm_d;
+          fpu_vec_op_q <= fpu_vec_op_d;
+          fpu_tag_q    <= fpu_tag_d;
+        end
+      end
+    end
+
+    // Select FPU input data: from register if valid data in register, else directly from input
+    assign operand_a  = use_hold ? operand_a_q : operand_a_d;
+    assign operand_b  = use_hold ? operand_b_q : operand_b_d;
+    assign operand_c  = use_hold ? operand_c_q : operand_c_d;
+    assign fpu_op     = use_hold ? fpu_op_q : fpu_op_d;
+    assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d;
+    assign fpu_srcfmt = use_hold ? fpu_srcfmt_q : fpu_srcfmt_d;
+    assign fpu_dstfmt = use_hold ? fpu_dstfmt_q : fpu_dstfmt_d;
+    assign fpu_ifmt   = use_hold ? fpu_ifmt_q : fpu_ifmt_d;
+    assign fpu_rm     = use_hold ? fpu_rm_q : fpu_rm_d;
+    assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d;
+    assign fpu_tag    = use_hold ? fpu_tag_q : fpu_tag_d;
+
+    // Consolidate operands
+    logic [2:0][CVA6Cfg.FLen-1:0] fpu_operands;
+
+    assign fpu_operands[0] = operand_a;
+    assign fpu_operands[1] = operand_b;
+    assign fpu_operands[2] = operand_c;
+
+    //---------------
+    // FPU instance
+    //---------------
+
+    fpnew_top #(
+        .Features      (FPU_FEATURES),
+        .Implementation(FPU_IMPLEMENTATION),
+        .TagType       (logic [TRANS_ID_BITS-1:0])
+    ) i_fpnew_bulk (
+        .clk_i,
+        .rst_ni,
+        .operands_i    (fpu_operands),
+        .rnd_mode_i    (fpnew_pkg::roundmode_e'(fpu_rm)),
+        .op_i          (fpnew_pkg::operation_e'(fpu_op)),
+        .op_mod_i      (fpu_op_mod),
+        .src_fmt_i     (fpnew_pkg::fp_format_e'(fpu_srcfmt)),
+        .dst_fmt_i     (fpnew_pkg::fp_format_e'(fpu_dstfmt)),
+        .int_fmt_i     (fpnew_pkg::int_format_e'(fpu_ifmt)),
+        .vectorial_op_i(fpu_vec_op),
+        .tag_i         (fpu_tag),
+        .simd_mask_i   (1'b1),
+        .in_valid_i    (fpu_in_valid),
+        .in_ready_o    (fpu_in_ready),
+        .flush_i,
+        .result_o,
+        .status_o      (fpu_status),
+        .tag_o         (fpu_trans_id_o),
+        .out_valid_o   (fpu_out_valid),
+        .out_ready_i   (fpu_out_ready),
+        .busy_o        (  /* unused */)
+    );
+
+    // Pack status flag into exception cause, tval ignored in wb, exception is always invalid
+    assign fpu_exception_o.cause = {59'h0, fpu_status};
+    assign fpu_exception_o.valid = 1'b0;
+
+    // Donwstream write port is dedicated to FPU and always ready
+    assign fpu_out_ready = 1'b1;
+
+    // Downstream valid from unit
+    assign fpu_valid_o = fpu_out_valid;
+
+  end
+endmodule
diff --git a/test/type_param/core/frontend/bht.sv b/test/type_param/core/frontend/bht.sv
new file mode 100644
index 0000000..bcfb78c
--- /dev/null
+++ b/test/type_param/core/frontend/bht.sv
@@ -0,0 +1,215 @@
+// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
+// Copyright 2023 - Thales for additionnal conribution.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 2.0 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 08.02.2018
+// Migrated: Luis Vitorio Cargnini, IEEE
+// Date: 09.06.2018
+// FPGA optimization: Sebastien Jacq, Thales
+// Date: 2023-01-30
+
+// branch history table - 2 bit saturation counter
+
+module bht #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned NR_ENTRIES = 1024
+) (
+    input  logic                                                          clk_i,
+    input  logic                                                          rst_ni,
+    input  logic                                                          flush_i,
+    input  logic                                                          debug_mode_i,
+    input  logic                        [                riscv::VLEN-1:0] vpc_i,
+    input  ariane_pkg::bht_update_t                                       bht_update_i,
+    // we potentially need INSTR_PER_FETCH predictions/cycle
+    output ariane_pkg::bht_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_prediction_o
+);
+  // the last bit is always zero, we don't need it for indexing
+  localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
+  // re-shape the branch history table
+  localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
+  // number of bits needed to index the row
+  localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
+  localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
+  // number of bits we should use for prediction
+  localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
+  // we are not interested in all bits of the address
+  unread i_unread (.d_i(|vpc_i));
+
+  struct packed {
+    logic       valid;
+    logic [1:0] saturation_counter;
+  }
+      bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
+      bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
+
+  logic [$clog2(NR_ROWS)-1:0] index, update_pc;
+  logic [ROW_INDEX_BITS-1:0] update_row_index;
+
+  assign index     = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
+  assign update_pc = bht_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
+  if (CVA6Cfg.RVC) begin : gen_update_row_index
+    assign update_row_index = bht_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
+  end else begin
+    assign update_row_index = '0;
+  end
+
+  if (!ariane_pkg::FPGA_EN) begin : gen_asic_bht  // ASIC TARGET
+
+    logic [1:0] saturation_counter;
+    // prediction assignment
+    for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output
+      assign bht_prediction_o[i].valid = bht_q[index][i].valid;
+      assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1;
+    end
+
+    always_comb begin : update_bht
+      bht_d = bht_q;
+      saturation_counter = bht_q[update_pc][update_row_index].saturation_counter;
+
+      if ((bht_update_i.valid && CVA6Cfg.DebugEn && !debug_mode_i) || (bht_update_i.valid && !CVA6Cfg.DebugEn)) begin
+        bht_d[update_pc][update_row_index].valid = 1'b1;
+
+        if (saturation_counter == 2'b11) begin
+          // we can safely decrease it
+          if (!bht_update_i.taken)
+            bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
+          // then check if it saturated in the negative regime e.g.: branch not taken
+        end else if (saturation_counter == 2'b00) begin
+          // we can safely increase it
+          if (bht_update_i.taken)
+            bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
+        end else begin  // otherwise we are not in any boundaries and can decrease or increase it
+          if (bht_update_i.taken)
+            bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
+          else bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
+        end
+      end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        for (int unsigned i = 0; i < NR_ROWS; i++) begin
+          for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
+            bht_q[i][j] <= '0;
+          end
+        end
+      end else begin
+        // evict all entries
+        if (flush_i) begin
+          for (int i = 0; i < NR_ROWS; i++) begin
+            for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
+              bht_q[i][j].valid <= 1'b0;
+              bht_q[i][j].saturation_counter <= 2'b10;
+            end
+          end
+        end else begin
+          bht_q <= bht_d;
+        end
+      end
+    end
+
+  end else begin : gen_fpga_bht  //FPGA TARGETS
+
+    // number of bits par word in the bram
+    localparam BRAM_WORD_BITS = $bits(ariane_pkg::bht_t);
+    logic             [                             ROW_INDEX_BITS-1:0] row_index;
+    logic             [                ariane_pkg::INSTR_PER_FETCH-1:0] bht_ram_we;
+    logic             [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0;
+    logic             [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1;
+    logic             [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address;
+    logic             [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata;
+    logic             [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0;
+    logic             [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1;
+
+    ariane_pkg::bht_t [                ariane_pkg::INSTR_PER_FETCH-1:0] bht;
+    ariane_pkg::bht_t [                ariane_pkg::INSTR_PER_FETCH-1:0] bht_updated;
+
+    if (CVA6Cfg.RVC) begin : gen_row_index
+      assign row_index = vpc_i[ROW_ADDR_BITS+OFFSET-1:OFFSET];
+    end else begin
+      assign row_index = '0;
+    end
+
+    // -------------------------
+    // prediction assignment & update Branch History Table
+    // -------------------------
+    always_comb begin : prediction_update_bht
+      bht_ram_we = '0;
+      bht_ram_read_address_0 = '0;
+      bht_ram_read_address_1 = '0;
+      bht_ram_write_address = '0;
+      bht_ram_wdata = '0;
+      bht_updated = '0;
+      bht = '0;
+
+      for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
+        if (row_index == i) begin
+          bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
+          bht_prediction_o[i].valid = bht_ram_rdata_0[i*BRAM_WORD_BITS+2];
+          bht_prediction_o[i].taken = bht_ram_rdata_0[i*BRAM_WORD_BITS+1];
+        end
+      end
+
+      if (bht_update_i.valid && !debug_mode_i) begin
+        for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
+          if (update_row_index == i) begin
+            bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
+            bht[i].saturation_counter = bht_ram_rdata_1[i*BRAM_WORD_BITS+:2];
+
+            if (bht[i].saturation_counter == 2'b11) begin
+              // we can safely decrease it
+              if (!bht_update_i.taken)
+                bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
+              else bht_updated[i].saturation_counter = 2'b11;
+              // then check if it saturated in the negative regime e.g.: branch not taken
+            end else if (bht[i].saturation_counter == 2'b00) begin
+              // we can safely increase it
+              if (bht_update_i.taken)
+                bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
+              else bht_updated[i].saturation_counter = 2'b00;
+            end else begin // otherwise we are not in any boundaries and can decrease or increase it
+              if (bht_update_i.taken)
+                bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
+              else bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
+            end
+
+            bht_updated[i].valid = 1'b1;
+            bht_ram_we[i] = 1'b1;
+            bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
+            //bht_ram_wdata[(i+1)*BRAM_WORD_BITS-1] =  1'b1; //valid
+            bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
+              bht_updated[i].valid, bht_updated[i].saturation_counter
+            };
+
+          end
+        end
+      end
+    end
+
+    for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_ram
+      AsyncThreePortRam #(
+          .ADDR_WIDTH($clog2(NR_ROWS)),
+          .DATA_DEPTH(NR_ROWS),
+          .DATA_WIDTH(BRAM_WORD_BITS)
+      ) i_bht_ram (
+          .Clk_CI     (clk_i),
+          .WrEn_SI    (bht_ram_we[i]),
+          .WrAddr_DI  (bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
+          .WrData_DI  (bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
+          .RdAddr_DI_0(bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
+          .RdAddr_DI_1(bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
+          .RdData_DO_0(bht_ram_rdata_0[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
+          .RdData_DO_1(bht_ram_rdata_1[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
+      );
+    end
+
+  end
+endmodule
diff --git a/test/type_param/core/frontend/btb.sv b/test/type_param/core/frontend/btb.sv
new file mode 100644
index 0000000..9500f37
--- /dev/null
+++ b/test/type_param/core/frontend/btb.sv
@@ -0,0 +1,185 @@
+// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 2.0 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 08.02.2018
+// Migrated: Luis Vitorio Cargnini, IEEE
+// Date: 09.06.2018
+//
+// Additional contributions by:
+//         Sebastien Jacq, Thales - sjthales on github.com
+//         Date: 2022-12-01
+//
+// Description: This module is an adaptation of the BTB (Branch Target Buffer)
+//              module both FPGA and ASIC targets.
+//              Prediction target address is stored in BRAM on FPGA while for
+//              original module, target address is stored in D flip-flop.
+//              For FPGA flushing is not supported because the frontend module
+//              flushing signal is not connected.
+//
+// branch target buffer
+module btb #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int NR_ENTRIES = 8
+) (
+    input logic clk_i,        // Clock
+    input logic rst_ni,       // Asynchronous reset active low
+    input logic flush_i,      // flush the btb
+    input logic debug_mode_i,
+
+    input logic [riscv::VLEN-1:0] vpc_i,  // virtual PC from IF stage
+    input ariane_pkg::btb_update_t btb_update_i,  // update btb with this information
+    output ariane_pkg::btb_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb
+);
+  // the last bit is always zero, we don't need it for indexing
+  localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
+  // re-shape the branch history table
+  localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
+  // number of bits needed to index the row
+  localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
+  localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
+  // number of bits we should use for prediction
+  localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
+  // prevent aliasing to degrade performance
+  localparam ANTIALIAS_BITS = 8;
+  // number of bits par word in the bram
+  localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t);
+  // we are not interested in all bits of the address
+  unread i_unread (.d_i(|vpc_i));
+
+
+  logic [$clog2(NR_ROWS)-1:0] index, update_pc;
+  logic [ROW_INDEX_BITS-1:0] update_row_index;
+
+  assign index     = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
+  assign update_pc = btb_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
+  if (CVA6Cfg.RVC) begin : gen_update_row_index
+    assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
+  end else begin
+    assign update_row_index = '0;
+  end
+
+  if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb  //FPGA TARGETS
+    logic [                ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
+    logic [                ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
+    logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
+    logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
+    logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
+
+    logic [                ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update;
+    logic [                ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update;
+    logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
+    logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
+
+    // output matching prediction
+    for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
+      assign btb_ram_csel_prediction[i] = 1'b1;
+      assign btb_ram_we_prediction[i] = 1'b0;
+      assign btb_ram_wdata_prediction = '0;
+      assign btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
+      assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS];
+    end
+
+    // -------------------------
+    // Update Branch Prediction
+    // -------------------------
+    // update on a mis-predict
+    always_comb begin : update_branch_predict
+      btb_ram_csel_update = '0;
+      btb_ram_we_update = '0;
+      btb_ram_addr_update = '0;
+      btb_ram_wdata_update = '0;
+
+      if (btb_update_i.valid && !debug_mode_i) begin
+        for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
+          if (update_row_index == i) begin
+            btb_ram_csel_update[i] = 1'b1;
+            btb_ram_we_update[i] = 1'b1;
+            btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
+            btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
+              1'b1, btb_update_i.target_address
+            };
+          end
+        end
+      end
+    end
+
+    for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram
+      SyncDpRam #(
+          .ADDR_WIDTH($clog2(NR_ROWS)),
+          .DATA_DEPTH(NR_ROWS),
+          .DATA_WIDTH(BRAM_WORD_BITS),
+          .OUT_REGS  (0),
+          .SIM_INIT  (1)
+      ) i_btb_ram (
+          .Clk_CI    (clk_i),
+          .Rst_RBI   (rst_ni),
+          //----------------------------
+          .CSelA_SI  (btb_ram_csel_update[i]),
+          .WrEnA_SI  (btb_ram_we_update[i]),
+          .AddrA_DI  (btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
+          .WrDataA_DI(btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
+          .RdDataA_DO(),
+          //-----------------------------
+          .CSelB_SI  (btb_ram_csel_prediction[i]),
+          .WrEnB_SI  (btb_ram_we_prediction[i]),
+          .AddrB_DI  (btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
+          .WrDataB_DI(btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
+          .RdDataB_DO(btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
+      );
+    end
+
+  end else begin : gen_asic_btb  // ASIC TARGET
+
+    // typedef for all branch target entries
+    // we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
+    ariane_pkg::btb_prediction_t
+        btb_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
+        btb_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
+
+    // output matching prediction
+    for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
+      assign btb_prediction_o[i] = btb_q[index][i];  // workaround
+    end
+
+    // -------------------------
+    // Update Branch Prediction
+    // -------------------------
+    // update on a mis-predict
+    always_comb begin : update_branch_predict
+      btb_d = btb_q;
+
+      if (btb_update_i.valid && !debug_mode_i) begin
+        btb_d[update_pc][update_row_index].valid = 1'b1;
+        // the target address is simply updated
+        btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
+      end
+    end
+
+    // sequential process
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        // Bias the branches to be taken upon first arrival
+        for (int i = 0; i < NR_ROWS; i++) btb_q[i] <= '{default: 0};
+      end else begin
+        // evict all entries
+        if (flush_i) begin
+          for (int i = 0; i < NR_ROWS; i++) begin
+            for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
+              btb_q[i][j].valid <= 1'b0;
+            end
+          end
+        end else begin
+          btb_q <= btb_d;
+        end
+      end
+    end
+  end
+endmodule
diff --git a/test/type_param/core/frontend/frontend.sv b/test/type_param/core/frontend/frontend.sv
new file mode 100644
index 0000000..8f2f50a
--- /dev/null
+++ b/test/type_param/core/frontend/frontend.sv
@@ -0,0 +1,516 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 08.02.2018
+// Description: Ariane Instruction Fetch Frontend
+//
+// This module interfaces with the instruction cache, handles control
+// change request from the back-end and does branch prediction.
+
+module frontend
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic flush_i,  // flush request for PCGEN
+    input logic flush_bp_i,  // flush branch prediction
+    input logic halt_i,  // halt commit stage
+    input logic debug_mode_i,
+    // global input
+    input logic [riscv::VLEN-1:0] boot_addr_i,
+    // Set a new PC
+    // mispredict
+    input  bp_resolve_t        resolved_branch_i,  // from controller signaling a branch_predict -> update BTB
+    // from commit, when flushing the whole pipeline
+    input logic set_pc_commit_i,  // Take the PC from commit stage
+    input logic [riscv::VLEN-1:0] pc_commit_i,  // PC of instruction in commit stage
+    // CSR input
+    input logic [riscv::VLEN-1:0] epc_i,  // exception PC which we need to return to
+    input logic eret_i,  // return from exception
+    input logic [riscv::VLEN-1:0] trap_vector_base_i,  // base of trap vector
+    input logic ex_valid_i,  // exception is valid - from commit
+    input logic set_debug_pc_i,  // jump to debug address
+    // Instruction Fetch
+    output icache_dreq_t icache_dreq_o,
+    input icache_drsp_t icache_dreq_i,
+    // instruction output port -> to processor back-end
+    output fetch_entry_t       fetch_entry_o,       // fetch entry containing all relevant data for the ID stage
+    output logic fetch_entry_valid_o,  // instruction in IF is valid
+    input logic fetch_entry_ready_i  // ID acknowledged this instruction
+);
+  // Instruction Cache Registers, from I$
+  logic                            [                FETCH_WIDTH-1:0] icache_data_q;
+  logic                                                              icache_valid_q;
+  ariane_pkg::frontend_exception_t                                   icache_ex_valid_q;
+  logic                            [                riscv::VLEN-1:0] icache_vaddr_q;
+  logic                                                              instr_queue_ready;
+  logic                            [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed;
+  // upper-most branch-prediction from last cycle
+  btb_prediction_t                                                   btb_q;
+  bht_prediction_t                                                   bht_q;
+  // instruction fetch is ready
+  logic                                                              if_ready;
+  logic [riscv::VLEN-1:0] npc_d, npc_q;  // next PC
+
+  // indicates whether we come out of reset (then we need to load boot_addr_i)
+  logic                                           npc_rst_load_q;
+
+  logic                                           replay;
+  logic [                        riscv::VLEN-1:0] replay_addr;
+
+  // shift amount
+  logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt;
+  // address will always be 16 bit aligned, make this explicit here
+  if (CVA6Cfg.RVC) begin : gen_shamt
+    assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1];
+  end else begin
+    assign shamt = 1'b0;
+  end
+
+  // -----------------------
+  // Ctrl Flow Speculation
+  // -----------------------
+  // RVI ctrl flow prediction
+  logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump;
+  logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvi_imm;
+  // RVC branching
+  logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call;
+  logic            [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvc_imm;
+  // re-aligned instruction and address (coming from cache - combinationally)
+  logic            [INSTR_PER_FETCH-1:0][           31:0] instr;
+  logic            [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr;
+  logic            [INSTR_PER_FETCH-1:0]                  instruction_valid;
+  // BHT, BTB and RAS prediction
+  bht_prediction_t [INSTR_PER_FETCH-1:0]                  bht_prediction;
+  btb_prediction_t [INSTR_PER_FETCH-1:0]                  btb_prediction;
+  bht_prediction_t [INSTR_PER_FETCH-1:0]                  bht_prediction_shifted;
+  btb_prediction_t [INSTR_PER_FETCH-1:0]                  btb_prediction_shifted;
+  ras_t                                                   ras_predict;
+  logic            [    riscv::VLEN-1:0]                  vpc_btb;
+
+  // branch-predict update
+  logic                                                   is_mispredict;
+  logic ras_push, ras_pop;
+  logic [                riscv::VLEN-1:0] ras_update;
+
+  // Instruction FIFO
+  logic [                riscv::VLEN-1:0] predict_address;
+  cf_t  [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type;
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf;
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf;
+
+  logic                                   serving_unaligned;
+  // Re-align instructions
+  instr_realign #(
+      .CVA6Cfg(CVA6Cfg)
+  ) i_instr_realign (
+      .clk_i              (clk_i),
+      .rst_ni             (rst_ni),
+      .flush_i            (icache_dreq_o.kill_s2),
+      .valid_i            (icache_valid_q),
+      .serving_unaligned_o(serving_unaligned),
+      .address_i          (icache_vaddr_q),
+      .data_i             (icache_data_q),
+      .valid_o            (instruction_valid),
+      .addr_o             (addr),
+      .instr_o            (instr)
+  );
+
+  // --------------------
+  // Branch Prediction
+  // --------------------
+  // select the right branch prediction result
+  // in case we are serving an unaligned instruction in instr[0] we need to take
+  // the prediction we saved from the previous fetch
+  if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted
+    assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2(
+        INSTR_PER_FETCH
+    ):1]];
+    assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2(
+        INSTR_PER_FETCH
+    ):1]];
+
+    // for all other predictions we can use the generated address to index
+    // into the branch prediction data structures
+    for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address
+      assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
+      assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
+    end
+  end else begin
+    assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]];
+    assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][1]];
+  end
+  ;
+
+  // for the return address stack it doens't matter as we have the
+  // address of the call/return already
+  logic bp_valid;
+
+  logic [INSTR_PER_FETCH-1:0] is_branch;
+  logic [INSTR_PER_FETCH-1:0] is_call;
+  logic [INSTR_PER_FETCH-1:0] is_jump;
+  logic [INSTR_PER_FETCH-1:0] is_return;
+  logic [INSTR_PER_FETCH-1:0] is_jalr;
+
+  for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
+    // branch history table -> BHT
+    assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]);
+    // function calls -> RAS
+    assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]);
+    // function return -> RAS
+    assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]);
+    // unconditional jumps with known target -> immediately resolved
+    assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]);
+    // unconditional jumps with unknown target -> BTB
+    assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
+  end
+
+  // taken/not taken
+  always_comb begin
+    taken_rvi_cf = '0;
+    taken_rvc_cf = '0;
+    predict_address = '0;
+
+    for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF;
+
+    ras_push = 1'b0;
+    ras_pop = 1'b0;
+    ras_update = '0;
+
+    // lower most prediction gets precedence
+    for (int i = INSTR_PER_FETCH - 1; i >= 0; i--) begin
+      unique case ({
+        is_branch[i], is_return[i], is_jump[i], is_jalr[i]
+      })
+        4'b0000: ;  // regular instruction e.g.: no branch
+        // unconditional jump to register, we need the BTB to resolve this
+        4'b0001: begin
+          ras_pop  = 1'b0;
+          ras_push = 1'b0;
+          if (CVA6Cfg.BTBEntries && btb_prediction_shifted[i].valid) begin
+            predict_address = btb_prediction_shifted[i].target_address;
+            cf_type[i] = ariane_pkg::JumpR;
+          end
+        end
+        // its an unconditional jump to an immediate
+        4'b0010: begin
+          ras_pop = 1'b0;
+          ras_push = 1'b0;
+          taken_rvi_cf[i] = rvi_jump[i];
+          taken_rvc_cf[i] = rvc_jump[i];
+          cf_type[i] = ariane_pkg::Jump;
+        end
+        // return
+        4'b0100: begin
+          // make sure to only alter the RAS if we actually consumed the instruction
+          ras_pop = ras_predict.valid & instr_queue_consumed[i];
+          ras_push = 1'b0;
+          predict_address = ras_predict.ra;
+          cf_type[i] = ariane_pkg::Return;
+        end
+        // branch prediction
+        4'b1000: begin
+          ras_pop  = 1'b0;
+          ras_push = 1'b0;
+          // if we have a valid dynamic prediction use it
+          if (bht_prediction_shifted[i].valid) begin
+            taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken;
+            taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken;
+            // otherwise default to static prediction
+          end else begin
+            // set if immediate is negative - static prediction
+            taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1];
+            taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1];
+          end
+          if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin
+            cf_type[i] = ariane_pkg::Branch;
+          end
+        end
+        default: ;
+        // default: $error("Decoded more than one control flow");
+      endcase
+      // if this instruction, in addition, is a call, save the resulting address
+      // but only if we actually consumed the address
+      if (is_call[i]) begin
+        ras_push   = instr_queue_consumed[i];
+        ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
+      end
+      // calculate the jump target address
+      if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin
+        predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
+      end
+    end
+  end
+  // or reduce struct
+  always_comb begin
+    bp_valid = 1'b0;
+    // BP cannot be valid if we have a return instruction and the RAS is not giving a valid address
+    // Check that we encountered a control flow and that for a return the RAS
+    // contains a valid prediction.
+    for (int i = 0; i < INSTR_PER_FETCH; i++)
+    bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid));
+  end
+  assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
+
+  // Cache interface
+  assign icache_dreq_o.req = instr_queue_ready;
+  assign if_ready = icache_dreq_i.ready & instr_queue_ready;
+  // We need to flush the cache pipeline if:
+  // 1. We mispredicted
+  // 2. Want to flush the whole processor front-end
+  // 3. Need to replay an instruction because the fetch-fifo was full
+  assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay;
+  // if we have a valid branch-prediction we need to only kill the last cache request
+  // also if we killed the first stage we also need to kill the second stage (inclusive flush)
+  assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
+
+  // Update Control Flow Predictions
+  bht_update_t bht_update;
+  btb_update_t btb_update;
+
+  // assert on branch, deassert when resolved
+  logic speculative_q, speculative_d;
+  assign speculative_d = (speculative_q && !resolved_branch_i.valid || |is_branch || |is_return || |is_jalr) && !flush_i;
+  assign icache_dreq_o.spec = speculative_d;
+
+  assign bht_update.valid = resolved_branch_i.valid
+                                & (resolved_branch_i.cf_type == ariane_pkg::Branch);
+  assign bht_update.pc = resolved_branch_i.pc;
+  assign bht_update.taken = resolved_branch_i.is_taken;
+  // only update mispredicted branches e.g. no returns from the RAS
+  assign btb_update.valid = resolved_branch_i.valid
+                                & resolved_branch_i.is_mispredict
+                                & (resolved_branch_i.cf_type == ariane_pkg::JumpR);
+  assign btb_update.pc = resolved_branch_i.pc;
+  assign btb_update.target_address = resolved_branch_i.target_address;
+
+  // -------------------
+  // Next PC
+  // -------------------
+  // next PC (NPC) can come from (in order of precedence):
+  // 0. Default assignment/replay instruction
+  // 1. Branch Predict taken
+  // 2. Control flow change request (misprediction)
+  // 3. Return from environment call
+  // 4. Exception/Interrupt
+  // 5. Pipeline Flush because of CSR side effects
+  // Mis-predict handling is a little bit different
+  // select PC a.k.a PC Gen
+  always_comb begin : npc_select
+    automatic logic [riscv::VLEN-1:0] fetch_address;
+    // check whether we come out of reset
+    // this is a workaround. some tools have issues
+    // having boot_addr_i in the asynchronous
+    // reset assignment to npc_q, even though
+    // boot_addr_i will be assigned a constant
+    // on the top-level.
+    if (npc_rst_load_q) begin
+      npc_d         = boot_addr_i;
+      fetch_address = boot_addr_i;
+    end else begin
+      fetch_address = npc_q;
+      // keep stable by default
+      npc_d         = npc_q;
+    end
+    // 0. Branch Prediction
+    if (bp_valid) begin
+      fetch_address = predict_address;
+      npc_d = predict_address;
+    end
+    // 1. Default assignment
+    if (if_ready) begin
+      npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4;
+    end
+    // 2. Replay instruction fetch
+    if (replay) begin
+      npc_d = replay_addr;
+    end
+    // 3. Control flow change request
+    if (is_mispredict) begin
+      npc_d = resolved_branch_i.target_address;
+    end
+    // 4. Return from environment call
+    if (eret_i) begin
+      npc_d = epc_i;
+    end
+    // 5. Exception/Interrupt
+    if (ex_valid_i) begin
+      npc_d = trap_vector_base_i;
+    end
+    // 6. Pipeline Flush because of CSR side effects
+    // On a pipeline flush start fetching from the next address
+    // of the instruction in the commit stage
+    // we either came here from a flush request of a CSR instruction or AMO,
+    // so as CSR or AMO instructions do not exist in a compressed form
+    // we can unconditionally do PC + 4 here
+    // or if the commit stage is halted, just take the current pc of the
+    // instruction in the commit stage
+    // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
+    if (set_pc_commit_i) begin
+      npc_d = pc_commit_i + (halt_i ? '0 : {{riscv::VLEN - 3{1'b0}}, 3'b100});
+    end
+    // 7. Debug
+    // enter debug on a hard-coded base-address
+    if (CVA6Cfg.DebugEn && set_debug_pc_i)
+      npc_d = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.HaltAddress[riscv::VLEN-1:0];
+    icache_dreq_o.vaddr = fetch_address;
+  end
+
+  logic [FETCH_WIDTH-1:0] icache_data;
+  // re-align the cache line
+  assign icache_data = icache_dreq_i.data >> {shamt, 4'b0};
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      npc_rst_load_q    <= 1'b1;
+      npc_q             <= '0;
+      speculative_q     <= '0;
+      icache_data_q     <= '0;
+      icache_valid_q    <= 1'b0;
+      icache_vaddr_q    <= 'b0;
+      icache_ex_valid_q <= ariane_pkg::FE_NONE;
+      btb_q             <= '0;
+      bht_q             <= '0;
+    end else begin
+      npc_rst_load_q <= 1'b0;
+      npc_q          <= npc_d;
+      speculative_q  <= speculative_d;
+      icache_valid_q <= icache_dreq_i.valid;
+      if (icache_dreq_i.valid) begin
+        icache_data_q  <= icache_data;
+        icache_vaddr_q <= icache_dreq_i.vaddr;
+        // Map the only three exceptions which can occur in the frontend to a two bit enum
+        if (ariane_pkg::MMU_PRESENT && icache_dreq_i.ex.cause == riscv::INSTR_PAGE_FAULT) begin
+          icache_ex_valid_q <= ariane_pkg::FE_INSTR_PAGE_FAULT;
+        end else if (icache_dreq_i.ex.cause == riscv::INSTR_ACCESS_FAULT) begin
+          icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT;
+        end else begin
+          icache_ex_valid_q <= ariane_pkg::FE_NONE;
+        end
+        // save the uppermost prediction
+        btb_q <= btb_prediction[INSTR_PER_FETCH-1];
+        bht_q <= bht_prediction[INSTR_PER_FETCH-1];
+      end
+    end
+  end
+
+  if (CVA6Cfg.RASDepth == 0) begin
+    assign ras_predict = '0;
+  end else begin : ras_gen
+    ras #(
+        .CVA6Cfg(CVA6Cfg),
+        .DEPTH  (CVA6Cfg.RASDepth)
+    ) i_ras (
+        .clk_i,
+        .rst_ni,
+        .flush_i(flush_bp_i),
+        .push_i (ras_push),
+        .pop_i  (ras_pop),
+        .data_i (ras_update),
+        .data_o (ras_predict)
+    );
+  end
+
+  //For FPGA, BTB is implemented in read synchronous BRAM
+  //while for ASIC, BTB is implemented in D flip-flop
+  //and can be read at the same cycle.
+  assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q;
+
+  if (CVA6Cfg.BTBEntries == 0) begin
+    assign btb_prediction = '0;
+  end else begin : btb_gen
+    btb #(
+        .CVA6Cfg   (CVA6Cfg),
+        .NR_ENTRIES(CVA6Cfg.BTBEntries)
+    ) i_btb (
+        .clk_i,
+        .rst_ni,
+        .flush_i         (flush_bp_i),
+        .debug_mode_i,
+        .vpc_i           (vpc_btb),
+        .btb_update_i    (btb_update),
+        .btb_prediction_o(btb_prediction)
+    );
+  end
+
+  if (CVA6Cfg.BHTEntries == 0) begin
+    assign bht_prediction = '0;
+  end else begin : bht_gen
+    bht #(
+        .CVA6Cfg   (CVA6Cfg),
+        .NR_ENTRIES(CVA6Cfg.BHTEntries)
+    ) i_bht (
+        .clk_i,
+        .rst_ni,
+        .flush_i         (flush_bp_i),
+        .debug_mode_i,
+        .vpc_i           (icache_vaddr_q),
+        .bht_update_i    (bht_update),
+        .bht_prediction_o(bht_prediction)
+    );
+  end
+
+  // we need to inspect up to INSTR_PER_FETCH instructions for branches
+  // and jumps
+  for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan
+    instr_scan #(
+        .CVA6Cfg(CVA6Cfg)
+    ) i_instr_scan (
+        .instr_i     (instr[i]),
+        .rvi_return_o(rvi_return[i]),
+        .rvi_call_o  (rvi_call[i]),
+        .rvi_branch_o(rvi_branch[i]),
+        .rvi_jalr_o  (rvi_jalr[i]),
+        .rvi_jump_o  (rvi_jump[i]),
+        .rvi_imm_o   (rvi_imm[i]),
+        .rvc_branch_o(rvc_branch[i]),
+        .rvc_jump_o  (rvc_jump[i]),
+        .rvc_jr_o    (rvc_jr[i]),
+        .rvc_return_o(rvc_return[i]),
+        .rvc_jalr_o  (rvc_jalr[i]),
+        .rvc_call_o  (rvc_call[i]),
+        .rvc_imm_o   (rvc_imm[i])
+    );
+  end
+
+  instr_queue #(
+      .CVA6Cfg(CVA6Cfg)
+  ) i_instr_queue (
+      .clk_i              (clk_i),
+      .rst_ni             (rst_ni),
+      .flush_i            (flush_i),
+      .instr_i            (instr),                 // from re-aligner
+      .addr_i             (addr),                  // from re-aligner
+      .exception_i        (icache_ex_valid_q),     // from I$
+      .exception_addr_i   (icache_vaddr_q),
+      .predict_address_i  (predict_address),
+      .cf_type_i          (cf_type),
+      .valid_i            (instruction_valid),     // from re-aligner
+      .consumed_o         (instr_queue_consumed),
+      .ready_o            (instr_queue_ready),
+      .replay_o           (replay),
+      .replay_addr_o      (replay_addr),
+      .fetch_entry_o      (fetch_entry_o),         // to back-end
+      .fetch_entry_valid_o(fetch_entry_valid_o),   // to back-end
+      .fetch_entry_ready_i(fetch_entry_ready_i)    // to back-end
+  );
+
+  // pragma translate_off
+`ifndef VERILATOR
+  initial begin
+    assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64)
+    else $fatal(1, "[frontend] fetch width != not supported");
+  end
+`endif
+  // pragma translate_on
+endmodule
diff --git a/test/type_param/core/frontend/instr_queue.sv b/test/type_param/core/frontend/instr_queue.sv
new file mode 100644
index 0000000..3f95593
--- /dev/null
+++ b/test/type_param/core/frontend/instr_queue.sv
@@ -0,0 +1,459 @@
+// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 26.10.2018sim:/ariane_tb/dut/i_ariane/i_frontend/icache_ex_valid_q
+
+// Description: Instruction Queue, separates instruction front-end from processor
+//              back-end.
+//
+// This is an optimized instruction queue which supports the handling of
+// compressed instructions (16 bit instructions). Internally it is organized as
+// FETCH_ENTRY x 32 bit queues which are filled in a consecutive manner. Two pointers
+// point into (`idx_is_q` and `idx_ds_q`) the fill port and the read port. The read port
+// is designed so that it will easily allow for multiple issue implementation.
+// The input supports arbitrary power of two instruction fetch widths.
+//
+// The queue supports handling of branch prediction and will take care of
+// only saving a valid instruction stream.
+//
+// Furthermore it contains a replay interface in case the instruction queue
+// is already full. As instructions are in general easily replayed this should
+// increase the efficiency as I$ misses are potentially hidden. This stands in
+// contrast to pessimistic actions (early stalling) or credit based approaches.
+// Credit based systems might be difficult to implement with the current system
+// as we do not exactly know how much space we are going to need in the fifos
+// as each instruction can take either one or two slots.
+//
+// So the consumed/valid interface degenerates to a `information` interface. If the
+// upstream circuits keeps pushing the queue will discard the information
+// and start replaying from the point were it could last manage to accept instructions.
+//
+// The instruction front-end will stop issuing instructions as soon as the
+// fifo is full. This will gate the logic if the processor is e.g.: halted
+//
+// TODO(zarubaf): The instruction queues can be reduced to 16 bit. Potentially
+// the replay mechanism gets more complicated as it can be that a 32 bit instruction
+// can not be pushed at once.
+
+module instr_queue
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    input logic flush_i,
+    input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i,
+    input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i,
+    input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i,
+    output logic ready_o,
+    output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o,
+    // we've encountered an exception, at this point the only possible exceptions are page-table faults
+    input ariane_pkg::frontend_exception_t exception_i,
+    input logic [riscv::VLEN-1:0] exception_addr_i,
+    // branch predict
+    input logic [riscv::VLEN-1:0] predict_address_i,
+    input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i,
+    // replay instruction because one of the FIFO was already full
+    output logic replay_o,
+    output logic [riscv::VLEN-1:0] replay_addr_o,  // address at which to replay this instruction
+    // to processor backend
+    output ariane_pkg::fetch_entry_t fetch_entry_o,
+    output logic fetch_entry_valid_o,
+    input logic fetch_entry_ready_i
+);
+
+  typedef struct packed {
+    logic [31:0]                     instr;     // instruction word
+    ariane_pkg::cf_t                 cf;        // branch was taken
+    ariane_pkg::frontend_exception_t ex;        // exception happened
+    logic [riscv::VLEN-1:0]          ex_vaddr;  // lower VLEN bits of tval for exception
+  } instr_data_t;
+
+  logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] branch_index;
+  // instruction queues
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0][$clog2(
+ariane_pkg::FETCH_FIFO_DEPTH
+)-1:0] instr_queue_usage;
+  instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out;
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo;
+  logic [         ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr;
+  logic [         ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full;
+  logic [         ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty;
+  logic                                            instr_overflow;
+  // address queue
+  logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage;
+  logic [                         riscv::VLEN-1:0] address_out;
+  logic                                            pop_address;
+  logic                                            push_address;
+  logic                                            full_address;
+  logic                                            empty_address;
+  logic                                            address_overflow;
+  // input stream counter
+  logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] idx_is_d, idx_is_q;
+  // Registers
+  // output FIFO select, one-hot
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q;
+  logic [riscv::VLEN-1:0] pc_d, pc_q;  // current PC
+  logic reset_address_d, reset_address_q;  // we need to re-set the address because of a flush
+
+  logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended;
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask;
+  logic branch_empty;
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken;
+  // shift amount, e.g.: instructions we want to retire
+  logic [ariane_pkg::LOG2_INSTR_PER_FETCH:0] popcount;
+  logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] shamt;
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid;
+  logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] consumed_extended;
+  // FIFO mask
+  logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] fifo_pos_extended;
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_pos;
+  logic [ariane_pkg::INSTR_PER_FETCH*2-1:0][31:0] instr;
+  ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH*2-1:0] cf;
+  // replay interface
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_overflow_fifo;
+
+  assign ready_o = ~(|instr_queue_full) & ~full_address;
+
+  if (ariane_pkg::RVC) begin : gen_multiple_instr_per_fetch_with_C
+
+    for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_unpack_taken
+      assign taken[i] = cf_type_i[i] != ariane_pkg::NoCF;
+    end
+
+    // calculate a branch mask, e.g.: get the first taken branch
+    lzc #(
+        .WIDTH(ariane_pkg::INSTR_PER_FETCH),
+        .MODE (0)                             // count trailing zeros
+    ) i_lzc_branch_index (
+        .in_i   (taken),         // we want to count trailing zeros
+        .cnt_o  (branch_index),  // first branch on branch_index
+        .empty_o(branch_empty)
+    );
+
+
+    // the first index is for sure valid
+    // for example (64 bit fetch):
+    // taken mask: 0 1 1 0
+    // leading zero count = 1
+    // 0 0 0 1, 1 1 1 << 1 = 0 0 1 1, 1 1 0
+    // take the upper 4 bits: 0 0 1 1
+    assign branch_mask_extended = {{{ariane_pkg::INSTR_PER_FETCH-1}{1'b0}}, {{ariane_pkg::INSTR_PER_FETCH}{1'b1}}} << branch_index;
+    assign branch_mask = branch_mask_extended[ariane_pkg::INSTR_PER_FETCH * 2 - 2:ariane_pkg::INSTR_PER_FETCH - 1];
+
+    // mask with taken branches to get the actual amount of instructions we want to push
+    assign valid = valid_i & branch_mask;
+    // rotate right again
+    assign consumed_extended = {push_instr_fifo, push_instr_fifo} >> idx_is_q;
+    assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0];
+    // count the numbers of valid instructions we've pushed from this package
+    popcount #(
+        .INPUT_WIDTH(ariane_pkg::INSTR_PER_FETCH)
+    ) i_popcount (
+        .data_i    (push_instr_fifo),
+        .popcount_o(popcount)
+    );
+    assign shamt = popcount[$bits(shamt)-1:0];
+
+    // save the shift amount for next cycle
+    assign idx_is_d = idx_is_q + shamt;
+
+    // ----------------------
+    // Input interface
+    // ----------------------
+    // rotate left by the current position
+    assign fifo_pos_extended = {valid, valid} << idx_is_q;
+    // we just care about the upper bits
+    assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH];
+    // the fifo_position signal can directly be used to guide the push signal of each FIFO
+    // make sure it is not full
+    assign push_instr = fifo_pos & ~instr_queue_full;
+
+    // duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0
+    for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input
+      assign instr[i] = instr_i[i];
+      assign instr[i+ariane_pkg::INSTR_PER_FETCH] = instr_i[i];
+      assign cf[i] = cf_type_i[i];
+      assign cf[i+ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i];
+    end
+
+    // shift the inputs
+    for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select
+      /* verilator lint_off WIDTH */
+      assign instr_data_in[i].instr = instr[i+idx_is_q];
+      assign instr_data_in[i].cf = cf[i+idx_is_q];
+      assign instr_data_in[i].ex = exception_i;  // exceptions hold for the whole fetch packet
+      assign instr_data_in[i].ex_vaddr = exception_addr_i;
+      /* verilator lint_on WIDTH */
+    end
+  end else begin : gen_multiple_instr_per_fetch_without_C
+
+    assign taken = '0;
+    assign branch_empty = '0;
+    assign branch_index = '0;
+    assign branch_mask_extended = '0;
+    assign branch_mask = '0;
+    assign consumed_extended = '0;
+    assign fifo_pos_extended = '0;
+    assign fifo_pos = '0;
+    assign instr = '0;
+    assign popcount = '0;
+    assign shamt = '0;
+    assign valid = '0;
+
+
+    assign consumed_o = push_instr_fifo[0];
+    // ----------------------
+    // Input interface
+    // ----------------------
+    assign push_instr = valid_i & ~instr_queue_full;
+
+    /* verilator lint_off WIDTH */
+    assign instr_data_in[0].instr = instr_i[0];
+    assign instr_data_in[0].cf = cf_type_i[0];
+    assign instr_data_in[0].ex = exception_i;  // exceptions hold for the whole fetch packet
+    assign instr_data_in[0].ex_vaddr = exception_addr_i;
+    /* verilator lint_on WIDTH */
+  end
+
+  // ----------------------
+  // Replay Logic
+  // ----------------------
+  // We need to replay a instruction fetch iff:
+  // 1. One of the instruction data FIFOs was full and we needed it
+  // (e.g.: we pushed and it was full)
+  // 2. The address/branch predict FIFO was full
+  // if one of the FIFOs was full we need to replay the faulting instruction
+  if (ariane_pkg::RVC == 1'b1) begin : gen_instr_overflow_fifo_with_C
+    assign instr_overflow_fifo = instr_queue_full & fifo_pos;
+  end else begin : gen_instr_overflow_fifo_without_C
+    assign instr_overflow_fifo = instr_queue_full & valid_i;
+  end
+  assign instr_overflow = |instr_overflow_fifo;  // at least one instruction overflowed
+  assign address_overflow = full_address & push_address;
+  assign replay_o = instr_overflow | address_overflow;
+
+  if (ariane_pkg::RVC) begin : gen_replay_addr_o_with_c
+    // select the address, in the case of an address fifo overflow just
+    // use the base of this package
+    // if we successfully pushed some instructions we can output the next instruction
+    // which we didn't manage to push
+    assign replay_addr_o = (address_overflow) ? addr_i[0] : addr_i[shamt];
+  end else begin : gen_replay_addr_o_without_C
+    assign replay_addr_o = addr_i[0];
+  end
+
+  // ----------------------
+  // Downstream interface
+  // ----------------------
+  // as long as there is at least one queue which can take the value we have a valid instruction
+  assign fetch_entry_valid_o = ~(&instr_queue_empty);
+
+  if (ariane_pkg::RVC) begin : gen_downstream_itf_with_c
+    always_comb begin
+      idx_ds_d = idx_ds_q;
+
+      pop_instr = '0;
+      // assemble fetch entry
+      fetch_entry_o.instruction = '0;
+      fetch_entry_o.address = pc_q;
+      fetch_entry_o.ex.valid = 1'b0;
+      fetch_entry_o.ex.cause = '0;
+
+      fetch_entry_o.ex.tval = '0;
+      fetch_entry_o.branch_predict.predict_address = address_out;
+      fetch_entry_o.branch_predict.cf = ariane_pkg::NoCF;
+      // output mux select
+      for (int unsigned i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
+        if (idx_ds_q[i]) begin
+          if (instr_data_out[i].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin
+            fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT;
+          end else begin
+            fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT;
+          end
+          fetch_entry_o.instruction = instr_data_out[i].instr;
+          fetch_entry_o.ex.valid = instr_data_out[i].ex != ariane_pkg::FE_NONE;
+          fetch_entry_o.ex.tval = {
+            {(riscv::XLEN - riscv::VLEN) {1'b0}}, instr_data_out[i].ex_vaddr
+          };
+          fetch_entry_o.branch_predict.cf = instr_data_out[i].cf;
+          pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i;
+        end
+      end
+      // rotate the pointer left
+      if (fetch_entry_ready_i) begin
+        idx_ds_d = {
+          idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1]
+        };
+      end
+    end
+  end else begin : gen_downstream_itf_without_c
+    always_comb begin
+      idx_ds_d = '0;
+      idx_is_d = '0;
+      fetch_entry_o.instruction = instr_data_out[0].instr;
+      fetch_entry_o.address = pc_q;
+
+      fetch_entry_o.ex.valid = instr_data_out[0].ex != ariane_pkg::FE_NONE;
+      if (instr_data_out[0].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin
+        fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT;
+      end else begin
+        fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT;
+      end
+      fetch_entry_o.ex.tval = {{64 - riscv::VLEN{1'b0}}, instr_data_out[0].ex_vaddr};
+
+      fetch_entry_o.branch_predict.predict_address = address_out;
+      fetch_entry_o.branch_predict.cf = instr_data_out[0].cf;
+
+      pop_instr[0] = fetch_entry_valid_o & fetch_entry_ready_i;
+    end
+  end
+
+  // TODO(zarubaf): This needs to change for dual-issue
+  // if the handshaking is successful and we had a prediction pop one address entry
+  assign pop_address = ((fetch_entry_o.branch_predict.cf != ariane_pkg::NoCF) & |pop_instr);
+
+  // ----------------------
+  // Calculate (Next) PC
+  // ----------------------
+  always_comb begin
+    pc_d = pc_q;
+    reset_address_d = flush_i ? 1'b1 : reset_address_q;
+
+    if (fetch_entry_ready_i) begin
+      // TODO(zarubaf): This needs to change for a dual issue implementation
+      // advance the PC
+      if (ariane_pkg::RVC == 1'b1) begin : gen_pc_with_c_extension
+        pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4);
+      end else begin : gen_pc_without_c_extension
+        pc_d = pc_q + 'd4;
+      end
+    end
+
+    if (pop_address) pc_d = address_out;
+
+    // we previously flushed so we need to reset the address
+    if (valid_i[0] && reset_address_q) begin
+      // this is the base of the first instruction
+      pc_d = addr_i[0];
+      reset_address_d = 1'b0;
+    end
+  end
+
+  // FIFOs
+  for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_instr_fifo
+    // Make sure we don't save any instructions if we couldn't save the address
+    assign push_instr_fifo[i] = push_instr[i] & ~address_overflow;
+    fifo_v3 #(
+        .DEPTH(ariane_pkg::FETCH_FIFO_DEPTH),
+        .dtype(instr_data_t)
+    ) i_fifo_instr_data (
+        .clk_i     (clk_i),
+        .rst_ni    (rst_ni),
+        .flush_i   (flush_i),
+        .testmode_i(1'b0),
+        .full_o    (instr_queue_full[i]),
+        .empty_o   (instr_queue_empty[i]),
+        .usage_o   (instr_queue_usage[i]),
+        .data_i    (instr_data_in[i]),
+        .push_i    (push_instr_fifo[i]),
+        .data_o    (instr_data_out[i]),
+        .pop_i     (pop_instr[i])
+    );
+  end
+  // or reduce and check whether we are retiring a taken branch (might be that the corresponding)
+  // fifo is full.
+  always_comb begin
+    push_address = 1'b0;
+    // check if we are pushing a ctrl flow change, if so save the address
+    for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
+      push_address |= push_instr[i] & (instr_data_in[i].cf != ariane_pkg::NoCF);
+    end
+  end
+
+  fifo_v3 #(
+      .DEPTH     (ariane_pkg::FETCH_FIFO_DEPTH),  // TODO(zarubaf): Fork out to separate param
+      .DATA_WIDTH(riscv::VLEN)
+  ) i_fifo_address (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .flush_i   (flush_i),
+      .testmode_i(1'b0),
+      .full_o    (full_address),
+      .empty_o   (empty_address),
+      .usage_o   (address_queue_usage),
+      .data_i    (predict_address_i),
+      .push_i    (push_address & ~full_address),
+      .data_o    (address_out),
+      .pop_i     (pop_address)
+  );
+
+  unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage}));
+  unread i_unread_branch_mask (.d_i(|branch_mask_extended));
+  unread i_unread_lzc (.d_i(|{branch_empty}));
+  unread i_unread_fifo_pos (.d_i(|fifo_pos_extended));  // we don't care about the lower signals
+  unread i_unread_instr_fifo (.d_i(|instr_queue_usage));
+
+  if (ariane_pkg::RVC) begin : gen_pc_q_with_c
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        idx_ds_q        <= 'b1;
+        idx_is_q        <= '0;
+        pc_q            <= '0;
+        reset_address_q <= 1'b1;
+      end else begin
+        pc_q            <= pc_d;
+        reset_address_q <= reset_address_d;
+        if (flush_i) begin
+          // one-hot encoded
+          idx_ds_q        <= 'b1;
+          // binary encoded
+          idx_is_q        <= '0;
+          reset_address_q <= 1'b1;
+        end else begin
+          idx_ds_q <= idx_ds_d;
+          idx_is_q <= idx_is_d;
+        end
+      end
+    end
+  end else begin : gen_pc_q_without_C
+    assign idx_ds_q = '0;
+    assign idx_is_q = '0;
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        pc_q            <= '0;
+        reset_address_q <= 1'b1;
+      end else begin
+        pc_q            <= pc_d;
+        reset_address_q <= reset_address_d;
+        if (flush_i) begin
+          reset_address_q <= 1'b1;
+        end
+      end
+    end
+  end
+
+  // pragma translate_off
+`ifndef VERILATOR
+  replay_address_fifo :
+  assert property (@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i)
+  else $fatal(1, "[instr_queue] Pushing address although replay asserted");
+
+  output_select_onehot :
+  assert property (@(posedge clk_i) $onehot0(idx_ds_q))
+  else begin
+    $error("Output select should be one-hot encoded");
+    $stop();
+  end
+`endif
+  // pragma translate_on
+endmodule
diff --git a/test/type_param/core/frontend/instr_scan.sv b/test/type_param/core/frontend/instr_scan.sv
new file mode 100644
index 0000000..592d5d3
--- /dev/null
+++ b/test/type_param/core/frontend/instr_scan.sv
@@ -0,0 +1,83 @@
+// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 2.0 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// Author: Florian Zaruba, ETH Zurich
+// Date: 08.02.2018
+// Migrated: Luis Vitorio Cargnini, IEEE
+// Date: 09.06.2018
+
+// ------------------------------
+// Instruction Scanner
+// ------------------------------
+module instr_scan #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input  logic [           31:0] instr_i,       // expect aligned instruction, compressed or not
+    output logic                   rvi_return_o,
+    output logic                   rvi_call_o,
+    output logic                   rvi_branch_o,
+    output logic                   rvi_jalr_o,
+    output logic                   rvi_jump_o,
+    output logic [riscv::VLEN-1:0] rvi_imm_o,
+    output logic                   rvc_branch_o,
+    output logic                   rvc_jump_o,
+    output logic                   rvc_jr_o,
+    output logic                   rvc_return_o,
+    output logic                   rvc_jalr_o,
+    output logic                   rvc_call_o,
+    output logic [riscv::VLEN-1:0] rvc_imm_o
+);
+  logic is_rvc;
+  assign is_rvc = (instr_i[1:0] != 2'b11);
+
+  logic rv32_rvc_jal;
+  assign rv32_rvc_jal = (riscv::XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1));
+
+  logic is_xret;
+  assign is_xret = logic'(instr_i[31:30] == 2'b00) & logic'(instr_i[28:0] == 29'b10000001000000000000001110011);
+
+  // check that rs1 is either x1 or x5 and that rd is not rs1
+  assign rvi_return_o = rvi_jalr_o & ((instr_i[19:15] == 5'd1) | instr_i[19:15] == 5'd5)
+                                     & (instr_i[19:15] != instr_i[11:7]);
+  // Opocde is JAL[R] and destination register is either x1 or x5
+  assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5);
+  // differentiates between JAL and BRANCH opcode, JALR comes from BHT
+  assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? ariane_pkg::uj_imm(
+      instr_i
+  ) : ariane_pkg::sb_imm(
+      instr_i
+  );
+  assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch);
+  assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr);
+  assign rvi_jump_o = logic'(instr_i[6:0] == riscv::OpcodeJal) | is_xret;
+
+  // opcode JAL
+  assign rvc_jump_o   = ((instr_i[15:13] == riscv::OpcodeC1J) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)) | rv32_rvc_jal;
+
+  // always links to register 0
+  logic is_jal_r;
+  assign is_jal_r     = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd)
+                        & (instr_i[6:2] == 5'b00000)
+                        & (instr_i[1:0] == riscv::OpcodeC2)
+                        & is_rvc;
+  assign rvc_jr_o = is_jal_r & ~instr_i[12];
+  // always links to register 1 e.g.: it is a jump
+  assign rvc_jalr_o = is_jal_r & instr_i[12];
+  assign rvc_call_o = rvc_jalr_o | rv32_rvc_jal;
+
+  assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez))
+                        & (instr_i[1:0] == riscv::OpcodeC1)
+                        & is_rvc;
+  // check that rs1 is x1 or x5
+  assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o;
+
+  // differentiates between JAL and BRANCH opcode, JALR comes from BHT
+  assign rvc_imm_o    = (instr_i[14]) ? {{56+riscv::VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0}
+                                       : {{53+riscv::VLEN-64{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0};
+endmodule
diff --git a/test/type_param/core/frontend/ras.sv b/test/type_param/core/frontend/ras.sv
new file mode 100644
index 0000000..f092b50
--- /dev/null
+++ b/test/type_param/core/frontend/ras.sv
@@ -0,0 +1,71 @@
+//Copyright (C) 2018 to present,
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 2.0 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 08.02.2018
+// Migrated: Luis Vitorio Cargnini, IEEE
+// Date: 09.06.2018
+
+// return address stack
+module ras #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned DEPTH = 2
+) (
+    input  logic                               clk_i,
+    input  logic                               rst_ni,
+    input  logic                               flush_i,
+    input  logic                               push_i,
+    input  logic                               pop_i,
+    input  logic             [riscv::VLEN-1:0] data_i,
+    output ariane_pkg::ras_t                   data_o
+);
+
+  ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q;
+
+  assign data_o = stack_q[0];
+
+  always_comb begin
+    stack_d = stack_q;
+
+    // push on the stack
+    if (push_i) begin
+      stack_d[0].ra = data_i;
+      // mark the new return address as valid
+      stack_d[0].valid = 1'b1;
+      stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0];
+    end
+
+    if (pop_i) begin
+      stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1];
+      // we popped the value so invalidate the end of the stack
+      stack_d[DEPTH-1].valid = 1'b0;
+      stack_d[DEPTH-1].ra = 'b0;
+    end
+    // leave everything untouched and just push the latest value to the
+    // top of the stack
+    if (pop_i && push_i) begin
+      stack_d = stack_q;
+      stack_d[0].ra = data_i;
+      stack_d[0].valid = 1'b1;
+    end
+
+    if (flush_i) begin
+      stack_d = '0;
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      stack_q <= '0;
+    end else begin
+      stack_q <= stack_d;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/id_stage.sv b/test/type_param/core/id_stage.sv
new file mode 100644
index 0000000..81d1640
--- /dev/null
+++ b/test/type_param/core/id_stage.sv
@@ -0,0 +1,143 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 15.04.2017
+// Description: Instruction decode, contains the logic for decode,
+//              issue and read operands.
+
+module id_stage #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,
+    input logic rst_ni,
+
+    input logic flush_i,
+    input logic debug_req_i,
+    // from IF
+    input ariane_pkg::fetch_entry_t fetch_entry_i,
+    input logic fetch_entry_valid_i,
+    output logic fetch_entry_ready_o,  // acknowledge the instruction (fetch entry)
+    // to ID
+    output ariane_pkg::scoreboard_entry_t issue_entry_o,  // a decoded instruction
+    output logic issue_entry_valid_o,  // issue entry is valid
+    output logic is_ctrl_flow_o,  // the instruction we issue is a ctrl flow instructions
+    input logic issue_instr_ack_i,  // issue stage acknowledged sampling of instructions
+    output logic rvfi_is_compressed_o,
+    // from CSR file
+    input riscv::priv_lvl_t priv_lvl_i,  // current privilege level
+    input riscv::xs_t fs_i,  // floating point extension status
+    input logic [2:0] frm_i,  // floating-point dynamic rounding mode
+    input riscv::xs_t vs_i,  // vector extension status
+    input logic [1:0] irq_i,
+    input ariane_pkg::irq_ctrl_t irq_ctrl_i,
+    input logic debug_mode_i,  // we are in debug mode
+    input logic tvm_i,
+    input logic tw_i,
+    input logic tsr_i
+);
+  // ID/ISSUE register stage
+  typedef struct packed {
+    logic                          valid;
+    ariane_pkg::scoreboard_entry_t sbe;
+    logic                          is_ctrl_flow;
+  } issue_struct_t;
+  issue_struct_t issue_n, issue_q;
+
+  logic                                 is_control_flow_instr;
+  ariane_pkg::scoreboard_entry_t        decoded_instruction;
+
+  logic                                 is_illegal;
+  logic                          [31:0] instruction;
+  logic                                 is_compressed;
+
+  if (CVA6Cfg.RVC) begin
+    // ---------------------------------------------------------
+    // 1. Check if they are compressed and expand in case they are
+    // ---------------------------------------------------------
+    compressed_decoder #(
+        .CVA6Cfg(CVA6Cfg)
+    ) compressed_decoder_i (
+        .instr_i        (fetch_entry_i.instruction),
+        .instr_o        (instruction),
+        .illegal_instr_o(is_illegal),
+        .is_compressed_o(is_compressed)
+    );
+  end else begin
+    assign instruction = fetch_entry_i.instruction;
+    assign is_illegal = '0;
+    assign is_compressed = '0;
+  end
+
+  assign rvfi_is_compressed_o = is_compressed;
+  // ---------------------------------------------------------
+  // 2. Decode and emit instruction to issue stage
+  // ---------------------------------------------------------
+  decoder #(
+      .CVA6Cfg(CVA6Cfg)
+  ) decoder_i (
+      .debug_req_i,
+      .irq_ctrl_i,
+      .irq_i,
+      .pc_i                   (fetch_entry_i.address),
+      .is_compressed_i        (is_compressed),
+      .is_illegal_i           (is_illegal),
+      .instruction_i          (instruction),
+      .compressed_instr_i     (fetch_entry_i.instruction[15:0]),
+      .branch_predict_i       (fetch_entry_i.branch_predict),
+      .ex_i                   (fetch_entry_i.ex),
+      .priv_lvl_i             (priv_lvl_i),
+      .debug_mode_i           (debug_mode_i),
+      .fs_i,
+      .frm_i,
+      .vs_i,
+      .tvm_i,
+      .tw_i,
+      .tsr_i,
+      .instruction_o          (decoded_instruction),
+      .is_control_flow_instr_o(is_control_flow_instr)
+  );
+
+  // ------------------
+  // Pipeline Register
+  // ------------------
+  assign issue_entry_o = issue_q.sbe;
+  assign issue_entry_valid_o = issue_q.valid;
+  assign is_ctrl_flow_o = issue_q.is_ctrl_flow;
+
+  always_comb begin
+    issue_n             = issue_q;
+    fetch_entry_ready_o = 1'b0;
+
+    // Clear the valid flag if issue has acknowledged the instruction
+    if (issue_instr_ack_i) issue_n.valid = 1'b0;
+
+    // if we have a space in the register and the fetch is valid, go get it
+    // or the issue stage is currently acknowledging an instruction, which means that we will have space
+    // for a new instruction
+    if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin
+      fetch_entry_ready_o = 1'b1;
+      issue_n = '{1'b1, decoded_instruction, is_control_flow_instr};
+    end
+
+    // invalidate the pipeline register on a flush
+    if (flush_i) issue_n.valid = 1'b0;
+  end
+  // -------------------------
+  // Registers (ID <-> Issue)
+  // -------------------------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      issue_q <= '0;
+    end else begin
+      issue_q <= issue_n;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/include/acc_pkg.sv b/test/type_param/core/include/acc_pkg.sv
new file mode 100644
index 0000000..bcd3c70
--- /dev/null
+++ b/test/type_param/core/include/acc_pkg.sv
@@ -0,0 +1,47 @@
+// Copyright 2023 ETH Zurich and University of Bologna.
+// Solderpad Hardware License, Version 0.51, see LICENSE for details.
+// SPDX-License-Identifier: SHL-0.51
+
+// Authors: Matheus Cavalcante <matheusd@iis.ee.ethz.ch>
+//          Nils Wistoff <nwistoff@iis.ee.ethz.ch>
+
+// Package defining the accelerator interface as used by Ara + CVA6
+
+package acc_pkg;
+
+  // ----------------------
+  // Accelerator Interface
+  // ----------------------
+
+  typedef struct packed {
+    logic                                 req_valid;
+    logic                                 resp_ready;
+    riscv::instruction_t                  insn;
+    riscv::xlen_t                         rs1;
+    riscv::xlen_t                         rs2;
+    fpnew_pkg::roundmode_e                frm;
+    logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id;
+    logic                                 store_pending;
+    // Invalidation interface
+    logic                                 acc_cons_en;
+    logic                                 inval_ready;
+  } accelerator_req_t;
+
+  typedef struct packed {
+    logic                                 req_ready;
+    logic                                 resp_valid;
+    riscv::xlen_t                         result;
+    logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id;
+    logic                                 error;
+    // Metadata
+    logic                                 store_pending;
+    logic                                 store_complete;
+    logic                                 load_complete;
+    logic [4:0]                           fflags;
+    logic                                 fflags_valid;
+    // Invalidation interface
+    logic                                 inval_valid;
+    logic [63:0]                          inval_addr;
+  } accelerator_resp_t;
+
+endpackage
diff --git a/test/type_param/core/include/ariane_pkg.sv b/test/type_param/core/include/ariane_pkg.sv
new file mode 100644
index 0000000..1616faf
--- /dev/null
+++ b/test/type_param/core/include/ariane_pkg.sv
@@ -0,0 +1,994 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:   ariane_pkg.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   8.4.2017
+ *
+ * Description: Contains all the necessary defines for Ariane
+ *              in one package.
+ */
+
+// this is needed to propagate the
+// configuration in case Ariane is
+// instantiated in OpenPiton
+`ifdef PITON_ARIANE
+`include "l15.tmp.h"
+`endif
+
+/// This package contains `functions` and global defines for CVA6.
+/// *Note*: There are some parameters here as well which will eventually be
+/// moved out to favour a fully parameterizable core.
+package ariane_pkg;
+
+  // TODO: Slowly move those parameters to the new system.
+  localparam NR_SB_ENTRIES = cva6_config_pkg::CVA6ConfigNrScoreboardEntries; // number of scoreboard entries
+  localparam TRANS_ID_BITS = $clog2(
+      NR_SB_ENTRIES
+  );  // depending on the number of scoreboard entries we need that many bits
+      // to uniquely identify the entry in the scoreboard
+  localparam ASID_WIDTH = (riscv::XLEN == 64) ? 16 : 1;
+  localparam BITS_SATURATION_COUNTER = 2;
+
+  localparam ISSUE_WIDTH = 1;
+
+  // depth of store-buffers, this needs to be a power of two
+  localparam logic [2:0] DEPTH_SPEC = 'd4;
+
+  localparam int unsigned DCACHE_TYPE = int'(cva6_config_pkg::CVA6ConfigDcacheType);
+  // if DCACHE_TYPE = cva6_config_pkg::WT
+  // we can use a small commit queue since we have a write buffer in the dcache
+  // we could in principle do without the commit queue in this case, but the timing degrades if we do that due
+  // to longer paths into the commit stage
+  // if DCACHE_TYPE = cva6_config_pkg::WB
+  // allocate more space for the commit buffer to be on the save side, this needs to be a power of two
+  localparam logic [2:0] DEPTH_COMMIT = 'd4;
+
+  localparam bit FPGA_EN = cva6_config_pkg::CVA6ConfigFPGAEn;  // Is FPGA optimization of CV32A6
+
+  localparam bit RVC = cva6_config_pkg::CVA6ConfigCExtEn;  // Is C extension configuration
+
+  // Transprecision float unit
+  localparam int unsigned LAT_COMP_FP32 = 'd2;
+  localparam int unsigned LAT_COMP_FP64 = 'd3;
+  localparam int unsigned LAT_COMP_FP16 = 'd1;
+  localparam int unsigned LAT_COMP_FP16ALT = 'd1;
+  localparam int unsigned LAT_COMP_FP8 = 'd1;
+  localparam int unsigned LAT_DIVSQRT = 'd2;
+  localparam int unsigned LAT_NONCOMP = 'd1;
+  localparam int unsigned LAT_CONV = 'd2;
+
+  localparam riscv::xlen_t OPENHWGROUP_MVENDORID = {{riscv::XLEN - 32{1'b0}}, 32'h0602};
+  localparam riscv::xlen_t ARIANE_MARCHID = {{riscv::XLEN - 32{1'b0}}, 32'd3};
+
+  // 32 registers
+  localparam REG_ADDR_SIZE = 5;
+
+  // Read ports for general purpose register files
+  localparam NR_RGPR_PORTS = 2;
+
+  // static debug hartinfo
+  // debug causes
+  localparam logic [2:0] CauseBreakpoint = 3'h1;
+  localparam logic [2:0] CauseTrigger = 3'h2;
+  localparam logic [2:0] CauseRequest = 3'h3;
+  localparam logic [2:0] CauseSingleStep = 3'h4;
+  // amount of data count registers implemented
+  localparam logic [3:0] DataCount = 4'h2;
+
+  // address where data0-15 is shadowed or if shadowed in a CSR
+  // address of the first CSR used for shadowing the data
+  localparam logic [11:0] DataAddr = 12'h380;  // we are aligned with Rocket here
+  typedef struct packed {
+    logic [31:24] zero1;
+    logic [23:20] nscratch;
+    logic [19:17] zero0;
+    logic         dataaccess;
+    logic [15:12] datasize;
+    logic [11:0]  dataaddr;
+  } hartinfo_t;
+
+  localparam hartinfo_t DebugHartInfo = '{
+      zero1: '0,
+      nscratch: 2,  // Debug module needs at least two scratch regs
+      zero0: '0,
+      dataaccess: 1'b1,  // data registers are memory mapped in the debugger
+      datasize: DataCount,
+      dataaddr: DataAddr
+  };
+
+  // enables a commit log which matches spikes commit log format for easier trace comparison
+  localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1;
+
+  // ------------- Dangerous -------------
+  // if set to zero a flush will not invalidate the cache-lines, in a single core environment
+  // where coherence is not necessary this can improve performance. This needs to be switched on
+  // when more than one core is in a system
+  localparam logic INVALIDATE_ON_FLUSH = 1'b1;
+
+`ifdef SPIKE_TANDEM
+  // Spike still places 0 in TVAL for ENV_CALL_* exceptions.
+  // This may eventually go away when Spike starts to handle TVAL for *all* exceptions.
+  localparam bit ZERO_TVAL = 1'b1;
+`else
+  localparam bit ZERO_TVAL = 1'b0;
+`endif
+  // read mask for SSTATUS over MMSTATUS
+  localparam logic [63:0] SMODE_STATUS_READ_MASK = riscv::SSTATUS_UIE
+                                                   | riscv::SSTATUS_SIE
+                                                   | riscv::SSTATUS_SPIE
+                                                   | riscv::SSTATUS_SPP
+                                                   | riscv::SSTATUS_FS
+                                                   | riscv::SSTATUS_XS
+                                                   | riscv::SSTATUS_SUM
+                                                   | riscv::SSTATUS_MXR
+                                                   | riscv::SSTATUS_UPIE
+                                                   | riscv::SSTATUS_SPIE
+                                                   | riscv::SSTATUS_UXL
+                                                   | riscv::SSTATUS_SD;
+
+  localparam logic [63:0] SMODE_STATUS_WRITE_MASK = riscv::SSTATUS_SIE
+                                                    | riscv::SSTATUS_SPIE
+                                                    | riscv::SSTATUS_SPP
+                                                    | riscv::SSTATUS_FS
+                                                    | riscv::SSTATUS_SUM
+                                                    | riscv::SSTATUS_MXR;
+  // ---------------
+  // AXI
+  // ---------------
+
+  localparam FETCH_USER_WIDTH = cva6_config_pkg::CVA6ConfigFetchUserWidth;
+  localparam DATA_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth;
+  localparam AXI_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn | cva6_config_pkg::CVA6ConfigFetchUserEn;
+  localparam AXI_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth;
+  localparam DATA_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn;
+  localparam FETCH_USER_EN = cva6_config_pkg::CVA6ConfigFetchUserEn;
+
+  typedef enum logic {
+    SINGLE_REQ,
+    CACHE_LINE_REQ
+  } ad_req_t;
+
+  // ---------------
+  // Fetch Stage
+  // ---------------
+
+  // leave as is (fails with >8 entries and wider fetch width)
+  localparam int unsigned FETCH_FIFO_DEPTH = 4;
+  localparam int unsigned FETCH_WIDTH = 32;
+  // maximum instructions we can fetch on one request (we support compressed instructions)
+  localparam int unsigned INSTR_PER_FETCH = RVC == 1'b1 ? (FETCH_WIDTH / 16) : 1;
+  localparam int unsigned LOG2_INSTR_PER_FETCH = RVC == 1'b1 ? $clog2(INSTR_PER_FETCH) : 1;
+
+  // Only use struct when signals have same direction
+  // exception
+  typedef struct packed {
+    riscv::xlen_t cause;  // cause of exception
+    riscv::xlen_t       tval;  // additional information of causing exception (e.g.: instruction causing it),
+    // address of LD/ST fault
+    logic valid;
+  } exception_t;
+
+  typedef enum logic [2:0] {
+    NoCF,    // No control flow prediction
+    Branch,  // Branch
+    Jump,    // Jump to address from immediate
+    JumpR,   // Jump to address from registers
+    Return   // Return Address Prediction
+  } cf_t;
+
+  // branch-predict
+  // this is the struct we get back from ex stage and we will use it to update
+  // all the necessary data structures
+  // bp_resolve_t
+  typedef struct packed {
+    logic                   valid;           // prediction with all its values is valid
+    logic [riscv::VLEN-1:0] pc;              // PC of predict or mis-predict
+    logic [riscv::VLEN-1:0] target_address;  // target address at which to jump, or not
+    logic                   is_mispredict;   // set if this was a mis-predict
+    logic                   is_taken;        // branch is taken
+    cf_t                    cf_type;         // Type of control flow change
+  } bp_resolve_t;
+
+  // branchpredict scoreboard entry
+  // this is the struct which we will inject into the pipeline to guide the various
+  // units towards the correct branch decision and resolve
+  typedef struct packed {
+    cf_t                    cf;               // type of control flow prediction
+    logic [riscv::VLEN-1:0] predict_address;  // target address at which to jump, or not
+  } branchpredict_sbe_t;
+
+  typedef struct packed {
+    logic                   valid;
+    logic [riscv::VLEN-1:0] pc;              // update at PC
+    logic [riscv::VLEN-1:0] target_address;
+  } btb_update_t;
+
+  typedef struct packed {
+    logic                   valid;
+    logic [riscv::VLEN-1:0] target_address;
+  } btb_prediction_t;
+
+  typedef struct packed {
+    logic                   valid;
+    logic [riscv::VLEN-1:0] ra;
+  } ras_t;
+
+  typedef struct packed {
+    logic                   valid;
+    logic [riscv::VLEN-1:0] pc;     // update at PC
+    logic                   taken;
+  } bht_update_t;
+
+  typedef struct packed {
+    logic valid;
+    logic taken;
+  } bht_prediction_t;
+
+  typedef struct packed {
+    logic       valid;
+    logic [1:0] saturation_counter;
+  } bht_t;
+
+  typedef enum logic [3:0] {
+    NONE,       // 0
+    LOAD,       // 1
+    STORE,      // 2
+    ALU,        // 3
+    CTRL_FLOW,  // 4
+    MULT,       // 5
+    CSR,        // 6
+    FPU,        // 7
+    FPU_VEC,    // 8
+    CVXIF,      // 9
+    ACCEL       // 10
+  } fu_t;
+
+  localparam EXC_OFF_RST = 8'h80;
+
+  localparam SupervisorIrq = 1;
+  localparam MachineIrq = 0;
+
+  // All information needed to determine whether we need to associate an interrupt
+  // with the corresponding instruction or not.
+  typedef struct packed {
+    riscv::xlen_t mie;
+    riscv::xlen_t mip;
+    riscv::xlen_t mideleg;
+    logic         sie;
+    logic         global_enable;
+  } irq_ctrl_t;
+
+  // ---------------
+  // Cache config
+  // ---------------
+
+  // for usage in OpenPiton we have to propagate the openpiton L15 configuration from l15.h
+`ifdef PITON_ARIANE
+
+`ifndef CONFIG_L1I_CACHELINE_WIDTH
+  `define CONFIG_L1I_CACHELINE_WIDTH 128
+`endif
+
+`ifndef CONFIG_L1I_ASSOCIATIVITY
+  `define CONFIG_L1I_ASSOCIATIVITY 4
+`endif
+
+`ifndef CONFIG_L1I_SIZE
+  `define CONFIG_L1I_SIZE 16*1024
+`endif
+
+`ifndef CONFIG_L1D_CACHELINE_WIDTH
+  `define CONFIG_L1D_CACHELINE_WIDTH 128
+`endif
+
+`ifndef CONFIG_L1D_ASSOCIATIVITY
+  `define CONFIG_L1D_ASSOCIATIVITY 8
+`endif
+
+`ifndef CONFIG_L1D_SIZE
+  `define CONFIG_L1D_SIZE 32*1024
+`endif
+
+`ifndef L15_THREADID_WIDTH
+  `define L15_THREADID_WIDTH 3
+`endif
+
+  // I$
+  localparam int unsigned ICACHE_LINE_WIDTH = `CONFIG_L1I_CACHELINE_WIDTH;
+  localparam int unsigned ICACHE_SET_ASSOC = `CONFIG_L1I_ASSOCIATIVITY;
+  localparam int unsigned ICACHE_INDEX_WIDTH = $clog2(`CONFIG_L1I_SIZE / ICACHE_SET_ASSOC);
+  localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH;
+  localparam int unsigned ICACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : 128;  // in bit
+  // D$
+  localparam int unsigned DCACHE_LINE_WIDTH = `CONFIG_L1D_CACHELINE_WIDTH;
+  localparam int unsigned DCACHE_SET_ASSOC = `CONFIG_L1D_ASSOCIATIVITY;
+  localparam int unsigned DCACHE_INDEX_WIDTH = $clog2(`CONFIG_L1D_SIZE / DCACHE_SET_ASSOC);
+  localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH;
+  localparam int unsigned DCACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : 128;  // in bit
+  localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH;
+
+  localparam int unsigned MEM_TID_WIDTH = `L15_THREADID_WIDTH;
+`else
+  // I$
+  localparam int unsigned CONFIG_L1I_SIZE = cva6_config_pkg::CVA6ConfigIcacheByteSize;  // in byte
+  localparam int unsigned ICACHE_SET_ASSOC   = cva6_config_pkg::CVA6ConfigIcacheSetAssoc; // number of ways
+  localparam int unsigned ICACHE_INDEX_WIDTH = $clog2(
+      CONFIG_L1I_SIZE / ICACHE_SET_ASSOC
+  );  // in bit, contains also offset width
+  localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH;  // in bit
+  localparam int unsigned ICACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigIcacheLineWidth;  // in bit
+  localparam int unsigned ICACHE_USER_LINE_WIDTH  = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit
+  // D$
+  localparam int unsigned CONFIG_L1D_SIZE = cva6_config_pkg::CVA6ConfigDcacheByteSize;  // in byte
+  localparam int unsigned DCACHE_SET_ASSOC   = cva6_config_pkg::CVA6ConfigDcacheSetAssoc; // number of ways
+  localparam int unsigned DCACHE_INDEX_WIDTH = $clog2(
+      CONFIG_L1D_SIZE / DCACHE_SET_ASSOC
+  );  // in bit, contains also offset width
+  localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH;  // in bit
+  localparam int unsigned DCACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigDcacheLineWidth;  // in bit
+  localparam int unsigned DCACHE_USER_LINE_WIDTH  = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit
+  localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH;
+
+  localparam int unsigned MEM_TID_WIDTH = cva6_config_pkg::CVA6ConfigMemTidWidth;
+`endif
+
+  localparam int unsigned DCACHE_TID_WIDTH = cva6_config_pkg::CVA6ConfigDcacheIdWidth;
+
+  localparam int unsigned WT_DCACHE_WBUF_DEPTH = cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth;
+
+  // ---------------
+  // EX Stage
+  // ---------------
+
+  typedef enum logic [7:0] {  // basic ALU op
+    ADD,
+    SUB,
+    ADDW,
+    SUBW,
+    // logic operations
+    XORL,
+    ORL,
+    ANDL,
+    // shifts
+    SRA,
+    SRL,
+    SLL,
+    SRLW,
+    SLLW,
+    SRAW,
+    // comparisons
+    LTS,
+    LTU,
+    GES,
+    GEU,
+    EQ,
+    NE,
+    // jumps
+    JALR,
+    BRANCH,
+    // set lower than operations
+    SLTS,
+    SLTU,
+    // CSR functions
+    MRET,
+    SRET,
+    DRET,
+    ECALL,
+    WFI,
+    FENCE,
+    FENCE_I,
+    SFENCE_VMA,
+    CSR_WRITE,
+    CSR_READ,
+    CSR_SET,
+    CSR_CLEAR,
+    // LSU functions
+    LD,
+    SD,
+    LW,
+    LWU,
+    SW,
+    LH,
+    LHU,
+    SH,
+    LB,
+    SB,
+    LBU,
+    // Atomic Memory Operations
+    AMO_LRW,
+    AMO_LRD,
+    AMO_SCW,
+    AMO_SCD,
+    AMO_SWAPW,
+    AMO_ADDW,
+    AMO_ANDW,
+    AMO_ORW,
+    AMO_XORW,
+    AMO_MAXW,
+    AMO_MAXWU,
+    AMO_MINW,
+    AMO_MINWU,
+    AMO_SWAPD,
+    AMO_ADDD,
+    AMO_ANDD,
+    AMO_ORD,
+    AMO_XORD,
+    AMO_MAXD,
+    AMO_MAXDU,
+    AMO_MIND,
+    AMO_MINDU,
+    // Multiplications
+    MUL,
+    MULH,
+    MULHU,
+    MULHSU,
+    MULW,
+    // Divisions
+    DIV,
+    DIVU,
+    DIVW,
+    DIVUW,
+    REM,
+    REMU,
+    REMW,
+    REMUW,
+    // Floating-Point Load and Store Instructions
+    FLD,
+    FLW,
+    FLH,
+    FLB,
+    FSD,
+    FSW,
+    FSH,
+    FSB,
+    // Floating-Point Computational Instructions
+    FADD,
+    FSUB,
+    FMUL,
+    FDIV,
+    FMIN_MAX,
+    FSQRT,
+    FMADD,
+    FMSUB,
+    FNMSUB,
+    FNMADD,
+    // Floating-Point Conversion and Move Instructions
+    FCVT_F2I,
+    FCVT_I2F,
+    FCVT_F2F,
+    FSGNJ,
+    FMV_F2X,
+    FMV_X2F,
+    // Floating-Point Compare Instructions
+    FCMP,
+    // Floating-Point Classify Instruction
+    FCLASS,
+    // Vectorial Floating-Point Instructions that don't directly map onto the scalar ones
+    VFMIN,
+    VFMAX,
+    VFSGNJ,
+    VFSGNJN,
+    VFSGNJX,
+    VFEQ,
+    VFNE,
+    VFLT,
+    VFGE,
+    VFLE,
+    VFGT,
+    VFCPKAB_S,
+    VFCPKCD_S,
+    VFCPKAB_D,
+    VFCPKCD_D,
+    // Offload Instructions to be directed into cv_x_if
+    OFFLOAD,
+    // Or-Combine and REV8
+    ORCB,
+    REV8,
+    // Bitwise Rotation
+    ROL,
+    ROLW,
+    ROR,
+    RORI,
+    RORIW,
+    RORW,
+    // Sign and Zero Extend
+    SEXTB,
+    SEXTH,
+    ZEXTH,
+    // Count population
+    CPOP,
+    CPOPW,
+    // Count Leading/Training Zeros
+    CLZ,
+    CLZW,
+    CTZ,
+    CTZW,
+    // Carry less multiplication Op's
+    CLMUL,
+    CLMULH,
+    CLMULR,
+    // Single bit instructions Op's
+    BCLR,
+    BCLRI,
+    BEXT,
+    BEXTI,
+    BINV,
+    BINVI,
+    BSET,
+    BSETI,
+    // Integer minimum/maximum
+    MAX,
+    MAXU,
+    MIN,
+    MINU,
+    // Shift with Add Unsigned Word and Unsigned Word Op's (Bitmanip)
+    SH1ADDUW,
+    SH2ADDUW,
+    SH3ADDUW,
+    ADDUW,
+    SLLIUW,
+    // Shift with Add (Bitmanip)
+    SH1ADD,
+    SH2ADD,
+    SH3ADD,
+    // Bitmanip Logical with negate op (Bitmanip)
+    ANDN,
+    ORN,
+    XNOR,
+    // Accelerator operations
+    ACCEL_OP,
+    ACCEL_OP_FS1,
+    ACCEL_OP_FD,
+    ACCEL_OP_LOAD,
+    ACCEL_OP_STORE,
+    // Zicond instruction
+    CZERO_EQZ,
+    CZERO_NEZ
+  } fu_op;
+
+  typedef struct packed {
+    fu_t                      fu;
+    fu_op                     operation;
+    riscv::xlen_t             operand_a;
+    riscv::xlen_t             operand_b;
+    riscv::xlen_t             imm;
+    logic [TRANS_ID_BITS-1:0] trans_id;
+  } fu_data_t;
+
+  function automatic logic op_is_branch(input fu_op op);
+    unique case (op) inside
+      EQ, NE, LTS, GES, LTU, GEU: return 1'b1;
+      default:                    return 1'b0;  // all other ops
+    endcase
+  endfunction
+
+  // -------------------------------
+  // Extract Src/Dst FP Reg from Op
+  // -------------------------------
+  // function used in instr_trace svh
+  // is_rs1_fpr function is kept to allow cva6 compilation with instr_trace feature
+  function automatic logic is_rs1_fpr(input fu_op op);
+    unique case (op) inside
+      [FMUL : FNMADD],  // Computational Operations (except ADD/SUB)
+      FCVT_F2I,  // Float-Int Casts
+      FCVT_F2F,  // Float-Float Casts
+      FSGNJ,  // Sign Injections
+      FMV_F2X,  // FPR-GPR Moves
+      FCMP,  // Comparisons
+      FCLASS,  // Classifications
+      [VFMIN : VFCPKCD_D],  // Additional Vectorial FP ops
+      ACCEL_OP_FS1:
+      return 1'b1;  // Accelerator instructions
+      default: return 1'b0;  // all other ops
+    endcase
+  endfunction
+
+  // function used in instr_trace svh
+  // is_rs2_fpr function is kept to allow cva6 compilation with instr_trace feature
+  function automatic logic is_rs2_fpr(input fu_op op);
+    unique case (op) inside
+      [FSD : FSB],  // FP Stores
+      [FADD : FMIN_MAX],  // Computational Operations (no sqrt)
+      [FMADD : FNMADD],  // Fused Computational Operations
+      FCVT_F2F,  // Vectorial F2F Conversions requrie target
+      [FSGNJ : FMV_F2X],  // Sign Injections and moves mapped to SGNJ
+      FCMP,  // Comparisons
+      [VFMIN : VFCPKCD_D]:
+      return 1'b1;  // Additional Vectorial FP ops
+      default: return 1'b0;  // all other ops
+    endcase
+  endfunction
+
+  // function used in instr_trace svh
+  // is_imm_fpr function is kept to allow cva6 compilation with instr_trace feature
+  // ternary operations encode the rs3 address in the imm field, also add/sub
+  function automatic logic is_imm_fpr(input fu_op op);
+    unique case (op) inside
+      [FADD : FSUB],  // ADD/SUB need inputs as Operand B/C
+      [FMADD : FNMADD],  // Fused Computational Operations
+      [VFCPKAB_S : VFCPKCD_D]:
+      return 1'b1;  // Vectorial FP cast and pack ops
+      default: return 1'b0;  // all other ops
+    endcase
+  endfunction
+
+  // function used in instr_trace svh
+  // is_rd_fpr function is kept to allow cva6 compilation with instr_trace feature
+  function automatic logic is_rd_fpr(input fu_op op);
+    unique case (op) inside
+      [FLD : FLB],  // FP Loads
+      [FADD : FNMADD],  // Computational Operations
+      FCVT_I2F,  // Int-Float Casts
+      FCVT_F2F,  // Float-Float Casts
+      FSGNJ,  // Sign Injections
+      FMV_X2F,  // GPR-FPR Moves
+      [VFMIN : VFSGNJX],  // Vectorial MIN/MAX and SGNJ
+      [VFCPKAB_S : VFCPKCD_D],  // Vectorial FP cast and pack ops
+      ACCEL_OP_FD:
+      return 1'b1;  // Accelerator instructions
+      default: return 1'b0;  // all other ops
+    endcase
+  endfunction
+
+  function automatic logic is_amo(fu_op op);
+    case (op) inside
+      [AMO_LRW : AMO_MINDU]: begin
+        return 1'b1;
+      end
+      default: return 1'b0;
+    endcase
+  endfunction
+
+  typedef struct packed {
+    logic                       valid;
+    logic [riscv::VLEN-1:0]     vaddr;
+    logic                       overflow;
+    riscv::xlen_t               data;
+    logic [(riscv::XLEN/8)-1:0] be;
+    fu_t                        fu;
+    fu_op                       operation;
+    logic [TRANS_ID_BITS-1:0]   trans_id;
+  } lsu_ctrl_t;
+
+  // ---------------
+  // IF/ID Stage
+  // ---------------
+  // store the decompressed instruction
+  typedef struct packed {
+    logic [riscv::VLEN-1:0] address;  // the address of the instructions from below
+    logic [31:0] instruction;  // instruction word
+    branchpredict_sbe_t     branch_predict; // this field contains branch prediction information regarding the forward branch path
+    exception_t             ex;             // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions
+  } fetch_entry_t;
+
+  // ---------------
+  // ID/EX/WB Stage
+  // ---------------
+
+  localparam RVFI = cva6_config_pkg::CVA6ConfigRvfiTrace;
+
+  typedef struct packed {
+    logic [riscv::VLEN-1:0] pc;  // PC of instruction
+    logic [TRANS_ID_BITS-1:0] trans_id;      // this can potentially be simplified, we could index the scoreboard entry
+                                             // with the transaction id in any case make the width more generic
+    fu_t fu;  // functional unit to use
+    fu_op op;  // operation to perform in each functional unit
+    logic [REG_ADDR_SIZE-1:0] rs1;  // register source address 1
+    logic [REG_ADDR_SIZE-1:0] rs2;  // register source address 2
+    logic [REG_ADDR_SIZE-1:0] rd;  // register destination address
+    riscv::xlen_t result;  // for unfinished instructions this field also holds the immediate,
+                           // for unfinished floating-point that are partly encoded in rs2, this field also holds rs2
+                           // for unfinished floating-point fused operations (FMADD, FMSUB, FNMADD, FNMSUB)
+                           // this field holds the address of the third operand from the floating-point register file
+    logic valid;  // is the result valid
+    logic use_imm;  // should we use the immediate as operand b?
+    logic use_zimm;  // use zimm as operand a
+    logic use_pc;  // set if we need to use the PC as operand a, PC from exception
+    exception_t ex;  // exception has occurred
+    branchpredict_sbe_t bp;  // branch predict scoreboard data structure
+    logic                     is_compressed; // signals a compressed instructions, we need this information at the commit stage if
+                                             // we want jump accordingly e.g.: +4, +2
+    logic vfp;  // is this a vector floating-point instruction?
+  } scoreboard_entry_t;
+
+  // ---------------
+  // MMU instanciation
+  // ---------------
+  localparam bit MMU_PRESENT = cva6_config_pkg::CVA6ConfigMmuPresent;
+
+  localparam int unsigned INSTR_TLB_ENTRIES = cva6_config_pkg::CVA6ConfigInstrTlbEntries;
+  localparam int unsigned DATA_TLB_ENTRIES = cva6_config_pkg::CVA6ConfigDataTlbEntries;
+
+  // -------------------
+  // Performance counter
+  // -------------------
+  localparam bit PERF_COUNTER_EN = cva6_config_pkg::CVA6ConfigPerfCounterEn;
+  localparam int unsigned MHPMCounterNum = 6;
+
+  // --------------------
+  // Atomics
+  // --------------------
+  typedef enum logic [3:0] {
+    AMO_NONE = 4'b0000,
+    AMO_LR   = 4'b0001,
+    AMO_SC   = 4'b0010,
+    AMO_SWAP = 4'b0011,
+    AMO_ADD  = 4'b0100,
+    AMO_AND  = 4'b0101,
+    AMO_OR   = 4'b0110,
+    AMO_XOR  = 4'b0111,
+    AMO_MAX  = 4'b1000,
+    AMO_MAXU = 4'b1001,
+    AMO_MIN  = 4'b1010,
+    AMO_MINU = 4'b1011,
+    AMO_CAS1 = 4'b1100,  // unused, not part of riscv spec, but provided in OpenPiton
+    AMO_CAS2 = 4'b1101   // unused, not part of riscv spec, but provided in OpenPiton
+  } amo_t;
+
+  typedef struct packed {
+    logic                  valid;    // valid flag
+    logic                  is_2M;    //
+    logic                  is_1G;    //
+    logic [27-1:0]         vpn;      // VPN (39bits) = 27bits + 12bits offset
+    logic [ASID_WIDTH-1:0] asid;
+    riscv::pte_t           content;
+  } tlb_update_t;
+
+  // Bits required for representation of physical address space as 4K pages
+  // (e.g. 27*4K == 39bit address space).
+  localparam PPN4K_WIDTH = 38;
+
+  typedef struct packed {
+    logic             valid;    // valid flag
+    logic             is_4M;    //
+    logic [20-1:0]    vpn;      //VPN (32bits) = 20bits + 12bits offset
+    logic [9-1:0]     asid;     //ASID length = 9 for Sv32 mmu
+    riscv::pte_sv32_t content;
+  } tlb_update_sv32_t;
+
+  typedef enum logic [1:0] {
+    FE_NONE,
+    FE_INSTR_ACCESS_FAULT,
+    FE_INSTR_PAGE_FAULT
+  } frontend_exception_t;
+
+  // ----------------------
+  // cache request ports
+  // ----------------------
+  // I$ address translation requests
+  typedef struct packed {
+    logic                   fetch_valid;      // address translation valid
+    logic [riscv::PLEN-1:0] fetch_paddr;      // physical address in
+    exception_t             fetch_exception;  // exception occurred during fetch
+  } icache_areq_t;
+
+  typedef struct packed {
+    logic                   fetch_req;    // address translation request
+    logic [riscv::VLEN-1:0] fetch_vaddr;  // virtual address out
+  } icache_arsp_t;
+
+  // I$ data requests
+  typedef struct packed {
+    logic                   req;      // we request a new word
+    logic                   kill_s1;  // kill the current request
+    logic                   kill_s2;  // kill the last request
+    logic                   spec;     // request is speculative
+    logic [riscv::VLEN-1:0] vaddr;    // 1st cycle: 12 bit index is taken for lookup
+  } icache_dreq_t;
+
+  typedef struct packed {
+    logic                        ready;  // icache is ready
+    logic                        valid;  // signals a valid read
+    logic [FETCH_WIDTH-1:0]      data;   // 2+ cycle out: tag
+    logic [FETCH_USER_WIDTH-1:0] user;   // User bits
+    logic [riscv::VLEN-1:0]      vaddr;  // virtual address out
+    exception_t                  ex;     // we've encountered an exception
+  } icache_drsp_t;
+
+  // AMO request going to cache. this request is unconditionally valid as soon
+  // as request goes high.
+  // Furthermore, those signals are kept stable until the response indicates
+  // completion by asserting ack.
+  typedef struct packed {
+    logic        req;        // this request is valid
+    amo_t        amo_op;     // atomic memory operation to perform
+    logic [1:0]  size;       // 2'b10 --> word operation, 2'b11 --> double word operation
+    logic [63:0] operand_a;  // address
+    logic [63:0] operand_b;  // data as layouted in the register
+  } amo_req_t;
+
+  // AMO response coming from cache.
+  typedef struct packed {
+    logic        ack;     // response is valid
+    logic [63:0] result;  // sign-extended, result
+  } amo_resp_t;
+
+  // D$ data requests
+  typedef struct packed {
+    logic [DCACHE_INDEX_WIDTH-1:0] address_index;
+    logic [DCACHE_TAG_WIDTH-1:0]   address_tag;
+    riscv::xlen_t                  data_wdata;
+    logic [DCACHE_USER_WIDTH-1:0]  data_wuser;
+    logic                          data_req;
+    logic                          data_we;
+    logic [(riscv::XLEN/8)-1:0]    data_be;
+    logic [1:0]                    data_size;
+    logic [DCACHE_TID_WIDTH-1:0]   data_id;
+    logic                          kill_req;
+    logic                          tag_valid;
+  } dcache_req_i_t;
+
+  typedef struct packed {
+    logic                         data_gnt;
+    logic                         data_rvalid;
+    logic [DCACHE_TID_WIDTH-1:0]  data_rid;
+    riscv::xlen_t                 data_rdata;
+    logic [DCACHE_USER_WIDTH-1:0] data_ruser;
+  } dcache_req_o_t;
+
+  // ----------------------
+  // Arithmetic Functions
+  // ----------------------
+  function automatic riscv::xlen_t sext32(logic [31:0] operand);
+    return {{riscv::XLEN - 32{operand[31]}}, operand[31:0]};
+  endfunction
+
+  // ----------------------
+  // Immediate functions
+  // ----------------------
+  function automatic logic [riscv::VLEN-1:0] uj_imm(logic [31:0] instruction_i);
+    return {
+      {44 + riscv::VLEN - 64{instruction_i[31]}},
+      instruction_i[19:12],
+      instruction_i[20],
+      instruction_i[30:21],
+      1'b0
+    };
+  endfunction
+
+  function automatic logic [riscv::VLEN-1:0] i_imm(logic [31:0] instruction_i);
+    return {{52 + riscv::VLEN - 64{instruction_i[31]}}, instruction_i[31:20]};
+  endfunction
+
+  function automatic logic [riscv::VLEN-1:0] sb_imm(logic [31:0] instruction_i);
+    return {
+      {51 + riscv::VLEN - 64{instruction_i[31]}},
+      instruction_i[31],
+      instruction_i[7],
+      instruction_i[30:25],
+      instruction_i[11:8],
+      1'b0
+    };
+  endfunction
+
+  // ----------------------
+  // LSU Functions
+  // ----------------------
+  // align data to address e.g.: shift data to be naturally 64
+  function automatic riscv::xlen_t data_align(logic [2:0] addr, logic [63:0] data);
+    // Set addr[2] to 1'b0 when 32bits
+    logic [ 2:0] addr_tmp = {(addr[2] && riscv::IS_XLEN64), addr[1:0]};
+    logic [63:0] data_tmp = {64{1'b0}};
+    case (addr_tmp)
+      3'b000: data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-1:0]};
+      3'b001:
+      data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-9:0], data[riscv::XLEN-1:riscv::XLEN-8]};
+      3'b010:
+      data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-17:0], data[riscv::XLEN-1:riscv::XLEN-16]};
+      3'b011:
+      data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-25:0], data[riscv::XLEN-1:riscv::XLEN-24]};
+      3'b100: data_tmp = {data[31:0], data[63:32]};
+      3'b101: data_tmp = {data[23:0], data[63:24]};
+      3'b110: data_tmp = {data[15:0], data[63:16]};
+      3'b111: data_tmp = {data[7:0], data[63:8]};
+    endcase
+    return data_tmp[riscv::XLEN-1:0];
+  endfunction
+
+  // generate byte enable mask
+  function automatic logic [7:0] be_gen(logic [2:0] addr, logic [1:0] size);
+    case (size)
+      2'b11: begin
+        return 8'b1111_1111;
+      end
+      2'b10: begin
+        case (addr[2:0])
+          3'b000:  return 8'b0000_1111;
+          3'b001:  return 8'b0001_1110;
+          3'b010:  return 8'b0011_1100;
+          3'b011:  return 8'b0111_1000;
+          3'b100:  return 8'b1111_0000;
+          default: ;  // Do nothing
+        endcase
+      end
+      2'b01: begin
+        case (addr[2:0])
+          3'b000:  return 8'b0000_0011;
+          3'b001:  return 8'b0000_0110;
+          3'b010:  return 8'b0000_1100;
+          3'b011:  return 8'b0001_1000;
+          3'b100:  return 8'b0011_0000;
+          3'b101:  return 8'b0110_0000;
+          3'b110:  return 8'b1100_0000;
+          default: ;  // Do nothing
+        endcase
+      end
+      2'b00: begin
+        case (addr[2:0])
+          3'b000: return 8'b0000_0001;
+          3'b001: return 8'b0000_0010;
+          3'b010: return 8'b0000_0100;
+          3'b011: return 8'b0000_1000;
+          3'b100: return 8'b0001_0000;
+          3'b101: return 8'b0010_0000;
+          3'b110: return 8'b0100_0000;
+          3'b111: return 8'b1000_0000;
+        endcase
+      end
+    endcase
+    return 8'b0;
+  endfunction
+
+  function automatic logic [3:0] be_gen_32(logic [1:0] addr, logic [1:0] size);
+    case (size)
+      2'b10: begin
+        return 4'b1111;
+      end
+      2'b01: begin
+        case (addr[1:0])
+          2'b00:   return 4'b0011;
+          2'b01:   return 4'b0110;
+          2'b10:   return 4'b1100;
+          default: ;  // Do nothing
+        endcase
+      end
+      2'b00: begin
+        case (addr[1:0])
+          2'b00: return 4'b0001;
+          2'b01: return 4'b0010;
+          2'b10: return 4'b0100;
+          2'b11: return 4'b1000;
+        endcase
+      end
+      default: return 4'b0;
+    endcase
+    return 4'b0;
+  endfunction
+
+  // ----------------------
+  // Extract Bytes from Op
+  // ----------------------
+  function automatic logic [1:0] extract_transfer_size(fu_op op);
+    case (op)
+      LD, SD, FLD, FSD,
+            AMO_LRD,   AMO_SCD,
+            AMO_SWAPD, AMO_ADDD,
+            AMO_ANDD,  AMO_ORD,
+            AMO_XORD,  AMO_MAXD,
+            AMO_MAXDU, AMO_MIND,
+            AMO_MINDU: begin
+        return 2'b11;
+      end
+      LW, LWU, SW, FLW, FSW,
+            AMO_LRW,   AMO_SCW,
+            AMO_SWAPW, AMO_ADDW,
+            AMO_ANDW,  AMO_ORW,
+            AMO_XORW,  AMO_MAXW,
+            AMO_MAXWU, AMO_MINW,
+            AMO_MINWU: begin
+        return 2'b10;
+      end
+      LH, LHU, SH, FLH, FSH: return 2'b01;
+      LB, LBU, SB, FLB, FSB: return 2'b00;
+      default:               return 2'b11;
+    endcase
+  endfunction
+endpackage
diff --git a/test/type_param/core/include/config_pkg.sv b/test/type_param/core/include/config_pkg.sv
new file mode 100644
index 0000000..90d6bfe
--- /dev/null
+++ b/test/type_param/core/include/config_pkg.sv
@@ -0,0 +1,181 @@
+// Copyright 2023 Thales DIS France SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Jean-Roch COULON - Thales
+
+package config_pkg;
+
+  // ---------------
+  // Global Config
+  // ---------------
+  localparam int unsigned ILEN = 32;
+  localparam int unsigned NRET = 1;
+
+  /// The NoC type is a top-level parameter, hence we need a bit more
+  /// information on what protocol those type parameters are supporting.
+  /// Currently two values are supported"
+  typedef enum {
+    /// The "classic" AXI4 protocol.
+    NOC_TYPE_AXI4_ATOP,
+    /// In the OpenPiton setting the WT cache is connected to the L15.
+    NOC_TYPE_L15_BIG_ENDIAN,
+    NOC_TYPE_L15_LITTLE_ENDIAN
+  } noc_type_e;
+
+  /// Cache type parameter
+  typedef enum logic [1:0] {
+    WB = 0,
+    WT = 1,
+    HPDCACHE = 2
+  } cache_type_t;
+
+  localparam NrMaxRules = 16;
+
+  typedef struct packed {
+    /// Number of commit ports, i.e., maximum number of instructions that the
+    /// core can retire per cycle. It can be beneficial to have more commit
+    /// ports than issue ports, for the scoreboard to empty out in case one
+    /// instruction stalls a little longer.
+    int unsigned                 NrCommitPorts;
+    /// AXI parameters.
+    int unsigned                 AxiAddrWidth;
+    int unsigned                 AxiDataWidth;
+    int unsigned                 AxiIdWidth;
+    int unsigned                 AxiUserWidth;
+    int unsigned                 NrLoadBufEntries;
+    bit                          FpuEn;
+    bit                          XF16;
+    bit                          XF16ALT;
+    bit                          XF8;
+    bit                          RVA;
+    bit                          RVB;
+    bit                          RVV;
+    bit                          RVC;
+    bit                          RVZCB;
+    bit                          XFVec;
+    bit                          CvxifEn;
+    bit                          ZiCondExtEn;
+    // Calculated
+    bit                          RVF;
+    bit                          RVD;
+    bit                          FpPresent;
+    bit                          NSX;
+    int unsigned                 FLen;
+    bit                          RVFVec;
+    bit                          XF16Vec;
+    bit                          XF16ALTVec;
+    bit                          XF8Vec;
+    int unsigned                 NrRgprPorts;
+    int unsigned                 NrWbPorts;
+    bit                          EnableAccelerator;
+    bit                          RVS;                    //Supervisor mode
+    bit                          RVU;                    //User mode
+    // Debug Module
+    // address to which a hart should jump when it was requested to halt
+    logic [63:0]                 HaltAddress;
+    logic [63:0]                 ExceptionAddress;
+    /// Return address stack depth, good values are around 2 to 4.
+    int unsigned                 RASDepth;
+    /// Branch target buffer entries.
+    int unsigned                 BTBEntries;
+    /// Branch history (2-bit saturation counter) size, to keep track of
+    /// branch otucomes.
+    int unsigned                 BHTEntries;
+    /// Offset of the debug module.
+    logic [63:0]                 DmBaseAddress;
+    /// Number of PMP entries.
+    int unsigned                 NrPMPEntries;
+    /// Physical Memory Protection (PMP) CSR reset values and read-only bits
+    logic [15:0][63:0]           PMPCfgRstVal;
+    logic [15:0][63:0]           PMPAddrRstVal;
+    bit [15:0]                   PMPEntryReadOnly;
+    /// Set to the bus type in use.
+    noc_type_e                   NOCType;
+    /// Physical Memory Attributes (PMAs)
+    /// Number of non idempotent rules.
+    int unsigned                 NrNonIdempotentRules;
+    /// Base which needs to match.
+    logic [NrMaxRules-1:0][63:0] NonIdempotentAddrBase;
+    /// Bit mask which bits to consider when matching the rule.
+    logic [NrMaxRules-1:0][63:0] NonIdempotentLength;
+    /// Number of regions which have execute property.
+    int unsigned                 NrExecuteRegionRules;
+    /// Base which needs to match.
+    logic [NrMaxRules-1:0][63:0] ExecuteRegionAddrBase;
+    /// Bit mask which bits to consider when matching the rule.
+    logic [NrMaxRules-1:0][63:0] ExecuteRegionLength;
+    /// Number of regions which have cached property.
+    int unsigned                 NrCachedRegionRules;
+    /// Base which needs to match.
+    logic [NrMaxRules-1:0][63:0] CachedRegionAddrBase;
+    /// Bit mask which bits to consider when matching the rule.
+    logic [NrMaxRules-1:0][63:0] CachedRegionLength;
+    /// Maximum number of outstanding stores.
+    int unsigned                 MaxOutstandingStores;
+    bit                          DebugEn;
+    bit                          NonIdemPotenceEn;
+    bit                          AxiBurstWriteEn;
+  } cva6_cfg_t;
+
+
+  /// Empty configuration to sanity check proper parameter passing. Whenever
+  /// you develop a module that resides within the core, assign this constant.
+  localparam cva6_cfg_t cva6_cfg_empty = '0;
+
+
+  /// Utility function being called to check parameters. Not all values make
+  /// sense for all parameters, here is the place to sanity check them.
+  function automatic void check_cfg(cva6_cfg_t Cfg);
+    // pragma translate_off
+`ifndef VERILATOR
+    assert (Cfg.RASDepth > 0);
+    assert (2 ** $clog2(Cfg.BTBEntries) == Cfg.BTBEntries);
+    assert (2 ** $clog2(Cfg.BHTEntries) == Cfg.BHTEntries);
+    assert (Cfg.NrNonIdempotentRules <= NrMaxRules);
+    assert (Cfg.NrExecuteRegionRules <= NrMaxRules);
+    assert (Cfg.NrCachedRegionRules <= NrMaxRules);
+    assert (Cfg.NrPMPEntries <= 16);
+`endif
+    // pragma translate_on
+  endfunction
+
+  function automatic logic range_check(logic [63:0] base, logic [63:0] len, logic [63:0] address);
+    // if len is a power of two, and base is properly aligned, this check could be simplified
+    // Extend base by one bit to prevent an overflow.
+    return (address >= base) && (({1'b0, address}) < (65'(base) + len));
+  endfunction : range_check
+
+
+  function automatic logic is_inside_nonidempotent_regions(cva6_cfg_t Cfg, logic [63:0] address);
+    logic [NrMaxRules-1:0] pass;
+    pass = '0;
+    for (int unsigned k = 0; k < Cfg.NrNonIdempotentRules; k++) begin
+      pass[k] = range_check(Cfg.NonIdempotentAddrBase[k], Cfg.NonIdempotentLength[k], address);
+    end
+    return |pass;
+  endfunction : is_inside_nonidempotent_regions
+
+  function automatic logic is_inside_execute_regions(cva6_cfg_t Cfg, logic [63:0] address);
+    // if we don't specify any region we assume everything is accessible
+    logic [NrMaxRules-1:0] pass;
+    pass = '0;
+    for (int unsigned k = 0; k < Cfg.NrExecuteRegionRules; k++) begin
+      pass[k] = range_check(Cfg.ExecuteRegionAddrBase[k], Cfg.ExecuteRegionLength[k], address);
+    end
+    return |pass;
+  endfunction : is_inside_execute_regions
+
+  function automatic logic is_inside_cacheable_regions(cva6_cfg_t Cfg, logic [63:0] address);
+    automatic logic [NrMaxRules-1:0] pass;
+    pass = '0;
+    for (int unsigned k = 0; k < Cfg.NrCachedRegionRules; k++) begin
+      pass[k] = range_check(Cfg.CachedRegionAddrBase[k], Cfg.CachedRegionLength[k], address);
+    end
+    return |pass;
+  endfunction : is_inside_cacheable_regions
+
+endpackage
diff --git a/test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv b/test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv
new file mode 100644
index 0000000..ec4db64
--- /dev/null
+++ b/test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv
@@ -0,0 +1,150 @@
+// Copyright 2021 Thales DIS design services SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Jean-Roch COULON - Thales
+
+
+package cva6_config_pkg;
+
+  localparam CVA6ConfigXlen = 64;
+
+  localparam CVA6ConfigFpuEn = 1;
+  localparam CVA6ConfigF16En = 0;
+  localparam CVA6ConfigF16AltEn = 0;
+  localparam CVA6ConfigF8En = 0;
+  localparam CVA6ConfigFVecEn = 0;
+
+  localparam CVA6ConfigCvxifEn = 1;
+  localparam CVA6ConfigCExtEn = 1;
+  localparam CVA6ConfigZcbExtEn = 1;
+  localparam CVA6ConfigAExtEn = 1;
+  localparam CVA6ConfigBExtEn = 1;
+  localparam CVA6ConfigVExtEn = 0;
+  localparam CVA6ConfigZiCondExtEn = 1;
+
+  localparam CVA6ConfigAxiIdWidth = 4;
+  localparam CVA6ConfigAxiAddrWidth = 64;
+  localparam CVA6ConfigAxiDataWidth = 64;
+  localparam CVA6ConfigFetchUserEn = 0;
+  localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
+  localparam CVA6ConfigDataUserEn = 0;
+  localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
+
+  localparam CVA6ConfigIcacheByteSize = 16384;
+  localparam CVA6ConfigIcacheSetAssoc = 4;
+  localparam CVA6ConfigIcacheLineWidth = 128;
+  localparam CVA6ConfigDcacheByteSize = 32768;
+  localparam CVA6ConfigDcacheSetAssoc = 8;
+  localparam CVA6ConfigDcacheLineWidth = 128;
+
+  localparam CVA6ConfigDcacheIdWidth = 1;
+  localparam CVA6ConfigMemTidWidth = 2;
+
+  localparam CVA6ConfigWtDcacheWbufDepth = 8;
+
+  localparam CVA6ConfigNrCommitPorts = 2;
+  localparam CVA6ConfigNrScoreboardEntries = 8;
+
+  localparam CVA6ConfigFPGAEn = 0;
+
+  localparam CVA6ConfigNrLoadPipeRegs = 1;
+  localparam CVA6ConfigNrStorePipeRegs = 0;
+  localparam CVA6ConfigNrLoadBufEntries = 2;
+
+  localparam CVA6ConfigInstrTlbEntries = 16;
+  localparam CVA6ConfigDataTlbEntries = 16;
+
+  localparam CVA6ConfigRASDepth = 2;
+  localparam CVA6ConfigBTBEntries = 32;
+  localparam CVA6ConfigBHTEntries = 128;
+
+  localparam CVA6ConfigNrPMPEntries = 8;
+
+  localparam CVA6ConfigPerfCounterEn = 1;
+
+  localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
+
+  localparam CVA6ConfigMmuPresent = 1;
+
+  localparam CVA6ConfigRvfiTrace = 1;
+
+  localparam config_pkg::cva6_cfg_t cva6_cfg = '{
+      NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
+      AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
+      AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
+      AxiIdWidth: unsigned'(CVA6ConfigAxiIdWidth),
+      AxiUserWidth: unsigned'(CVA6ConfigDataUserWidth),
+      NrLoadBufEntries: unsigned'(CVA6ConfigNrLoadBufEntries),
+      FpuEn: bit'(CVA6ConfigFpuEn),
+      XF16: bit'(CVA6ConfigF16En),
+      XF16ALT: bit'(CVA6ConfigF16AltEn),
+      XF8: bit'(CVA6ConfigF8En),
+      RVA: bit'(CVA6ConfigAExtEn),
+      RVB: bit'(CVA6ConfigBExtEn),
+      RVV: bit'(CVA6ConfigVExtEn),
+      RVC: bit'(CVA6ConfigCExtEn),
+      RVZCB: bit'(CVA6ConfigZcbExtEn),
+      XFVec: bit'(CVA6ConfigFVecEn),
+      CvxifEn: bit'(CVA6ConfigCvxifEn),
+      ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
+      // Extended
+      RVF:
+      bit'(
+      0
+      ),
+      RVD: bit'(0),
+      FpPresent: bit'(0),
+      NSX: bit'(0),
+      FLen: unsigned'(0),
+      RVFVec: bit'(0),
+      XF16Vec: bit'(0),
+      XF16ALTVec: bit'(0),
+      XF8Vec: bit'(0),
+      NrRgprPorts: unsigned'(0),
+      NrWbPorts: unsigned'(0),
+      EnableAccelerator: bit'(0),
+      RVS: bit'(1),
+      RVU: bit'(1),
+      HaltAddress: 64'h800,
+      ExceptionAddress: 64'h808,
+      RASDepth: unsigned'(CVA6ConfigRASDepth),
+      BTBEntries: unsigned'(CVA6ConfigBTBEntries),
+      BHTEntries: unsigned'(CVA6ConfigBHTEntries),
+      DmBaseAddress: 64'h0,
+      NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
+      PMPCfgRstVal: {16{64'h0}},
+      PMPAddrRstVal: {16{64'h0}},
+      PMPEntryReadOnly: 16'd0,
+      NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
+      // idempotent region
+      NrNonIdempotentRules:
+      unsigned'(
+      2
+      ),
+      NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
+      NonIdempotentLength: 1024'({64'b0, 64'b0}),
+      NrExecuteRegionRules: unsigned'(3),
+      //                      DRAM,          Boot ROM,   Debug Module
+      ExecuteRegionAddrBase:
+      1024'(
+      {64'h8000_0000, 64'h1_0000, 64'h0}
+      ),
+      ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
+      // cached region
+      NrCachedRegionRules:
+      unsigned'(
+      1
+      ),
+      CachedRegionAddrBase: 1024'({64'h8000_0000}),
+      CachedRegionLength: 1024'({64'h40000000}),
+      MaxOutstandingStores: unsigned'(7),
+      DebugEn: bit'(1),
+      NonIdemPotenceEn: bit'(0),
+      AxiBurstWriteEn: bit'(0)
+  };
+
+endpackage
diff --git a/test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv b/test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv
new file mode 100644
index 0000000..1abe537
--- /dev/null
+++ b/test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv
@@ -0,0 +1,123 @@
+// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies
+//                Alternatives (CEA)
+//
+// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”);
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Authors: Cesar Fuguet
+// Date: February, 2023
+// Description:
+//   Default package with parameters for the HPDcache in a CVA6 platform.
+//   Users can copy this file, rename it, and adapt the configuration values as
+//   needed.
+
+package hpdcache_params_pkg;
+  //  Imports from the CVA6 configuration package
+  //  {{{
+  import cva6_config_pkg::CVA6ConfigXlen;
+  import cva6_config_pkg::CVA6ConfigDcacheByteSize;
+  import cva6_config_pkg::CVA6ConfigDcacheSetAssoc;
+  import cva6_config_pkg::CVA6ConfigDcacheLineWidth;
+  import cva6_config_pkg::CVA6ConfigDcacheIdWidth;
+  import cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth;
+  import cva6_config_pkg::CVA6ConfigNrLoadBufEntries;
+  //  }}}
+
+  //  Definition of constants used only in this file
+  //  {{{
+  localparam int unsigned __BYTES_PER_WAY = CVA6ConfigDcacheByteSize / CVA6ConfigDcacheSetAssoc;
+
+  localparam int unsigned __BYTES_PER_CACHELINE = CVA6ConfigDcacheLineWidth / 8;
+  //  }}}
+
+  //  Definition of global constants for the HPDcache data and directory
+  //  {{{
+  //  HPDcache physical address width (in bits)
+  localparam int unsigned PARAM_PA_WIDTH = riscv::PLEN;
+
+  //  HPDcache number of sets
+  localparam int unsigned PARAM_SETS = __BYTES_PER_WAY / __BYTES_PER_CACHELINE;
+
+  //  HPDcache number of ways
+  localparam int unsigned PARAM_WAYS = CVA6ConfigDcacheSetAssoc;
+
+  //  HPDcache word width (bits)
+  localparam int unsigned PARAM_WORD_WIDTH = CVA6ConfigXlen;
+
+  //  HPDcache cache-line width (bits)
+  localparam int unsigned PARAM_CL_WORDS = CVA6ConfigDcacheLineWidth / PARAM_WORD_WIDTH;
+
+  //  HPDcache number of words in the request data channels (request and response)
+  localparam int unsigned PARAM_REQ_WORDS = 1;
+
+  //  HPDcache request transaction ID width (bits)
+  localparam int unsigned PARAM_REQ_TRANS_ID_WIDTH = CVA6ConfigDcacheIdWidth;
+
+  //  HPDcache request source ID width (bits)
+  localparam int unsigned PARAM_REQ_SRC_ID_WIDTH = 3;
+  //  }}}
+
+  //  Definition of constants and types for HPDcache data memory
+  //  {{{
+  localparam int unsigned PARAM_DATA_WAYS_PER_RAM_WORD = 128 / PARAM_WORD_WIDTH;
+  localparam int unsigned PARAM_DATA_SETS_PER_RAM = PARAM_SETS;
+
+  //  HPDcache DATA RAM macros whether implements:
+  //  -  Write byte enable (1'b1)
+  //  -  Write bit mask (1'b0)
+  localparam bit PARAM_DATA_RAM_WBYTEENABLE = 1'b1;
+
+  //  Define the number of memory contiguous words that can be accessed
+  //  simultaneously from the cache.
+  //  -  This limits the maximum width for the data channel from requesters
+  //  -  This impacts the refill latency (more ACCESS_WORDS -> less REFILL LATENCY)
+  localparam int unsigned PARAM_ACCESS_WORDS = PARAM_CL_WORDS / 2;
+  //  }}}
+
+  //  Definition of constants and types for the Miss Status Holding Register (MSHR)
+  //  {{{
+  //  HPDcache MSHR number of sets
+  localparam int unsigned PARAM_MSHR_SETS = 2;
+
+  //  HPDcache MSHR number of ways
+  localparam int unsigned PARAM_MSHR_WAYS = (CVA6ConfigNrLoadBufEntries > 4) ? 4 : 2;
+
+  //  HPDcache MSHR number of ways in the same SRAM word
+  localparam int unsigned PARAM_MSHR_WAYS_PER_RAM_WORD = (PARAM_MSHR_WAYS > 1) ? 2 : 1;
+
+  //  HPDcache MSHR number of sets in the same SRAM
+  localparam int unsigned PARAM_MSHR_SETS_PER_RAM = PARAM_MSHR_SETS;
+
+  //  HPDcache MSHR RAM whether implements:
+  //  -  Write byte enable (1'b1)
+  //  -  Write bit mask (1'b0)
+  localparam bit PARAM_MSHR_RAM_WBYTEENABLE = 1'b1;
+
+  //  HPDcache MSHR whether uses FFs or SRAM
+  localparam bit PARAM_MSHR_USE_REGBANK = (PARAM_MSHR_SETS * PARAM_MSHR_WAYS) <= 16;
+  localparam bit PARAM_REFILL_CORE_RSP_FEEDTHROUGH = 1'b1;
+  //  }}}
+
+  //  Definition of constants and types for the Write Buffer (WBUF)
+  //  {{{
+  //  HPDcache Write-Buffer number of entries in the directory
+  localparam int unsigned PARAM_WBUF_DIR_ENTRIES = CVA6ConfigWtDcacheWbufDepth;
+
+  //  HPDcache Write-Buffer number of entries in the data buffer
+  localparam int unsigned PARAM_WBUF_DATA_ENTRIES = CVA6ConfigWtDcacheWbufDepth;
+
+  //  HPDcache Write-Buffer number of words per entry
+  localparam int unsigned PARAM_WBUF_WORDS = PARAM_REQ_WORDS;
+
+  //  HPDcache Write-Buffer threshold counter width (in bits)
+  localparam int unsigned PARAM_WBUF_TIMECNT_WIDTH = 3;
+  localparam bit PARAM_WBUF_SEND_FEEDTHROUGH = 1'b0;
+  //  }}}
+
+  //  Definition of constants and types for the Replay Table (RTAB)
+  //  {{{
+  localparam int PARAM_RTAB_ENTRIES = 4;
+  //  }}}
+endpackage
diff --git a/test/type_param/core/include/cvxif_pkg.sv b/test/type_param/core/include/cvxif_pkg.sv
new file mode 100644
index 0000000..39e77b4
--- /dev/null
+++ b/test/type_param/core/include/cvxif_pkg.sv
@@ -0,0 +1,110 @@
+// Copyright 2021 Thales DIS design services SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Guillaume CHAUVON (guillaume.chauvon@thalesgroup.com)
+
+// Package for the CoreV-X-Interface for the CVA6
+
+package cvxif_pkg;
+
+  localparam X_DATAWIDTH = riscv::XLEN;
+  localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS;  //2 or 3
+  localparam X_ID_WIDTH = ariane_pkg::TRANS_ID_BITS;
+  localparam X_MEM_WIDTH = 64;
+  localparam X_RFR_WIDTH = riscv::XLEN;
+  localparam X_RFW_WIDTH = riscv::XLEN;
+
+  typedef struct packed {
+    logic [15:0]           instr;
+    logic [1:0]            mode;
+    logic [X_ID_WIDTH-1:0] id;
+  } x_compressed_req_t;
+
+  typedef struct packed {
+    logic [31:0] instr;
+    logic        accept;
+  } x_compressed_resp_t;
+
+  typedef struct packed {
+    logic [31:0]                          instr;
+    logic [1:0]                           mode;
+    logic [X_ID_WIDTH-1:0]                id;
+    logic [X_NUM_RS-1:0][X_RFR_WIDTH-1:0] rs;
+    logic [X_NUM_RS-1:0]                  rs_valid;
+  } x_issue_req_t;
+
+  typedef struct packed {
+    logic accept;
+    logic writeback;
+    logic dualwrite;
+    logic dualread;
+    logic loadstore;
+    logic exc;
+  } x_issue_resp_t;
+
+  typedef struct packed {
+    logic [X_ID_WIDTH-1:0] id;
+    logic                  x_commit_kill;
+  } x_commit_t;
+
+  typedef struct packed {
+    logic [X_ID_WIDTH-1:0]  id;
+    logic [31:0]            addr;
+    logic [1:0]             mode;
+    logic                   we;
+    logic [1:0]             size;
+    logic [X_MEM_WIDTH-1:0] wdata;
+    logic                   last;
+    logic                   spec;
+  } x_mem_req_t;
+
+  typedef struct packed {
+    logic       exc;
+    logic [5:0] exccode;
+  } x_mem_resp_t;
+
+  typedef struct packed {
+    logic [X_ID_WIDTH-1:0]  id;
+    logic [X_MEM_WIDTH-1:0] rdata;
+    logic                   err;
+  } x_mem_result_t;
+
+  typedef struct packed {
+    logic [X_ID_WIDTH-1:0]  id;
+    logic [X_RFW_WIDTH-1:0] data;
+    logic [4:0]             rd;
+    logic                   we;
+    logic                   exc;
+    logic [5:0]             exccode;
+  } x_result_t;
+
+  typedef struct packed {
+    logic              x_compressed_valid;
+    x_compressed_req_t x_compressed_req;
+    logic              x_issue_valid;
+    x_issue_req_t      x_issue_req;
+    logic              x_commit_valid;
+    x_commit_t         x_commit;
+    logic              x_mem_ready;
+    x_mem_resp_t       x_mem_resp;
+    logic              x_mem_result_valid;
+    x_mem_result_t     x_mem_result;
+    logic              x_result_ready;
+  } cvxif_req_t;
+
+  typedef struct packed {
+    logic               x_compressed_ready;
+    x_compressed_resp_t x_compressed_resp;
+    logic               x_issue_ready;
+    x_issue_resp_t      x_issue_resp;
+    logic               x_mem_valid;
+    x_mem_req_t         x_mem_req;
+    logic               x_result_valid;
+    x_result_t          x_result;
+  } cvxif_resp_t;
+
+endpackage
diff --git a/test/type_param/core/include/instr_tracer_pkg.sv b/test/type_param/core/include/instr_tracer_pkg.sv
new file mode 100644
index 0000000..bd36f09
--- /dev/null
+++ b/test/type_param/core/include/instr_tracer_pkg.sv
@@ -0,0 +1,202 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 16.05.2017
+// Description: Instruction Tracer Defines
+
+`ifndef VERILATOR
+package instr_tracer_pkg;
+
+  parameter INSTR_NOP = 32'h00_00_00_13;
+
+  parameter INSTR_LUI = {25'b?, riscv::OpcodeLui};
+  parameter INSTR_AUIPC = {25'b?, riscv::OpcodeAuipc};
+  parameter INSTR_JAL = {25'b?, riscv::OpcodeJal};
+  parameter INSTR_JALR = {17'b?, 3'b000, 5'b?, riscv::OpcodeJalr};
+  // BRANCH
+  parameter INSTR_BEQZ = {7'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch};
+  parameter INSTR_BEQ = {7'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch};
+  parameter INSTR_BNEZ = {7'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch};
+  parameter INSTR_BNE = {7'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch};
+  parameter INSTR_BLTZ = {7'b?, 5'b0, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch};
+  parameter INSTR_BLT = {7'b?, 5'b?, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch};
+  parameter INSTR_BGEZ = {7'b?, 5'b0, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch};
+  parameter INSTR_BGE = {7'b?, 5'b?, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch};
+  parameter INSTR_BLTU = {7'b?, 5'b?, 5'b?, 3'b110, 5'b?, riscv::OpcodeBranch};
+  parameter INSTR_BGEU = {7'b?, 5'b?, 5'b?, 3'b111, 5'b?, riscv::OpcodeBranch};
+
+  // OP-IMM
+  parameter INSTR_LI = {12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpImm};
+  parameter INSTR_ADDI = {17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm};
+  parameter INSTR_SLTI = {17'b?, 3'b010, 5'b?, riscv::OpcodeOpImm};
+  parameter INSTR_SLTIU = {17'b?, 3'b011, 5'b?, riscv::OpcodeOpImm};
+  parameter INSTR_XORI = {17'b?, 3'b100, 5'b?, riscv::OpcodeOpImm};
+  parameter INSTR_ORI = {17'b?, 3'b110, 5'b?, riscv::OpcodeOpImm};
+  parameter INSTR_ANDI = {17'b?, 3'b111, 5'b?, riscv::OpcodeOpImm};
+  parameter INSTR_SLLI = {6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpImm};
+  parameter INSTR_SRLI = {6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm};
+  parameter INSTR_SRAI = {6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm};
+
+  // OP-IMM-32
+  parameter INSTR_ADDIW = {17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm32};
+  parameter INSTR_SLLIW = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpImm32};
+  parameter INSTR_SRLIW = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32};
+  parameter INSTR_SRAIW = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32};
+
+  // OP
+  parameter INSTR_ADD = {7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_SUB = {7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_SLL = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_SLT = {7'b0000000, 10'b?, 3'b010, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_SLTU = {7'b0000000, 10'b?, 3'b011, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_XOR = {7'b0000000, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_SRL = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_SRA = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_OR = {7'b0000000, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_AND = {7'b0000000, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_MUL = {7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp};
+
+  // OP32
+  parameter INSTR_ADDW = {7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32};
+  parameter INSTR_SUBW = {7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32};
+  parameter INSTR_SLLW = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp32};
+  parameter INSTR_SRLW = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32};
+  parameter INSTR_SRAW = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32};
+  parameter INSTR_MULW = {7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp32};
+
+  // MISC-MEM
+  parameter INSTR_FENCE = {4'b0, 8'b?, 13'b0, riscv::OpcodeMiscMem};
+  parameter INSTR_FENCEI = {17'b0, 3'b001, 5'b0, riscv::OpcodeMiscMem};
+
+  // SYSTEM
+  parameter INSTR_CSRW = {12'b?, 5'b?, 3'b001, 5'b0, riscv::OpcodeSystem};
+  parameter INSTR_CSRRW = {12'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeSystem};
+  parameter INSTR_CSRR = {12'b?, 5'b0, 3'b010, 5'b?, riscv::OpcodeSystem};
+  parameter INSTR_CSRRS = {12'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeSystem};
+  parameter INSTR_CSRS = {12'b?, 5'b?, 3'b010, 5'b0, riscv::OpcodeSystem};
+  parameter INSTR_CSRRC = {12'b?, 5'b?, 3'b011, 5'b?, riscv::OpcodeSystem};
+  parameter INSTR_CSRC = {12'b?, 5'b?, 3'b011, 5'b0, riscv::OpcodeSystem};
+
+  parameter INSTR_CSRWI = {17'b?, 3'b101, 5'b0, riscv::OpcodeSystem};
+  parameter INSTR_CSRRWI = {17'b?, 3'b101, 5'b?, riscv::OpcodeSystem};
+  parameter INSTR_CSRSI = {17'b?, 3'b110, 5'b0, riscv::OpcodeSystem};
+  parameter INSTR_CSRRSI = {17'b?, 3'b110, 5'b?, riscv::OpcodeSystem};
+  parameter INSTR_CSRCI = {17'b?, 3'b111, 5'b0, riscv::OpcodeSystem};
+  parameter INSTR_CSRRCI = {17'b?, 3'b111, 5'b?, riscv::OpcodeSystem};
+
+  parameter INSTR_ECALL = {12'b000000000000, 13'b0, riscv::OpcodeSystem};
+  parameter INSTR_EBREAK = {12'b000000000001, 13'b0, riscv::OpcodeSystem};
+  parameter INSTR_MRET = {12'b001100000010, 13'b0, riscv::OpcodeSystem};
+  parameter INSTR_SRET = {12'b000100000010, 13'b0, riscv::OpcodeSystem};
+  parameter INSTR_DRET = {12'b011110110010, 13'b0, riscv::OpcodeSystem};
+  parameter INSTR_WFI = {12'b000100000101, 13'b0, riscv::OpcodeSystem};
+  parameter INSTR_SFENCE = {12'b0001001?????, 13'b?, riscv::OpcodeSystem};
+
+  // RV32M
+  parameter INSTR_PMUL = {7'b0000001, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_DIV = {7'b0000001, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_DIVU = {7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_REM = {7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp};
+  parameter INSTR_REMU = {7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp};
+
+  // RVFD
+  parameter INSTR_FMADD = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMadd};
+  parameter INSTR_FMSUB = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMsub};
+  parameter INSTR_FNSMSUB = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmsub};
+  parameter INSTR_FNMADD = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmadd};
+
+  parameter INSTR_FADD = {5'b00000, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FSUB = {5'b00001, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FMUL = {5'b00010, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FDIV = {5'b00011, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FSQRT = {5'b01011, 2'b?, 5'b0, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FSGNJ = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FSGNJN = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FSGNJX = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FMIN = {5'b00101, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FMAX = {5'b00101, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FLE = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FLT = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FEQ = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp};
+
+  parameter INSTR_FCVT_F2F = {5'b01000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FMV_F2X = {5'b11100, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FCLASS = {5'b11100, 2'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FMV_X2F = {5'b11110, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FCVT_F2I = {5'b11000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
+  parameter INSTR_FCVT_I2F = {5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
+
+  // A
+  parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo};
+
+  // Load/Stores
+  parameter [31:0] LB = 32'b?????????????????000?????0000011;
+  parameter [31:0] LH = 32'b?????????????????001?????0000011;
+  parameter [31:0] LW = 32'b?????????????????010?????0000011;
+  parameter [31:0] LD = 32'b?????????????????011?????0000011;
+  parameter [31:0] LBU = 32'b?????????????????100?????0000011;
+  parameter [31:0] LHU = 32'b?????????????????101?????0000011;
+  parameter [31:0] LWU = 32'b?????????????????110?????0000011;
+  parameter [31:0] FLW = 32'b?????????????????010?????0000111;
+  parameter [31:0] FLD = 32'b?????????????????011?????0000111;
+  parameter [31:0] FLQ = 32'b?????????????????100?????0000111;
+  parameter [31:0] SB = 32'b?????????????????000?????0100011;
+  parameter [31:0] SH = 32'b?????????????????001?????0100011;
+  parameter [31:0] SW = 32'b?????????????????010?????0100011;
+  parameter [31:0] SD = 32'b?????????????????011?????0100011;
+  parameter [31:0] FSW = 32'b?????????????????010?????0100111;
+  parameter [31:0] FSD = 32'b?????????????????011?????0100111;
+  parameter [31:0] FSQ = 32'b?????????????????100?????0100111;
+  parameter [31:0] C_ADDI4SPN = 32'b????????????????000???????????00;
+  parameter [31:0] C_FLD = 32'b????????????????001???????????00;
+  parameter [31:0] C_LW = 32'b????????????????010???????????00;
+  parameter [31:0] C_FLW = 32'b????????????????011???????????00;
+  parameter [31:0] C_FSD = 32'b????????????????101???????????00;
+  parameter [31:0] C_SW = 32'b????????????????110???????????00;
+  parameter [31:0] C_FSW = 32'b????????????????111???????????00;
+  parameter [31:0] C_ADDI = 32'b????????????????000???????????01;
+  parameter [31:0] C_JAL = 32'b????????????????001???????????01;
+  parameter [31:0] C_LI = 32'b????????????????010???????????01;
+  parameter [31:0] C_LUI = 32'b????????????????011???????????01;
+  parameter [31:0] C_SRLI = 32'b????????????????100?00????????01;
+  parameter [31:0] C_SRAI = 32'b????????????????100?01????????01;
+  parameter [31:0] C_ANDI = 32'b????????????????100?10????????01;
+  parameter [31:0] C_SUB = 32'b????????????????100011???00???01;
+  parameter [31:0] C_XOR = 32'b????????????????100011???01???01;
+  parameter [31:0] C_OR = 32'b????????????????100011???10???01;
+  parameter [31:0] C_AND = 32'b????????????????100011???11???01;
+  parameter [31:0] C_SUBW = 32'b????????????????100111???00???01;
+  parameter [31:0] C_ADDW = 32'b????????????????100111???01???01;
+  parameter [31:0] C_J = 32'b????????????????101???????????01;
+  parameter [31:0] C_BEQZ = 32'b????????????????110???????????01;
+  parameter [31:0] C_BNEZ = 32'b????????????????111???????????01;
+  parameter [31:0] C_SLLI = 32'b????????????????000???????????10;
+  parameter [31:0] C_FLDSP = 32'b????????????????001???????????10;
+  parameter [31:0] C_LWSP = 32'b????????????????010???????????10;
+  parameter [31:0] C_FLWSP = 32'b????????????????011???????????10;
+  parameter [31:0] C_MV = 32'b????????????????1000??????????10;
+  parameter [31:0] C_ADD = 32'b????????????????1001??????????10;
+  parameter [31:0] C_FSDSP = 32'b????????????????101???????????10;
+  parameter [31:0] C_SWSP = 32'b????????????????110???????????10;
+  parameter [31:0] C_FSWSP = 32'b????????????????111???????????10;
+  parameter [31:0] C_NOP = 32'b????????????????0000000000000001;
+  parameter [31:0] C_ADDI16SP = 32'b????????????????011?00010?????01;
+  parameter [31:0] C_JR = 32'b????????????????1000?????0000010;
+  parameter [31:0] C_JALR = 32'b????????????????1001?????0000010;
+  parameter [31:0] C_EBREAK = 32'b????????????????1001000000000010;
+  parameter [31:0] C_LD = 32'b????????????????011???????????00;
+  parameter [31:0] C_SD = 32'b????????????????111???????????00;
+  parameter [31:0] C_ADDIW = 32'b????????????????001???????????01;
+  parameter [31:0] C_LDSP = 32'b????????????????011???????????10;
+  parameter [31:0] C_SDSP = 32'b????????????????111???????????10;
+
+endpackage
+`endif
diff --git a/test/type_param/core/include/riscv_pkg.sv b/test/type_param/core/include/riscv_pkg.sv
new file mode 100644
index 0000000..18ae2cf
--- /dev/null
+++ b/test/type_param/core/include/riscv_pkg.sv
@@ -0,0 +1,851 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:   riscv_pkg.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   30.6.2017
+ *
+ * Description: Common RISC-V definitions.
+ */
+
+package riscv;
+
+  // ----------------------
+  // Import cva6 config from cva6_config_pkg
+  // ----------------------
+  localparam XLEN = cva6_config_pkg::CVA6ConfigXlen;
+  localparam FPU_EN = cva6_config_pkg::CVA6ConfigFpuEn;
+
+  // ----------------------
+  // Data and Address length
+  // ----------------------
+  typedef enum logic [3:0] {
+    ModeOff  = 0,
+    ModeSv32 = 1,
+    ModeSv39 = 8,
+    ModeSv48 = 9,
+    ModeSv57 = 10,
+    ModeSv64 = 11
+  } vm_mode_t;
+
+  // Warning: When using STD_CACHE, configuration must be PLEN=56 and VLEN=64
+  // Warning: VLEN must be superior or equal to PLEN
+  localparam VLEN = (XLEN == 32) ? 32 : 64;  // virtual address length
+  localparam PLEN = (XLEN == 32) ? 34 : 56;  // physical address length
+
+  localparam IS_XLEN32 = (XLEN == 32) ? 1'b1 : 1'b0;
+  localparam IS_XLEN64 = (XLEN == 32) ? 1'b0 : 1'b1;
+  localparam ModeW = (XLEN == 32) ? 1 : 4;
+  localparam ASIDW = (XLEN == 32) ? 9 : 16;
+  localparam PPNW = (XLEN == 32) ? 22 : 44;
+  localparam vm_mode_t MODE_SV = (XLEN == 32) ? ModeSv32 : ModeSv39;
+  localparam SV = (MODE_SV == ModeSv32) ? 32 : 39;
+  localparam VPN2 = (VLEN - 31 < 8) ? VLEN - 31 : 8;
+  localparam XLEN_ALIGN_BYTES = $clog2(XLEN / 8);
+
+  typedef logic [XLEN-1:0] xlen_t;
+
+  // --------------------
+  // Privilege Spec
+  // --------------------
+  typedef enum logic [1:0] {
+    PRIV_LVL_M = 2'b11,
+    PRIV_LVL_S = 2'b01,
+    PRIV_LVL_U = 2'b00
+  } priv_lvl_t;
+
+  // type which holds xlen
+  typedef enum logic [1:0] {
+    XLEN_32  = 2'b01,
+    XLEN_64  = 2'b10,
+    XLEN_128 = 2'b11
+  } xlen_e;
+
+  typedef enum logic [1:0] {
+    Off     = 2'b00,
+    Initial = 2'b01,
+    Clean   = 2'b10,
+    Dirty   = 2'b11
+  } xs_t;
+
+  typedef struct packed {
+    logic sd;  // signal dirty state - read-only
+    logic [62:34] wpri6;  // writes preserved reads ignored
+    xlen_e uxl;  // variable user mode xlen - hardwired to zero
+    logic [12:0] wpri5;  // writes preserved reads ignored
+    logic mxr;  // make executable readable
+    logic sum;  // permit supervisor user memory access
+    logic wpri4;  // writes preserved reads ignored
+    xs_t xs;  // extension register - hardwired to zero
+    xs_t fs;  // floating point extension register
+    logic [1:0] wpri3;  // writes preserved reads ignored
+    xs_t vs;  // vector extension register
+    logic spp;  // holds the previous privilege mode up to supervisor
+    logic wpri2;  // writes preserved reads ignored
+    logic         ube;    // UBE controls whether explicit load and store memory accesses made from U-mode are little-endian (UBE=0) or big-endian (UBE=1)
+    logic spie;  // supervisor interrupts enable bit active prior to trap
+    logic [1:0] wpri1;  // writes preserved reads ignored
+    logic sie;  // supervisor interrupts enable
+    logic wpri0;  // writes preserved reads ignored
+  } sstatus_rv_t;
+
+  typedef struct packed {
+    logic sd;  // signal dirty state - read-only
+    logic [62:36] wpri4;  // writes preserved reads ignored
+    xlen_e sxl;  // variable supervisor mode xlen - hardwired to zero
+    xlen_e uxl;  // variable user mode xlen - hardwired to zero
+    logic [8:0] wpri3;  // writes preserved reads ignored
+    logic tsr;  // trap sret
+    logic tw;  // time wait
+    logic tvm;  // trap virtual memory
+    logic mxr;  // make executable readable
+    logic sum;  // permit supervisor user memory access
+    logic mprv;  // modify privilege - privilege level for ld/st
+    xs_t xs;  // extension register - hardwired to zero
+    xs_t fs;  // floating point extension register
+    priv_lvl_t mpp;  // holds the previous privilege mode up to machine
+    xs_t vs;  // vector extension register
+    logic spp;  // holds the previous privilege mode up to supervisor
+    logic mpie;  // machine interrupts enable bit active prior to trap
+    logic         ube;    // UBE controls whether explicit load and store memory accesses made from U-mode are little-endian (UBE=0) or big-endian (UBE=1)
+    logic spie;  // supervisor interrupts enable bit active prior to trap
+    logic wpri2;  // writes preserved reads ignored
+    logic mie;  // machine interrupts enable
+    logic wpri1;  // writes preserved reads ignored
+    logic sie;  // supervisor interrupts enable
+    logic wpri0;  // writes preserved reads ignored
+  } mstatus_rv_t;
+
+  typedef struct packed {
+    logic [ModeW-1:0] mode;
+    logic [ASIDW-1:0] asid;
+    logic [PPNW-1:0]  ppn;
+  } satp_t;
+
+  // --------------------
+  // Instruction Types
+  // --------------------
+  typedef struct packed {
+    logic [31:25] funct7;
+    logic [24:20] rs2;
+    logic [19:15] rs1;
+    logic [14:12] funct3;
+    logic [11:7]  rd;
+    logic [6:0]   opcode;
+  } rtype_t;
+
+  typedef struct packed {
+    logic [31:27] rs3;
+    logic [26:25] funct2;
+    logic [24:20] rs2;
+    logic [19:15] rs1;
+    logic [14:12] funct3;
+    logic [11:7]  rd;
+    logic [6:0]   opcode;
+  } r4type_t;
+
+  typedef struct packed {
+    logic [31:27] funct5;
+    logic [26:25] fmt;
+    logic [24:20] rs2;
+    logic [19:15] rs1;
+    logic [14:12] rm;
+    logic [11:7]  rd;
+    logic [6:0]   opcode;
+  } rftype_t;  // floating-point
+
+  typedef struct packed {
+    logic [31:30] funct2;
+    logic [29:25] vecfltop;
+    logic [24:20] rs2;
+    logic [19:15] rs1;
+    logic [14:14] repl;
+    logic [13:12] vfmt;
+    logic [11:7]  rd;
+    logic [6:0]   opcode;
+  } rvftype_t;  // vectorial floating-point
+
+  typedef struct packed {
+    logic [31:20] imm;
+    logic [19:15] rs1;
+    logic [14:12] funct3;
+    logic [11:7]  rd;
+    logic [6:0]   opcode;
+  } itype_t;
+
+  typedef struct packed {
+    logic [31:25] imm;
+    logic [24:20] rs2;
+    logic [19:15] rs1;
+    logic [14:12] funct3;
+    logic [11:7]  imm0;
+    logic [6:0]   opcode;
+  } stype_t;
+
+  typedef struct packed {
+    logic [31:12] imm;
+    logic [11:7]  rd;
+    logic [6:0]   opcode;
+  } utype_t;
+
+  // atomic instructions
+  typedef struct packed {
+    logic [31:27] funct5;
+    logic         aq;
+    logic         rl;
+    logic [24:20] rs2;
+    logic [19:15] rs1;
+    logic [14:12] funct3;
+    logic [11:7]  rd;
+    logic [6:0]   opcode;
+  } atype_t;
+
+  typedef union packed {
+    logic [31:0] instr;
+    rtype_t      rtype;
+    r4type_t     r4type;
+    rftype_t     rftype;
+    rvftype_t    rvftype;
+    itype_t      itype;
+    stype_t      stype;
+    utype_t      utype;
+    atype_t      atype;
+  } instruction_t;
+
+  // --------------------
+  // Opcodes
+  // --------------------
+  // RV32/64G listings:
+  // Quadrant 0
+  localparam OpcodeLoad = 7'b00_000_11;
+  localparam OpcodeLoadFp = 7'b00_001_11;
+  localparam OpcodeCustom0 = 7'b00_010_11;
+  localparam OpcodeMiscMem = 7'b00_011_11;
+  localparam OpcodeOpImm = 7'b00_100_11;
+  localparam OpcodeAuipc = 7'b00_101_11;
+  localparam OpcodeOpImm32 = 7'b00_110_11;
+  // Quadrant 1
+  localparam OpcodeStore = 7'b01_000_11;
+  localparam OpcodeStoreFp = 7'b01_001_11;
+  localparam OpcodeCustom1 = 7'b01_010_11;
+  localparam OpcodeAmo = 7'b01_011_11;
+  localparam OpcodeOp = 7'b01_100_11;
+  localparam OpcodeLui = 7'b01_101_11;
+  localparam OpcodeOp32 = 7'b01_110_11;
+  // Quadrant 2
+  localparam OpcodeMadd = 7'b10_000_11;
+  localparam OpcodeMsub = 7'b10_001_11;
+  localparam OpcodeNmsub = 7'b10_010_11;
+  localparam OpcodeNmadd = 7'b10_011_11;
+  localparam OpcodeOpFp = 7'b10_100_11;
+  localparam OpcodeVec = 7'b10_101_11;
+  localparam OpcodeCustom2 = 7'b10_110_11;
+  // Quadrant 3
+  localparam OpcodeBranch = 7'b11_000_11;
+  localparam OpcodeJalr = 7'b11_001_11;
+  localparam OpcodeRsrvd2 = 7'b11_010_11;
+  localparam OpcodeJal = 7'b11_011_11;
+  localparam OpcodeSystem = 7'b11_100_11;
+  localparam OpcodeRsrvd3 = 7'b11_101_11;
+  localparam OpcodeCustom3 = 7'b11_110_11;
+
+  // RV64C/RV32C listings:
+  // Quadrant 0
+  localparam OpcodeC0 = 2'b00;
+  localparam OpcodeC0Addi4spn = 3'b000;
+  localparam OpcodeC0Fld = 3'b001;
+  localparam OpcodeC0Lw = 3'b010;
+  localparam OpcodeC0Ld = 3'b011;
+  localparam OpcodeC0Zcb = 3'b100;
+  localparam OpcodeC0Fsd = 3'b101;
+  localparam OpcodeC0Sw = 3'b110;
+  localparam OpcodeC0Sd = 3'b111;
+  // Quadrant 1
+  localparam OpcodeC1 = 2'b01;
+  localparam OpcodeC1Addi = 3'b000;
+  localparam OpcodeC1Addiw = 3'b001;  //for RV64I only
+  localparam OpcodeC1Jal = 3'b001;  //for RV32I only
+  localparam OpcodeC1Li = 3'b010;
+  localparam OpcodeC1LuiAddi16sp = 3'b011;
+  localparam OpcodeC1MiscAlu = 3'b100;
+  localparam OpcodeC1J = 3'b101;
+  localparam OpcodeC1Beqz = 3'b110;
+  localparam OpcodeC1Bnez = 3'b111;
+  // Quadrant 2
+  localparam OpcodeC2 = 2'b10;
+  localparam OpcodeC2Slli = 3'b000;
+  localparam OpcodeC2Fldsp = 3'b001;
+  localparam OpcodeC2Lwsp = 3'b010;
+  localparam OpcodeC2Ldsp = 3'b011;
+  localparam OpcodeC2JalrMvAdd = 3'b100;
+  localparam OpcodeC2Fsdsp = 3'b101;
+  localparam OpcodeC2Swsp = 3'b110;
+  localparam OpcodeC2Sdsp = 3'b111;
+
+  // ----------------------
+  // Virtual Memory
+  // ----------------------
+  // memory management, pte for sv39
+  typedef struct packed {
+    logic [9:0] reserved;
+    logic [44-1:0] ppn;  // PPN length for
+    logic [1:0] rsw;
+    logic d;
+    logic a;
+    logic g;
+    logic u;
+    logic x;
+    logic w;
+    logic r;
+    logic v;
+  } pte_t;
+
+  // memory management, pte for sv32
+  typedef struct packed {
+    logic [22-1:0] ppn;  // PPN length for
+    logic [1:0] rsw;
+    logic d;
+    logic a;
+    logic g;
+    logic u;
+    logic x;
+    logic w;
+    logic r;
+    logic v;
+  } pte_sv32_t;
+
+  // ----------------------
+  // Exception Cause Codes
+  // ----------------------
+  localparam logic [XLEN-1:0] INSTR_ADDR_MISALIGNED = 0;
+  localparam logic [XLEN-1:0] INSTR_ACCESS_FAULT    = 1;  // Illegal access as governed by PMPs and PMAs
+  localparam logic [XLEN-1:0] ILLEGAL_INSTR = 2;
+  localparam logic [XLEN-1:0] BREAKPOINT = 3;
+  localparam logic [XLEN-1:0] LD_ADDR_MISALIGNED = 4;
+  localparam logic [XLEN-1:0] LD_ACCESS_FAULT = 5;  // Illegal access as governed by PMPs and PMAs
+  localparam logic [XLEN-1:0] ST_ADDR_MISALIGNED = 6;
+  localparam logic [XLEN-1:0] ST_ACCESS_FAULT = 7;  // Illegal access as governed by PMPs and PMAs
+  localparam logic [XLEN-1:0] ENV_CALL_UMODE = 8;  // environment call from user mode
+  localparam logic [XLEN-1:0] ENV_CALL_SMODE = 9;  // environment call from supervisor mode
+  localparam logic [XLEN-1:0] ENV_CALL_MMODE = 11;  // environment call from machine mode
+  localparam logic [XLEN-1:0] INSTR_PAGE_FAULT = 12;  // Instruction page fault
+  localparam logic [XLEN-1:0] LOAD_PAGE_FAULT = 13;  // Load page fault
+  localparam logic [XLEN-1:0] STORE_PAGE_FAULT = 15;  // Store page fault
+  localparam logic [XLEN-1:0] DEBUG_REQUEST = 24;  // Debug request
+
+  localparam int unsigned IRQ_S_SOFT = 1;
+  localparam int unsigned IRQ_M_SOFT = 3;
+  localparam int unsigned IRQ_S_TIMER = 5;
+  localparam int unsigned IRQ_M_TIMER = 7;
+  localparam int unsigned IRQ_S_EXT = 9;
+  localparam int unsigned IRQ_M_EXT = 11;
+
+  localparam logic [XLEN-1:0] MIP_SSIP = 1 << IRQ_S_SOFT;
+  localparam logic [XLEN-1:0] MIP_MSIP = 1 << IRQ_M_SOFT;
+  localparam logic [XLEN-1:0] MIP_STIP = 1 << IRQ_S_TIMER;
+  localparam logic [XLEN-1:0] MIP_MTIP = 1 << IRQ_M_TIMER;
+  localparam logic [XLEN-1:0] MIP_SEIP = 1 << IRQ_S_EXT;
+  localparam logic [XLEN-1:0] MIP_MEIP = 1 << IRQ_M_EXT;
+
+  localparam logic [XLEN-1:0] S_SW_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_SOFT);
+  localparam logic [XLEN-1:0] M_SW_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_SOFT);
+  localparam logic [XLEN-1:0] S_TIMER_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_TIMER);
+  localparam logic [XLEN-1:0] M_TIMER_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_TIMER);
+  localparam logic [XLEN-1:0] S_EXT_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_EXT);
+  localparam logic [XLEN-1:0] M_EXT_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_EXT);
+
+  // -----
+  // CSRs
+  // -----
+  typedef enum logic [11:0] {
+    // Floating-Point CSRs
+    CSR_FFLAGS           = 12'h001,
+    CSR_FRM              = 12'h002,
+    CSR_FCSR             = 12'h003,
+    CSR_FTRAN            = 12'h800,
+    // Vector CSRs
+    CSR_VSTART           = 12'h008,
+    CSR_VXSAT            = 12'h009,
+    CSR_VXRM             = 12'h00A,
+    CSR_VCSR             = 12'h00F,
+    CSR_VL               = 12'hC20,
+    CSR_VTYPE            = 12'hC21,
+    CSR_VLENB            = 12'hC22,
+    // Supervisor Mode CSRs
+    CSR_SSTATUS          = 12'h100,
+    CSR_SIE              = 12'h104,
+    CSR_STVEC            = 12'h105,
+    CSR_SCOUNTEREN       = 12'h106,
+    CSR_SSCRATCH         = 12'h140,
+    CSR_SEPC             = 12'h141,
+    CSR_SCAUSE           = 12'h142,
+    CSR_STVAL            = 12'h143,
+    CSR_SIP              = 12'h144,
+    CSR_SATP             = 12'h180,
+    // Machine Mode CSRs
+    CSR_MSTATUS          = 12'h300,
+    CSR_MISA             = 12'h301,
+    CSR_MEDELEG          = 12'h302,
+    CSR_MIDELEG          = 12'h303,
+    CSR_MIE              = 12'h304,
+    CSR_MTVEC            = 12'h305,
+    CSR_MCOUNTEREN       = 12'h306,
+    CSR_MSTATUSH         = 12'h310,
+    CSR_MCOUNTINHIBIT    = 12'h320,
+    CSR_MHPM_EVENT_3     = 12'h323,  //Machine performance monitoring Event Selector
+    CSR_MHPM_EVENT_4     = 12'h324,  //Machine performance monitoring Event Selector
+    CSR_MHPM_EVENT_5     = 12'h325,  //Machine performance monitoring Event Selector
+    CSR_MHPM_EVENT_6     = 12'h326,  //Machine performance monitoring Event Selector
+    CSR_MHPM_EVENT_7     = 12'h327,  //Machine performance monitoring Event Selector
+    CSR_MHPM_EVENT_8     = 12'h328,  //Machine performance monitoring Event Selector
+    CSR_MHPM_EVENT_9     = 12'h329,  //Reserved
+    CSR_MHPM_EVENT_10    = 12'h32A,  //Reserved
+    CSR_MHPM_EVENT_11    = 12'h32B,  //Reserved
+    CSR_MHPM_EVENT_12    = 12'h32C,  //Reserved
+    CSR_MHPM_EVENT_13    = 12'h32D,  //Reserved
+    CSR_MHPM_EVENT_14    = 12'h32E,  //Reserved
+    CSR_MHPM_EVENT_15    = 12'h32F,  //Reserved
+    CSR_MHPM_EVENT_16    = 12'h330,  //Reserved
+    CSR_MHPM_EVENT_17    = 12'h331,  //Reserved
+    CSR_MHPM_EVENT_18    = 12'h332,  //Reserved
+    CSR_MHPM_EVENT_19    = 12'h333,  //Reserved
+    CSR_MHPM_EVENT_20    = 12'h334,  //Reserved
+    CSR_MHPM_EVENT_21    = 12'h335,  //Reserved
+    CSR_MHPM_EVENT_22    = 12'h336,  //Reserved
+    CSR_MHPM_EVENT_23    = 12'h337,  //Reserved
+    CSR_MHPM_EVENT_24    = 12'h338,  //Reserved
+    CSR_MHPM_EVENT_25    = 12'h339,  //Reserved
+    CSR_MHPM_EVENT_26    = 12'h33A,  //Reserved
+    CSR_MHPM_EVENT_27    = 12'h33B,  //Reserved
+    CSR_MHPM_EVENT_28    = 12'h33C,  //Reserved
+    CSR_MHPM_EVENT_29    = 12'h33D,  //Reserved
+    CSR_MHPM_EVENT_30    = 12'h33E,  //Reserved
+    CSR_MHPM_EVENT_31    = 12'h33F,  //Reserved
+    CSR_MSCRATCH         = 12'h340,
+    CSR_MEPC             = 12'h341,
+    CSR_MCAUSE           = 12'h342,
+    CSR_MTVAL            = 12'h343,
+    CSR_MIP              = 12'h344,
+    CSR_MENVCFG          = 12'h30A,
+    CSR_MENVCFGH         = 12'h31A,
+    CSR_PMPCFG0          = 12'h3A0,
+    CSR_PMPCFG1          = 12'h3A1,
+    CSR_PMPCFG2          = 12'h3A2,
+    CSR_PMPCFG3          = 12'h3A3,
+    CSR_PMPADDR0         = 12'h3B0,
+    CSR_PMPADDR1         = 12'h3B1,
+    CSR_PMPADDR2         = 12'h3B2,
+    CSR_PMPADDR3         = 12'h3B3,
+    CSR_PMPADDR4         = 12'h3B4,
+    CSR_PMPADDR5         = 12'h3B5,
+    CSR_PMPADDR6         = 12'h3B6,
+    CSR_PMPADDR7         = 12'h3B7,
+    CSR_PMPADDR8         = 12'h3B8,
+    CSR_PMPADDR9         = 12'h3B9,
+    CSR_PMPADDR10        = 12'h3BA,
+    CSR_PMPADDR11        = 12'h3BB,
+    CSR_PMPADDR12        = 12'h3BC,
+    CSR_PMPADDR13        = 12'h3BD,
+    CSR_PMPADDR14        = 12'h3BE,
+    CSR_PMPADDR15        = 12'h3BF,
+    CSR_MVENDORID        = 12'hF11,
+    CSR_MARCHID          = 12'hF12,
+    CSR_MIMPID           = 12'hF13,
+    CSR_MHARTID          = 12'hF14,
+    CSR_MCONFIGPTR       = 12'hF15,
+    CSR_MCYCLE           = 12'hB00,
+    CSR_MCYCLEH          = 12'hB80,
+    CSR_MINSTRET         = 12'hB02,
+    CSR_MINSTRETH        = 12'hB82,
+    //Performance Counters
+    CSR_MHPM_COUNTER_3   = 12'hB03,
+    CSR_MHPM_COUNTER_4   = 12'hB04,
+    CSR_MHPM_COUNTER_5   = 12'hB05,
+    CSR_MHPM_COUNTER_6   = 12'hB06,
+    CSR_MHPM_COUNTER_7   = 12'hB07,
+    CSR_MHPM_COUNTER_8   = 12'hB08,
+    CSR_MHPM_COUNTER_9   = 12'hB09,  // reserved
+    CSR_MHPM_COUNTER_10  = 12'hB0A,  // reserved
+    CSR_MHPM_COUNTER_11  = 12'hB0B,  // reserved
+    CSR_MHPM_COUNTER_12  = 12'hB0C,  // reserved
+    CSR_MHPM_COUNTER_13  = 12'hB0D,  // reserved
+    CSR_MHPM_COUNTER_14  = 12'hB0E,  // reserved
+    CSR_MHPM_COUNTER_15  = 12'hB0F,  // reserved
+    CSR_MHPM_COUNTER_16  = 12'hB10,  // reserved
+    CSR_MHPM_COUNTER_17  = 12'hB11,  // reserved
+    CSR_MHPM_COUNTER_18  = 12'hB12,  // reserved
+    CSR_MHPM_COUNTER_19  = 12'hB13,  // reserved
+    CSR_MHPM_COUNTER_20  = 12'hB14,  // reserved
+    CSR_MHPM_COUNTER_21  = 12'hB15,  // reserved
+    CSR_MHPM_COUNTER_22  = 12'hB16,  // reserved
+    CSR_MHPM_COUNTER_23  = 12'hB17,  // reserved
+    CSR_MHPM_COUNTER_24  = 12'hB18,  // reserved
+    CSR_MHPM_COUNTER_25  = 12'hB19,  // reserved
+    CSR_MHPM_COUNTER_26  = 12'hB1A,  // reserved
+    CSR_MHPM_COUNTER_27  = 12'hB1B,  // reserved
+    CSR_MHPM_COUNTER_28  = 12'hB1C,  // reserved
+    CSR_MHPM_COUNTER_29  = 12'hB1D,  // reserved
+    CSR_MHPM_COUNTER_30  = 12'hB1E,  // reserved
+    CSR_MHPM_COUNTER_31  = 12'hB1F,  // reserved
+    CSR_MHPM_COUNTER_3H  = 12'hB83,
+    CSR_MHPM_COUNTER_4H  = 12'hB84,
+    CSR_MHPM_COUNTER_5H  = 12'hB85,
+    CSR_MHPM_COUNTER_6H  = 12'hB86,
+    CSR_MHPM_COUNTER_7H  = 12'hB87,
+    CSR_MHPM_COUNTER_8H  = 12'hB88,
+    CSR_MHPM_COUNTER_9H  = 12'hB89,  // reserved
+    CSR_MHPM_COUNTER_10H = 12'hB8A,  // reserved
+    CSR_MHPM_COUNTER_11H = 12'hB8B,  // reserved
+    CSR_MHPM_COUNTER_12H = 12'hB8C,  // reserved
+    CSR_MHPM_COUNTER_13H = 12'hB8D,  // reserved
+    CSR_MHPM_COUNTER_14H = 12'hB8E,  // reserved
+    CSR_MHPM_COUNTER_15H = 12'hB8F,  // reserved
+    CSR_MHPM_COUNTER_16H = 12'hB90,  // reserved
+    CSR_MHPM_COUNTER_17H = 12'hB91,  // reserved
+    CSR_MHPM_COUNTER_18H = 12'hB92,  // reserved
+    CSR_MHPM_COUNTER_19H = 12'hB93,  // reserved
+    CSR_MHPM_COUNTER_20H = 12'hB94,  // reserved
+    CSR_MHPM_COUNTER_21H = 12'hB95,  // reserved
+    CSR_MHPM_COUNTER_22H = 12'hB96,  // reserved
+    CSR_MHPM_COUNTER_23H = 12'hB97,  // reserved
+    CSR_MHPM_COUNTER_24H = 12'hB98,  // reserved
+    CSR_MHPM_COUNTER_25H = 12'hB99,  // reserved
+    CSR_MHPM_COUNTER_26H = 12'hB9A,  // reserved
+    CSR_MHPM_COUNTER_27H = 12'hB9B,  // reserved
+    CSR_MHPM_COUNTER_28H = 12'hB9C,  // reserved
+    CSR_MHPM_COUNTER_29H = 12'hB9D,  // reserved
+    CSR_MHPM_COUNTER_30H = 12'hB9E,  // reserved
+    CSR_MHPM_COUNTER_31H = 12'hB9F,  // reserved
+    // Cache Control (platform specifc)
+    CSR_DCACHE           = 12'h7C1,
+    CSR_ICACHE           = 12'h7C0,
+    // Accelerator memory consistency (platform specific)
+    CSR_ACC_CONS         = 12'h7C2,
+    // Triggers
+    CSR_TSELECT          = 12'h7A0,
+    CSR_TDATA1           = 12'h7A1,
+    CSR_TDATA2           = 12'h7A2,
+    CSR_TDATA3           = 12'h7A3,
+    CSR_TINFO            = 12'h7A4,
+    // Debug CSR
+    CSR_DCSR             = 12'h7b0,
+    CSR_DPC              = 12'h7b1,
+    CSR_DSCRATCH0        = 12'h7b2,  // optional
+    CSR_DSCRATCH1        = 12'h7b3,  // optional
+    // Counters and Timers (User Mode - R/O Shadows)
+    CSR_CYCLE            = 12'hC00,
+    CSR_CYCLEH           = 12'hC80,
+    CSR_TIME             = 12'hC01,
+    CSR_TIMEH            = 12'hC81,
+    CSR_INSTRET          = 12'hC02,
+    CSR_INSTRETH         = 12'hC82,
+    // Performance counters (User Mode - R/O Shadows)
+    CSR_HPM_COUNTER_3    = 12'hC03,
+    CSR_HPM_COUNTER_4    = 12'hC04,
+    CSR_HPM_COUNTER_5    = 12'hC05,
+    CSR_HPM_COUNTER_6    = 12'hC06,
+    CSR_HPM_COUNTER_7    = 12'hC07,
+    CSR_HPM_COUNTER_8    = 12'hC08,
+    CSR_HPM_COUNTER_9    = 12'hC09,  // reserved
+    CSR_HPM_COUNTER_10   = 12'hC0A,  // reserved
+    CSR_HPM_COUNTER_11   = 12'hC0B,  // reserved
+    CSR_HPM_COUNTER_12   = 12'hC0C,  // reserved
+    CSR_HPM_COUNTER_13   = 12'hC0D,  // reserved
+    CSR_HPM_COUNTER_14   = 12'hC0E,  // reserved
+    CSR_HPM_COUNTER_15   = 12'hC0F,  // reserved
+    CSR_HPM_COUNTER_16   = 12'hC10,  // reserved
+    CSR_HPM_COUNTER_17   = 12'hC11,  // reserved
+    CSR_HPM_COUNTER_18   = 12'hC12,  // reserved
+    CSR_HPM_COUNTER_19   = 12'hC13,  // reserved
+    CSR_HPM_COUNTER_20   = 12'hC14,  // reserved
+    CSR_HPM_COUNTER_21   = 12'hC15,  // reserved
+    CSR_HPM_COUNTER_22   = 12'hC16,  // reserved
+    CSR_HPM_COUNTER_23   = 12'hC17,  // reserved
+    CSR_HPM_COUNTER_24   = 12'hC18,  // reserved
+    CSR_HPM_COUNTER_25   = 12'hC19,  // reserved
+    CSR_HPM_COUNTER_26   = 12'hC1A,  // reserved
+    CSR_HPM_COUNTER_27   = 12'hC1B,  // reserved
+    CSR_HPM_COUNTER_28   = 12'hC1C,  // reserved
+    CSR_HPM_COUNTER_29   = 12'hC1D,  // reserved
+    CSR_HPM_COUNTER_30   = 12'hC1E,  // reserved
+    CSR_HPM_COUNTER_31   = 12'hC1F,  // reserved
+    CSR_HPM_COUNTER_3H   = 12'hC83,
+    CSR_HPM_COUNTER_4H   = 12'hC84,
+    CSR_HPM_COUNTER_5H   = 12'hC85,
+    CSR_HPM_COUNTER_6H   = 12'hC86,
+    CSR_HPM_COUNTER_7H   = 12'hC87,
+    CSR_HPM_COUNTER_8H   = 12'hC88,
+    CSR_HPM_COUNTER_9H   = 12'hC89,  // reserved
+    CSR_HPM_COUNTER_10H  = 12'hC8A,  // reserved
+    CSR_HPM_COUNTER_11H  = 12'hC8B,  // reserved
+    CSR_HPM_COUNTER_12H  = 12'hC8C,  // reserved
+    CSR_HPM_COUNTER_13H  = 12'hC8D,  // reserved
+    CSR_HPM_COUNTER_14H  = 12'hC8E,  // reserved
+    CSR_HPM_COUNTER_15H  = 12'hC8F,  // reserved
+    CSR_HPM_COUNTER_16H  = 12'hC90,  // reserved
+    CSR_HPM_COUNTER_17H  = 12'hC91,  // reserved
+    CSR_HPM_COUNTER_18H  = 12'hC92,  // reserved
+    CSR_HPM_COUNTER_19H  = 12'hC93,  // reserved
+    CSR_HPM_COUNTER_20H  = 12'hC94,  // reserved
+    CSR_HPM_COUNTER_21H  = 12'hC95,  // reserved
+    CSR_HPM_COUNTER_22H  = 12'hC96,  // reserved
+    CSR_HPM_COUNTER_23H  = 12'hC97,  // reserved
+    CSR_HPM_COUNTER_24H  = 12'hC98,  // reserved
+    CSR_HPM_COUNTER_25H  = 12'hC99,  // reserved
+    CSR_HPM_COUNTER_26H  = 12'hC9A,  // reserved
+    CSR_HPM_COUNTER_27H  = 12'hC9B,  // reserved
+    CSR_HPM_COUNTER_28H  = 12'hC9C,  // reserved
+    CSR_HPM_COUNTER_29H  = 12'hC9D,  // reserved
+    CSR_HPM_COUNTER_30H  = 12'hC9E,  // reserved
+    CSR_HPM_COUNTER_31H  = 12'hC9F   // reserved
+  } csr_reg_t;
+
+  localparam logic [63:0] SSTATUS_UIE = 'h00000001;
+  localparam logic [63:0] SSTATUS_SIE = 'h00000002;
+  localparam logic [63:0] SSTATUS_SPIE = 'h00000020;
+  localparam logic [63:0] SSTATUS_SPP = 'h00000100;
+  localparam logic [63:0] SSTATUS_FS = 'h00006000;
+  localparam logic [63:0] SSTATUS_XS = 'h00018000;
+  localparam logic [63:0] SSTATUS_SUM = 'h00040000;
+  localparam logic [63:0] SSTATUS_MXR = 'h00080000;
+  localparam logic [63:0] SSTATUS_UPIE = 'h00000010;
+  localparam logic [63:0] SSTATUS_UXL = 64'h0000000300000000;
+  localparam logic [63:0] SSTATUS_SD = {IS_XLEN64, 31'h00000000, ~IS_XLEN64, 31'h00000000};
+
+  localparam logic [63:0] MSTATUS_UIE = 'h00000001;
+  localparam logic [63:0] MSTATUS_SIE = 'h00000002;
+  localparam logic [63:0] MSTATUS_HIE = 'h00000004;
+  localparam logic [63:0] MSTATUS_MIE = 'h00000008;
+  localparam logic [63:0] MSTATUS_UPIE = 'h00000010;
+  localparam logic [63:0] MSTATUS_SPIE = 'h00000020;
+  localparam logic [63:0] MSTATUS_HPIE = 'h00000040;
+  localparam logic [63:0] MSTATUS_MPIE = 'h00000080;
+  localparam logic [63:0] MSTATUS_SPP = 'h00000100;
+  localparam logic [63:0] MSTATUS_HPP = 'h00000600;
+  localparam logic [63:0] MSTATUS_MPP = 'h00001800;
+  localparam logic [63:0] MSTATUS_FS = 'h00006000;
+  localparam logic [63:0] MSTATUS_XS = 'h00018000;
+  localparam logic [63:0] MSTATUS_MPRV = 'h00020000;
+  localparam logic [63:0] MSTATUS_SUM = 'h00040000;
+  localparam logic [63:0] MSTATUS_MXR = 'h00080000;
+  localparam logic [63:0] MSTATUS_TVM = 'h00100000;
+  localparam logic [63:0] MSTATUS_TW = 'h00200000;
+  localparam logic [63:0] MSTATUS_TSR = 'h00400000;
+  localparam logic [63:0] MSTATUS_UXL = {30'h0000000, IS_XLEN64, IS_XLEN64, 32'h00000000};
+  localparam logic [63:0] MSTATUS_SXL = {28'h0000000, IS_XLEN64, IS_XLEN64, 34'h00000000};
+  localparam logic [63:0] MSTATUS_SD = {IS_XLEN64, 31'h00000000, ~IS_XLEN64, 31'h00000000};
+
+  typedef enum logic [2:0] {
+    CSRRW  = 3'h1,
+    CSRRS  = 3'h2,
+    CSRRC  = 3'h3,
+    CSRRWI = 3'h5,
+    CSRRSI = 3'h6,
+    CSRRCI = 3'h7
+  } csr_op_t;
+
+  // decoded CSR address
+  typedef struct packed {
+    logic [1:0] rw;
+    priv_lvl_t  priv_lvl;
+    logic [7:0] address;
+  } csr_addr_t;
+
+  typedef union packed {
+    csr_reg_t  address;
+    csr_addr_t csr_decode;
+  } csr_t;
+
+  // Floating-Point control and status register (32-bit!)
+  typedef struct packed {
+    logic [31:15] reserved;  // reserved for L extension, return 0 otherwise
+    logic [6:0]   fprec;     // div/sqrt precision control
+    logic [2:0]   frm;       // float rounding mode
+    logic [4:0]   fflags;    // float exception flags
+  } fcsr_t;
+
+  // PMP
+  typedef enum logic [1:0] {
+    OFF   = 2'b00,
+    TOR   = 2'b01,
+    NA4   = 2'b10,
+    NAPOT = 2'b11
+  } pmp_addr_mode_t;
+
+  // PMP Access Type
+  typedef enum logic [2:0] {
+    ACCESS_NONE  = 3'b000,
+    ACCESS_READ  = 3'b001,
+    ACCESS_WRITE = 3'b010,
+    ACCESS_EXEC  = 3'b100
+  } pmp_access_t;
+
+  typedef struct packed {
+    logic x;
+    logic w;
+    logic r;
+  } pmpcfg_access_t;
+
+  // packed struct of a PMP configuration register (8bit)
+  typedef struct packed {
+    logic           locked;       // lock this configuration
+    logic [1:0]     reserved;
+    pmp_addr_mode_t addr_mode;    // Off, TOR, NA4, NAPOT
+    pmpcfg_access_t access_type;
+  } pmpcfg_t;
+
+  // -----
+  // Debug
+  // -----
+  typedef struct packed {
+    logic [31:28] xdebugver;
+    logic [27:16] zero2;
+    logic         ebreakm;
+    logic         zero1;
+    logic         ebreaks;
+    logic         ebreaku;
+    logic         stepie;
+    logic         stopcount;
+    logic         stoptime;
+    logic [8:6]   cause;
+    logic         zero0;
+    logic         mprven;
+    logic         nmip;
+    logic         step;
+    priv_lvl_t    prv;
+  } dcsr_t;
+
+  // Instruction Generation *incomplete*
+  function automatic logic [31:0] jal(logic [4:0] rd, logic [20:0] imm);
+    // OpCode Jal
+    return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h6f};
+  endfunction
+
+  function automatic logic [31:0] jalr(logic [4:0] rd, logic [4:0] rs1, logic [11:0] offset);
+    // OpCode Jal
+    return {offset[11:0], rs1, 3'b0, rd, 7'h67};
+  endfunction
+
+  function automatic logic [31:0] andi(logic [4:0] rd, logic [4:0] rs1, logic [11:0] imm);
+    // OpCode andi
+    return {imm[11:0], rs1, 3'h7, rd, 7'h13};
+  endfunction
+
+  function automatic logic [31:0] slli(logic [4:0] rd, logic [4:0] rs1, logic [5:0] shamt);
+    // OpCode slli
+    return {6'b0, shamt[5:0], rs1, 3'h1, rd, 7'h13};
+  endfunction
+
+  function automatic logic [31:0] srli(logic [4:0] rd, logic [4:0] rs1, logic [5:0] shamt);
+    // OpCode srli
+    return {6'b0, shamt[5:0], rs1, 3'h5, rd, 7'h13};
+  endfunction
+
+  function automatic logic [31:0] load(logic [2:0] size, logic [4:0] dest, logic [4:0] base,
+                                       logic [11:0] offset);
+    // OpCode Load
+    return {offset[11:0], base, size, dest, 7'h03};
+  endfunction
+
+  function automatic logic [31:0] auipc(logic [4:0] rd, logic [20:0] imm);
+    // OpCode Auipc
+    return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h17};
+  endfunction
+
+  function automatic logic [31:0] store(logic [2:0] size, logic [4:0] src, logic [4:0] base,
+                                        logic [11:0] offset);
+    // OpCode Store
+    return {offset[11:5], src, base, size, offset[4:0], 7'h23};
+  endfunction
+
+  function automatic logic [31:0] float_load(logic [2:0] size, logic [4:0] dest, logic [4:0] base,
+                                             logic [11:0] offset);
+    // OpCode Load
+    return {offset[11:0], base, size, dest, 7'b00_001_11};
+  endfunction
+
+  function automatic logic [31:0] float_store(logic [2:0] size, logic [4:0] src, logic [4:0] base,
+                                              logic [11:0] offset);
+    // OpCode Store
+    return {offset[11:5], src, base, size, offset[4:0], 7'b01_001_11};
+  endfunction
+
+  function automatic logic [31:0] csrw(csr_reg_t csr, logic [4:0] rs1);
+    // CSRRW, rd, OpCode System
+    return {csr, rs1, 3'h1, 5'h0, 7'h73};
+  endfunction
+
+  function automatic logic [31:0] csrr(csr_reg_t csr, logic [4:0] dest);
+    // rs1, CSRRS, rd, OpCode System
+    return {csr, 5'h0, 3'h2, dest, 7'h73};
+  endfunction
+
+  function automatic logic [31:0] branch(logic [4:0] src2, logic [4:0] src1, logic [2:0] funct3,
+                                         logic [11:0] offset);
+    // OpCode Branch
+    return {offset[11], offset[9:4], src2, src1, funct3, offset[3:0], offset[10], 7'b11_000_11};
+  endfunction
+
+  function automatic logic [31:0] ebreak();
+    return 32'h00100073;
+  endfunction
+
+  function automatic logic [31:0] wfi();
+    return 32'h10500073;
+  endfunction
+
+  function automatic logic [31:0] nop();
+    return 32'h00000013;
+  endfunction
+
+  function automatic logic [31:0] illegal();
+    return 32'h00000000;
+  endfunction
+
+
+  // trace log compatible to spikes commit log feature
+  // pragma translate_off
+  function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr,
+                                 logic [4:0] rd, logic [63:0] result, logic rd_fpr);
+    string rd_s;
+    string instr_word;
+
+    automatic string rf_s = rd_fpr ? "f" : "x";
+
+    if (instr[1:0] != 2'b11) begin
+      instr_word = $sformatf("(0x%h)", instr[15:0]);
+    end else begin
+      instr_word = $sformatf("(0x%h)", instr);
+    end
+
+    if (rd < 10) rd_s = $sformatf("%s %0d", rf_s, rd);
+    else rd_s = $sformatf("%s%0d", rf_s, rd);
+
+    if (rd_fpr || rd != 0) begin
+      // 0 0x0000000080000118 (0xeecf8f93) x31 0x0000000080004000
+      return $sformatf("%d 0x%h %s %s 0x%h\n", priv_lvl, pc, instr_word, rd_s, result);
+    end else begin
+      // 0 0x000000008000019c (0x0040006f)
+      return $sformatf("%d 0x%h %s\n", priv_lvl, pc, instr_word);
+    end
+  endfunction
+
+  typedef struct {
+    byte priv;
+    longint unsigned pc;
+    byte is_fp;
+    byte rd;
+    longint unsigned data;
+    int unsigned instr;
+    byte was_exception;
+  } commit_log_t;
+  // pragma translate_on
+
+endpackage
diff --git a/test/type_param/core/include/std_cache_pkg.sv b/test/type_param/core/include/std_cache_pkg.sv
new file mode 100644
index 0000000..ae812c9
--- /dev/null
+++ b/test/type_param/core/include/std_cache_pkg.sv
@@ -0,0 +1,98 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba    <zarubaf@iis.ee.ethz.ch>, ETH Zurich
+//         Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 15.08.2018
+
+// ******* WIP *******
+// Description: package for the standard Ariane cache subsystem.
+
+package std_cache_pkg;
+
+  // Calculated parameter
+  localparam DCACHE_BYTE_OFFSET = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8);
+  localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_BYTE_OFFSET);
+  localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC * 2;
+  localparam DCACHE_SET_ASSOC_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC);
+  // localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not
+
+  typedef struct packed {
+    logic [1:0]      id;     // id for which we handle the miss
+    logic            valid;
+    logic            we;
+    logic [55:0]     addr;
+    logic [7:0][7:0] wdata;
+    logic [7:0]      be;
+  } mshr_t;
+
+  typedef struct packed {
+    logic        valid;
+    logic [63:0] addr;
+    logic [7:0]  be;
+    logic [1:0]  size;
+    logic        we;
+    logic [63:0] wdata;
+    logic        bypass;
+  } miss_req_t;
+
+  typedef struct packed {
+    logic                req;
+    ariane_pkg::ad_req_t reqtype;
+    ariane_pkg::amo_t    amo;
+    logic [3:0]          id;
+    logic [63:0]         addr;
+    logic [63:0]         wdata;
+    logic                we;
+    logic [7:0]          be;
+    logic [1:0]          size;
+  } bypass_req_t;
+
+  typedef struct packed {
+    logic        gnt;
+    logic        valid;
+    logic [63:0] rdata;
+  } bypass_rsp_t;
+
+  typedef struct packed {
+    logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0]  tag;    // tag array
+    logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data;   // data array
+    logic                                     valid;  // state array
+    logic                                     dirty;  // state array
+  } cache_line_t;
+
+  // cache line byte enable
+  typedef struct packed {
+    logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag;  // byte enable into tag array
+    logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data;  // byte enable into data array
+    logic [ariane_pkg::DCACHE_SET_ASSOC-1:0]        vldrty; // bit enable into state array (valid for a pair of dirty/valid bits)
+  } cl_be_t;
+
+  // convert one hot to bin for -> needed for cache replacement
+  function automatic logic [DCACHE_SET_ASSOC_WIDTH-1:0] one_hot_to_bin(
+      input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] in);
+    for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin
+      if (in[i]) return i;
+    end
+  endfunction
+  // get the first bit set, returns one hot value
+  function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] get_victim_cl(
+      input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] valid_dirty);
+    // one-hot return vector
+    logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] oh = '0;
+    for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin
+      if (valid_dirty[i]) begin
+        oh[i] = 1'b1;
+        return oh;
+      end
+    end
+  endfunction
+endpackage : std_cache_pkg
+
diff --git a/test/type_param/core/include/wt_cache_pkg.sv b/test/type_param/core/include/wt_cache_pkg.sv
new file mode 100644
index 0000000..9a8c0ce
--- /dev/null
+++ b/test/type_param/core/include/wt_cache_pkg.sv
@@ -0,0 +1,344 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 15.08.2018
+// Description: Package for OpenPiton compatible L1 cache subsystem
+
+// this is needed to propagate the
+// configuration in case Ariane is
+// instantiated in OpenPiton
+`ifdef PITON_ARIANE
+`include "l15.tmp.h"
+`include "define.tmp.h"
+`endif
+
+package wt_cache_pkg;
+
+  // these parames need to coincide with the
+  // L1.5 parameterization, do not change
+`ifdef PITON_ARIANE
+
+`ifndef CONFIG_L15_ASSOCIATIVITY
+  `define CONFIG_L15_ASSOCIATIVITY 4
+`endif
+
+`ifndef TLB_CSM_WIDTH
+  `define TLB_CSM_WIDTH 33
+`endif
+
+  localparam L15_SET_ASSOC = `CONFIG_L15_ASSOCIATIVITY;
+  localparam L15_TLB_CSM_WIDTH = `TLB_CSM_WIDTH;
+`else
+  localparam L15_SET_ASSOC           = ariane_pkg::DCACHE_SET_ASSOC;// align with dcache for compatibility with the standard Ariane setup
+  localparam L15_TLB_CSM_WIDTH = 33;
+`endif
+  localparam L15_TID_WIDTH = ariane_pkg::MEM_TID_WIDTH;
+  localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC);
+  localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC);
+  localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC);
+
+  // FIFO depths of L15 adapter
+  localparam ADAPTER_REQ_FIFO_DEPTH = 2;
+  localparam ADAPTER_RTRN_FIFO_DEPTH = 2;
+
+
+  // Calculated parameter
+  localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8);
+  localparam ICACHE_NUM_WORDS = 2 ** (ariane_pkg::ICACHE_INDEX_WIDTH - ICACHE_OFFSET_WIDTH);
+  localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS);  // excluding byte offset
+
+  localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8);
+  localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_OFFSET_WIDTH);
+  localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS);  // excluding byte offset
+
+  localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH / riscv::XLEN;
+  localparam DCACHE_NUM_BANKS_WIDTH = $clog2(DCACHE_NUM_BANKS);
+
+  // write buffer parameterization
+  localparam DCACHE_WBUF_DEPTH = ariane_pkg::WT_DCACHE_WBUF_DEPTH;
+  localparam DCACHE_MAX_TX = 2 ** L15_TID_WIDTH;
+  localparam CACHE_ID_WIDTH = L15_TID_WIDTH;
+
+
+  typedef struct packed {
+    logic [ariane_pkg::DCACHE_TAG_WIDTH+(ariane_pkg::DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES)-1:0] wtag;
+    riscv::xlen_t data;
+    logic [ariane_pkg::DCACHE_USER_WIDTH-1:0] user;
+    logic [(riscv::XLEN/8)-1:0] dirty;  // byte is dirty
+    logic [(riscv::XLEN/8)-1:0] valid;  // byte is valid
+    logic [(riscv::XLEN/8)-1:0] txblock;  // byte is part of transaction in-flight
+    logic checked;  // if cache state of this word has been checked
+    logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh;  // valid way in the cache
+  } wbuffer_t;
+
+  // TX status registers are indexed with the transaction ID
+  // they basically store which bytes from which buffer entry are part
+  // of that transaction
+
+  typedef struct packed {
+    logic                                 vld;
+    logic [(riscv::XLEN/8)-1:0]           be;
+    logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] ptr;
+  } tx_stat_t;
+
+  // local interfaces between caches and L15 adapter
+  typedef enum logic [1:0] {
+    DCACHE_STORE_REQ,
+    DCACHE_LOAD_REQ,
+    DCACHE_ATOMIC_REQ,
+    DCACHE_INT_REQ
+  } dcache_out_t;
+
+  typedef enum logic [2:0] {
+    DCACHE_INV_REQ,  // no ack from the core required
+    DCACHE_STORE_ACK,  // note: this may contain an invalidation vector, too
+    DCACHE_LOAD_ACK,
+    DCACHE_ATOMIC_ACK,
+    DCACHE_INT_ACK
+  } dcache_in_t;
+
+  typedef enum logic [0:0] {
+    ICACHE_INV_REQ,   // no ack from the core required
+    ICACHE_IFILL_ACK
+  } icache_in_t;
+
+  // icache interface
+  typedef struct packed {
+    logic                                      vld;  // invalidate only affected way
+    logic                                      all;  // invalidate all ways
+    logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] idx;  // physical address to invalidate
+    logic [L1I_WAY_WIDTH-1:0]                  way;  // way to invalidate
+  } icache_inval_t;
+
+  typedef struct packed {
+    logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way;  // way to replace
+    logic [riscv::PLEN-1:0] paddr;  // physical address
+    logic nc;  // noncacheable
+    logic [CACHE_ID_WIDTH-1:0] tid;  // threadi id (used as transaction id in Ariane)
+  } icache_req_t;
+
+  typedef struct packed {
+    icache_in_t rtype;  // see definitions above
+    logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data;  // full cache line width
+    logic [ariane_pkg::ICACHE_USER_LINE_WIDTH-1:0] user;  // user bits
+    icache_inval_t inv;  // invalidation vector
+    logic [CACHE_ID_WIDTH-1:0] tid;  // threadi id (used as transaction id in Ariane)
+  } icache_rtrn_t;
+
+  // dcache interface
+  typedef struct packed {
+    logic                                      vld;  // invalidate only affected way
+    logic                                      all;  // invalidate all ways
+    logic [ariane_pkg::DCACHE_INDEX_WIDTH-1:0] idx;  // physical address to invalidate
+    logic [L15_WAY_WIDTH-1:0]                  way;  // way to invalidate
+  } dcache_inval_t;
+
+  typedef struct packed {
+    dcache_out_t rtype;  // see definitions above
+    logic [2:0]                                      size;        // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
+    logic [L1D_WAY_WIDTH-1:0] way;  // way to replace
+    logic [riscv::PLEN-1:0] paddr;  // physical address
+    riscv::xlen_t data;  // word width of processor (no block stores at the moment)
+    logic [ariane_pkg::DATA_USER_WIDTH-1:0]          user;        // user width of processor (no block stores at the moment)
+    logic nc;  // noncacheable
+    logic [CACHE_ID_WIDTH-1:0] tid;  // threadi id (used as transaction id in Ariane)
+    ariane_pkg::amo_t amo_op;  // amo opcode
+  } dcache_req_t;
+
+  typedef struct packed {
+    dcache_in_t rtype;  // see definitions above
+    logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data;  // full cache line width
+    logic [ariane_pkg::DCACHE_USER_LINE_WIDTH-1:0] user;  // user bits
+    dcache_inval_t inv;  // invalidation vector
+    logic [CACHE_ID_WIDTH-1:0] tid;  // threadi id (used as transaction id in Ariane)
+  } dcache_rtrn_t;
+
+
+  // taken from iop.h in openpiton
+  // to l1.5 (only marked subset is used)
+  typedef enum logic [4:0] {
+    L15_LOAD_RQ    = 5'b00000,  // load request
+    L15_IMISS_RQ   = 5'b10000,  // instruction fill request
+    L15_STORE_RQ   = 5'b00001,  // store request
+    L15_ATOMIC_RQ  = 5'b00110,  // atomic op
+    //L15_CAS1_RQ     = 5'b00010, // compare and swap1 packet (OpenSparc atomics)
+    //L15_CAS2_RQ     = 5'b00011, // compare and swap2 packet (OpenSparc atomics)
+    //L15_SWAP_RQ     = 5'b00110, // swap packet (OpenSparc atomics)
+    L15_STRLOAD_RQ = 5'b00100,  // unused
+    L15_STRST_RQ   = 5'b00101,  // unused
+    L15_STQ_RQ     = 5'b00111,  // unused
+    L15_INT_RQ     = 5'b01001,  // interrupt request
+    L15_FWD_RQ     = 5'b01101,  // unused
+    L15_FWD_RPY    = 5'b01110,  // unused
+    L15_RSVD_RQ    = 5'b11111   // unused
+  } l15_reqtypes_t;
+
+  // from l1.5 (only marked subset is used)
+  typedef enum logic [3:0] {
+    L15_LOAD_RET               = 4'b0000,  // load packet
+    // L15_INV_RET                = 4'b0011, // invalidate packet, not unique...
+    L15_ST_ACK                 = 4'b0100,  // store ack packet
+    //L15_AT_ACK                 = 4'b0011, // unused, not unique...
+    L15_INT_RET                = 4'b0111,  // interrupt packet
+    L15_TEST_RET               = 4'b0101,  // unused
+    L15_FP_RET                 = 4'b1000,  // unused
+    L15_IFILL_RET              = 4'b0001,  // instruction fill packet
+    L15_EVICT_REQ              = 4'b0011,  // eviction request
+    L15_ERR_RET                = 4'b1100,  // unused
+    L15_STRLOAD_RET            = 4'b0010,  // unused
+    L15_STRST_ACK              = 4'b0110,  // unused
+    L15_FWD_RQ_RET             = 4'b1010,  // unused
+    L15_FWD_RPY_RET            = 4'b1011,  // unused
+    L15_RSVD_RET               = 4'b1111,  // unused
+    L15_CPX_RESTYPE_ATOMIC_RES = 4'b1110   // custom type for atomic responses
+  } l15_rtrntypes_t;
+
+
+  typedef struct packed {
+    logic l15_val;  // valid signal, asserted with request
+    logic l15_req_ack;  // ack for response
+    l15_reqtypes_t l15_rqtype;  // see below for encoding
+    logic l15_nc;  // non-cacheable bit
+    logic [2:0]                        l15_size;                  // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
+    logic [L15_TID_WIDTH-1:0] l15_threadid;  // currently 0 or 1
+    logic l15_prefetch;  // unused in openpiton
+    logic l15_invalidate_cacheline;  // unused by Ariane as L1 has no ECC at the moment
+    logic l15_blockstore;  // unused in openpiton
+    logic l15_blockinitstore;  // unused in openpiton
+    logic [L15_WAY_WIDTH-1:0] l15_l1rplway;  // way to replace
+    logic [39:0] l15_address;  // physical address
+    logic [63:0] l15_data;  // word to write
+    logic [63:0] l15_data_next_entry;  // unused in Ariane (only used for CAS atomic requests)
+    logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data;  // unused in Ariane
+    logic [3:0] l15_amo_op;  // atomic operation type
+  } l15_req_t;
+
+  typedef struct packed {
+    logic l15_ack;  // ack for request struct
+    logic l15_header_ack;  // ack for request struct
+    logic l15_val;  // valid signal for return struct
+    l15_rtrntypes_t l15_returntype;  // see below for encoding
+    logic l15_l2miss;  // unused in Ariane
+    logic [1:0] l15_error;  // unused in openpiton
+    logic l15_noncacheable;  // non-cacheable bit
+    logic l15_atomic;  // asserted in load return and store ack packets of atomic tx
+    logic [L15_TID_WIDTH-1:0] l15_threadid;  // used as transaction ID
+    logic l15_prefetch;  // unused in openpiton
+    logic l15_f4b;  // 4byte instruction fill from I/O space (nc).
+    logic [63:0] l15_data_0;  // used for both caches
+    logic [63:0] l15_data_1;  // used for both caches
+    logic [63:0] l15_data_2;  // currently only used for I$
+    logic [63:0] l15_data_3;  // currently only used for I$
+    logic l15_inval_icache_all_way;  // invalidate all ways
+    logic l15_inval_dcache_all_way;  // unused in openpiton
+    logic [15:4] l15_inval_address_15_4;  // invalidate selected cacheline
+    logic l15_cross_invalidate;  // unused in openpiton
+    logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way;  // unused in openpiton
+    logic l15_inval_dcache_inval;  // invalidate selected cacheline and way
+    logic l15_inval_icache_inval;  // unused in openpiton
+    logic [L15_WAY_WIDTH-1:0] l15_inval_way;  // way to invalidate
+    logic l15_blockinitstore;  // unused in openpiton
+  } l15_rtrn_t;
+
+  // swap endianess in a 64bit word
+  function automatic logic [63:0] swendian64(input logic [63:0] in);
+    automatic logic [63:0] out;
+    for (int k = 0; k < 64; k += 8) begin
+      out[k+:8] = in[63-k-:8];
+    end
+    return out;
+  endfunction
+
+  function automatic logic [5:0] popcnt64(input logic [63:0] in);
+    logic [5:0] cnt = 0;
+    foreach (in[k]) begin
+      cnt += 6'(in[k]);
+    end
+    return cnt;
+  endfunction : popcnt64
+
+  function automatic logic [(riscv::XLEN/8)-1:0] to_byte_enable8(
+      input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size);
+    logic [(riscv::XLEN/8)-1:0] be;
+    be = '0;
+    unique case (size)
+      2'b00:   be[offset] = '1;  // byte
+      2'b01:   be[offset+:2] = '1;  // hword
+      2'b10:   be[offset+:4] = '1;  // word
+      default: be = '1;  // dword
+    endcase  // size
+    return be;
+  endfunction : to_byte_enable8
+
+  function automatic logic [(riscv::XLEN/8)-1:0] to_byte_enable4(
+      input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size);
+    logic [3:0] be;
+    be = '0;
+    unique case (size)
+      2'b00:   be[offset] = '1;  // byte
+      2'b01:   be[offset+:2] = '1;  // hword
+      default: be = '1;  // word
+    endcase  // size
+    return be;
+  endfunction : to_byte_enable4
+
+  // openpiton requires the data to be replicated in case of smaller sizes than dwords
+  function automatic riscv::xlen_t repData64(input riscv::xlen_t data,
+                                             input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset,
+                                             input logic [1:0] size);
+    riscv::xlen_t out;
+    unique case (size)
+      2'b00:   for (int k = 0; k < 8; k++) out[k*8+:8] = data[offset*8+:8];  // byte
+      2'b01:   for (int k = 0; k < 4; k++) out[k*16+:16] = data[offset*8+:16];  // hword
+      2'b10:   for (int k = 0; k < 2; k++) out[k*32+:32] = data[offset*8+:32];  // word
+      default: out = data;  // dword
+    endcase  // size
+    return out;
+  endfunction : repData64
+
+  function automatic riscv::xlen_t repData32(input riscv::xlen_t data,
+                                             input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset,
+                                             input logic [1:0] size);
+    riscv::xlen_t out;
+    unique case (size)
+      2'b00:   for (int k = 0; k < 4; k++) out[k*8+:8] = data[offset*8+:8];  // byte
+      2'b01:   for (int k = 0; k < 2; k++) out[k*16+:16] = data[offset*8+:16];  // hword
+      default: out = data;  // word
+    endcase  // size
+    return out;
+  endfunction : repData32
+
+  // note: this is openpiton specific. cannot transmit unaligned words.
+  // hence we default to individual bytes in that case, and they have to be transmitted
+  // one after the other
+  function automatic logic [1:0] toSize64(input logic [7:0] be);
+    logic [1:0] size;
+    unique case (be)
+      8'b1111_1111:                                           size = 2'b11;  // dword
+      8'b0000_1111, 8'b1111_0000:                             size = 2'b10;  // word
+      8'b1100_0000, 8'b0011_0000, 8'b0000_1100, 8'b0000_0011: size = 2'b01;  // hword
+      default:                                                size = 2'b00;  // individual bytes
+    endcase  // be
+    return size;
+  endfunction : toSize64
+
+
+  function automatic logic [1:0] toSize32(input logic [3:0] be);
+    logic [1:0] size;
+    unique case (be)
+      4'b1111:          size = 2'b10;  // word
+      4'b1100, 4'b0011: size = 2'b01;  // hword
+      default:          size = 2'b00;  // individual bytes
+    endcase  // be
+    return size;
+  endfunction : toSize32
+
+endpackage
diff --git a/test/type_param/core/instr_realign.sv b/test/type_param/core/instr_realign.sv
new file mode 100644
index 0000000..043a131
--- /dev/null
+++ b/test/type_param/core/instr_realign.sv
@@ -0,0 +1,361 @@
+// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+// Description: Instruction Re-aligner
+//
+// This module takes 32-bit aligned cache blocks and extracts the instructions.
+// As we are supporting the compressed instruction set extension in a 32 bit instruction word
+// are up to 2 compressed instructions.
+// Furthermore those instructions can be arbitrarily interleaved which makes it possible to fetch
+// only the lower part of a 32 bit instruction.
+// Furthermore we need to handle the case if we want to start fetching from an unaligned
+// instruction e.g. a branch.
+
+
+module instr_realign
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    input logic flush_i,
+    input logic valid_i,
+    output logic serving_unaligned_o,  // we have an unaligned instruction in [0]
+    input logic [riscv::VLEN-1:0] address_i,
+    input logic [FETCH_WIDTH-1:0] data_i,
+    output logic [INSTR_PER_FETCH-1:0] valid_o,
+    output logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_o,
+    output logic [INSTR_PER_FETCH-1:0][31:0] instr_o
+);
+  // as a maximum we support a fetch width of 64-bit, hence there can be 4 compressed instructions
+  logic [3:0] instr_is_compressed;
+
+  for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
+    // LSB != 2'b11
+    assign instr_is_compressed[i] = ~&data_i[i*16+:2];
+  end
+
+  // save the unaligned part of the instruction to this ff
+  logic [15:0] unaligned_instr_d, unaligned_instr_q;
+  // the last instruction was unaligned
+  logic unaligned_d, unaligned_q;
+  // register to save the unaligned address
+  logic [riscv::VLEN-1:0] unaligned_address_d, unaligned_address_q;
+  // we have an unaligned instruction
+  assign serving_unaligned_o = unaligned_q;
+
+  // Instruction re-alignment
+  if (FETCH_WIDTH == 32) begin : realign_bp_32
+    always_comb begin : re_align
+      unaligned_d = unaligned_q;
+      unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
+      unaligned_instr_d = data_i[31:16];
+
+      valid_o[0] = valid_i;
+      instr_o[0] = (unaligned_q) ? {data_i[15:0], unaligned_instr_q} : data_i[31:0];
+      addr_o[0] = (unaligned_q) ? unaligned_address_q : address_i;
+
+      valid_o[1] = 1'b0;
+      instr_o[1] = '0;
+      addr_o[1] = {address_i[riscv::VLEN-1:2], 2'b10};
+
+      // this instruction is compressed or the last instruction was unaligned
+      if (instr_is_compressed[0] || unaligned_q) begin
+        // check if this is instruction is still unaligned e.g.: it is not compressed
+        // if its compressed re-set unaligned flag
+        // for 32 bit we can simply check the next instruction and whether it is compressed or not
+        // if it is compressed the next fetch will contain an aligned instruction
+        // is instruction 1 also compressed
+        // yes? -> no problem, no -> we've got an unaligned instruction
+        if (instr_is_compressed[1]) begin
+          unaligned_d = 1'b0;
+          valid_o[1]  = valid_i;
+          instr_o[1]  = {16'b0, data_i[31:16]};
+        end else begin
+          // save the upper bits for next cycle
+          unaligned_d = 1'b1;
+          unaligned_instr_d = data_i[31:16];
+          unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
+        end
+      end  // else -> normal fetch
+
+      // we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
+      // received the next instruction
+      if (valid_i && address_i[1]) begin
+        // the instruction is not compressed so we can't do anything in this cycle
+        if (!instr_is_compressed[0]) begin
+          valid_o = '0;
+          unaligned_d = 1'b1;
+          unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
+          unaligned_instr_d = data_i[15:0];
+          // the instruction isn't compressed but only the lower is ready
+        end else begin
+          valid_o = {{INSTR_PER_FETCH - 1{1'b0}}, 1'b1};
+        end
+      end
+    end
+    // TODO(zarubaf): Fix 64 bit FETCH_WIDTH, maybe generalize to arbitrary fetch width
+  end else if (FETCH_WIDTH == 64) begin : realign_bp_64
+    initial begin
+      $error("Not propperly implemented");
+    end
+    always_comb begin : re_align
+      unaligned_d = unaligned_q;
+      unaligned_address_d = unaligned_address_q;
+      unaligned_instr_d = unaligned_instr_q;
+
+      valid_o    = '0;
+      valid_o[0] = valid_i;
+
+      instr_o[0] = data_i[31:0];
+      addr_o[0]  = address_i;
+
+      instr_o[1] = '0;
+      addr_o[1]  = {address_i[riscv::VLEN-1:3], 3'b010};
+
+      instr_o[2] = {16'b0, data_i[47:32]};
+      addr_o[2]  = {address_i[riscv::VLEN-1:3], 3'b100};
+
+      instr_o[3] = {16'b0, data_i[63:48]};
+      addr_o[3]  = {address_i[riscv::VLEN-1:3], 3'b110};
+
+      // last instruction was unaligned
+      if (unaligned_q) begin
+        instr_o[0] = {data_i[15:0], unaligned_instr_q};
+        addr_o[0]  = unaligned_address_q;
+        // for 64 bit there exist the following options:
+        //     64      32      0
+        //     | 3 | 2 | 1 | 0 | <- instruction slot
+        // |   I   |   I   |   U   | -> again unaligned
+        // | * | C |   I   |   U   | -> aligned
+        // | * |   I   | C |   U   | -> aligned
+        // |   I   | C | C |   U   | -> again unaligned
+        // | * | C | C | C |   U   | -> aligned
+        // Legend: C = compressed, I = 32 bit instruction, U = unaligned upper half
+        //         * = don't care
+        if (instr_is_compressed[1]) begin
+          instr_o[1] = {16'b0, data_i[31:16]};
+          valid_o[1] = valid_i;
+
+          if (instr_is_compressed[2]) begin
+            if (instr_is_compressed[3]) begin
+              unaligned_d = 1'b0;
+              valid_o[3]  = valid_i;
+            end else begin
+              // continues to be unaligned
+            end
+          end else begin
+            unaligned_d = 1'b0;
+            instr_o[2]  = data_i[63:32];
+            valid_o[2]  = valid_i;
+          end
+          // instruction 1 is not compressed
+        end else begin
+          instr_o[1] = data_i[47:16];
+          valid_o[1] = valid_i;
+          addr_o[2]  = {address_i[riscv::VLEN-1:3], 3'b110};
+          if (instr_is_compressed[2]) begin
+            unaligned_d = 1'b0;
+            instr_o[2]  = {16'b0, data_i[63:48]};
+            valid_o[2]  = valid_i;
+          end else begin
+            // continues to be unaligned
+          end
+        end
+      end else if (instr_is_compressed[0]) begin  // instruction zero is RVC
+        //     64     32       0
+        //     | 3 | 2 | 1 | 0 | <- instruction slot
+        // |   I   |   I   | C | -> again unaligned
+        // | * | C |   I   | C | -> aligned
+        // | * |   I   | C | C | -> aligned
+        // |   I   | C | C | C | -> again unaligned
+        // | * | C | C | C | C | -> aligned
+        if (instr_is_compressed[1]) begin
+          instr_o[1] = {16'b0, data_i[31:16]};
+          valid_o[1] = valid_i;
+
+          if (instr_is_compressed[2]) begin
+            valid_o[2] = valid_i;
+            if (instr_is_compressed[3]) begin
+              valid_o[3] = valid_i;
+            end else begin
+              // this instruction is unaligned
+              unaligned_d = 1'b1;
+              unaligned_instr_d = data_i[63:48];
+              unaligned_address_d = addr_o[3];
+            end
+          end else begin
+            instr_o[2] = data_i[63:32];
+            valid_o[2] = valid_i;
+          end
+          // instruction 1 is not compressed -> check slot 3
+        end else begin
+          instr_o[1] = data_i[47:16];
+          valid_o[1] = valid_i;
+          addr_o[2]  = {address_i[riscv::VLEN-1:3], 3'b110};
+          if (instr_is_compressed[3]) begin
+            instr_o[2] = data_i[63:48];
+            valid_o[2] = valid_i;
+          end else begin
+            unaligned_d = 1'b1;
+            unaligned_instr_d = data_i[63:48];
+            unaligned_address_d = addr_o[2];
+          end
+        end
+
+        // Full instruction in slot zero
+        //     64     32       0
+        //     | 3 | 2 | 1 | 0 | <- instruction slot
+        // |   I   | C |   I   |
+        // | * | C | C |   I   |
+        // | * |   I   |   I   |
+      end else begin
+        addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
+
+        if (instr_is_compressed[2]) begin
+          instr_o[1] = {16'b0, data_i[47:32]};
+          valid_o[1] = valid_i;
+          addr_o[2]  = {address_i[riscv::VLEN-1:3], 3'b110};
+          if (instr_is_compressed[3]) begin
+            // | * | C | C |   I   |
+            valid_o[2] = valid_i;
+            addr_o[2]  = {16'b0, data_i[63:48]};
+          end else begin
+            // this instruction is unaligned
+            unaligned_d = 1'b1;
+            unaligned_instr_d = data_i[63:48];
+            unaligned_address_d = addr_o[2];
+          end
+        end else begin
+          // two regular instructions back-to-back
+          instr_o[1] = data_i[63:32];
+          valid_o[1] = valid_i;
+        end
+      end
+
+      // --------------------------
+      // Unaligned fetch
+      // --------------------------
+      // Address was not 64 bit aligned
+      case (address_i[2:1])
+        // this means the previouse instruction was either compressed or unaligned
+        // in any case we don't ccare
+        2'b01: begin
+          //     64     32       0
+          //     | 3 | 2 | 1 | 0 | <- instruction slot
+          // |   I   |   I   | x  -> again unaligned
+          // | * | C |   I   | x  -> aligned
+          // | * |   I   | C | x  -> aligned
+          // |   I   | C | C | x  -> again unaligned
+          // | * | C | C | C | x  -> aligned
+          addr_o[0] = {address_i[riscv::VLEN-1:3], 3'b010};
+
+          if (instr_is_compressed[1]) begin
+            instr_o[0] = {16'b0, data_i[31:16]};
+            valid_o[0] = valid_i;
+
+            if (instr_is_compressed[2]) begin
+              valid_o[1] = valid_i;
+              instr_o[1] = {16'b0, data_i[47:32]};
+              addr_o[1]  = {address_i[riscv::VLEN-1:3], 3'b100};
+              if (instr_is_compressed[3]) begin
+                instr_o[2] = {16'b0, data_i[63:48]};
+                addr_o[2]  = {address_i[riscv::VLEN-1:3], 3'b110};
+                valid_o[2] = valid_i;
+              end else begin
+                // this instruction is unaligned
+                unaligned_d = 1'b1;
+                unaligned_instr_d = data_i[63:48];
+                unaligned_address_d = addr_o[3];
+              end
+            end else begin
+              instr_o[1] = data_i[63:32];
+              addr_o[1]  = {address_i[riscv::VLEN-1:3], 3'b100};
+              valid_o[1] = valid_i;
+            end
+            // instruction 1 is not compressed -> check slot 3
+          end else begin
+            instr_o[0] = data_i[47:16];
+            valid_o[0] = valid_i;
+            addr_o[1]  = {address_i[riscv::VLEN-1:3], 3'b110};
+            if (instr_is_compressed[3]) begin
+              instr_o[1] = data_i[63:48];
+              valid_o[1] = valid_i;
+            end else begin
+              unaligned_d = 1'b1;
+              unaligned_instr_d = data_i[63:48];
+              unaligned_address_d = addr_o[1];
+            end
+          end
+        end
+        2'b10: begin
+          valid_o = '0;
+          //     64     32       0
+          //     | 3 | 2 | 1 | 0 | <- instruction slot
+          // |   I   | C |   *   | <- unaligned
+          //    | C  | C |   *   | <- aligned
+          //    |    I   |   *   | <- aligned
+          if (instr_is_compressed[2]) begin
+            valid_o[0] = valid_i;
+            instr_o[0] = data_i[47:32];
+            // second instruction is also compressed
+            if (instr_is_compressed[3]) begin
+              valid_o[1] = valid_i;
+              instr_o[1] = data_i[63:48];
+              // regular instruction -> unaligned
+            end else begin
+              unaligned_d = 1'b1;
+              unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110};
+              unaligned_instr_d = data_i[63:48];
+            end
+            // instruction is a regular instruction
+          end else begin
+            valid_o[0] = valid_i;
+            instr_o[0] = data_i[63:32];
+            addr_o[0]  = address_i;
+          end
+        end
+        // we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
+        // received the next instruction
+        2'b11: begin
+          valid_o = '0;
+          if (!instr_is_compressed[3]) begin
+            unaligned_d = 1'b1;
+            unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110};
+            unaligned_instr_d = data_i[63:48];
+          end else begin
+            valid_o[3] = valid_i;
+          end
+        end
+      endcase
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      unaligned_q         <= 1'b0;
+      unaligned_address_q <= '0;
+      unaligned_instr_q   <= '0;
+    end else begin
+      if (valid_i) begin
+        unaligned_address_q <= unaligned_address_d;
+        unaligned_instr_q   <= unaligned_instr_d;
+      end
+
+      if (flush_i) begin
+        unaligned_q <= 1'b0;
+      end else if (valid_i) begin
+        unaligned_q <= unaligned_d;
+      end
+    end
+  end
+endmodule
diff --git a/test/type_param/core/issue_read_operands.sv b/test/type_param/core/issue_read_operands.sv
new file mode 100644
index 0000000..2e32486
--- /dev/null
+++ b/test/type_param/core/issue_read_operands.sv
@@ -0,0 +1,604 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 08.04.2017
+// Description: Issues instruction from the scoreboard and fetches the operands
+//              This also includes all the forwarding logic
+
+
+module issue_read_operands
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter type rs3_len_t = logic
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    // flush
+    input logic flush_i,
+    // stall
+    input logic stall_i,
+    // coming from decoder
+    input scoreboard_entry_t issue_instr_i,
+    input logic issue_instr_valid_i,
+    output logic issue_ack_o,
+    // lookup rd in scoreboard
+    output logic [REG_ADDR_SIZE-1:0] rs1_o,
+    input riscv::xlen_t rs1_i,
+    input logic rs1_valid_i,
+    output logic [REG_ADDR_SIZE-1:0] rs2_o,
+    input riscv::xlen_t rs2_i,
+    input logic rs2_valid_i,
+    output logic [REG_ADDR_SIZE-1:0] rs3_o,
+    input rs3_len_t rs3_i,
+    input logic rs3_valid_i,
+    // get clobber input
+    input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_i,
+    input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_i,
+    // To FU, just single issue for now
+    output fu_data_t fu_data_o,
+    output riscv::xlen_t rs1_forwarding_o,  // unregistered version of fu_data_o.operanda
+    output riscv::xlen_t rs2_forwarding_o,  // unregistered version of fu_data_o.operandb
+    output logic [riscv::VLEN-1:0] pc_o,
+    output logic is_compressed_instr_o,
+    // ALU 1
+    input logic flu_ready_i,  // Fixed latency unit ready to accept a new request
+    output logic alu_valid_o,  // Output is valid
+    // Branches and Jumps
+    output logic branch_valid_o,  // this is a valid branch instruction
+    output branchpredict_sbe_t branch_predict_o,
+    // LSU
+    input logic lsu_ready_i,  // FU is ready
+    output logic lsu_valid_o,  // Output is valid
+    // MULT
+    output logic mult_valid_o,  // Output is valid
+    // FPU
+    input logic fpu_ready_i,  // FU is ready
+    output logic fpu_valid_o,  // Output is valid
+    output logic [1:0] fpu_fmt_o,  // FP fmt field from instr.
+    output logic [2:0] fpu_rm_o,  // FP rm field from instr.
+    // CSR
+    output logic csr_valid_o,  // Output is valid
+    // CVXIF
+    output logic cvxif_valid_o,
+    input logic cvxif_ready_i,
+    output logic [31:0] cvxif_off_instr_o,
+    // commit port
+    input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i,
+    input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i,
+    input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i,
+    input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i,
+
+    output logic stall_issue_o  // stall signal, we do not want to fetch any more entries
+    // committing instruction instruction
+    // from scoreboard
+    // input  scoreboard_entry     commit_instr_i,
+    // output logic                commit_ack_o
+);
+  logic stall;
+  logic fu_busy;  // functional unit is busy
+  riscv::xlen_t operand_a_regfile, operand_b_regfile;  // operands coming from regfile
+  rs3_len_t
+      operand_c_regfile,
+      operand_c_fpr,
+      operand_c_gpr;  // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3
+  // output flipflop (ID <-> EX)
+  riscv::xlen_t operand_a_n, operand_a_q, operand_b_n, operand_b_q, imm_n, imm_q, imm_forward_rs3;
+
+  logic        alu_valid_q;
+  logic        mult_valid_q;
+  logic        fpu_valid_q;
+  logic [ 1:0] fpu_fmt_q;
+  logic [ 2:0] fpu_rm_q;
+  logic        lsu_valid_q;
+  logic        csr_valid_q;
+  logic        branch_valid_q;
+  logic        cvxif_valid_q;
+  logic [31:0] cvxif_off_instr_q;
+
+  logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
+  fu_op operator_n, operator_q;  // operation to perform
+  fu_t fu_n, fu_q;  // functional unit to use
+
+  // forwarding signals
+  logic forward_rs1, forward_rs2, forward_rs3;
+
+  // original instruction stored in tval
+  riscv::instruction_t orig_instr;
+  assign orig_instr          = riscv::instruction_t'(issue_instr_i.ex.tval[31:0]);
+
+  // ID <-> EX registers
+
+  assign rs1_forwarding_o    = operand_a_n[riscv::VLEN-1:0];  //forwarding or unregistered rs1 value
+  assign rs2_forwarding_o    = operand_b_n[riscv::VLEN-1:0];  //forwarding or unregistered rs2 value
+
+  assign fu_data_o.operand_a = operand_a_q;
+  assign fu_data_o.operand_b = operand_b_q;
+  assign fu_data_o.fu        = fu_q;
+  assign fu_data_o.operation = operator_q;
+  assign fu_data_o.trans_id  = trans_id_q;
+  assign fu_data_o.imm       = imm_q;
+  assign alu_valid_o         = alu_valid_q;
+  assign branch_valid_o      = branch_valid_q;
+  assign lsu_valid_o         = lsu_valid_q;
+  assign csr_valid_o         = csr_valid_q;
+  assign mult_valid_o        = mult_valid_q;
+  assign fpu_valid_o         = fpu_valid_q;
+  assign fpu_fmt_o           = fpu_fmt_q;
+  assign fpu_rm_o            = fpu_rm_q;
+  assign cvxif_valid_o       = CVA6Cfg.CvxifEn ? cvxif_valid_q : '0;
+  assign cvxif_off_instr_o   = CVA6Cfg.CvxifEn ? cvxif_off_instr_q : '0;
+  assign stall_issue_o       = stall;
+  // ---------------
+  // Issue Stage
+  // ---------------
+
+  // select the right busy signal
+  // this obviously depends on the functional unit we need
+  always_comb begin : unit_busy
+    unique case (issue_instr_i.fu)
+      NONE: fu_busy = 1'b0;
+      ALU, CTRL_FLOW, CSR, MULT: fu_busy = ~flu_ready_i;
+      LOAD, STORE: fu_busy = ~lsu_ready_i;
+      CVXIF: fu_busy = ~cvxif_ready_i;
+      default: begin
+        if (CVA6Cfg.FpPresent && (issue_instr_i.fu == FPU || issue_instr_i.fu == FPU_VEC)) begin
+          fu_busy = ~fpu_ready_i;
+        end else begin
+          fu_busy = 1'b0;
+        end
+      end
+    endcase
+  end
+
+  // ---------------
+  // Register stage
+  // ---------------
+  // check that all operands are available, otherwise stall
+  // forward corresponding register
+  always_comb begin : operands_available
+    stall = stall_i;
+    // operand forwarding signals
+    forward_rs1 = 1'b0;
+    forward_rs2 = 1'b0;
+    forward_rs3 = 1'b0;  // FPR only
+    // poll the scoreboard for those values
+    rs1_o = issue_instr_i.rs1;
+    rs2_o = issue_instr_i.rs2;
+    rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0];  // rs3 is encoded in imm field
+
+    // 0. check that we are not using the zimm type in RS1
+    //    as this is an immediate we do not have to wait on anything here
+    // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr)
+    // 2. poll the scoreboard
+    if (!issue_instr_i.use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr(
+            issue_instr_i.op
+        )) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE :
+            rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin
+      // check if the clobbering instruction is not a CSR instruction, CSR instructions can only
+      // be fetched through the register file since they can't be forwarded
+      // if the operand is available, forward it. CSRs don't write to/from FPR
+      if (rs1_valid_i && (CVA6Cfg.FpPresent && is_rs1_fpr(
+              issue_instr_i.op
+          ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i.rs1] != CSR) ||
+                      (CVA6Cfg.RVS && issue_instr_i.op == SFENCE_VMA)))) begin
+        forward_rs1 = 1'b1;
+      end else begin  // the operand is not available -> stall
+        stall = 1'b1;
+      end
+    end
+
+    if ((CVA6Cfg.FpPresent && is_rs2_fpr(
+            issue_instr_i.op
+        )) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE :
+            rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin
+      // if the operand is available, forward it. CSRs don't write to/from FPR
+      if (rs2_valid_i && (CVA6Cfg.FpPresent && is_rs2_fpr(
+              issue_instr_i.op
+          ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i.rs2] != CSR) ||
+                      (CVA6Cfg.RVS && issue_instr_i.op == SFENCE_VMA)))) begin
+        forward_rs2 = 1'b1;
+      end else begin  // the operand is not available -> stall
+        stall = 1'b1;
+      end
+    end
+
+    // Only check clobbered gpr for OFFLOADED instruction
+    if ((CVA6Cfg.FpPresent && is_imm_fpr(
+            issue_instr_i.op
+        )) ? rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE :
+            issue_instr_i.op == OFFLOAD && CVA6Cfg.NrRgprPorts == 3 ?
+            rd_clobber_gpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE : 0) begin
+      // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check
+      if (rs3_valid_i) begin
+        forward_rs3 = 1'b1;
+      end else begin  // the operand is not available -> stall
+        stall = 1'b1;
+      end
+    end
+  end
+
+  // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3
+  if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_rs3
+    assign imm_forward_rs3 = rs3_i;
+  end else begin : gen_fp_rs3
+    assign imm_forward_rs3 = {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, rs3_i};
+  end
+
+  // Forwarding/Output MUX
+  always_comb begin : forwarding_operand_select
+    // default is regfiles (gpr or fpr)
+    operand_a_n = operand_a_regfile;
+    operand_b_n = operand_b_regfile;
+    // immediates are the third operands in the store case
+    // for FP operations, the imm field can also be the third operand from the regfile
+    if (CVA6Cfg.NrRgprPorts == 3) begin
+      imm_n = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ?
+          {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} :
+          issue_instr_i.op == OFFLOAD ? operand_c_regfile : issue_instr_i.result;
+    end else begin
+      imm_n = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ?
+          {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : issue_instr_i.result;
+    end
+    trans_id_n = issue_instr_i.trans_id;
+    fu_n       = issue_instr_i.fu;
+    operator_n = issue_instr_i.op;
+    // or should we forward
+    if (forward_rs1) begin
+      operand_a_n = rs1_i;
+    end
+
+    if (forward_rs2) begin
+      operand_b_n = rs2_i;
+    end
+
+    if (CVA6Cfg.FpPresent && forward_rs3) begin
+      imm_n = imm_forward_rs3;
+    end
+
+    // use the PC as operand a
+    if (issue_instr_i.use_pc) begin
+      operand_a_n = {
+        {riscv::XLEN - riscv::VLEN{issue_instr_i.pc[riscv::VLEN-1]}}, issue_instr_i.pc
+      };
+    end
+
+    // use the zimm as operand a
+    if (issue_instr_i.use_zimm) begin
+      // zero extend operand a
+      operand_a_n = {{riscv::XLEN - 5{1'b0}}, issue_instr_i.rs1[4:0]};
+    end
+    // or is it an immediate (including PC), this is not the case for a store, control flow, and accelerator instructions
+    // also make sure operand B is not already used as an FP operand
+    if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && (issue_instr_i.fu != ACCEL) && !(CVA6Cfg.FpPresent && is_rs2_fpr(
+            issue_instr_i.op
+        ))) begin
+      operand_b_n = issue_instr_i.result;
+    end
+  end
+
+  // FU select, assert the correct valid out signal (in the next cycle)
+  // This needs to be like this to make verilator happy. I know its ugly.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      alu_valid_q    <= 1'b0;
+      lsu_valid_q    <= 1'b0;
+      mult_valid_q   <= 1'b0;
+      fpu_valid_q    <= 1'b0;
+      fpu_fmt_q      <= 2'b0;
+      fpu_rm_q       <= 3'b0;
+      csr_valid_q    <= 1'b0;
+      branch_valid_q <= 1'b0;
+    end else begin
+      alu_valid_q    <= 1'b0;
+      lsu_valid_q    <= 1'b0;
+      mult_valid_q   <= 1'b0;
+      fpu_valid_q    <= 1'b0;
+      fpu_fmt_q      <= 2'b0;
+      fpu_rm_q       <= 3'b0;
+      csr_valid_q    <= 1'b0;
+      branch_valid_q <= 1'b0;
+      // Exception pass through:
+      // If an exception has occurred simply pass it through
+      // we do not want to issue this instruction
+      if (!issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin
+        case (issue_instr_i.fu)
+          ALU: begin
+            alu_valid_q <= 1'b1;
+          end
+          CTRL_FLOW: begin
+            branch_valid_q <= 1'b1;
+          end
+          MULT: begin
+            mult_valid_q <= 1'b1;
+          end
+          LOAD, STORE: begin
+            lsu_valid_q <= 1'b1;
+          end
+          CSR: begin
+            csr_valid_q <= 1'b1;
+          end
+          default: begin
+            if (issue_instr_i.fu == FPU && CVA6Cfg.FpPresent) begin
+              fpu_valid_q <= 1'b1;
+              fpu_fmt_q   <= orig_instr.rftype.fmt;  // fmt bits from instruction
+              fpu_rm_q    <= orig_instr.rftype.rm;  // rm bits from instruction
+            end else if (issue_instr_i.fu == FPU_VEC && CVA6Cfg.FpPresent) begin
+              fpu_valid_q <= 1'b1;
+              fpu_fmt_q   <= orig_instr.rvftype.vfmt;  // vfmt bits from instruction
+              fpu_rm_q    <= {2'b0, orig_instr.rvftype.repl};  // repl bit from instruction
+            end
+          end
+        endcase
+      end
+      // if we got a flush request, de-assert the valid flag, otherwise we will start this
+      // functional unit with the wrong inputs
+      if (flush_i) begin
+        alu_valid_q    <= 1'b0;
+        lsu_valid_q    <= 1'b0;
+        mult_valid_q   <= 1'b0;
+        fpu_valid_q    <= 1'b0;
+        csr_valid_q    <= 1'b0;
+        branch_valid_q <= 1'b0;
+      end
+    end
+  end
+
+  if (CVA6Cfg.CvxifEn) begin
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        cvxif_valid_q <= 1'b0;
+        cvxif_off_instr_q <= 32'b0;
+      end else begin
+        cvxif_valid_q <= 1'b0;
+        cvxif_off_instr_q <= 32'b0;
+        if (!issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin
+          case (issue_instr_i.fu)
+            CVXIF: begin
+              cvxif_valid_q     <= 1'b1;
+              cvxif_off_instr_q <= orig_instr;
+            end
+            default: ;
+          endcase
+        end
+        if (flush_i) begin
+          cvxif_valid_q <= 1'b0;
+          cvxif_off_instr_q <= 32'b0;
+        end
+      end
+    end
+  end
+
+  // We can issue an instruction if we do not detect that any other instruction is writing the same
+  // destination register.
+  // We also need to check if there is an unresolved branch in the scoreboard.
+  always_comb begin : issue_scoreboard
+    // default assignment
+    issue_ack_o = 1'b0;
+    // check that we didn't stall, that the instruction we got is valid
+    // and that the functional unit we need is not busy
+    if (issue_instr_valid_i) begin
+      // check that the corresponding functional unit is not busy
+      if (!stall && !fu_busy) begin
+        // -----------------------------------------
+        // WAW - Write After Write Dependency Check
+        // -----------------------------------------
+        // no other instruction has the same destination register -> issue the instruction
+        if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
+                issue_instr_i.op
+            )) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE) :
+                (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin
+          issue_ack_o = 1'b1;
+        end
+        // or check that the target destination register will be written in this cycle by the
+        // commit stage
+        for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++)
+        if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
+                issue_instr_i.op
+            )) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd[4:0]) :
+                (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd[4:0])) begin
+          issue_ack_o = 1'b1;
+        end
+
+      end
+      // we can also issue the instruction under the following two circumstances:
+      // we can do this even if we are stalled or no functional unit is ready (as we don't need one)
+      // the decoder needs to make sure that the instruction is marked as valid when it does not
+      // need any functional unit or if an exception occurred previous to the execute stage.
+      // 1. we already got an exception
+      if (issue_instr_i.ex.valid) begin
+        issue_ack_o = 1'b1;
+      end
+      // 2. it is an instruction which does not need any functional unit
+      if (issue_instr_i.fu == NONE) begin
+        issue_ack_o = 1'b1;
+      end
+    end
+    // after a multiplication was issued we can only issue another multiplication
+    // otherwise we will get contentions on the fixed latency bus
+    if (mult_valid_q && issue_instr_i.fu inside {ALU, CTRL_FLOW, CSR}) begin
+      issue_ack_o = 1'b0;
+    end
+  end
+
+  // ----------------------
+  // Integer Register File
+  // ----------------------
+  logic [  CVA6Cfg.NrRgprPorts-1:0][riscv::XLEN-1:0] rdata;
+  logic [  CVA6Cfg.NrRgprPorts-1:0][            4:0] raddr_pack;
+
+  // pack signals
+  logic [CVA6Cfg.NrCommitPorts-1:0][            4:0] waddr_pack;
+  logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_pack;
+  logic [CVA6Cfg.NrCommitPorts-1:0]                  we_pack;
+
+  if (CVA6Cfg.NrRgprPorts == 3) begin : gen_rs3
+    assign raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]};
+  end else begin : gen_no_rs3
+    assign raddr_pack = {issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]};
+  end
+
+  for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_write_back_port
+    assign waddr_pack[i] = waddr_i[i];
+    assign wdata_pack[i] = wdata_i[i];
+    assign we_pack[i]    = we_gpr_i[i];
+  end
+  if (ariane_pkg::FPGA_EN) begin : gen_fpga_regfile
+    ariane_regfile_fpga #(
+        .CVA6Cfg      (CVA6Cfg),
+        .DATA_WIDTH   (riscv::XLEN),
+        .NR_READ_PORTS(CVA6Cfg.NrRgprPorts),
+        .ZERO_REG_ZERO(1)
+    ) i_ariane_regfile_fpga (
+        .test_en_i(1'b0),
+        .raddr_i  (raddr_pack),
+        .rdata_o  (rdata),
+        .waddr_i  (waddr_pack),
+        .wdata_i  (wdata_pack),
+        .we_i     (we_pack),
+        .*
+    );
+  end else begin : gen_asic_regfile
+    ariane_regfile #(
+        .CVA6Cfg      (CVA6Cfg),
+        .DATA_WIDTH   (riscv::XLEN),
+        .NR_READ_PORTS(CVA6Cfg.NrRgprPorts),
+        .ZERO_REG_ZERO(1)
+    ) i_ariane_regfile (
+        .test_en_i(1'b0),
+        .raddr_i  (raddr_pack),
+        .rdata_o  (rdata),
+        .waddr_i  (waddr_pack),
+        .wdata_i  (wdata_pack),
+        .we_i     (we_pack),
+        .*
+    );
+  end
+
+  // -----------------------------
+  // Floating-Point Register File
+  // -----------------------------
+  logic [2:0][CVA6Cfg.FLen-1:0] fprdata;
+
+  // pack signals
+  logic [2:0][4:0] fp_raddr_pack;
+  logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] fp_wdata_pack;
+
+  generate
+    if (CVA6Cfg.FpPresent) begin : float_regfile_gen
+      assign fp_raddr_pack = {
+        issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]
+      };
+      for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_fp_wdata_pack
+        assign fp_wdata_pack[i] = {wdata_i[i][CVA6Cfg.FLen-1:0]};
+      end
+      if (ariane_pkg::FPGA_EN) begin : gen_fpga_fp_regfile
+        ariane_regfile_fpga #(
+            .CVA6Cfg      (CVA6Cfg),
+            .DATA_WIDTH   (CVA6Cfg.FLen),
+            .NR_READ_PORTS(3),
+            .ZERO_REG_ZERO(0)
+        ) i_ariane_fp_regfile_fpga (
+            .test_en_i(1'b0),
+            .raddr_i  (fp_raddr_pack),
+            .rdata_o  (fprdata),
+            .waddr_i  (waddr_pack),
+            .wdata_i  (fp_wdata_pack),
+            .we_i     (we_fpr_i),
+            .*
+        );
+      end else begin : gen_asic_fp_regfile
+        ariane_regfile #(
+            .CVA6Cfg      (CVA6Cfg),
+            .DATA_WIDTH   (CVA6Cfg.FLen),
+            .NR_READ_PORTS(3),
+            .ZERO_REG_ZERO(0)
+        ) i_ariane_fp_regfile (
+            .test_en_i(1'b0),
+            .raddr_i  (fp_raddr_pack),
+            .rdata_o  (fprdata),
+            .waddr_i  (waddr_pack),
+            .wdata_i  (fp_wdata_pack),
+            .we_i     (we_fpr_i),
+            .*
+        );
+      end
+    end else begin : no_fpr_gen
+      assign fprdata = '{default: '0};
+    end
+  endgenerate
+
+  if (CVA6Cfg.NrRgprPorts == 3) begin : gen_operand_c
+    assign operand_c_fpr = {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[2]};
+    assign operand_c_gpr = rdata[2];
+  end else begin
+    assign operand_c_fpr = fprdata[2];
+  end
+
+  assign operand_a_regfile = (CVA6Cfg.FpPresent && is_rs1_fpr(
+      issue_instr_i.op
+  )) ? {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[0]} : rdata[0];
+  assign operand_b_regfile = (CVA6Cfg.FpPresent && is_rs2_fpr(
+      issue_instr_i.op
+  )) ? {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[1]} : rdata[1];
+  assign operand_c_regfile = (CVA6Cfg.NrRgprPorts == 3) ? ((CVA6Cfg.FpPresent && is_imm_fpr(
+      issue_instr_i.op
+  )) ? operand_c_fpr : operand_c_gpr) : operand_c_fpr;
+
+
+  // ----------------------
+  // Registers (ID <-> EX)
+  // ----------------------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      operand_a_q           <= '{default: 0};
+      operand_b_q           <= '{default: 0};
+      imm_q                 <= '0;
+      fu_q                  <= NONE;
+      operator_q            <= ADD;
+      trans_id_q            <= '0;
+      pc_o                  <= '0;
+      is_compressed_instr_o <= 1'b0;
+      branch_predict_o      <= {cf_t'(0), {riscv::VLEN{1'b0}}};
+    end else begin
+      operand_a_q           <= operand_a_n;
+      operand_b_q           <= operand_b_n;
+      imm_q                 <= imm_n;
+      fu_q                  <= fu_n;
+      operator_q            <= operator_n;
+      trans_id_q            <= trans_id_n;
+      pc_o                  <= issue_instr_i.pc;
+      is_compressed_instr_o <= issue_instr_i.is_compressed;
+      branch_predict_o      <= issue_instr_i.bp;
+    end
+  end
+
+  //pragma translate_off
+  initial begin
+    assert (CVA6Cfg.NrRgprPorts == 2 || (CVA6Cfg.NrRgprPorts == 3 && CVA6Cfg.CvxifEn))
+    else
+      $fatal(
+          1,
+          "If CVXIF is enable, ariane regfile can have either 2 or 3 read ports. Else it has 2 read ports."
+      );
+  end
+
+  assert property (@(posedge clk_i) (branch_valid_q) |-> (!$isunknown(
+      operand_a_q
+  ) && !$isunknown(
+      operand_b_q
+  )))
+  else $warning("Got unknown value in one of the operands");
+
+  //pragma translate_on
+endmodule
+
+
diff --git a/test/type_param/core/issue_stage.sv b/test/type_param/core/issue_stage.sv
new file mode 100644
index 0000000..64b8cb5
--- /dev/null
+++ b/test/type_param/core/issue_stage.sv
@@ -0,0 +1,199 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 21.05.2017
+// Description: Issue stage dispatches instructions to the FUs and keeps track of them
+//              in a scoreboard like data-structure.
+
+
+module issue_stage
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni, // Asynchronous reset active low
+
+    output logic sb_full_o,
+    input logic flush_unissued_instr_i,
+    input logic flush_i,
+    input logic stall_i,  // Stall issue stage
+    // from ISSUE
+    input scoreboard_entry_t decoded_instr_i,
+    input logic decoded_instr_valid_i,
+    input logic is_ctrl_flow_i,
+    output logic decoded_instr_ack_o,
+    // to EX
+    output [riscv::VLEN-1:0] rs1_forwarding_o,  // unregistered version of fu_data_o.operanda
+    output [riscv::VLEN-1:0] rs2_forwarding_o,  // unregistered version of fu_data_o.operandb
+    output fu_data_t fu_data_o,
+    output logic [riscv::VLEN-1:0] pc_o,
+    output logic is_compressed_instr_o,
+    input logic flu_ready_i,
+    output logic alu_valid_o,
+    // ex just resolved our predicted branch, we are ready to accept new requests
+    input logic resolve_branch_i,
+
+    input  logic               lsu_ready_i,
+    output logic               lsu_valid_o,
+    // branch prediction
+    output logic               branch_valid_o,   // use branch prediction unit
+    output branchpredict_sbe_t branch_predict_o, // Branch predict Out
+
+    output logic mult_valid_o,
+
+    input  logic       fpu_ready_i,
+    output logic       fpu_valid_o,
+    output logic [1:0] fpu_fmt_o,    // FP fmt field from instr.
+    output logic [2:0] fpu_rm_o,     // FP rm field from instr.
+
+    output logic csr_valid_o,
+
+    // CVXIF
+    //Issue interface
+    output logic        x_issue_valid_o,
+    input  logic        x_issue_ready_i,
+    output logic [31:0] x_off_instr_o,
+
+    // to accelerator dispatcher
+    output scoreboard_entry_t issue_instr_o,
+    output logic              issue_instr_hs_o,
+
+    // write back port
+    input logic [CVA6Cfg.NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_i,
+    input bp_resolve_t resolved_branch_i,
+    input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i,
+    input exception_t [CVA6Cfg.NrWbPorts-1:0]              ex_ex_i, // exception from execute stage or CVXIF offloaded instruction
+    input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i,
+    input logic x_we_i,
+
+    // commit port
+    input logic [CVA6Cfg.NrCommitPorts-1:0][            4:0] waddr_i,
+    input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i,
+    input logic [CVA6Cfg.NrCommitPorts-1:0]                  we_gpr_i,
+    input logic [CVA6Cfg.NrCommitPorts-1:0]                  we_fpr_i,
+
+    output scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o,
+    input  logic              [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
+
+    output logic stall_issue_o,  // Used in Performance Counters
+
+    //RVFI
+    output logic [TRANS_ID_BITS-1:0] rvfi_issue_pointer_o,
+    output logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] rvfi_commit_pointer_o
+);
+  // ---------------------------------------------------
+  // Scoreboard (SB) <-> Issue and Read Operands (IRO)
+  // ---------------------------------------------------
+  typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? riscv::XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t;
+
+  fu_t               [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro;
+  fu_t               [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro;
+
+  logic              [   REG_ADDR_SIZE-1:0] rs1_iro_sb;
+  riscv::xlen_t                             rs1_sb_iro;
+  logic                                     rs1_valid_sb_iro;
+
+  logic              [   REG_ADDR_SIZE-1:0] rs2_iro_sb;
+  riscv::xlen_t                             rs2_sb_iro;
+  logic                                     rs2_valid_iro_sb;
+
+  logic              [   REG_ADDR_SIZE-1:0] rs3_iro_sb;
+  rs3_len_t                                 rs3_sb_iro;
+  logic                                     rs3_valid_iro_sb;
+
+  scoreboard_entry_t                        issue_instr_sb_iro;
+  logic                                     issue_instr_valid_sb_iro;
+  logic                                     issue_ack_iro_sb;
+
+  riscv::xlen_t                             rs1_forwarding_xlen;
+  riscv::xlen_t                             rs2_forwarding_xlen;
+
+  assign rs1_forwarding_o = rs1_forwarding_xlen[riscv::VLEN-1:0];
+  assign rs2_forwarding_o = rs2_forwarding_xlen[riscv::VLEN-1:0];
+
+  assign issue_instr_o    = issue_instr_sb_iro;
+  assign issue_instr_hs_o = issue_instr_valid_sb_iro & issue_ack_iro_sb;
+
+
+  // ---------------------------------------------------------
+  // 2. Manage instructions in a scoreboard
+  // ---------------------------------------------------------
+  scoreboard #(
+      .CVA6Cfg  (CVA6Cfg),
+      .rs3_len_t(rs3_len_t)
+  ) i_scoreboard (
+      .sb_full_o          (sb_full_o),
+      .unresolved_branch_i(1'b0),
+      .rd_clobber_gpr_o   (rd_clobber_gpr_sb_iro),
+      .rd_clobber_fpr_o   (rd_clobber_fpr_sb_iro),
+      .rs1_i              (rs1_iro_sb),
+      .rs1_o              (rs1_sb_iro),
+      .rs1_valid_o        (rs1_valid_sb_iro),
+      .rs2_i              (rs2_iro_sb),
+      .rs2_o              (rs2_sb_iro),
+      .rs2_valid_o        (rs2_valid_iro_sb),
+      .rs3_i              (rs3_iro_sb),
+      .rs3_o              (rs3_sb_iro),
+      .rs3_valid_o        (rs3_valid_iro_sb),
+
+      .decoded_instr_i      (decoded_instr_i),
+      .decoded_instr_valid_i(decoded_instr_valid_i),
+      .decoded_instr_ack_o  (decoded_instr_ack_o),
+      .issue_instr_o        (issue_instr_sb_iro),
+      .issue_instr_valid_o  (issue_instr_valid_sb_iro),
+      .issue_ack_i          (issue_ack_iro_sb),
+
+      .resolved_branch_i(resolved_branch_i),
+      .trans_id_i       (trans_id_i),
+      .wbdata_i         (wbdata_i),
+      .ex_i             (ex_ex_i),
+      .*
+  );
+
+  // ---------------------------------------------------------
+  // 3. Issue instruction and read operand, also commit
+  // ---------------------------------------------------------
+  issue_read_operands #(
+      .CVA6Cfg  (CVA6Cfg),
+      .rs3_len_t(rs3_len_t)
+  ) i_issue_read_operands (
+      .flush_i            (flush_unissued_instr_i),
+      .issue_instr_i      (issue_instr_sb_iro),
+      .issue_instr_valid_i(issue_instr_valid_sb_iro),
+      .issue_ack_o        (issue_ack_iro_sb),
+      .fu_data_o          (fu_data_o),
+      .flu_ready_i        (flu_ready_i),
+      .rs1_o              (rs1_iro_sb),
+      .rs1_i              (rs1_sb_iro),
+      .rs1_valid_i        (rs1_valid_sb_iro),
+      .rs2_o              (rs2_iro_sb),
+      .rs2_i              (rs2_sb_iro),
+      .rs2_valid_i        (rs2_valid_iro_sb),
+      .rs3_o              (rs3_iro_sb),
+      .rs3_i              (rs3_sb_iro),
+      .rs3_valid_i        (rs3_valid_iro_sb),
+      .rd_clobber_gpr_i   (rd_clobber_gpr_sb_iro),
+      .rd_clobber_fpr_i   (rd_clobber_fpr_sb_iro),
+      .alu_valid_o        (alu_valid_o),
+      .branch_valid_o     (branch_valid_o),
+      .csr_valid_o        (csr_valid_o),
+      .cvxif_valid_o      (x_issue_valid_o),
+      .cvxif_ready_i      (x_issue_ready_i),
+      .cvxif_off_instr_o  (x_off_instr_o),
+      .mult_valid_o       (mult_valid_o),
+      .rs1_forwarding_o   (rs1_forwarding_xlen),
+      .rs2_forwarding_o   (rs2_forwarding_xlen),
+      .stall_issue_o      (stall_issue_o),
+      .*
+  );
+
+endmodule
diff --git a/test/type_param/core/load_store_unit.sv b/test/type_param/core/load_store_unit.sv
new file mode 100644
index 0000000..14a281f
--- /dev/null
+++ b/test/type_param/core/load_store_unit.sv
@@ -0,0 +1,493 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 19.04.2017
+// Description: Load Store Unit, handles address calculation and memory interface signals
+
+
+module load_store_unit
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned ASID_WIDTH = 1
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic flush_i,
+    input  logic stall_st_pending_i,
+    output logic no_st_pending_o,
+    input  logic amo_valid_commit_i,
+
+    input  fu_data_t fu_data_i,
+    output logic     lsu_ready_o,  // FU is ready e.g. not busy
+    input  logic     lsu_valid_i,  // Input is valid
+
+    output logic [TRANS_ID_BITS-1:0] load_trans_id_o,          // ID of scoreboard entry at which to write back
+    output riscv::xlen_t load_result_o,
+    output logic load_valid_o,
+    output exception_t load_exception_o,  // to WB, signal exception status LD exception
+
+    output logic [TRANS_ID_BITS-1:0] store_trans_id_o,         // ID of scoreboard entry at which to write back
+    output riscv::xlen_t store_result_o,
+    output logic store_valid_o,
+    output exception_t store_exception_o,  // to WB, signal exception status ST exception
+
+    input logic commit_i,  // commit the pending store
+    output logic commit_ready_o,  // commit queue is ready to accept another commit request
+    input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
+
+    input logic enable_translation_i,   // enable virtual memory translation
+    input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
+
+    // icache translation requests
+    input  icache_arsp_t icache_areq_i,
+    output icache_areq_t icache_areq_o,
+
+    input  riscv::priv_lvl_t                   priv_lvl_i,             // From CSR register file
+    input  riscv::priv_lvl_t                   ld_st_priv_lvl_i,       // From CSR register file
+    input  logic                               sum_i,                  // From CSR register file
+    input  logic                               mxr_i,                  // From CSR register file
+    input  logic             [riscv::PPNW-1:0] satp_ppn_i,             // From CSR register file
+    input  logic             [ ASID_WIDTH-1:0] asid_i,                 // From CSR register file
+    input  logic             [ ASID_WIDTH-1:0] asid_to_be_flushed_i,
+    input  logic             [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
+    input  logic                               flush_tlb_i,
+    // Performance counters
+    output logic                               itlb_miss_o,
+    output logic                               dtlb_miss_o,
+
+    // interface to dcache
+    input  dcache_req_o_t  [ 2:0]                  dcache_req_ports_i,
+    output dcache_req_i_t  [ 2:0]                  dcache_req_ports_o,
+    input  logic                                   dcache_wbuffer_empty_i,
+    input  logic                                   dcache_wbuffer_not_ni_i,
+    // AMO interface
+    output amo_req_t                               amo_req_o,
+    input  amo_resp_t                              amo_resp_i,
+    // PMP
+    input  riscv::pmpcfg_t [15:0]                  pmpcfg_i,
+    input  logic           [15:0][riscv::PLEN-3:0] pmpaddr_i,
+
+    //RVFI
+    output lsu_ctrl_t                   rvfi_lsu_ctrl_o,
+    output            [riscv::PLEN-1:0] rvfi_mem_paddr_o
+);
+  // data is misaligned
+  logic                               data_misaligned;
+  // --------------------------------------
+  // 1st register stage - (stall registers)
+  // --------------------------------------
+  // those are the signals which are always correct
+  // e.g.: they keep the value in the stall case
+  lsu_ctrl_t                          lsu_ctrl;
+
+  logic                               pop_st;
+  logic                               pop_ld;
+
+  // ------------------------------
+  // Address Generation Unit (AGU)
+  // ------------------------------
+  // virtual address as calculated by the AGU in the first cycle
+  logic         [    riscv::VLEN-1:0] vaddr_i;
+  riscv::xlen_t                       vaddr_xlen;
+  logic                               overflow;
+  logic         [(riscv::XLEN/8)-1:0] be_i;
+
+  assign vaddr_xlen = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a));
+  assign vaddr_i = vaddr_xlen[riscv::VLEN-1:0];
+  // we work with SV39 or SV32, so if VM is enabled, check that all bits [XLEN-1:38] or [XLEN-1:31] are equal
+  assign overflow = (riscv::IS_XLEN64 && (!((&vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b1 || (|vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b0)));
+
+  logic                   st_valid_i;
+  logic                   ld_valid_i;
+  logic                   ld_translation_req;
+  logic                   st_translation_req;
+  logic [riscv::VLEN-1:0] ld_vaddr;
+  logic [riscv::VLEN-1:0] st_vaddr;
+  logic                   translation_req;
+  logic                   translation_valid;
+  logic [riscv::VLEN-1:0] mmu_vaddr;
+  logic [riscv::PLEN-1:0] mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen;
+  exception_t                       mmu_exception;
+  logic                             dtlb_hit;
+  logic         [  riscv::PPNW-1:0] dtlb_ppn;
+
+  logic                             ld_valid;
+  logic         [TRANS_ID_BITS-1:0] ld_trans_id;
+  riscv::xlen_t                     ld_result;
+  logic                             st_valid;
+  logic         [TRANS_ID_BITS-1:0] st_trans_id;
+  riscv::xlen_t                     st_result;
+
+  logic         [             11:0] page_offset;
+  logic                             page_offset_matches;
+
+  exception_t                       misaligned_exception;
+  exception_t                       ld_ex;
+  exception_t                       st_ex;
+
+  // -------------------
+  // MMU e.g.: TLBs/PTW
+  // -------------------
+  if (MMU_PRESENT && (riscv::XLEN == 64)) begin : gen_mmu_sv39
+    mmu #(
+        .CVA6Cfg          (CVA6Cfg),
+        .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES),
+        .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES),
+        .ASID_WIDTH       (ASID_WIDTH)
+    ) i_cva6_mmu (
+        // misaligned bypass
+        .misaligned_ex_i(misaligned_exception),
+        .lsu_is_store_i (st_translation_req),
+        .lsu_req_i      (translation_req),
+        .lsu_vaddr_i    (mmu_vaddr),
+        .lsu_valid_o    (translation_valid),
+        .lsu_paddr_o    (mmu_paddr),
+        .lsu_exception_o(mmu_exception),
+        .lsu_dtlb_hit_o (dtlb_hit),               // send in the same cycle as the request
+        .lsu_dtlb_ppn_o (dtlb_ppn),               // send in the same cycle as the request
+        // connecting PTW to D$ IF
+        .req_port_i     (dcache_req_ports_i[0]),
+        .req_port_o     (dcache_req_ports_o[0]),
+        // icache address translation requests
+        .icache_areq_i  (icache_areq_i),
+        .asid_to_be_flushed_i,
+        .vaddr_to_be_flushed_i,
+        .icache_areq_o  (icache_areq_o),
+        .pmpcfg_i,
+        .pmpaddr_i,
+        .*
+    );
+  end else if (MMU_PRESENT && (riscv::XLEN == 32)) begin : gen_mmu_sv32
+    cva6_mmu_sv32 #(
+        .CVA6Cfg          (CVA6Cfg),
+        .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES),
+        .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES),
+        .ASID_WIDTH       (ASID_WIDTH)
+    ) i_cva6_mmu (
+        // misaligned bypass
+        .misaligned_ex_i(misaligned_exception),
+        .lsu_is_store_i (st_translation_req),
+        .lsu_req_i      (translation_req),
+        .lsu_vaddr_i    (mmu_vaddr),
+        .lsu_valid_o    (translation_valid),
+        .lsu_paddr_o    (mmu_paddr),
+        .lsu_exception_o(mmu_exception),
+        .lsu_dtlb_hit_o (dtlb_hit),               // send in the same cycle as the request
+        .lsu_dtlb_ppn_o (dtlb_ppn),               // send in the same cycle as the request
+        // connecting PTW to D$ IF
+        .req_port_i     (dcache_req_ports_i[0]),
+        .req_port_o     (dcache_req_ports_o[0]),
+        // icache address translation requests
+        .icache_areq_i  (icache_areq_i),
+        .asid_to_be_flushed_i,
+        .vaddr_to_be_flushed_i,
+        .icache_areq_o  (icache_areq_o),
+        .pmpcfg_i,
+        .pmpaddr_i,
+        .*
+    );
+  end else begin : gen_no_mmu
+
+    if (riscv::VLEN > riscv::PLEN) begin
+      assign mmu_vaddr_plen   = mmu_vaddr[riscv::PLEN-1:0];
+      assign fetch_vaddr_plen = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0];
+    end else begin
+      assign mmu_vaddr_plen   = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, mmu_vaddr};
+      assign fetch_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, icache_areq_i.fetch_vaddr};
+    end
+
+    assign icache_areq_o.fetch_valid           = icache_areq_i.fetch_req;
+    assign icache_areq_o.fetch_paddr           = fetch_vaddr_plen;
+    assign icache_areq_o.fetch_exception       = '0;
+
+    assign dcache_req_ports_o[0].address_index = '0;
+    assign dcache_req_ports_o[0].address_tag   = '0;
+    assign dcache_req_ports_o[0].data_wdata    = '0;
+    assign dcache_req_ports_o[0].data_req      = 1'b0;
+    assign dcache_req_ports_o[0].data_be       = '1;
+    assign dcache_req_ports_o[0].data_size     = 2'b11;
+    assign dcache_req_ports_o[0].data_we       = 1'b0;
+    assign dcache_req_ports_o[0].kill_req      = '0;
+    assign dcache_req_ports_o[0].tag_valid     = 1'b0;
+
+    assign itlb_miss_o                         = 1'b0;
+    assign dtlb_miss_o                         = 1'b0;
+    assign dtlb_ppn                            = mmu_vaddr_plen[riscv::PLEN-1:12];
+    assign dtlb_hit                            = 1'b1;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (~rst_ni) begin
+        mmu_paddr         <= '0;
+        translation_valid <= '0;
+        mmu_exception     <= '0;
+      end else begin
+        mmu_paddr         <= mmu_vaddr_plen;
+        translation_valid <= translation_req;
+        mmu_exception     <= misaligned_exception;
+      end
+    end
+  end
+
+
+  logic store_buffer_empty;
+  // ------------------
+  // Store Unit
+  // ------------------
+  store_unit #(
+      .CVA6Cfg(CVA6Cfg)
+  ) i_store_unit (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .stall_st_pending_i,
+      .no_st_pending_o,
+      .store_buffer_empty_o(store_buffer_empty),
+
+      .valid_i   (st_valid_i),
+      .lsu_ctrl_i(lsu_ctrl),
+      .pop_st_o  (pop_st),
+      .commit_i,
+      .commit_ready_o,
+      .amo_valid_commit_i,
+
+      .valid_o              (st_valid),
+      .trans_id_o           (st_trans_id),
+      .result_o             (st_result),
+      .ex_o                 (st_ex),
+      // MMU port
+      .translation_req_o    (st_translation_req),
+      .vaddr_o              (st_vaddr),
+      .rvfi_mem_paddr_o     (rvfi_mem_paddr_o),
+      .paddr_i              (mmu_paddr),
+      .ex_i                 (mmu_exception),
+      .dtlb_hit_i           (dtlb_hit),
+      // Load Unit
+      .page_offset_i        (page_offset),
+      .page_offset_matches_o(page_offset_matches),
+      // AMOs
+      .amo_req_o,
+      .amo_resp_i,
+      // to memory arbiter
+      .req_port_i           (dcache_req_ports_i[2]),
+      .req_port_o           (dcache_req_ports_o[2])
+  );
+
+  // ------------------
+  // Load Unit
+  // ------------------
+  load_unit #(
+      .CVA6Cfg(CVA6Cfg)
+  ) i_load_unit (
+      .valid_i   (ld_valid_i),
+      .lsu_ctrl_i(lsu_ctrl),
+      .pop_ld_o  (pop_ld),
+
+      .valid_o              (ld_valid),
+      .trans_id_o           (ld_trans_id),
+      .result_o             (ld_result),
+      .ex_o                 (ld_ex),
+      // MMU port
+      .translation_req_o    (ld_translation_req),
+      .vaddr_o              (ld_vaddr),
+      .paddr_i              (mmu_paddr),
+      .ex_i                 (mmu_exception),
+      .dtlb_hit_i           (dtlb_hit),
+      .dtlb_ppn_i           (dtlb_ppn),
+      // to store unit
+      .page_offset_o        (page_offset),
+      .page_offset_matches_i(page_offset_matches),
+      .store_buffer_empty_i (store_buffer_empty),
+      // to memory arbiter
+      .req_port_i           (dcache_req_ports_i[1]),
+      .req_port_o           (dcache_req_ports_o[1]),
+      .dcache_wbuffer_not_ni_i,
+      .commit_tran_id_i,
+      .*
+  );
+
+  // ----------------------------
+  // Output Pipeline Register
+  // ----------------------------
+
+  // amount of pipeline registers inserted for load/store return path
+  // can be tuned to trade-off IPC vs. cycle time
+
+  shift_reg #(
+      .dtype(logic [$bits(ld_valid) + $bits(ld_trans_id) + $bits(ld_result) + $bits(ld_ex) - 1:0]),
+      .Depth(cva6_config_pkg::CVA6ConfigNrLoadPipeRegs)
+  ) i_pipe_reg_load (
+      .clk_i,
+      .rst_ni,
+      .d_i({ld_valid, ld_trans_id, ld_result, ld_ex}),
+      .d_o({load_valid_o, load_trans_id_o, load_result_o, load_exception_o})
+  );
+
+  shift_reg #(
+      .dtype(logic [$bits(st_valid) + $bits(st_trans_id) + $bits(st_result) + $bits(st_ex) - 1:0]),
+      .Depth(cva6_config_pkg::CVA6ConfigNrStorePipeRegs)
+  ) i_pipe_reg_store (
+      .clk_i,
+      .rst_ni,
+      .d_i({st_valid, st_trans_id, st_result, st_ex}),
+      .d_o({store_valid_o, store_trans_id_o, store_result_o, store_exception_o})
+  );
+
+  // determine whether this is a load or store
+  always_comb begin : which_op
+
+    ld_valid_i      = 1'b0;
+    st_valid_i      = 1'b0;
+
+    translation_req = 1'b0;
+    mmu_vaddr       = {riscv::VLEN{1'b0}};
+
+    // check the operation to activate the right functional unit accordingly
+    unique case (lsu_ctrl.fu)
+      // all loads go here
+      LOAD: begin
+        ld_valid_i      = lsu_ctrl.valid;
+        translation_req = ld_translation_req;
+        mmu_vaddr       = ld_vaddr;
+      end
+      // all stores go here
+      STORE: begin
+        st_valid_i      = lsu_ctrl.valid;
+        translation_req = st_translation_req;
+        mmu_vaddr       = st_vaddr;
+      end
+      // not relevant for the LSU
+      default: ;
+    endcase
+  end
+
+
+  // ---------------
+  // Byte Enable
+  // ---------------
+  // we can generate the byte enable from the virtual address since the last
+  // 12 bit are the same anyway
+  // and we can always generate the byte enable from the address at hand
+
+  if (riscv::IS_XLEN64) begin : gen_8b_be
+    assign be_i = be_gen(vaddr_i[2:0], extract_transfer_size(fu_data_i.operation));
+  end else begin : gen_4b_be
+    assign be_i = be_gen_32(vaddr_i[1:0], extract_transfer_size(fu_data_i.operation));
+  end
+
+  // ------------------------
+  // Misaligned Exception
+  // ------------------------
+  // we can detect a misaligned exception immediately
+  // the misaligned exception is passed to the functional unit via the MMU, which in case
+  // can augment the exception if other memory related exceptions like a page fault or access errors
+  always_comb begin : data_misaligned_detection
+
+    misaligned_exception = {{riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, 1'b0};
+
+    data_misaligned = 1'b0;
+
+    if (lsu_ctrl.valid) begin
+      case (lsu_ctrl.operation)
+        // double word
+        LD, SD, FLD, FSD,
+                AMO_LRD, AMO_SCD,
+                AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD,
+                AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND,
+                AMO_MINDU: begin
+          if (riscv::IS_XLEN64 && lsu_ctrl.vaddr[2:0] != 3'b000) begin
+            data_misaligned = 1'b1;
+          end
+        end
+        // word
+        LW, LWU, SW, FLW, FSW,
+                AMO_LRW, AMO_SCW,
+                AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW,
+                AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW,
+                AMO_MINWU: begin
+          if (lsu_ctrl.vaddr[1:0] != 2'b00) begin
+            data_misaligned = 1'b1;
+          end
+        end
+        // half word
+        LH, LHU, SH, FLH, FSH: begin
+          if (lsu_ctrl.vaddr[0] != 1'b0) begin
+            data_misaligned = 1'b1;
+          end
+        end
+        // byte -> is always aligned
+        default: ;
+      endcase
+    end
+
+    if (data_misaligned) begin
+
+      if (lsu_ctrl.fu == LOAD) begin
+        misaligned_exception = {
+          riscv::LD_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
+        };
+
+      end else if (lsu_ctrl.fu == STORE) begin
+        misaligned_exception = {
+          riscv::ST_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
+        };
+      end
+    end
+
+    if (ariane_pkg::MMU_PRESENT && en_ld_st_translation_i && lsu_ctrl.overflow) begin
+
+      if (lsu_ctrl.fu == LOAD) begin
+        misaligned_exception = {
+          riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
+        };
+
+      end else if (lsu_ctrl.fu == STORE) begin
+        misaligned_exception = {
+          riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
+        };
+      end
+    end
+  end
+
+  // ------------------
+  // LSU Control
+  // ------------------
+  // new data arrives here
+  lsu_ctrl_t lsu_req_i;
+
+  assign lsu_req_i = {
+    lsu_valid_i,
+    vaddr_i,
+    overflow,
+    fu_data_i.operand_b,
+    be_i,
+    fu_data_i.fu,
+    fu_data_i.operation,
+    fu_data_i.trans_id
+  };
+
+  lsu_bypass #(
+      .CVA6Cfg(CVA6Cfg)
+  ) lsu_bypass_i (
+      .lsu_req_i      (lsu_req_i),
+      .lsu_req_valid_i(lsu_valid_i),
+      .pop_ld_i       (pop_ld),
+      .pop_st_i       (pop_st),
+
+      .lsu_ctrl_o(lsu_ctrl),
+      .ready_o   (lsu_ready_o),
+      .*
+  );
+
+  assign rvfi_lsu_ctrl_o = lsu_ctrl;
+
+endmodule
+
diff --git a/test/type_param/core/load_unit.sv b/test/type_param/core/load_unit.sv
new file mode 100644
index 0000000..512b498
--- /dev/null
+++ b/test/type_param/core/load_unit.sv
@@ -0,0 +1,534 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba    <zarubaf@iis.ee.ethz.ch>, ETH Zurich
+//         Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 15.08.2018
+// Description: Load Unit, takes care of all load requests
+//
+// Contributor: Cesar Fuguet <cesar.fuguettortolero@cea.fr>, CEA List
+// Date: August 29, 2023
+// Modification: add support for multiple outstanding load operations
+//               to the data cache
+
+module load_unit
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic flush_i,
+    // load unit input port
+    input logic valid_i,
+    input lsu_ctrl_t lsu_ctrl_i,
+    output logic pop_ld_o,
+    // load unit output port
+    output logic valid_o,
+    output logic [TRANS_ID_BITS-1:0] trans_id_o,
+    output riscv::xlen_t result_o,
+    output exception_t ex_o,
+    // MMU -> Address Translation
+    output logic translation_req_o,  // request address translation
+    output logic [riscv::VLEN-1:0] vaddr_o,  // virtual address out
+    input logic [riscv::PLEN-1:0] paddr_i,  // physical address in
+    input  exception_t               ex_i,                // exception which may has happened earlier. for example: mis-aligned exception
+    input logic dtlb_hit_i,  // hit on the dtlb, send in the same cycle as the request
+    input  logic [riscv::PPNW-1:0]   dtlb_ppn_i,          // ppn on the dtlb, send in the same cycle as the request
+    // address checker
+    output logic [11:0] page_offset_o,
+    input logic page_offset_matches_i,
+    input logic store_buffer_empty_i,  // the entire store-buffer is empty
+    input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
+    // D$ interface
+    input dcache_req_o_t req_port_i,
+    output dcache_req_i_t req_port_o,
+    input logic dcache_wbuffer_not_ni_i
+);
+  enum logic [3:0] {
+    IDLE,
+    WAIT_GNT,
+    SEND_TAG,
+    WAIT_PAGE_OFFSET,
+    ABORT_TRANSACTION,
+    ABORT_TRANSACTION_NI,
+    WAIT_TRANSLATION,
+    WAIT_FLUSH,
+    WAIT_WB_EMPTY
+  }
+      state_d, state_q;
+
+  // in order to decouple the response interface from the request interface,
+  // we need a a buffer which can hold all inflight memory load requests
+  typedef struct packed {
+    logic [TRANS_ID_BITS-1:0]           trans_id;        // scoreboard identifier
+    logic [riscv::XLEN_ALIGN_BYTES-1:0] address_offset;  // least significant bits of the address
+    fu_op                               operation;       // type of load
+  } ldbuf_t;
+
+
+  // to support a throughput of one load per cycle, if the number of entries
+  // of the load buffer is 1, implement a fall-through mode. This however
+  // adds a combinational path between the request and response interfaces
+  // towards the cache.
+  localparam logic LDBUF_FALLTHROUGH = (CVA6Cfg.NrLoadBufEntries == 1);
+  localparam int unsigned REQ_ID_BITS = CVA6Cfg.NrLoadBufEntries > 1 ? $clog2(
+      CVA6Cfg.NrLoadBufEntries
+  ) : 1;
+
+  typedef logic [REQ_ID_BITS-1:0] ldbuf_id_t;
+
+  logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_valid_q, ldbuf_valid_d;
+  logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_flushed_q, ldbuf_flushed_d;
+  ldbuf_t [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_q;
+  logic ldbuf_empty, ldbuf_full;
+  ldbuf_id_t ldbuf_free_index;
+  logic      ldbuf_w;
+  ldbuf_t    ldbuf_wdata;
+  ldbuf_id_t ldbuf_windex;
+  logic      ldbuf_r;
+  ldbuf_t    ldbuf_rdata;
+  ldbuf_id_t ldbuf_rindex;
+  ldbuf_id_t ldbuf_last_id_q;
+
+  assign ldbuf_full = &ldbuf_valid_q;
+
+  //
+  //  buffer of outstanding loads
+
+  //  write in the first available slot
+  generate
+    if (CVA6Cfg.NrLoadBufEntries > 1) begin : ldbuf_free_index_multi_gen
+      lzc #(
+          .WIDTH(CVA6Cfg.NrLoadBufEntries),
+          .MODE (1'b0)                       // Count leading zeros
+      ) lzc_windex_i (
+          .in_i   (~ldbuf_valid_q),
+          .cnt_o  (ldbuf_free_index),
+          .empty_o(ldbuf_empty)
+      );
+    end else begin : ldbuf_free_index_single_gen
+      assign ldbuf_free_index = 1'b0;
+    end
+  endgenerate
+
+  assign ldbuf_windex = (LDBUF_FALLTHROUGH && ldbuf_r) ? ldbuf_rindex : ldbuf_free_index;
+
+  always_comb begin : ldbuf_comb
+    ldbuf_flushed_d = ldbuf_flushed_q;
+    ldbuf_valid_d   = ldbuf_valid_q;
+
+    //  In case of flush, raise the flushed flag in all slots.
+    if (flush_i) begin
+      ldbuf_flushed_d = '1;
+    end
+    //  Free read entry (in the case of fall-through mode, free the entry
+    //  only if there is no pending load)
+    if (ldbuf_r && (!LDBUF_FALLTHROUGH || !ldbuf_w)) begin
+      ldbuf_valid_d[ldbuf_rindex] = 1'b0;
+    end
+    //  Track a new outstanding operation in the load buffer
+    if (ldbuf_w) begin
+      ldbuf_flushed_d[ldbuf_windex] = 1'b0;
+      ldbuf_valid_d[ldbuf_windex]   = 1'b1;
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : ldbuf_ff
+    if (!rst_ni) begin
+      ldbuf_flushed_q <= '0;
+      ldbuf_valid_q   <= '0;
+      ldbuf_last_id_q <= '0;
+      ldbuf_q         <= '0;
+    end else begin
+      ldbuf_flushed_q <= ldbuf_flushed_d;
+      ldbuf_valid_q   <= ldbuf_valid_d;
+      if (ldbuf_w) begin
+        ldbuf_last_id_q       <= ldbuf_windex;
+        ldbuf_q[ldbuf_windex] <= ldbuf_wdata;
+      end
+    end
+  end
+
+  // page offset is defined as the lower 12 bits, feed through for address checker
+  assign page_offset_o = lsu_ctrl_i.vaddr[11:0];
+  // feed-through the virtual address for VA translation
+  assign vaddr_o = lsu_ctrl_i.vaddr;
+  // this is a read-only interface so set the write enable to 0
+  assign req_port_o.data_we = 1'b0;
+  assign req_port_o.data_wdata = '0;
+  // compose the load buffer write data, control is handled in the FSM
+  assign ldbuf_wdata = {
+    lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[riscv::XLEN_ALIGN_BYTES-1:0], lsu_ctrl_i.operation
+  };
+  // output address
+  // we can now output the lower 12 bit as the index to the cache
+  assign req_port_o.address_index = lsu_ctrl_i.vaddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
+  // translation from last cycle, again: control is handled in the FSM
+  assign req_port_o.address_tag   = paddr_i[ariane_pkg::DCACHE_TAG_WIDTH     +
+                                              ariane_pkg::DCACHE_INDEX_WIDTH-1 :
+                                              ariane_pkg::DCACHE_INDEX_WIDTH];
+  // request id = index of the load buffer's entry
+  assign req_port_o.data_id = ldbuf_windex;
+  // directly forward exception fields (valid bit is set below)
+  assign ex_o.cause = ex_i.cause;
+  assign ex_o.tval = ex_i.tval;
+
+  // Check that NI operations follow the necessary conditions
+  logic paddr_ni;
+  logic not_commit_time;
+  logic inflight_stores;
+  logic stall_ni;
+  assign paddr_ni = config_pkg::is_inside_nonidempotent_regions(
+      CVA6Cfg, {{52 - riscv::PPNW{1'b0}}, dtlb_ppn_i, 12'd0}
+  );
+  assign not_commit_time = commit_tran_id_i != lsu_ctrl_i.trans_id;
+  assign inflight_stores = (!dcache_wbuffer_not_ni_i || !store_buffer_empty_i);
+  assign stall_ni = (inflight_stores || not_commit_time) && (paddr_ni && CVA6Cfg.NonIdemPotenceEn);
+
+  // ---------------
+  // Load Control
+  // ---------------
+  always_comb begin : load_control
+    automatic logic accept_req;
+
+    // default assignments
+    state_d              = state_q;
+    translation_req_o    = 1'b0;
+    req_port_o.data_req  = 1'b0;
+    // tag control
+    req_port_o.kill_req  = 1'b0;
+    req_port_o.tag_valid = 1'b0;
+    req_port_o.data_be   = lsu_ctrl_i.be;
+    req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operation);
+    pop_ld_o             = 1'b0;
+
+    // In IDLE and SEND_TAG states, this unit can accept a new load request
+    // when the load buffer is not full or if there is a response and the
+    // load buffer is in fall-through mode
+    accept_req           = (valid_i && (!ldbuf_full || (LDBUF_FALLTHROUGH && ldbuf_r)));
+
+    case (state_q)
+      IDLE: begin
+        if (accept_req) begin
+          // start the translation process even though we do not know if the addresses match
+          // this should ease timing
+          translation_req_o = 1'b1;
+          // check if the page offset matches with a store, if it does then stall and wait
+          if (!page_offset_matches_i) begin
+            // make a load request to memory
+            req_port_o.data_req = 1'b1;
+            // we got no data grant so wait for the grant before sending the tag
+            if (!req_port_i.data_gnt) begin
+              state_d = WAIT_GNT;
+            end else begin
+              if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
+                state_d = ABORT_TRANSACTION;
+              end else begin
+                if (!stall_ni) begin
+                  // we got a grant and a hit on the DTLB so we can send the tag in the next cycle
+                  state_d  = SEND_TAG;
+                  pop_ld_o = 1'b1;
+                  // translation valid but this is to NC and the WB is not yet empty.
+                end else if (CVA6Cfg.NonIdemPotenceEn) begin
+                  state_d = ABORT_TRANSACTION_NI;
+                end
+              end
+            end
+          end else begin
+            // wait for the store buffer to train and the page offset to not match anymore
+            state_d = WAIT_PAGE_OFFSET;
+          end
+        end
+      end
+
+      // wait here for the page offset to not match anymore
+      WAIT_PAGE_OFFSET: begin
+        // we make a new request as soon as the page offset does not match anymore
+        if (!page_offset_matches_i) begin
+          state_d = WAIT_GNT;
+        end
+      end
+
+      WAIT_GNT: begin
+        // keep the translation request up
+        translation_req_o   = 1'b1;
+        // keep the request up
+        req_port_o.data_req = 1'b1;
+        // we finally got a data grant
+        if (req_port_i.data_gnt) begin
+          // so we send the tag in the next cycle
+          if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
+            state_d = ABORT_TRANSACTION;
+          end else begin
+            if (!stall_ni) begin
+              // we got a grant and a hit on the DTLB so we can send the tag in the next cycle
+              state_d  = SEND_TAG;
+              pop_ld_o = 1'b1;
+              // translation valid but this is to NC and the WB is not yet empty.
+            end else if (CVA6Cfg.NonIdemPotenceEn) begin
+              state_d = ABORT_TRANSACTION_NI;
+            end
+          end
+
+        end
+        // otherwise we keep waiting on our grant
+      end
+      // we know for sure that the tag we want to send is valid
+      SEND_TAG: begin
+        req_port_o.tag_valid = 1'b1;
+        state_d = IDLE;
+
+        if (accept_req) begin
+          // start the translation process even though we do not know if the addresses match
+          // this should ease timing
+          translation_req_o = 1'b1;
+          // check if the page offset matches with a store, if it does stall and wait
+          if (!page_offset_matches_i) begin
+            // make a load request to memory
+            req_port_o.data_req = 1'b1;
+            // we got no data grant so wait for the grant before sending the tag
+            if (!req_port_i.data_gnt) begin
+              state_d = WAIT_GNT;
+            end else begin
+              // we got a grant so we can send the tag in the next cycle
+              if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
+                state_d = ABORT_TRANSACTION;
+              end else begin
+                if (!stall_ni) begin
+                  // we got a grant and a hit on the DTLB so we can send the tag in the next cycle
+                  state_d  = SEND_TAG;
+                  pop_ld_o = 1'b1;
+                  // translation valid but this is to NC and the WB is not yet empty.
+                end else if (CVA6Cfg.NonIdemPotenceEn) begin
+                  state_d = ABORT_TRANSACTION_NI;
+                end
+              end
+            end
+          end else begin
+            // wait for the store buffer to train and the page offset to not match anymore
+            state_d = WAIT_PAGE_OFFSET;
+          end
+        end
+        // ----------
+        // Exception
+        // ----------
+        // if we got an exception we need to kill the request immediately
+        if (ex_i.valid) begin
+          req_port_o.kill_req = 1'b1;
+        end
+      end
+
+      WAIT_FLUSH: begin
+        // the D$ arbiter will take care of presenting this to the memory only in case we
+        // have an outstanding request
+        req_port_o.kill_req = 1'b1;
+        req_port_o.tag_valid = 1'b1;
+        // we've killed the current request so we can go back to idle
+        state_d = IDLE;
+      end
+
+      default: begin
+        // abort the previous request - free the D$ arbiter
+        // we are here because of a TLB miss, we need to abort the current request and give way for the
+        // PTW walker to satisfy the TLB miss
+        if (state_q == ABORT_TRANSACTION && ariane_pkg::MMU_PRESENT) begin
+          req_port_o.kill_req = 1'b1;
+          req_port_o.tag_valid = 1'b1;
+          // wait until the WB is empty
+          state_d = WAIT_TRANSLATION;
+        end else if (state_q == ABORT_TRANSACTION_NI && CVA6Cfg.NonIdemPotenceEn) begin
+          req_port_o.kill_req = 1'b1;
+          req_port_o.tag_valid = 1'b1;
+          // re-do the request
+          state_d = WAIT_WB_EMPTY;
+        end else if (state_q == WAIT_WB_EMPTY && CVA6Cfg.NonIdemPotenceEn && dcache_wbuffer_not_ni_i) begin
+          // Wait until the write-back buffer is empty in the data cache.
+          // the write buffer is empty, so lets go and re-do the translation.
+          state_d = WAIT_TRANSLATION;
+        end else if(state_q == WAIT_TRANSLATION && (ariane_pkg::MMU_PRESENT || CVA6Cfg.NonIdemPotenceEn)) begin
+          translation_req_o = 1'b1;
+          // we've got a hit and we can continue with the request process
+          if (dtlb_hit_i) state_d = WAIT_GNT;
+
+          // we got an exception
+          if (ex_i.valid) begin
+            // the next state will be the idle state
+            state_d  = IDLE;
+            // pop load - but only if we are not getting an rvalid in here - otherwise we will over-write an incoming transaction
+            pop_ld_o = ~req_port_i.data_rvalid;
+          end
+        end else begin
+          state_d = IDLE;
+        end
+      end
+    endcase
+
+    // if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage
+    if (flush_i) begin
+      state_d = WAIT_FLUSH;
+    end
+  end
+
+  // track the load data for later usage
+  assign ldbuf_w = req_port_o.data_req & req_port_i.data_gnt;
+
+  // ---------------
+  // Retire Load
+  // ---------------
+  assign ldbuf_rindex = (CVA6Cfg.NrLoadBufEntries > 1) ? ldbuf_id_t'(req_port_i.data_rid) : 1'b0,
+      ldbuf_rdata = ldbuf_q[ldbuf_rindex];
+
+  // decoupled rvalid process
+  always_comb begin : rvalid_output
+    //  read the pending load buffer
+    ldbuf_r    = req_port_i.data_rvalid;
+    trans_id_o = ldbuf_q[ldbuf_rindex].trans_id;
+    valid_o    = 1'b0;
+    ex_o.valid = 1'b0;
+
+    // we got an rvalid and it's corresponding request was not flushed
+    if (req_port_i.data_rvalid && !ldbuf_flushed_q[ldbuf_rindex]) begin
+      // if the response corresponds to the last request, check that we are not killing it
+      if ((ldbuf_last_id_q != ldbuf_rindex) || !req_port_o.kill_req) valid_o = 1'b1;
+      // the output is also valid if we got an exception. An exception arrives one cycle after
+      // dtlb_hit_i is asserted, i.e. when we are in SEND_TAG. Otherwise, the exception
+      // corresponds to the next request that is already being translated (see below).
+      if (ex_i.valid && (state_q == SEND_TAG)) begin
+        valid_o    = 1'b1;
+        ex_o.valid = 1'b1;
+      end
+    end
+
+    // an exception occurred during translation
+    // exceptions can retire out-of-order -> but we need to give priority to non-excepting load and stores
+    // so we simply check if we got an rvalid if so we prioritize it by not retiring the exception - we simply go for another
+    // round in the load FSM
+    if ((ariane_pkg::MMU_PRESENT || CVA6Cfg.NonIdemPotenceEn) && (state_q == WAIT_TRANSLATION) && !req_port_i.data_rvalid && ex_i.valid && valid_i) begin
+      trans_id_o = lsu_ctrl_i.trans_id;
+      valid_o = 1'b1;
+      ex_o.valid = 1'b1;
+    end
+  end
+
+
+  // latch physical address for the tag cycle (one cycle after applying the index)
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      state_q <= IDLE;
+    end else begin
+      state_q <= state_d;
+    end
+  end
+
+  // ---------------
+  // Sign Extend
+  // ---------------
+  riscv::xlen_t shifted_data;
+
+  // realign as needed
+  assign shifted_data = req_port_i.data_rdata >> {ldbuf_rdata.address_offset, 3'b000};
+
+  /*  // result mux (leaner code, but more logic stages.
+    // can be used instead of the code below (in between //result mux fast) if timing is not so critical)
+    always_comb begin
+        unique case (ldbuf_rdata.operation)
+            LWU:        result_o = shifted_data[31:0];
+            LHU:        result_o = shifted_data[15:0];
+            LBU:        result_o = shifted_data[7:0];
+            LW:         result_o = 64'(signed'(shifted_data[31:0]));
+            LH:         result_o = 64'(signed'(shifted_data[15:0]));
+            LB:         result_o = 64'(signed'(shifted_data[ 7:0]));
+            default:    result_o = shifted_data;
+        endcase
+    end  */
+
+  // result mux fast
+  logic [        (riscv::XLEN/8)-1:0] rdata_sign_bits;
+  logic [riscv::XLEN_ALIGN_BYTES-1:0] rdata_offset;
+  logic rdata_sign_bit, rdata_is_signed, rdata_is_fp_signed;
+
+
+  // prepare these signals for faster selection in the next cycle
+  assign rdata_is_signed    =   ldbuf_rdata.operation inside {ariane_pkg::LW,  ariane_pkg::LH,  ariane_pkg::LB};
+  assign rdata_is_fp_signed =   ldbuf_rdata.operation inside {ariane_pkg::FLW, ariane_pkg::FLH, ariane_pkg::FLB};
+  assign rdata_offset       = ((ldbuf_rdata.operation inside {ariane_pkg::LW,  ariane_pkg::FLW}) & riscv::IS_XLEN64) ? ldbuf_rdata.address_offset + 3 :
+                                ( ldbuf_rdata.operation inside {ariane_pkg::LH,  ariane_pkg::FLH})                     ? ldbuf_rdata.address_offset + 1 :
+                                                                                                                         ldbuf_rdata.address_offset;
+
+  for (genvar i = 0; i < (riscv::XLEN / 8); i++) begin : gen_sign_bits
+    assign rdata_sign_bits[i] = req_port_i.data_rdata[(i+1)*8-1];
+  end
+
+
+  // select correct sign bit in parallel to result shifter above
+  // pull to 0 if unsigned
+  assign rdata_sign_bit = rdata_is_signed & rdata_sign_bits[rdata_offset] | rdata_is_fp_signed;
+
+  // result mux
+  always_comb begin
+    unique case (ldbuf_rdata.operation)
+      ariane_pkg::LW, ariane_pkg::LWU:
+      result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]};
+      ariane_pkg::LH, ariane_pkg::LHU:
+      result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]};
+      ariane_pkg::LB, ariane_pkg::LBU:
+      result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]};
+      default: begin
+        // FLW, FLH and FLB have been defined here in default case to improve Code Coverage
+        if (CVA6Cfg.FpPresent) begin
+          unique case (ldbuf_rdata.operation)
+            ariane_pkg::FLW: begin
+              result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]};
+            end
+            ariane_pkg::FLH: begin
+              result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]};
+            end
+            ariane_pkg::FLB: begin
+              result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]};
+            end
+            default: begin
+              result_o = shifted_data[riscv::XLEN-1:0];
+            end
+          endcase
+        end else begin
+          result_o = shifted_data[riscv::XLEN-1:0];
+        end
+      end
+    endcase
+  end
+  // end result mux fast
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  //pragma translate_off
+`ifndef VERILATOR
+  initial
+    assert (ariane_pkg::DCACHE_TID_WIDTH >= REQ_ID_BITS)
+    else $fatal(1, "CVA6ConfigDcacheIdWidth parameter is not wide enough to encode pending loads");
+  // check invalid offsets, but only issue a warning as these conditions actually trigger a load address misaligned exception
+  addr_offset0 :
+  assert property (@(posedge clk_i) disable iff (~rst_ni)
+        ldbuf_w |->  (ldbuf_wdata.operation inside {ariane_pkg::LW, ariane_pkg::LWU}) |-> ldbuf_wdata.address_offset < 5)
+  else $fatal(1, "invalid address offset used with {LW, LWU}");
+  addr_offset1 :
+  assert property (@(posedge clk_i) disable iff (~rst_ni)
+        ldbuf_w |->  (ldbuf_wdata.operation inside {ariane_pkg::LH, ariane_pkg::LHU}) |-> ldbuf_wdata.address_offset < 7)
+  else $fatal(1, "invalid address offset used with {LH, LHU}");
+  addr_offset2 :
+  assert property (@(posedge clk_i) disable iff (~rst_ni)
+        ldbuf_w |->  (ldbuf_wdata.operation inside {ariane_pkg::LB, ariane_pkg::LBU}) |-> ldbuf_wdata.address_offset < 8)
+  else $fatal(1, "invalid address offset used with {LB, LBU}");
+`endif
+  //pragma translate_on
+
+endmodule
diff --git a/test/type_param/core/lsu_bypass.sv b/test/type_param/core/lsu_bypass.sv
new file mode 100644
index 0000000..96f6d50
--- /dev/null
+++ b/test/type_param/core/lsu_bypass.sv
@@ -0,0 +1,122 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 19.04.2017
+// Description: Load Store Unit, handles address calculation and memory interface signals
+
+
+// ------------------
+// LSU Control
+// ------------------
+// The LSU consists of two independent block which share a common address translation block.
+// The one block is the load unit, the other one is the store unit. They will signal their readiness
+// with separate signals. If they are not ready the LSU control should keep the last applied signals stable.
+// Furthermore it can be the case that another request for one of the two store units arrives in which case
+// the LSU control should sample it and store it for later application to the units. It does so, by storing it in a
+// two element FIFO. This is necessary as we only know very late in the cycle whether the load/store will succeed (address check,
+// TLB hit mainly). So we better unconditionally allow another request to arrive and store this request in case we need to.
+module lsu_bypass
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    input logic flush_i,
+
+    input lsu_ctrl_t lsu_req_i,
+    input logic      lsu_req_valid_i,
+    input logic      pop_ld_i,
+    input logic      pop_st_i,
+
+    output lsu_ctrl_t lsu_ctrl_o,
+    output logic      ready_o
+);
+
+  lsu_ctrl_t [1:0] mem_n, mem_q;
+  logic read_pointer_n, read_pointer_q;
+  logic write_pointer_n, write_pointer_q;
+  logic [1:0] status_cnt_n, status_cnt_q;
+
+  logic empty;
+  assign empty   = (status_cnt_q == 0);
+  assign ready_o = empty;
+
+  always_comb begin
+    automatic logic [1:0] status_cnt;
+    automatic logic write_pointer;
+    automatic logic read_pointer;
+
+    status_cnt = status_cnt_q;
+    write_pointer = write_pointer_q;
+    read_pointer = read_pointer_q;
+
+    mem_n = mem_q;
+    // we've got a valid LSU request
+    if (lsu_req_valid_i) begin
+      mem_n[write_pointer_q] = lsu_req_i;
+      write_pointer++;
+      status_cnt++;
+    end
+
+    if (pop_ld_i) begin
+      // invalidate the result
+      mem_n[read_pointer_q].valid = 1'b0;
+      read_pointer++;
+      status_cnt--;
+    end
+
+    if (pop_st_i) begin
+      // invalidate the result
+      mem_n[read_pointer_q].valid = 1'b0;
+      read_pointer++;
+      status_cnt--;
+    end
+
+    if (pop_st_i && pop_ld_i) mem_n = '0;
+
+    if (flush_i) begin
+      status_cnt = '0;
+      write_pointer = '0;
+      read_pointer = '0;
+      mem_n = '0;
+    end
+    // default assignments
+    read_pointer_n  = read_pointer;
+    write_pointer_n = write_pointer;
+    status_cnt_n    = status_cnt;
+  end
+
+  // output assignment
+  always_comb begin : output_assignments
+    if (empty) begin
+      lsu_ctrl_o = lsu_req_i;
+    end else begin
+      lsu_ctrl_o = mem_q[read_pointer_q];
+    end
+  end
+
+  // registers
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      mem_q           <= '0;
+      status_cnt_q    <= '0;
+      write_pointer_q <= '0;
+      read_pointer_q  <= '0;
+    end else begin
+      mem_q           <= mem_n;
+      status_cnt_q    <= status_cnt_n;
+      write_pointer_q <= write_pointer_n;
+      read_pointer_q  <= read_pointer_n;
+    end
+  end
+endmodule
+
diff --git a/test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv b/test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv
new file mode 100644
index 0000000..d194306
--- /dev/null
+++ b/test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv
@@ -0,0 +1,565 @@
+// Copyright (c) 2021 Thales.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Sebastien Jacq Thales Research & Technology
+// Date: 17/07/2021
+//
+// Additional contributions by:
+//         Sebastien Jacq - sjthales on github.com
+//
+// Description: Memory Management Unit for CV32A6, contains TLB and
+//              address translation unit. Sv32 as defined in RISC-V
+//              privilege specification 1.11-WIP.
+//              This module is an adaptation of the MMU Sv39 developed
+//              by Florian Zaruba to the Sv32 standard.
+//
+// =========================================================================== //
+// Revisions  :
+// Date        Version  Author       Description
+// 2020-02-17  0.1      S.Jacq       MMU Sv32 for CV32A6
+// =========================================================================== //
+
+module cva6_mmu_sv32
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg           = config_pkg::cva6_cfg_empty,
+    parameter int unsigned           INSTR_TLB_ENTRIES = 2,
+    parameter int unsigned           DATA_TLB_ENTRIES  = 2,
+    parameter int unsigned           ASID_WIDTH        = 1
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    input logic flush_i,
+    input logic enable_translation_i,
+    input logic en_ld_st_translation_i,  // enable virtual memory translation for load/stores
+    // IF interface
+    input icache_arsp_t icache_areq_i,
+    output icache_areq_t icache_areq_o,
+    // LSU interface
+    // this is a more minimalistic interface because the actual addressing logic is handled
+    // in the LSU as we distinguish load and stores, what we do here is simple address translation
+    input exception_t misaligned_ex_i,
+    input logic lsu_req_i,  // request address translation
+    input logic [riscv::VLEN-1:0] lsu_vaddr_i,  // virtual address in
+    input logic lsu_is_store_i,  // the translation is requested by a store
+    // if we need to walk the page table we can't grant in the same cycle
+    // Cycle 0
+    output logic                            lsu_dtlb_hit_o,   // sent in the same cycle as the request if translation hits in the DTLB
+    output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o,  // ppn (send same cycle as hit)
+    // Cycle 1
+    output logic lsu_valid_o,  // translation is valid
+    output logic [riscv::PLEN-1:0] lsu_paddr_o,  // translated address
+    output exception_t lsu_exception_o,  // address translation threw an exception
+    // General control signals
+    input riscv::priv_lvl_t priv_lvl_i,
+    input riscv::priv_lvl_t ld_st_priv_lvl_i,
+    input logic sum_i,
+    input logic mxr_i,
+    // input logic flag_mprv_i,
+    input logic [riscv::PPNW-1:0] satp_ppn_i,
+    input logic [ASID_WIDTH-1:0] asid_i,
+    input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
+    input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
+    input logic flush_tlb_i,
+    // Performance counters
+    output logic itlb_miss_o,
+    output logic dtlb_miss_o,
+    // PTW memory interface
+    input dcache_req_o_t req_port_i,
+    output dcache_req_i_t req_port_o,
+    // PMP
+    input riscv::pmpcfg_t [15:0] pmpcfg_i,
+    input logic [15:0][riscv::PLEN-3:0] pmpaddr_i
+);
+
+  logic                   iaccess_err;  // insufficient privilege to access this instruction page
+  logic                   daccess_err;  // insufficient privilege to access this data page
+  logic                   ptw_active;  // PTW is currently walking a page table
+  logic                   walking_instr;  // PTW is walking because of an ITLB miss
+  logic                   ptw_error;  // PTW threw an exception
+  logic                   ptw_access_exception;  // PTW threw an access exception (PMPs)
+  logic [riscv::PLEN-1:0] ptw_bad_paddr;  // PTW PMP exception bad physical addr
+
+  logic [riscv::VLEN-1:0] update_vaddr;
+  tlb_update_sv32_t update_itlb, update_dtlb, update_shared_tlb;
+
+  logic                               itlb_lu_access;
+  riscv::pte_sv32_t                   itlb_content;
+  logic                               itlb_is_4M;
+  logic                               itlb_lu_hit;
+
+  logic                               dtlb_lu_access;
+  riscv::pte_sv32_t                   dtlb_content;
+  logic                               dtlb_is_4M;
+  logic                               dtlb_lu_hit;
+
+  logic                               shared_tlb_access;
+  logic             [riscv::VLEN-1:0] shared_tlb_vaddr;
+  logic                               shared_tlb_hit;
+
+  logic                               itlb_req;
+
+
+  // Assignments
+  assign itlb_lu_access = icache_areq_i.fetch_req;
+  assign dtlb_lu_access = lsu_req_i;
+
+
+  cva6_tlb_sv32 #(
+      .CVA6Cfg    (CVA6Cfg),
+      .TLB_ENTRIES(INSTR_TLB_ENTRIES),
+      .ASID_WIDTH (ASID_WIDTH)
+  ) i_itlb (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i(flush_tlb_i),
+
+      .update_i(update_itlb),
+
+      .lu_access_i          (itlb_lu_access),
+      .lu_asid_i            (asid_i),
+      .asid_to_be_flushed_i (asid_to_be_flushed_i),
+      .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
+      .lu_vaddr_i           (icache_areq_i.fetch_vaddr),
+      .lu_content_o         (itlb_content),
+
+      .lu_is_4M_o(itlb_is_4M),
+      .lu_hit_o  (itlb_lu_hit)
+  );
+
+  cva6_tlb_sv32 #(
+      .CVA6Cfg    (CVA6Cfg),
+      .TLB_ENTRIES(DATA_TLB_ENTRIES),
+      .ASID_WIDTH (ASID_WIDTH)
+  ) i_dtlb (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i(flush_tlb_i),
+
+      .update_i(update_dtlb),
+
+      .lu_access_i          (dtlb_lu_access),
+      .lu_asid_i            (asid_i),
+      .asid_to_be_flushed_i (asid_to_be_flushed_i),
+      .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
+      .lu_vaddr_i           (lsu_vaddr_i),
+      .lu_content_o         (dtlb_content),
+
+      .lu_is_4M_o(dtlb_is_4M),
+      .lu_hit_o  (dtlb_lu_hit)
+  );
+
+  cva6_shared_tlb_sv32 #(
+      .CVA6Cfg         (CVA6Cfg),
+      .SHARED_TLB_DEPTH(64),
+      .SHARED_TLB_WAYS (2),
+      .ASID_WIDTH      (ASID_WIDTH)
+  ) i_shared_tlb (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i(flush_tlb_i),
+
+      .enable_translation_i  (enable_translation_i),
+      .en_ld_st_translation_i(en_ld_st_translation_i),
+
+      .asid_i       (asid_i),
+      // from TLBs
+      // did we miss?
+      .itlb_access_i(itlb_lu_access),
+      .itlb_hit_i   (itlb_lu_hit),
+      .itlb_vaddr_i (icache_areq_i.fetch_vaddr),
+
+      .dtlb_access_i(dtlb_lu_access),
+      .dtlb_hit_i   (dtlb_lu_hit),
+      .dtlb_vaddr_i (lsu_vaddr_i),
+
+      // to TLBs, update logic
+      .itlb_update_o(update_itlb),
+      .dtlb_update_o(update_dtlb),
+
+      // Performance counters
+      .itlb_miss_o(itlb_miss_o),
+      .dtlb_miss_o(dtlb_miss_o),
+
+      .shared_tlb_access_o(shared_tlb_access),
+      .shared_tlb_hit_o   (shared_tlb_hit),
+      .shared_tlb_vaddr_o (shared_tlb_vaddr),
+
+      .itlb_req_o         (itlb_req),
+      // to update shared tlb
+      .shared_tlb_update_i(update_shared_tlb)
+  );
+
+  cva6_ptw_sv32 #(
+      .CVA6Cfg   (CVA6Cfg),
+      .ASID_WIDTH(ASID_WIDTH)
+  ) i_ptw (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i(flush_i),
+
+      .ptw_active_o          (ptw_active),
+      .walking_instr_o       (walking_instr),
+      .ptw_error_o           (ptw_error),
+      .ptw_access_exception_o(ptw_access_exception),
+
+      .lsu_is_store_i(lsu_is_store_i),
+      // PTW memory interface
+      .req_port_i    (req_port_i),
+      .req_port_o    (req_port_o),
+
+      // to Shared TLB, update logic
+      .shared_tlb_update_o(update_shared_tlb),
+
+      .update_vaddr_o(update_vaddr),
+
+      .asid_i(asid_i),
+
+      // from shared TLB
+      // did we miss?
+      .shared_tlb_access_i(shared_tlb_access),
+      .shared_tlb_hit_i   (shared_tlb_hit),
+      .shared_tlb_vaddr_i (shared_tlb_vaddr),
+
+      .itlb_req_i(itlb_req),
+
+      // from CSR file
+      .satp_ppn_i(satp_ppn_i),  // ppn from satp
+      .mxr_i     (mxr_i),
+
+      // Performance counters
+      .shared_tlb_miss_o(),  //open for now
+
+      // PMP
+      .pmpcfg_i   (pmpcfg_i),
+      .pmpaddr_i  (pmpaddr_i),
+      .bad_paddr_o(ptw_bad_paddr)
+
+  );
+
+  // ila_1 i_ila_1 (
+  //     .clk(clk_i), // input wire clk
+  //     .probe0({req_port_o.address_tag, req_port_o.address_index}),
+  //     .probe1(req_port_o.data_req), // input wire [63:0]  probe1
+  //     .probe2(req_port_i.data_gnt), // input wire [0:0]  probe2
+  //     .probe3(req_port_i.data_rdata), // input wire [0:0]  probe3
+  //     .probe4(req_port_i.data_rvalid), // input wire [0:0]  probe4
+  //     .probe5(ptw_error), // input wire [1:0]  probe5
+  //     .probe6(update_vaddr), // input wire [0:0]  probe6
+  //     .probe7(update_itlb.valid), // input wire [0:0]  probe7
+  //     .probe8(update_dtlb.valid), // input wire [0:0]  probe8
+  //     .probe9(dtlb_lu_access), // input wire [0:0]  probe9
+  //     .probe10(lsu_vaddr_i), // input wire [0:0]  probe10
+  //     .probe11(dtlb_lu_hit), // input wire [0:0]  probe11
+  //     .probe12(itlb_lu_access), // input wire [0:0]  probe12
+  //     .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0]  probe13
+  //     .probe14(itlb_lu_hit) // input wire [0:0]  probe13
+  // );
+
+  //-----------------------
+  // Instruction Interface
+  //-----------------------
+  logic match_any_execute_region;
+  logic pmp_instr_allow;
+
+  // The instruction interface is a simple request response interface
+  always_comb begin : instr_interface
+    // MMU disabled: just pass through
+    icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
+    if (riscv::PLEN > riscv::VLEN)
+      icache_areq_o.fetch_paddr = {
+        {riscv::PLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr
+      };  // play through in case we disabled address translation
+    else
+      icache_areq_o.fetch_paddr = {
+        2'b00, icache_areq_i.fetch_vaddr[riscv::VLEN-1:0]
+      };  // play through in case we disabled address translation
+    // two potential exception sources:
+    // 1. HPTW threw an exception -> signal with a page fault exception
+    // 2. We got an access error because of insufficient permissions -> throw an access exception
+    icache_areq_o.fetch_exception = '0;
+    // Check whether we are allowed to access this memory region from a fetch perspective
+    iaccess_err   = icache_areq_i.fetch_req && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u)
+                                                 || ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u));
+
+    // MMU enabled: address from TLB, request delayed until hit. Error when TLB
+    // hit and no access right or TLB hit and translated address not valid (e.g.
+    // AXI decode error), or when PTW performs walk due to ITLB miss and raises
+    // an error.
+    if (enable_translation_i) begin
+      // we work with SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal
+      if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin
+        icache_areq_o.fetch_exception = {
+          riscv::INSTR_ACCESS_FAULT,
+          {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
+          1'b1
+        };
+      end
+
+      icache_areq_o.fetch_valid = 1'b0;
+
+      // 4K page
+      icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]};
+      // Mega page
+      if (itlb_is_4M) begin
+        icache_areq_o.fetch_paddr[21:12] = icache_areq_i.fetch_vaddr[21:12];
+      end
+
+
+      // ---------
+      // ITLB Hit
+      // --------
+      // if we hit the ITLB output the request signal immediately
+      if (itlb_lu_hit) begin
+        icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
+        // we got an access error
+        if (iaccess_err) begin
+          // throw a page fault
+          icache_areq_o.fetch_exception = {
+            riscv::INSTR_PAGE_FAULT,
+            {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
+            1'b1
+          };  //to check on wave --> not connected
+        end else if (!pmp_instr_allow) begin
+          icache_areq_o.fetch_exception = {
+            riscv::INSTR_ACCESS_FAULT, icache_areq_i.fetch_vaddr, 1'b1
+          };  //to check on wave --> not connected
+        end
+      end else
+      // ---------
+      // ITLB Miss
+      // ---------
+      // watch out for exceptions happening during walking the page table
+      if (ptw_active && walking_instr) begin
+        icache_areq_o.fetch_valid = ptw_error | ptw_access_exception;
+        if (ptw_error)
+          icache_areq_o.fetch_exception = {
+            riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1
+          };  //to check on wave
+        // TODO(moschn,zarubaf): What should the value of tval be in this case?
+        else
+          icache_areq_o.fetch_exception = {
+            riscv::INSTR_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1
+          };  //to check on wave --> not connected
+      end
+    end
+    // if it didn't match any execute region throw an `Instruction Access Fault`
+    // or: if we are not translating, check PMPs immediately on the paddr
+    if (!match_any_execute_region || (!enable_translation_i && !pmp_instr_allow)) begin
+      icache_areq_o.fetch_exception = {
+        riscv::INSTR_ACCESS_FAULT, icache_areq_o.fetch_paddr[riscv::PLEN-1:2], 1'b1
+      };  //to check on wave --> not connected
+    end
+  end
+
+  // check for execute flag on memory
+  assign match_any_execute_region = config_pkg::is_inside_execute_regions(
+      CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}
+  );
+
+  // Instruction fetch
+  pmp #(
+      .PLEN      (riscv::PLEN),
+      .PMP_LEN   (riscv::PLEN - 2),
+      .NR_ENTRIES(CVA6Cfg.NrPMPEntries)
+  ) i_pmp_if (
+      .addr_i       (icache_areq_o.fetch_paddr),
+      .priv_lvl_i,
+      // we will always execute on the instruction fetch port
+      .access_type_i(riscv::ACCESS_EXEC),
+      // Configuration
+      .conf_addr_i  (pmpaddr_i),
+      .conf_i       (pmpcfg_i),
+      .allow_o      (pmp_instr_allow)
+  );
+
+  //-----------------------
+  // Data Interface
+  //-----------------------
+  logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q;
+  riscv::pte_sv32_t dtlb_pte_n, dtlb_pte_q;
+  exception_t misaligned_ex_n, misaligned_ex_q;
+  logic lsu_req_n, lsu_req_q;
+  logic lsu_is_store_n, lsu_is_store_q;
+  logic dtlb_hit_n, dtlb_hit_q;
+  logic dtlb_is_4M_n, dtlb_is_4M_q;
+
+  // check if we need to do translation or if we are always ready (e.g.: we are not translating anything)
+  assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1;
+
+  // Wires to PMP checks
+  riscv::pmp_access_t pmp_access_type;
+  logic               pmp_data_allow;
+  localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1;
+  // The data interface is simpler and only consists of a request/response interface
+  always_comb begin : data_interface
+    // save request and DTLB response
+    lsu_vaddr_n     = lsu_vaddr_i;
+    lsu_req_n       = lsu_req_i;
+    misaligned_ex_n = misaligned_ex_i;
+    dtlb_pte_n      = dtlb_content;
+    dtlb_hit_n      = dtlb_lu_hit;
+    lsu_is_store_n  = lsu_is_store_i;
+    dtlb_is_4M_n    = dtlb_is_4M;
+
+    if (riscv::PLEN > riscv::VLEN) begin
+      lsu_paddr_o    = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_q};
+      lsu_dtlb_ppn_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n[riscv::VLEN-1:12]};
+    end else begin
+      lsu_paddr_o    = {2'b00, lsu_vaddr_q[riscv::VLEN-1:0]};
+      lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PPNW-1:0];
+    end
+    lsu_valid_o = lsu_req_q;
+    lsu_exception_o = misaligned_ex_q;
+    pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ;
+
+    // mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions
+    misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i;
+
+    // Check if the User flag is set, then we may only access it in supervisor mode
+    // if SUM is enabled
+    daccess_err = (ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode
+    (ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u);            // this is not a user page but we are in user mode and trying to access it
+    // translation is enabled and no misaligned exception occurred
+    if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin
+      lsu_valid_o = 1'b0;
+      // 4K page
+      lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]};
+      lsu_dtlb_ppn_o = dtlb_content.ppn;
+      // Mega page
+      if (dtlb_is_4M_q) begin
+        lsu_paddr_o[21:12] = lsu_vaddr_q[21:12];
+        lsu_dtlb_ppn_o[21:12] = lsu_vaddr_n[21:12];
+      end
+      // ---------
+      // DTLB Hit
+      // --------
+      if (dtlb_hit_q && lsu_req_q) begin
+        lsu_valid_o = 1'b1;
+        // exception priority:
+        // PAGE_FAULTS have higher priority than ACCESS_FAULTS
+        // virtual memory based exceptions are PAGE_FAULTS
+        // physical memory based exceptions are ACCESS_FAULTS (PMA/PMP)
+
+        // this is a store
+        if (lsu_is_store_q) begin
+          // check if the page is write-able and we are not violating privileges
+          // also check if the dirty flag is set
+          if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin
+            lsu_exception_o = {
+              riscv::STORE_PAGE_FAULT,
+              {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
+              1'b1
+            };  //to check on wave
+            // Check if any PMPs are violated
+          end else if (!pmp_data_allow) begin
+            lsu_exception_o = {
+              riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1
+            };  //only 32 bits on 34b of lsu_paddr_o are returned.
+          end
+
+          // this is a load
+        end else begin
+          // check for sufficient access privileges - throw a page fault if necessary
+          if (daccess_err) begin
+            lsu_exception_o = {
+              riscv::LOAD_PAGE_FAULT,
+              {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
+              1'b1
+            };
+            // Check if any PMPs are violated
+          end else if (!pmp_data_allow) begin
+            lsu_exception_o = {
+              riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1
+            };  //only 32 bits on 34b of lsu_paddr_o are returned.
+          end
+        end
+      end else
+
+      // ---------
+      // DTLB Miss
+      // ---------
+      // watch out for exceptions
+      if (ptw_active && !walking_instr) begin
+        // page table walker threw an exception
+        if (ptw_error) begin
+          // an error makes the translation valid
+          lsu_valid_o = 1'b1;
+          // the page table walker can only throw page faults
+          if (lsu_is_store_q) begin
+            lsu_exception_o = {
+              riscv::STORE_PAGE_FAULT,
+              {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
+              1'b1
+            };
+          end else begin
+            lsu_exception_o = {
+              riscv::LOAD_PAGE_FAULT,
+              {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
+              1'b1
+            };
+          end
+        end
+
+        if (ptw_access_exception) begin
+          // an error makes the translation valid
+          lsu_valid_o = 1'b1;
+          // the page table walker can only throw page faults
+          lsu_exception_o = {riscv::LD_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1};
+        end
+      end
+    end  // If translation is not enabled, check the paddr immediately against PMPs
+    else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin
+      if (lsu_is_store_q) begin
+        lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1};
+      end else begin
+        lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1};
+      end
+    end
+  end
+
+  // Load/store PMP check
+  pmp #(
+      .PLEN      (riscv::PLEN),
+      .PMP_LEN   (riscv::PLEN - 2),
+      .NR_ENTRIES(CVA6Cfg.NrPMPEntries)
+  ) i_pmp_data (
+      .addr_i       (lsu_paddr_o),
+      .priv_lvl_i   (ld_st_priv_lvl_i),
+      .access_type_i(pmp_access_type),
+      // Configuration
+      .conf_addr_i  (pmpaddr_i),
+      .conf_i       (pmpcfg_i),
+      .allow_o      (pmp_data_allow)
+  );
+
+  // ----------
+  // Registers
+  // ----------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      lsu_vaddr_q     <= '0;
+      lsu_req_q       <= '0;
+      misaligned_ex_q <= '0;
+      dtlb_pte_q      <= '0;
+      dtlb_hit_q      <= '0;
+      lsu_is_store_q  <= '0;
+      dtlb_is_4M_q    <= '0;
+    end else begin
+      lsu_vaddr_q     <= lsu_vaddr_n;
+      lsu_req_q       <= lsu_req_n;
+      misaligned_ex_q <= misaligned_ex_n;
+      dtlb_pte_q      <= dtlb_pte_n;
+      dtlb_hit_q      <= dtlb_hit_n;
+      lsu_is_store_q  <= lsu_is_store_n;
+      dtlb_is_4M_q    <= dtlb_is_4M_n;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv b/test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv
new file mode 100644
index 0000000..4bd736b
--- /dev/null
+++ b/test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv
@@ -0,0 +1,400 @@
+// Copyright (c) 2021 Thales.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Sebastien Jacq Thales Research & Technology
+// Date: 17/07/2021
+//
+// Additional contributions by:
+//         Sebastien Jacq - sjthales on github.com
+//
+// Description: Hardware-PTW (Page-Table-Walker) for MMU Sv32.
+//              This module is an adaptation of the Sv39 PTW developed
+//              by Florian Zaruba and David Schaffenrath to the Sv32 standard.
+//
+// =========================================================================== //
+// Revisions  :
+// Date        Version  Author       Description
+// 2020-02-17  0.1      S.Jacq       PTW Sv32 for CV32A6
+// =========================================================================== //
+
+/* verilator lint_off WIDTH */
+
+module cva6_ptw_sv32
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int ASID_WIDTH = 1
+) (
+    input  logic clk_i,                  // Clock
+    input  logic rst_ni,                 // Asynchronous reset active low
+    input  logic flush_i,                // flush everything, we need to do this because
+                                         // actually everything we do is speculative at this stage
+                                         // e.g.: there could be a CSR instruction that changes everything
+    output logic ptw_active_o,
+    output logic walking_instr_o,        // set when walking for TLB
+    output logic ptw_error_o,            // set when an error occurred
+    output logic ptw_access_exception_o, // set when an PMP access exception occured
+
+    input  logic          lsu_is_store_i,  // this translation was triggered by a store
+    // PTW memory interface
+    input  dcache_req_o_t req_port_i,
+    output dcache_req_i_t req_port_o,
+
+    // to Shared TLB, update logic
+    output tlb_update_sv32_t shared_tlb_update_o,
+
+    output logic [riscv::VLEN-1:0] update_vaddr_o,
+
+    input logic [ASID_WIDTH-1:0] asid_i,
+
+    // from shared TLB
+    input logic                   shared_tlb_access_i,
+    input logic                   shared_tlb_hit_i,
+    input logic [riscv::VLEN-1:0] shared_tlb_vaddr_i,
+
+    input logic itlb_req_i,
+
+    // from CSR file
+    input logic [riscv::PPNW-1:0] satp_ppn_i,  // ppn from satp
+    input logic                   mxr_i,
+
+    // Performance counters
+    output logic shared_tlb_miss_o,
+
+    // PMP
+    input riscv::pmpcfg_t [15:0] pmpcfg_i,
+    input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
+    output logic [riscv::PLEN-1:0] bad_paddr_o
+
+);
+
+  // input registers
+  logic data_rvalid_q;
+  riscv::xlen_t data_rdata_q;
+
+  riscv::pte_sv32_t pte;
+  assign pte = riscv::pte_sv32_t'(data_rdata_q);
+
+
+  enum logic [2:0] {
+    IDLE,
+    WAIT_GRANT,
+    PTE_LOOKUP,
+    WAIT_RVALID,
+    PROPAGATE_ERROR,
+    PROPAGATE_ACCESS_ERROR,
+    LATENCY
+  }
+      state_q, state_d;
+
+  // SV32 defines two levels of page tables
+  enum logic {
+    LVL1,
+    LVL2
+  }
+      ptw_lvl_q, ptw_lvl_n;
+
+  // is this an instruction page table walk?
+  logic is_instr_ptw_q, is_instr_ptw_n;
+  logic global_mapping_q, global_mapping_n;
+  // latched tag signal
+  logic tag_valid_n, tag_valid_q;
+  // register the ASID
+  logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n;
+  // register the VPN we need to walk, SV32 defines a 32 bit virtual address
+  logic [riscv::VLEN-1:0] vaddr_q, vaddr_n;
+  // 4 byte aligned physical pointer
+  logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n;
+
+  // Assignments
+  assign update_vaddr_o = vaddr_q;
+
+  assign ptw_active_o = (state_q != IDLE);
+  //assign walking_instr_o = is_instr_ptw_q;
+  assign walking_instr_o = is_instr_ptw_q;
+  // directly output the correct physical address
+  assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0];
+  assign req_port_o.address_tag   = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH];
+  // we are never going to kill this request
+  assign req_port_o.kill_req = '0;
+  // we are never going to write with the HPTW
+  assign req_port_o.data_wdata = '0;
+  // we only issue one single request at a time
+  assign req_port_o.data_id = '0;
+
+  // -----------
+  // Shared TLB Update
+  // -----------
+  assign shared_tlb_update_o.vpn = vaddr_q[riscv::SV-1:12];
+  // update the correct page table level
+  assign shared_tlb_update_o.is_4M = (ptw_lvl_q == LVL1);
+  // output the correct ASID
+  assign shared_tlb_update_o.asid = tlb_update_asid_q;
+  // set the global mapping bit
+  assign shared_tlb_update_o.content = pte | (global_mapping_q << 5);
+
+
+  assign req_port_o.tag_valid = tag_valid_q;
+
+  logic allow_access;
+
+  assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0;
+
+  pmp #(
+      .CVA6Cfg   (CVA6Cfg),
+      .PLEN      (riscv::PLEN),
+      .PMP_LEN   (riscv::PLEN - 2),
+      .NR_ENTRIES(CVA6Cfg.NrPMPEntries)
+  ) i_pmp_ptw (
+      .addr_i       (ptw_pptr_q),
+      // PTW access are always checked as if in S-Mode...
+      .priv_lvl_i   (riscv::PRIV_LVL_S),
+      // ...and they are always loads
+      .access_type_i(riscv::ACCESS_READ),
+      // Configuration
+      .conf_addr_i  (pmpaddr_i),
+      .conf_i       (pmpcfg_i),
+      .allow_o      (allow_access)
+  );
+
+
+  assign req_port_o.data_be = be_gen_32(req_port_o.address_index[1:0], req_port_o.data_size);
+
+  //-------------------
+  // Page table walker
+  //-------------------
+  // A virtual address va is translated into a physical address pa as follows:
+  // 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
+  //    PAGESIZE=2^12 and LEVELS=3.)
+  // 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For
+  //    Sv32, PTESIZE=4.)
+  // 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access
+  //    exception.
+  // 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5.
+  //    Otherwise, this PTE is a pointer to the next level of the page table.
+  //    Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let
+  //    a = pte.ppn × PAGESIZE and go to step 2.
+  // 5. A leaf PTE has been found. Determine if the requested memory access
+  //    is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and
+  //    raise an access exception. Otherwise, the translation is successful.
+  //    Set pte.a to 1, and, if the memory access is a store, set pte.d to 1.
+  //    The translated physical address is given as follows:
+  //      - pa.pgoff = va.pgoff.
+  //      - If i > 0, then this is a superpage translation and
+  //        pa.ppn[i-1:0] = va.vpn[i-1:0].
+  //      - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
+  always_comb begin : ptw
+    // default assignments
+    // PTW memory interface
+    tag_valid_n               = 1'b0;
+    req_port_o.data_req       = 1'b0;
+    req_port_o.data_size      = 2'b10;
+    req_port_o.data_we        = 1'b0;
+    ptw_error_o               = 1'b0;
+    ptw_access_exception_o    = 1'b0;
+    shared_tlb_update_o.valid = 1'b0;
+    is_instr_ptw_n            = is_instr_ptw_q;
+    ptw_lvl_n                 = ptw_lvl_q;
+    ptw_pptr_n                = ptw_pptr_q;
+    state_d                   = state_q;
+    global_mapping_n          = global_mapping_q;
+    // input registers
+    tlb_update_asid_n         = tlb_update_asid_q;
+    vaddr_n                   = vaddr_q;
+
+    shared_tlb_miss_o         = 1'b0;
+
+    case (state_q)
+
+      IDLE: begin
+        // by default we start with the top-most page table
+        ptw_lvl_n        = LVL1;
+        global_mapping_n = 1'b0;
+        is_instr_ptw_n   = 1'b0;
+        // if we got a Shared TLB miss
+        if (shared_tlb_access_i & ~shared_tlb_hit_i) begin
+          ptw_pptr_n = {
+            satp_ppn_i, shared_tlb_vaddr_i[riscv::SV-1:22], 2'b0
+          };  // SATP.PPN * PAGESIZE + VPN*PTESIZE = SATP.PPN * 2^(12) + VPN*4
+          is_instr_ptw_n = itlb_req_i;
+          tlb_update_asid_n = asid_i;
+          vaddr_n = shared_tlb_vaddr_i;
+          state_d = WAIT_GRANT;
+          shared_tlb_miss_o = 1'b1;
+        end
+      end
+
+      WAIT_GRANT: begin
+        // send a request out
+        req_port_o.data_req = 1'b1;
+        // wait for the WAIT_GRANT
+        if (req_port_i.data_gnt) begin
+          // send the tag valid signal one cycle later
+          tag_valid_n = 1'b1;
+          state_d     = PTE_LOOKUP;
+        end
+      end
+
+      PTE_LOOKUP: begin
+        // we wait for the valid signal
+        if (data_rvalid_q) begin
+
+          // check if the global mapping bit is set
+          if (pte.g) global_mapping_n = 1'b1;
+
+          // -------------
+          // Invalid PTE
+          // -------------
+          // If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception.
+          if (!pte.v || (!pte.r && pte.w)) state_d = PROPAGATE_ERROR;
+          // -----------
+          // Valid PTE
+          // -----------
+          else begin
+            //state_d = IDLE;
+            state_d = LATENCY;
+            // it is a valid PTE
+            // if pte.r = 1 or pte.x = 1 it is a valid PTE
+            if (pte.r || pte.x) begin
+              // Valid translation found (either 4M or 4K entry)
+              if (is_instr_ptw_q) begin
+                // ------------
+                // Update ITLB
+                // ------------
+                // If page is not executable, we can directly raise an error. This
+                // doesn't put a useless entry into the TLB. The same idea applies
+                // to the access flag since we let the access flag be managed by SW.
+                if (!pte.x || !pte.a) state_d = PROPAGATE_ERROR;
+                else shared_tlb_update_o.valid = 1'b1;
+
+              end else begin
+                // ------------
+                // Update DTLB
+                // ------------
+                // Check if the access flag has been set, otherwise throw a page-fault
+                // and let the software handle those bits.
+                // If page is not readable (there are no write-only pages)
+                // we can directly raise an error. This doesn't put a useless
+                // entry into the TLB.
+                if (pte.a && (pte.r || (pte.x && mxr_i))) begin
+                  shared_tlb_update_o.valid = 1'b1;
+                end else begin
+                  state_d = PROPAGATE_ERROR;
+                end
+                // Request is a store: perform some additional checks
+                // If the request was a store and the page is not write-able, raise an error
+                // the same applies if the dirty flag is not set
+                if (lsu_is_store_i && (!pte.w || !pte.d)) begin
+                  shared_tlb_update_o.valid = 1'b0;
+                  state_d = PROPAGATE_ERROR;
+                end
+              end
+              // check if the ppn is correctly aligned:
+              // 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
+              // exception.
+              if (ptw_lvl_q == LVL1 && pte.ppn[9:0] != '0) begin
+                state_d                   = PROPAGATE_ERROR;
+                shared_tlb_update_o.valid = 1'b0;
+              end
+              // this is a pointer to the next TLB level
+            end else begin
+              // pointer to next level of page table
+              if (ptw_lvl_q == LVL1) begin
+                // we are in the second level now
+                ptw_lvl_n  = LVL2;
+                ptw_pptr_n = {pte.ppn, vaddr_q[21:12], 2'b0};
+              end
+
+              state_d = WAIT_GRANT;
+
+              if (ptw_lvl_q == LVL2) begin
+                // Should already be the last level page table => Error
+                ptw_lvl_n = LVL2;
+                state_d   = PROPAGATE_ERROR;
+              end
+            end
+          end
+
+          // Check if this access was actually allowed from a PMP perspective
+          if (!allow_access) begin
+            shared_tlb_update_o.valid = 1'b0;
+            // we have to return the failed address in bad_addr
+            ptw_pptr_n = ptw_pptr_q;
+            state_d = PROPAGATE_ACCESS_ERROR;
+          end
+        end
+        // we've got a data WAIT_GRANT so tell the cache that the tag is valid
+      end
+      // Propagate error to MMU/LSU
+      PROPAGATE_ERROR: begin
+        state_d     = LATENCY;
+        ptw_error_o = 1'b1;
+      end
+      PROPAGATE_ACCESS_ERROR: begin
+        state_d                = LATENCY;
+        ptw_access_exception_o = 1'b1;
+      end
+      // wait for the rvalid before going back to IDLE
+      WAIT_RVALID: begin
+        if (data_rvalid_q) state_d = IDLE;
+      end
+      LATENCY: begin
+        state_d = IDLE;
+      end
+      default: begin
+        state_d = IDLE;
+      end
+    endcase
+
+    // -------
+    // Flush
+    // -------
+    // should we have flushed before we got an rvalid, wait for it until going back to IDLE
+    if (flush_i) begin
+      // on a flush check whether we are
+      // 1. in the PTE Lookup check whether we still need to wait for an rvalid
+      // 2. waiting for a grant, if so: wait for it
+      // if not, go back to idle
+      if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) ||
+                ((state_q == WAIT_GRANT) && req_port_i.data_gnt))
+        state_d = WAIT_RVALID;
+      else state_d = LATENCY;
+    end
+  end
+
+  // sequential process
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      state_q           <= IDLE;
+      is_instr_ptw_q    <= 1'b0;
+      ptw_lvl_q         <= LVL1;
+      tag_valid_q       <= 1'b0;
+      tlb_update_asid_q <= '0;
+      vaddr_q           <= '0;
+      ptw_pptr_q        <= '0;
+      global_mapping_q  <= 1'b0;
+      data_rdata_q      <= '0;
+      data_rvalid_q     <= 1'b0;
+    end else begin
+      state_q           <= state_d;
+      ptw_pptr_q        <= ptw_pptr_n;
+      is_instr_ptw_q    <= is_instr_ptw_n;
+      ptw_lvl_q         <= ptw_lvl_n;
+      tag_valid_q       <= tag_valid_n;
+      tlb_update_asid_q <= tlb_update_asid_n;
+      vaddr_q           <= vaddr_n;
+      global_mapping_q  <= global_mapping_n;
+      data_rdata_q      <= req_port_i.data_rdata;
+      data_rvalid_q     <= req_port_i.data_rvalid;
+    end
+  end
+
+endmodule
+/* verilator lint_on WIDTH */
diff --git a/test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv b/test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv
new file mode 100644
index 0000000..98e2a04
--- /dev/null
+++ b/test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv
@@ -0,0 +1,367 @@
+// Copyright (c) 2023 Thales.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Sebastien Jacq - Thales Research & Technology
+// Date: 08/03/2023
+//
+// Description: N-way associative shared TLB, it allows to reduce the number
+//              of ITLB and DTLB entries.
+//
+
+/* verilator lint_off WIDTH */
+
+module cva6_shared_tlb_sv32
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int SHARED_TLB_DEPTH = 64,
+    parameter int SHARED_TLB_WAYS = 2,
+    parameter int ASID_WIDTH = 1
+) (
+    input logic clk_i,   // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic flush_i,
+
+    input logic enable_translation_i,   // CSRs indicate to enable SV32
+    input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
+
+    input logic [ASID_WIDTH-1:0] asid_i,
+
+    // from TLBs
+    // did we miss?
+    input logic                   itlb_access_i,
+    input logic                   itlb_hit_i,
+    input logic [riscv::VLEN-1:0] itlb_vaddr_i,
+
+    input logic                   dtlb_access_i,
+    input logic                   dtlb_hit_i,
+    input logic [riscv::VLEN-1:0] dtlb_vaddr_i,
+
+    // to TLBs, update logic
+    output tlb_update_sv32_t itlb_update_o,
+    output tlb_update_sv32_t dtlb_update_o,
+
+    // Performance counters
+    output logic itlb_miss_o,
+    output logic dtlb_miss_o,
+
+    output logic                   shared_tlb_access_o,
+    output logic                   shared_tlb_hit_o,
+    output logic [riscv::VLEN-1:0] shared_tlb_vaddr_o,
+
+    output logic itlb_req_o,
+
+    // Update shared TLB in case of miss
+    input tlb_update_sv32_t shared_tlb_update_i
+
+);
+
+  function logic [SHARED_TLB_WAYS-1:0] shared_tlb_way_bin2oh(input logic [$clog2(SHARED_TLB_WAYS
+)-1:0] in);
+    logic [SHARED_TLB_WAYS-1:0] out;
+    out     = '0;
+    out[in] = 1'b1;
+    return out;
+  endfunction
+
+  typedef struct packed {
+    logic [8:0] asid;   //9 bits wide
+    logic [9:0] vpn1;   //10 bits wide
+    logic [9:0] vpn0;   //10 bits wide
+    logic       is_4M;
+  } shared_tag_t;
+
+  shared_tag_t shared_tag_wr;
+  shared_tag_t [SHARED_TLB_WAYS-1:0] shared_tag_rd;
+
+  logic [SHARED_TLB_DEPTH-1:0][SHARED_TLB_WAYS-1:0] shared_tag_valid_q, shared_tag_valid_d;
+
+  logic [         SHARED_TLB_WAYS-1:0] shared_tag_valid;
+
+  logic [         SHARED_TLB_WAYS-1:0] tag_wr_en;
+  logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_wr_addr;
+  logic [     $bits(shared_tag_t)-1:0] tag_wr_data;
+
+  logic [         SHARED_TLB_WAYS-1:0] tag_rd_en;
+  logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_rd_addr;
+  logic [     $bits(shared_tag_t)-1:0] tag_rd_data      [SHARED_TLB_WAYS-1:0];
+
+  logic [         SHARED_TLB_WAYS-1:0] tag_req;
+  logic [         SHARED_TLB_WAYS-1:0] tag_we;
+  logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_addr;
+
+  logic [         SHARED_TLB_WAYS-1:0] pte_wr_en;
+  logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_wr_addr;
+  logic [$bits(riscv::pte_sv32_t)-1:0] pte_wr_data;
+
+  logic [         SHARED_TLB_WAYS-1:0] pte_rd_en;
+  logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_rd_addr;
+  logic [$bits(riscv::pte_sv32_t)-1:0] pte_rd_data      [SHARED_TLB_WAYS-1:0];
+
+  logic [         SHARED_TLB_WAYS-1:0] pte_req;
+  logic [         SHARED_TLB_WAYS-1:0] pte_we;
+  logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_addr;
+
+  logic [9:0] vpn0_d, vpn1_d, vpn0_q, vpn1_q;
+
+  riscv::pte_sv32_t [SHARED_TLB_WAYS-1:0] pte;
+
+  logic [riscv::VLEN-1-12:0] itlb_vpn_q;
+  logic [riscv::VLEN-1-12:0] dtlb_vpn_q;
+
+  logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_d;
+
+  logic shared_tlb_access_q, shared_tlb_access_d;
+  logic shared_tlb_hit_d;
+  logic [riscv::VLEN-1:0] shared_tlb_vaddr_q, shared_tlb_vaddr_d;
+
+  logic itlb_req_d, itlb_req_q;
+  logic dtlb_req_d, dtlb_req_q;
+
+  // replacement strategy
+  logic [SHARED_TLB_WAYS-1:0] way_valid;
+  logic update_lfsr;  // shift the LFSR
+  logic [$clog2(SHARED_TLB_WAYS)-1:0] inv_way;  // first non-valid encountered
+  logic [$clog2(SHARED_TLB_WAYS)-1:0] rnd_way;  // random index for replacement
+  logic [$clog2(SHARED_TLB_WAYS)-1:0] repl_way;  // way to replace
+  logic [SHARED_TLB_WAYS-1:0] repl_way_oh_d;  // way to replace (onehot)
+  logic all_ways_valid;  // we need to switch repl strategy since all are valid
+
+  assign shared_tlb_access_o = shared_tlb_access_q;
+  assign shared_tlb_hit_o = shared_tlb_hit_d;
+  assign shared_tlb_vaddr_o = shared_tlb_vaddr_q;
+
+  assign itlb_req_o = itlb_req_q;
+
+  ///////////////////////////////////////////////////////
+  // tag comparison, hit generation
+  ///////////////////////////////////////////////////////
+  always_comb begin : itlb_dtlb_miss
+    itlb_miss_o         = 1'b0;
+    dtlb_miss_o         = 1'b0;
+    vpn0_d              = vpn0_q;
+    vpn1_d              = vpn1_q;
+
+    tag_rd_en           = '0;
+    pte_rd_en           = '0;
+
+    itlb_req_d          = 1'b0;
+    dtlb_req_d          = 1'b0;
+
+    tlb_update_asid_d   = tlb_update_asid_q;
+
+    shared_tlb_access_d = '0;
+    shared_tlb_vaddr_d  = shared_tlb_vaddr_q;
+
+    tag_rd_addr         = '0;
+    pte_rd_addr         = '0;
+
+    // if we got an ITLB miss
+    if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin
+      tag_rd_en           = '1;
+      tag_rd_addr         = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
+      pte_rd_en           = '1;
+      pte_rd_addr         = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
+
+      vpn0_d              = itlb_vaddr_i[21:12];
+      vpn1_d              = itlb_vaddr_i[31:22];
+
+      itlb_miss_o         = 1'b1;
+      itlb_req_d          = 1'b1;
+
+      tlb_update_asid_d   = asid_i;
+
+      shared_tlb_access_d = 1'b1;
+      shared_tlb_vaddr_d  = itlb_vaddr_i;
+
+      // we got an DTLB miss
+    end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
+      tag_rd_en           = '1;
+      tag_rd_addr         = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
+      pte_rd_en           = '1;
+      pte_rd_addr         = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
+
+      vpn0_d              = dtlb_vaddr_i[21:12];
+      vpn1_d              = dtlb_vaddr_i[31:22];
+
+      dtlb_miss_o         = 1'b1;
+      dtlb_req_d          = 1'b1;
+
+      tlb_update_asid_d   = asid_i;
+
+      shared_tlb_access_d = 1'b1;
+      shared_tlb_vaddr_d  = dtlb_vaddr_i;
+    end
+  end  //itlb_dtlb_miss
+
+  always_comb begin : tag_comparison
+    shared_tlb_hit_d = 1'b0;
+    dtlb_update_o = '0;
+    itlb_update_o = '0;
+    //number of ways
+    for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin
+      if (shared_tag_valid[i] && ((tlb_update_asid_q == shared_tag_rd[i].asid) || pte[i].g)  && vpn1_q == shared_tag_rd[i].vpn1) begin
+        if (shared_tag_rd[i].is_4M || vpn0_q == shared_tag_rd[i].vpn0) begin
+          shared_tlb_hit_d = 1'b1;
+          if (itlb_req_q) begin
+            itlb_update_o.valid = 1'b1;
+            itlb_update_o.vpn = itlb_vpn_q;
+            itlb_update_o.is_4M = shared_tag_rd[i].is_4M;
+            itlb_update_o.asid = tlb_update_asid_q;
+            itlb_update_o.content = pte[i];
+          end else if (dtlb_req_q) begin
+            dtlb_update_o.valid = 1'b1;
+            dtlb_update_o.vpn = dtlb_vpn_q;
+            dtlb_update_o.is_4M = shared_tag_rd[i].is_4M;
+            dtlb_update_o.asid = tlb_update_asid_q;
+            dtlb_update_o.content = pte[i];
+          end
+        end
+      end
+    end
+  end  //tag_comparison
+
+  // sequential process
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      itlb_vpn_q <= '0;
+      dtlb_vpn_q <= '0;
+      tlb_update_asid_q <= '0;
+      shared_tlb_access_q <= '0;
+      shared_tlb_vaddr_q <= '0;
+      shared_tag_valid_q <= '0;
+      vpn0_q <= '0;
+      vpn1_q <= '0;
+      itlb_req_q <= '0;
+      dtlb_req_q <= '0;
+      shared_tag_valid <= '0;
+    end else begin
+      itlb_vpn_q <= itlb_vaddr_i[riscv::SV-1:12];
+      dtlb_vpn_q <= dtlb_vaddr_i[riscv::SV-1:12];
+      tlb_update_asid_q <= tlb_update_asid_d;
+      shared_tlb_access_q <= shared_tlb_access_d;
+      shared_tlb_vaddr_q <= shared_tlb_vaddr_d;
+      shared_tag_valid_q <= shared_tag_valid_d;
+      vpn0_q <= vpn0_d;
+      vpn1_q <= vpn1_d;
+      itlb_req_q <= itlb_req_d;
+      dtlb_req_q <= dtlb_req_d;
+      shared_tag_valid <= shared_tag_valid_q[tag_rd_addr];
+    end
+  end
+
+  // ------------------
+  // Update and Flush
+  // ------------------
+  always_comb begin : update_flush
+    shared_tag_valid_d = shared_tag_valid_q;
+    tag_wr_en = '0;
+    pte_wr_en = '0;
+
+    if (flush_i) begin
+      shared_tag_valid_d = '0;
+    end else if (shared_tlb_update_i.valid) begin
+      for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin
+        if (repl_way_oh_d[i]) begin
+          shared_tag_valid_d[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]][i] = 1'b1;
+          tag_wr_en[i] = 1'b1;
+          pte_wr_en[i] = 1'b1;
+        end
+      end
+    end
+  end  //update_flush
+
+  assign shared_tag_wr.asid = shared_tlb_update_i.asid;
+  assign shared_tag_wr.vpn1 = shared_tlb_update_i.vpn[19:10];
+  assign shared_tag_wr.vpn0 = shared_tlb_update_i.vpn[9:0];
+  assign shared_tag_wr.is_4M = shared_tlb_update_i.is_4M;
+
+  assign tag_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0];
+  assign tag_wr_data = shared_tag_wr;
+
+  assign pte_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0];
+  assign pte_wr_data = shared_tlb_update_i.content;
+
+  assign way_valid = shared_tag_valid_q[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]];
+  assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
+  assign update_lfsr = shared_tlb_update_i.valid & all_ways_valid;
+  assign repl_way_oh_d = (shared_tlb_update_i.valid) ? shared_tlb_way_bin2oh(repl_way) : '0;
+
+  lzc #(
+      .WIDTH(SHARED_TLB_WAYS)
+  ) i_lzc (
+      .in_i   (~way_valid),
+      .cnt_o  (inv_way),
+      .empty_o(all_ways_valid)
+  );
+
+  lfsr #(
+      .LfsrWidth(8),
+      .OutWidth ($clog2(SHARED_TLB_WAYS))
+  ) i_lfsr (
+      .clk_i (clk_i),
+      .rst_ni(rst_ni),
+      .en_i  (update_lfsr),
+      .out_o (rnd_way)
+  );
+
+  ///////////////////////////////////////////////////////
+  // memory arrays and regs
+  ///////////////////////////////////////////////////////
+
+  assign tag_req  = tag_wr_en | tag_rd_en;
+  assign tag_we   = tag_wr_en;
+  assign tag_addr = tag_wr_en ? tag_wr_addr : tag_rd_addr;
+
+  assign pte_req  = pte_wr_en | pte_rd_en;
+  assign pte_we   = pte_wr_en;
+  assign pte_addr = pte_wr_en ? pte_wr_addr : pte_rd_addr;
+
+  for (genvar i = 0; i < SHARED_TLB_WAYS; i++) begin : gen_sram
+    // Tag RAM
+    sram #(
+        .DATA_WIDTH($bits(shared_tag_t)),
+        .NUM_WORDS (SHARED_TLB_DEPTH)
+    ) tag_sram (
+        .clk_i  (clk_i),
+        .rst_ni (rst_ni),
+        .req_i  (tag_req[i]),
+        .we_i   (tag_we[i]),
+        .addr_i (tag_addr),
+        .wuser_i('0),
+        .wdata_i(tag_wr_data),
+        .be_i   ('1),
+        .ruser_o(),
+        .rdata_o(tag_rd_data[i])
+    );
+
+    assign shared_tag_rd[i] = shared_tag_t'(tag_rd_data[i]);
+
+    // PTE RAM
+    sram #(
+        .DATA_WIDTH($bits(riscv::pte_sv32_t)),
+        .NUM_WORDS (SHARED_TLB_DEPTH)
+    ) pte_sram (
+        .clk_i  (clk_i),
+        .rst_ni (rst_ni),
+        .req_i  (pte_req[i]),
+        .we_i   (pte_we[i]),
+        .addr_i (pte_addr),
+        .wuser_i('0),
+        .wdata_i(pte_wr_data),
+        .be_i   ('1),
+        .ruser_o(),
+        .rdata_o(pte_rd_data[i])
+    );
+    assign pte[i] = riscv::pte_sv32_t'(pte_rd_data[i]);
+  end
+endmodule
+
+/* verilator lint_on WIDTH */
diff --git a/test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv b/test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv
new file mode 100644
index 0000000..79a7c98
--- /dev/null
+++ b/test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv
@@ -0,0 +1,281 @@
+// Copyright (c) 2021 Thales.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Sebastien Jacq Thales Research & Technology
+// Date: 17/07/2021
+//
+// Additional contributions by:
+//         Sebastien Jacq - sjthales on github.com
+//
+// Description: Translation Lookaside Buffer, Sv32 , fully set-associative
+//              This module is an adaptation of the Sv39 TLB developed
+//              by Florian Zaruba and David Schaffenrath to the Sv32 standard.
+//
+// =========================================================================== //
+// Revisions  :
+// Date        Version  Author       Description
+// 2020-02-17  0.1      S.Jacq       TLB Sv32 for CV32A6
+// =========================================================================== //
+
+module cva6_tlb_sv32
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned TLB_ENTRIES = 4,
+    parameter int unsigned ASID_WIDTH = 1
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic flush_i,  // Flush signal
+    // Update TLB
+    input tlb_update_sv32_t update_i,
+    // Lookup signals
+    input logic lu_access_i,
+    input logic [ASID_WIDTH-1:0] lu_asid_i,
+    input logic [riscv::VLEN-1:0] lu_vaddr_i,
+    output riscv::pte_sv32_t lu_content_o,
+    input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
+    input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
+    output logic lu_is_4M_o,
+    output logic lu_hit_o
+);
+
+  // Sv32 defines two levels of page tables
+  struct packed {
+    logic [8:0] asid;   //9 bits wide
+    logic [9:0] vpn1;   //10 bits wide
+    logic [9:0] vpn0;   //10 bits wide
+    logic       is_4M;
+    logic       valid;
+  } [TLB_ENTRIES-1:0]
+      tags_q, tags_n;
+
+  riscv::pte_sv32_t [TLB_ENTRIES-1:0] content_q, content_n;
+  logic [9:0] vpn0, vpn1;
+  logic [TLB_ENTRIES-1:0] lu_hit;  // to replacement logic
+  logic [TLB_ENTRIES-1:0] replace_en;  // replace the following entry, set by replacement strategy
+  //-------------
+  // Translation
+  //-------------
+  always_comb begin : translation
+    vpn0         = lu_vaddr_i[21:12];
+    vpn1         = lu_vaddr_i[31:22];
+
+
+    // default assignment
+    lu_hit       = '{default: 0};
+    lu_hit_o     = 1'b0;
+    lu_content_o = '{default: 0};
+    lu_is_4M_o   = 1'b0;
+
+    for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
+      // first level match, this may be a mega page, check the ASID flags as well
+      // if the entry is associated to a global address, don't match the ASID (ASID is don't care)
+      if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid[ASID_WIDTH-1:0]) || content_q[i].g)  && vpn1 == tags_q[i].vpn1) begin
+        if (tags_q[i].is_4M || vpn0 == tags_q[i].vpn0) begin
+          lu_is_4M_o   = tags_q[i].is_4M;
+          lu_content_o = content_q[i];
+          lu_hit_o     = 1'b1;
+          lu_hit[i]    = 1'b1;
+        end
+      end
+    end
+  end
+
+  logic asid_to_be_flushed_is0;  // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high
+  logic vaddr_to_be_flushed_is0;  // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high
+  logic [TLB_ENTRIES-1:0] vaddr_vpn0_match;
+  logic [TLB_ENTRIES-1:0] vaddr_vpn1_match;
+
+
+  assign asid_to_be_flushed_is0  = ~(|asid_to_be_flushed_i);
+  assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i);
+
+  // ------------------
+  // Update and Flush
+  // ------------------
+  always_comb begin : update_flush
+    tags_n    = tags_q;
+    content_n = content_q;
+
+    for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
+
+      vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[21:12] == tags_q[i].vpn0);
+      vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[31:22] == tags_q[i].vpn1);
+
+      if (flush_i) begin
+        // invalidate logic
+        // flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case)
+        if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0) tags_n[i].valid = 1'b0;
+        // flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages
+        else if (asid_to_be_flushed_is0 && ( (vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M) ) && (~vaddr_to_be_flushed_is0))
+          tags_n[i].valid = 1'b0;
+        // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case)
+        else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M)) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0))
+          tags_n[i].valid = 1'b0;
+        // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case)
+        else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!asid_to_be_flushed_is0))
+          tags_n[i].valid = 1'b0;
+        // normal replacement
+      end else if (update_i.valid & replace_en[i]) begin
+        // update tag array
+        tags_n[i] = '{
+            asid: update_i.asid,
+            vpn1: update_i.vpn[19:10],
+            vpn0: update_i.vpn[9:0],
+            is_4M: update_i.is_4M,
+            valid: 1'b1
+        };
+        // and content as well
+        content_n[i] = update_i.content;
+      end
+    end
+  end
+
+  // -----------------------------------------------
+  // PLRU - Pseudo Least Recently Used Replacement
+  // -----------------------------------------------
+  logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n;
+  logic en;
+  int unsigned idx_base, shift, new_index;
+  always_comb begin : plru_replacement
+    plru_tree_n = plru_tree_q;
+    en = '0;
+    idx_base = '0;
+    shift = '0;
+    new_index = '0;
+    // The PLRU-tree indexing:
+    // lvl0        0
+    //            / \
+        //           /   \
+        // lvl1     1     2
+        //         / \   / \
+        // lvl2   3   4 5   6
+        //       / \ /\/\  /\
+        //      ... ... ... ...
+        // Just predefine which nodes will be set/cleared
+        // E.g. for a TLB with 8 entries, the for-loop is semantically
+        // equivalent to the following pseudo-code:
+        // unique case (1'b1)
+        // lu_hit[7]: plru_tree_n[0, 2, 6] = {1, 1, 1};
+        // lu_hit[6]: plru_tree_n[0, 2, 6] = {1, 1, 0};
+        // lu_hit[5]: plru_tree_n[0, 2, 5] = {1, 0, 1};
+        // lu_hit[4]: plru_tree_n[0, 2, 5] = {1, 0, 0};
+        // lu_hit[3]: plru_tree_n[0, 1, 4] = {0, 1, 1};
+        // lu_hit[2]: plru_tree_n[0, 1, 4] = {0, 1, 0};
+        // lu_hit[1]: plru_tree_n[0, 1, 3] = {0, 0, 1};
+        // lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0};
+        // default: begin /* No hit */ end
+        // endcase
+        for (
+        int unsigned i = 0; i < TLB_ENTRIES; i++
+    ) begin
+      // we got a hit so update the pointer as it was least recently used
+      if (lu_hit[i] & lu_access_i) begin
+        // Set the nodes to the values we would expect
+        for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
+          idx_base = $unsigned((2 ** lvl) - 1);
+          // lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+          shift = $clog2(TLB_ENTRIES) - lvl;
+          // to circumvent the 32 bit integer arithmetic assignment
+          new_index = ~((i >> (shift - 1)) & 32'b1);
+          plru_tree_n[idx_base+(i>>shift)] = new_index[0];
+        end
+      end
+    end
+    // Decode tree to write enable signals
+    // Next for-loop basically creates the following logic for e.g. an 8 entry
+    // TLB (note: pseudo-code obviously):
+    // replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
+    // replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
+    // replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
+    // replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
+    // replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
+    // replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
+    // replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
+    // replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
+    // For each entry traverse the tree. If every tree-node matches,
+    // the corresponding bit of the entry's index, this is
+    // the next entry to replace.
+    for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
+      en = 1'b1;
+      for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
+        idx_base = $unsigned((2 ** lvl) - 1);
+        // lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+        shift = $clog2(TLB_ENTRIES) - lvl;
+
+        // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
+        new_index = (i >> (shift - 1)) & 32'b1;
+        if (new_index[0]) begin
+          en &= plru_tree_q[idx_base+(i>>shift)];
+        end else begin
+          en &= ~plru_tree_q[idx_base+(i>>shift)];
+        end
+      end
+      replace_en[i] = en;
+    end
+  end
+
+  // sequential process
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      tags_q      <= '{default: 0};
+      content_q   <= '{default: 0};
+      plru_tree_q <= '{default: 0};
+    end else begin
+      tags_q      <= tags_n;
+      content_q   <= content_n;
+      plru_tree_q <= plru_tree_n;
+    end
+  end
+  //--------------
+  // Sanity checks
+  //--------------
+
+  //pragma translate_off
+`ifndef VERILATOR
+
+  initial begin : p_assertions
+    assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1))
+    else begin
+      $error("TLB size must be a multiple of 2 and greater than 1");
+      $stop();
+    end
+    assert (ASID_WIDTH >= 1)
+    else begin
+      $error("ASID width must be at least 1");
+      $stop();
+    end
+  end
+
+  // Just for checking
+  function int countSetBits(logic [TLB_ENTRIES-1:0] vector);
+    automatic int count = 0;
+    foreach (vector[idx]) begin
+      count += vector[idx];
+    end
+    return count;
+  endfunction
+
+  assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1))
+  else begin
+    $error("More then one hit in TLB!");
+    $stop();
+  end
+  assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1))
+  else begin
+    $error("More then one TLB entry selected for next replace!");
+    $stop();
+  end
+
+`endif
+  //pragma translate_on
+
+endmodule
diff --git a/test/type_param/core/mmu_sv39/mmu.sv b/test/type_param/core/mmu_sv39/mmu.sv
new file mode 100644
index 0000000..39e9f34
--- /dev/null
+++ b/test/type_param/core/mmu_sv39/mmu.sv
@@ -0,0 +1,519 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 19/04/2017
+// Description: Memory Management Unit for Ariane, contains TLB and
+//              address translation unit. SV39 as defined in RISC-V
+//              privilege specification 1.11-WIP
+
+
+module mmu
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg           = config_pkg::cva6_cfg_empty,
+    parameter int unsigned           INSTR_TLB_ENTRIES = 4,
+    parameter int unsigned           DATA_TLB_ENTRIES  = 4,
+    parameter int unsigned           ASID_WIDTH        = 1
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    input logic flush_i,
+    input logic enable_translation_i,
+    input logic en_ld_st_translation_i,  // enable virtual memory translation for load/stores
+    // IF interface
+    input icache_arsp_t icache_areq_i,
+    output icache_areq_t icache_areq_o,
+    // LSU interface
+    // this is a more minimalistic interface because the actual addressing logic is handled
+    // in the LSU as we distinguish load and stores, what we do here is simple address translation
+    input exception_t misaligned_ex_i,
+    input logic lsu_req_i,  // request address translation
+    input logic [riscv::VLEN-1:0] lsu_vaddr_i,  // virtual address in
+    input logic lsu_is_store_i,  // the translation is requested by a store
+    // if we need to walk the page table we can't grant in the same cycle
+    // Cycle 0
+    output logic                            lsu_dtlb_hit_o,   // sent in the same cycle as the request if translation hits in the DTLB
+    output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o,  // ppn (send same cycle as hit)
+    // Cycle 1
+    output logic lsu_valid_o,  // translation is valid
+    output logic [riscv::PLEN-1:0] lsu_paddr_o,  // translated address
+    output exception_t lsu_exception_o,  // address translation threw an exception
+    // General control signals
+    input riscv::priv_lvl_t priv_lvl_i,
+    input riscv::priv_lvl_t ld_st_priv_lvl_i,
+    input logic sum_i,
+    input logic mxr_i,
+    // input logic flag_mprv_i,
+    input logic [riscv::PPNW-1:0] satp_ppn_i,
+    input logic [ASID_WIDTH-1:0] asid_i,
+    input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
+    input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
+    input logic flush_tlb_i,
+    // Performance counters
+    output logic itlb_miss_o,
+    output logic dtlb_miss_o,
+    // PTW memory interface
+    input dcache_req_o_t req_port_i,
+    output dcache_req_i_t req_port_o,
+    // PMP
+    input riscv::pmpcfg_t [15:0] pmpcfg_i,
+    input logic [15:0][riscv::PLEN-3:0] pmpaddr_i
+);
+
+  logic                   iaccess_err;  // insufficient privilege to access this instruction page
+  logic                   daccess_err;  // insufficient privilege to access this data page
+  logic                   ptw_active;  // PTW is currently walking a page table
+  logic                   walking_instr;  // PTW is walking because of an ITLB miss
+  logic                   ptw_error;  // PTW threw an exception
+  logic                   ptw_access_exception;  // PTW threw an access exception (PMPs)
+  logic [riscv::PLEN-1:0] ptw_bad_paddr;  // PTW PMP exception bad physical addr
+
+  logic [riscv::VLEN-1:0] update_vaddr;
+  tlb_update_t update_ptw_itlb, update_ptw_dtlb;
+
+  logic        itlb_lu_access;
+  riscv::pte_t itlb_content;
+  logic        itlb_is_2M;
+  logic        itlb_is_1G;
+  logic        itlb_lu_hit;
+
+  logic        dtlb_lu_access;
+  riscv::pte_t dtlb_content;
+  logic        dtlb_is_2M;
+  logic        dtlb_is_1G;
+  logic        dtlb_lu_hit;
+
+
+  // Assignments
+  assign itlb_lu_access = icache_areq_i.fetch_req;
+  assign dtlb_lu_access = lsu_req_i;
+
+
+  tlb #(
+      .CVA6Cfg    (CVA6Cfg),
+      .TLB_ENTRIES(INSTR_TLB_ENTRIES),
+      .ASID_WIDTH (ASID_WIDTH)
+  ) i_itlb (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i(flush_tlb_i),
+
+      .update_i(update_ptw_itlb),
+
+      .lu_access_i          (itlb_lu_access),
+      .lu_asid_i            (asid_i),
+      .asid_to_be_flushed_i (asid_to_be_flushed_i),
+      .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
+      .lu_vaddr_i           (icache_areq_i.fetch_vaddr),
+      .lu_content_o         (itlb_content),
+
+      .lu_is_2M_o(itlb_is_2M),
+      .lu_is_1G_o(itlb_is_1G),
+      .lu_hit_o  (itlb_lu_hit)
+  );
+
+  tlb #(
+      .CVA6Cfg    (CVA6Cfg),
+      .TLB_ENTRIES(DATA_TLB_ENTRIES),
+      .ASID_WIDTH (ASID_WIDTH)
+  ) i_dtlb (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i(flush_tlb_i),
+
+      .update_i(update_ptw_dtlb),
+
+      .lu_access_i          (dtlb_lu_access),
+      .lu_asid_i            (asid_i),
+      .asid_to_be_flushed_i (asid_to_be_flushed_i),
+      .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
+      .lu_vaddr_i           (lsu_vaddr_i),
+      .lu_content_o         (dtlb_content),
+
+      .lu_is_2M_o(dtlb_is_2M),
+      .lu_is_1G_o(dtlb_is_1G),
+      .lu_hit_o  (dtlb_lu_hit)
+  );
+
+
+  ptw #(
+      .CVA6Cfg   (CVA6Cfg),
+      .ASID_WIDTH(ASID_WIDTH)
+  ) i_ptw (
+      .clk_i                 (clk_i),
+      .rst_ni                (rst_ni),
+      .ptw_active_o          (ptw_active),
+      .walking_instr_o       (walking_instr),
+      .ptw_error_o           (ptw_error),
+      .ptw_access_exception_o(ptw_access_exception),
+      .enable_translation_i  (enable_translation_i),
+
+      .update_vaddr_o(update_vaddr),
+      .itlb_update_o (update_ptw_itlb),
+      .dtlb_update_o (update_ptw_dtlb),
+
+      .itlb_access_i(itlb_lu_access),
+      .itlb_hit_i   (itlb_lu_hit),
+      .itlb_vaddr_i (icache_areq_i.fetch_vaddr),
+
+      .dtlb_access_i(dtlb_lu_access),
+      .dtlb_hit_i   (dtlb_lu_hit),
+      .dtlb_vaddr_i (lsu_vaddr_i),
+
+      .req_port_i (req_port_i),
+      .req_port_o (req_port_o),
+      .pmpcfg_i,
+      .pmpaddr_i,
+      .bad_paddr_o(ptw_bad_paddr),
+      .*
+  );
+
+  // ila_1 i_ila_1 (
+  //     .clk(clk_i), // input wire clk
+  //     .probe0({req_port_o.address_tag, req_port_o.address_index}),
+  //     .probe1(req_port_o.data_req), // input wire [63:0]  probe1
+  //     .probe2(req_port_i.data_gnt), // input wire [0:0]  probe2
+  //     .probe3(req_port_i.data_rdata), // input wire [0:0]  probe3
+  //     .probe4(req_port_i.data_rvalid), // input wire [0:0]  probe4
+  //     .probe5(ptw_error), // input wire [1:0]  probe5
+  //     .probe6(update_vaddr), // input wire [0:0]  probe6
+  //     .probe7(update_ptw_itlb.valid), // input wire [0:0]  probe7
+  //     .probe8(update_ptw_dtlb.valid), // input wire [0:0]  probe8
+  //     .probe9(dtlb_lu_access), // input wire [0:0]  probe9
+  //     .probe10(lsu_vaddr_i), // input wire [0:0]  probe10
+  //     .probe11(dtlb_lu_hit), // input wire [0:0]  probe11
+  //     .probe12(itlb_lu_access), // input wire [0:0]  probe12
+  //     .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0]  probe13
+  //     .probe14(itlb_lu_hit) // input wire [0:0]  probe13
+  // );
+
+  //-----------------------
+  // Instruction Interface
+  //-----------------------
+  logic match_any_execute_region;
+  logic pmp_instr_allow;
+
+  // The instruction interface is a simple request response interface
+  always_comb begin : instr_interface
+    // MMU disabled: just pass through
+    icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
+    icache_areq_o.fetch_paddr  = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; // play through in case we disabled address translation
+    // two potential exception sources:
+    // 1. HPTW threw an exception -> signal with a page fault exception
+    // 2. We got an access error because of insufficient permissions -> throw an access exception
+    icache_areq_o.fetch_exception = '0;
+    // Check whether we are allowed to access this memory region from a fetch perspective
+    iaccess_err   = icache_areq_i.fetch_req && enable_translation_i
+                                                 && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u)
+                                                 || ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u));
+
+    // MMU enabled: address from TLB, request delayed until hit. Error when TLB
+    // hit and no access right or TLB hit and translated address not valid (e.g.
+    // AXI decode error), or when PTW performs walk due to ITLB miss and raises
+    // an error.
+    if (enable_translation_i) begin
+      // we work with SV39 or SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal
+      if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin
+        icache_areq_o.fetch_exception = {
+          riscv::INSTR_ACCESS_FAULT,
+          {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
+          1'b1
+        };
+      end
+
+      icache_areq_o.fetch_valid = 1'b0;
+
+      // 4K page
+      icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]};
+      // Mega page
+      if (itlb_is_2M) begin
+        icache_areq_o.fetch_paddr[20:12] = icache_areq_i.fetch_vaddr[20:12];
+      end
+      // Giga page
+      if (itlb_is_1G) begin
+        icache_areq_o.fetch_paddr[29:12] = icache_areq_i.fetch_vaddr[29:12];
+      end
+
+      // ---------
+      // ITLB Hit
+      // --------
+      // if we hit the ITLB output the request signal immediately
+      if (itlb_lu_hit) begin
+        icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
+        // we got an access error
+        if (iaccess_err) begin
+          // throw a page fault
+          icache_areq_o.fetch_exception = {
+            riscv::INSTR_PAGE_FAULT,
+            {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
+            1'b1
+          };
+        end else if (!pmp_instr_allow) begin
+          icache_areq_o.fetch_exception = {
+            riscv::INSTR_ACCESS_FAULT,
+            {{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_i.fetch_vaddr},
+            1'b1
+          };
+        end
+      end else
+      // ---------
+      // ITLB Miss
+      // ---------
+      // watch out for exceptions happening during walking the page table
+      if (ptw_active && walking_instr) begin
+        icache_areq_o.fetch_valid = ptw_error | ptw_access_exception;
+        if (ptw_error)
+          icache_areq_o.fetch_exception = {
+            riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1
+          };
+        else
+          icache_areq_o.fetch_exception = {
+            riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1
+          };
+      end
+    end
+    // if it didn't match any execute region throw an `Instruction Access Fault`
+    // or: if we are not translating, check PMPs immediately on the paddr
+    if ((!match_any_execute_region && !ptw_error) || (!enable_translation_i && !pmp_instr_allow)) begin
+      icache_areq_o.fetch_exception = {
+        riscv::INSTR_ACCESS_FAULT,
+        {{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr},
+        1'b1
+      };
+    end
+  end
+
+  // check for execute flag on memory
+  assign match_any_execute_region = config_pkg::is_inside_execute_regions(
+      CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}
+  );
+
+  // Instruction fetch
+  pmp #(
+      .CVA6Cfg   (CVA6Cfg),
+      .PLEN      (riscv::PLEN),
+      .PMP_LEN   (riscv::PLEN - 2),
+      .NR_ENTRIES(CVA6Cfg.NrPMPEntries)
+  ) i_pmp_if (
+      .addr_i       (icache_areq_o.fetch_paddr),
+      .priv_lvl_i,
+      // we will always execute on the instruction fetch port
+      .access_type_i(riscv::ACCESS_EXEC),
+      // Configuration
+      .conf_addr_i  (pmpaddr_i),
+      .conf_i       (pmpcfg_i),
+      .allow_o      (pmp_instr_allow)
+  );
+
+  //-----------------------
+  // Data Interface
+  //-----------------------
+  logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q;
+  riscv::pte_t dtlb_pte_n, dtlb_pte_q;
+  exception_t misaligned_ex_n, misaligned_ex_q;
+  logic lsu_req_n, lsu_req_q;
+  logic lsu_is_store_n, lsu_is_store_q;
+  logic dtlb_hit_n, dtlb_hit_q;
+  logic dtlb_is_2M_n, dtlb_is_2M_q;
+  logic dtlb_is_1G_n, dtlb_is_1G_q;
+
+  // check if we need to do translation or if we are always ready (e.g.: we are not translating anything)
+  assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1;
+
+  // Wires to PMP checks
+  riscv::pmp_access_t pmp_access_type;
+  logic               pmp_data_allow;
+  localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1;
+  // The data interface is simpler and only consists of a request/response interface
+  always_comb begin : data_interface
+    // save request and DTLB response
+    lsu_vaddr_n = lsu_vaddr_i;
+    lsu_req_n = lsu_req_i;
+    misaligned_ex_n = misaligned_ex_i;
+    dtlb_pte_n = dtlb_content;
+    dtlb_hit_n = dtlb_lu_hit;
+    lsu_is_store_n = lsu_is_store_i;
+    dtlb_is_2M_n = dtlb_is_2M;
+    dtlb_is_1G_n = dtlb_is_1G;
+
+    lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0];
+    lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PLEN-1:12];
+    lsu_valid_o = lsu_req_q;
+    lsu_exception_o = misaligned_ex_q;
+    pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ;
+
+    // mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions
+    misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i;
+
+    // Check if the User flag is set, then we may only access it in supervisor mode
+    // if SUM is enabled
+    daccess_err = en_ld_st_translation_i && ((ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode
+    (ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u));            // this is not a user page but we are in user mode and trying to access it
+    // translation is enabled and no misaligned exception occurred
+    if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin
+      lsu_valid_o = 1'b0;
+      // 4K page
+      lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]};
+      lsu_dtlb_ppn_o = dtlb_content.ppn;
+      // Mega page
+      if (dtlb_is_2M_q) begin
+        lsu_paddr_o[20:12] = lsu_vaddr_q[20:12];
+        lsu_dtlb_ppn_o[20:12] = lsu_vaddr_n[20:12];
+      end
+      // Giga page
+      if (dtlb_is_1G_q) begin
+        lsu_paddr_o[PPNWMin:12] = lsu_vaddr_q[PPNWMin:12];
+        lsu_dtlb_ppn_o[PPNWMin:12] = lsu_vaddr_n[PPNWMin:12];
+      end
+      // ---------
+      // DTLB Hit
+      // --------
+      if (dtlb_hit_q && lsu_req_q) begin
+        lsu_valid_o = 1'b1;
+        // exception priority:
+        // PAGE_FAULTS have higher priority than ACCESS_FAULTS
+        // virtual memory based exceptions are PAGE_FAULTS
+        // physical memory based exceptions are ACCESS_FAULTS (PMA/PMP)
+
+        // this is a store
+        if (lsu_is_store_q) begin
+          // check if the page is write-able and we are not violating privileges
+          // also check if the dirty flag is set
+          if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin
+            lsu_exception_o = {
+              riscv::STORE_PAGE_FAULT,
+              {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
+              1'b1
+            };
+            // Check if any PMPs are violated
+          end else if (!pmp_data_allow) begin
+            lsu_exception_o = {
+              riscv::ST_ACCESS_FAULT,
+              {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
+              1'b1
+            };
+          end
+
+          // this is a load
+        end else begin
+          // check for sufficient access privileges - throw a page fault if necessary
+          if (daccess_err) begin
+            lsu_exception_o = {
+              riscv::LOAD_PAGE_FAULT,
+              {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
+              1'b1
+            };
+            // Check if any PMPs are violated
+          end else if (!pmp_data_allow) begin
+            lsu_exception_o = {
+              riscv::LD_ACCESS_FAULT,
+              {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
+              1'b1
+            };
+          end
+        end
+      end else
+
+      // ---------
+      // DTLB Miss
+      // ---------
+      // watch out for exceptions
+      if (ptw_active && !walking_instr) begin
+        // page table walker threw an exception
+        if (ptw_error) begin
+          // an error makes the translation valid
+          lsu_valid_o = 1'b1;
+          // the page table walker can only throw page faults
+          if (lsu_is_store_q) begin
+            lsu_exception_o = {
+              riscv::STORE_PAGE_FAULT,
+              {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
+              1'b1
+            };
+          end else begin
+            lsu_exception_o = {
+              riscv::LOAD_PAGE_FAULT,
+              {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
+              1'b1
+            };
+          end
+        end
+
+        if (ptw_access_exception) begin
+          // an error makes the translation valid
+          lsu_valid_o = 1'b1;
+          // Any fault of the page table walk should be based of the original access type
+          if (lsu_is_store_q) begin
+            lsu_exception_o = {
+              riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1
+            };
+          end else begin
+            lsu_exception_o = {
+              riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1
+            };
+          end
+        end
+      end
+    end  // If translation is not enabled, check the paddr immediately against PMPs
+    else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin
+      if (lsu_is_store_q) begin
+        lsu_exception_o = {
+          riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1
+        };
+      end else begin
+        lsu_exception_o = {
+          riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1
+        };
+      end
+    end
+  end
+
+  // Load/store PMP check
+  pmp #(
+      .CVA6Cfg   (CVA6Cfg),
+      .PLEN      (riscv::PLEN),
+      .PMP_LEN   (riscv::PLEN - 2),
+      .NR_ENTRIES(CVA6Cfg.NrPMPEntries)
+  ) i_pmp_data (
+      .addr_i       (lsu_paddr_o),
+      .priv_lvl_i   (ld_st_priv_lvl_i),
+      .access_type_i(pmp_access_type),
+      // Configuration
+      .conf_addr_i  (pmpaddr_i),
+      .conf_i       (pmpcfg_i),
+      .allow_o      (pmp_data_allow)
+  );
+
+  // ----------
+  // Registers
+  // ----------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      lsu_vaddr_q     <= '0;
+      lsu_req_q       <= '0;
+      misaligned_ex_q <= '0;
+      dtlb_pte_q      <= '0;
+      dtlb_hit_q      <= '0;
+      lsu_is_store_q  <= '0;
+      dtlb_is_2M_q    <= '0;
+      dtlb_is_1G_q    <= '0;
+    end else begin
+      lsu_vaddr_q     <= lsu_vaddr_n;
+      lsu_req_q       <= lsu_req_n;
+      misaligned_ex_q <= misaligned_ex_n;
+      dtlb_pte_q      <= dtlb_pte_n;
+      dtlb_hit_q      <= dtlb_hit_n;
+      lsu_is_store_q  <= lsu_is_store_n;
+      dtlb_is_2M_q    <= dtlb_is_2M_n;
+      dtlb_is_1G_q    <= dtlb_is_1G_n;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/mmu_sv39/ptw.sv b/test/type_param/core/mmu_sv39/ptw.sv
new file mode 100644
index 0000000..2d0e378
--- /dev/null
+++ b/test/type_param/core/mmu_sv39/ptw.sv
@@ -0,0 +1,409 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: David Schaffenrath, TU Graz
+// Author: Florian Zaruba, ETH Zurich
+// Date: 24.4.2017
+// Description: Hardware-PTW
+
+/* verilator lint_off WIDTH */
+
+module ptw
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int ASID_WIDTH = 1
+) (
+    input  logic clk_i,                   // Clock
+    input  logic rst_ni,                  // Asynchronous reset active low
+    input  logic flush_i,                 // flush everything, we need to do this because
+                                          // actually everything we do is speculative at this stage
+                                          // e.g.: there could be a CSR instruction that changes everything
+    output logic ptw_active_o,
+    output logic walking_instr_o,         // set when walking for TLB
+    output logic ptw_error_o,             // set when an error occurred
+    output logic ptw_access_exception_o,  // set when an PMP access exception occured
+    input  logic enable_translation_i,    // CSRs indicate to enable SV39
+    input  logic en_ld_st_translation_i,  // enable virtual memory translation for load/stores
+
+    input  logic          lsu_is_store_i,  // this translation was triggered by a store
+    // PTW memory interface
+    input  dcache_req_o_t req_port_i,
+    output dcache_req_i_t req_port_o,
+
+
+    // to TLBs, update logic
+    output tlb_update_t itlb_update_o,
+    output tlb_update_t dtlb_update_o,
+
+    output logic [riscv::VLEN-1:0] update_vaddr_o,
+
+    input logic [ ASID_WIDTH-1:0] asid_i,
+    // from TLBs
+    // did we miss?
+    input logic                   itlb_access_i,
+    input logic                   itlb_hit_i,
+    input logic [riscv::VLEN-1:0] itlb_vaddr_i,
+
+    input  logic                   dtlb_access_i,
+    input  logic                   dtlb_hit_i,
+    input  logic [riscv::VLEN-1:0] dtlb_vaddr_i,
+    // from CSR file
+    input  logic [riscv::PPNW-1:0] satp_ppn_i,     // ppn from satp
+    input  logic                   mxr_i,
+    // Performance counters
+    output logic                   itlb_miss_o,
+    output logic                   dtlb_miss_o,
+    // PMP
+
+    input riscv::pmpcfg_t [15:0] pmpcfg_i,
+    input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
+    output logic [riscv::PLEN-1:0] bad_paddr_o
+
+);
+
+  // input registers
+  logic data_rvalid_q;
+  logic [63:0] data_rdata_q;
+
+  riscv::pte_t pte;
+  assign pte = riscv::pte_t'(data_rdata_q);
+
+  enum logic [2:0] {
+    IDLE,
+    WAIT_GRANT,
+    PTE_LOOKUP,
+    WAIT_RVALID,
+    PROPAGATE_ERROR,
+    PROPAGATE_ACCESS_ERROR
+  }
+      state_q, state_d;
+
+  // SV39 defines three levels of page tables
+  enum logic [1:0] {
+    LVL1,
+    LVL2,
+    LVL3
+  }
+      ptw_lvl_q, ptw_lvl_n;
+
+  // is this an instruction page table walk?
+  logic is_instr_ptw_q, is_instr_ptw_n;
+  logic global_mapping_q, global_mapping_n;
+  // latched tag signal
+  logic tag_valid_n, tag_valid_q;
+  // register the ASID
+  logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n;
+  // register the VPN we need to walk, SV39 defines a 39 bit virtual address
+  logic [riscv::VLEN-1:0] vaddr_q, vaddr_n;
+  // 4 byte aligned physical pointer
+  logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n;
+
+  // Assignments
+  assign update_vaddr_o = vaddr_q;
+
+  assign ptw_active_o = (state_q != IDLE);
+  assign walking_instr_o = is_instr_ptw_q;
+  // directly output the correct physical address
+  assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0];
+  assign req_port_o.address_tag   = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH];
+  // we are never going to kill this request
+  assign req_port_o.kill_req = '0;
+  // we are never going to write with the HPTW
+  assign req_port_o.data_wdata = 64'b0;
+  // we only issue one single request at a time
+  assign req_port_o.data_id = '0;
+  // -----------
+  // TLB Update
+  // -----------
+  assign itlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]};
+  assign dtlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]};
+  // update the correct page table level
+  assign itlb_update_o.is_2M = (ptw_lvl_q == LVL2);
+  assign itlb_update_o.is_1G = (ptw_lvl_q == LVL1);
+  assign dtlb_update_o.is_2M = (ptw_lvl_q == LVL2);
+  assign dtlb_update_o.is_1G = (ptw_lvl_q == LVL1);
+  // output the correct ASID
+  assign itlb_update_o.asid = tlb_update_asid_q;
+  assign dtlb_update_o.asid = tlb_update_asid_q;
+  // set the global mapping bit
+  assign itlb_update_o.content = pte | (global_mapping_q << 5);
+  assign dtlb_update_o.content = pte | (global_mapping_q << 5);
+
+  assign req_port_o.tag_valid = tag_valid_q;
+
+  logic allow_access;
+
+  assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0;
+
+  pmp #(
+      .CVA6Cfg   (CVA6Cfg),
+      .PLEN      (riscv::PLEN),
+      .PMP_LEN   (riscv::PLEN - 2),
+      .NR_ENTRIES(CVA6Cfg.NrPMPEntries)
+  ) i_pmp_ptw (
+      .addr_i       (ptw_pptr_q),
+      // PTW access are always checked as if in S-Mode...
+      .priv_lvl_i   (riscv::PRIV_LVL_S),
+      // ...and they are always loads
+      .access_type_i(riscv::ACCESS_READ),
+      // Configuration
+      .conf_addr_i  (pmpaddr_i),
+      .conf_i       (pmpcfg_i),
+      .allow_o      (allow_access)
+  );
+
+  //-------------------
+  // Page table walker
+  //-------------------
+  // A virtual address va is translated into a physical address pa as follows:
+  // 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
+  //    PAGESIZE=2^12 and LEVELS=3.)
+  // 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For
+  //    Sv32, PTESIZE=4.)
+  // 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access
+  //    exception.
+  // 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5.
+  //    Otherwise, this PTE is a pointer to the next level of the page table.
+  //    Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let
+  //    a = pte.ppn × PAGESIZE and go to step 2.
+  // 5. A leaf PTE has been found. Determine if the requested memory access
+  //    is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and
+  //    raise an access exception. Otherwise, the translation is successful.
+  //    Set pte.a to 1, and, if the memory access is a store, set pte.d to 1.
+  //    The translated physical address is given as follows:
+  //      - pa.pgoff = va.pgoff.
+  //      - If i > 0, then this is a superpage translation and
+  //        pa.ppn[i-1:0] = va.vpn[i-1:0].
+  //      - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
+  always_comb begin : ptw
+    // default assignments
+    // PTW memory interface
+    tag_valid_n            = 1'b0;
+    req_port_o.data_req    = 1'b0;
+    req_port_o.data_be     = 8'hFF;
+    req_port_o.data_size   = 2'b11;
+    req_port_o.data_we     = 1'b0;
+    ptw_error_o            = 1'b0;
+    ptw_access_exception_o = 1'b0;
+    itlb_update_o.valid    = 1'b0;
+    dtlb_update_o.valid    = 1'b0;
+    is_instr_ptw_n         = is_instr_ptw_q;
+    ptw_lvl_n              = ptw_lvl_q;
+    ptw_pptr_n             = ptw_pptr_q;
+    state_d                = state_q;
+    global_mapping_n       = global_mapping_q;
+    // input registers
+    tlb_update_asid_n      = tlb_update_asid_q;
+    vaddr_n                = vaddr_q;
+
+    itlb_miss_o            = 1'b0;
+    dtlb_miss_o            = 1'b0;
+
+    case (state_q)
+
+      IDLE: begin
+        // by default we start with the top-most page table
+        ptw_lvl_n        = LVL1;
+        global_mapping_n = 1'b0;
+        is_instr_ptw_n   = 1'b0;
+        // if we got an ITLB miss
+        if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin
+          ptw_pptr_n        = {satp_ppn_i, itlb_vaddr_i[riscv::SV-1:30], 3'b0};
+          is_instr_ptw_n    = 1'b1;
+          tlb_update_asid_n = asid_i;
+          vaddr_n           = itlb_vaddr_i;
+          state_d           = WAIT_GRANT;
+          itlb_miss_o       = 1'b1;
+          // we got an DTLB miss
+        end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
+          ptw_pptr_n        = {satp_ppn_i, dtlb_vaddr_i[riscv::SV-1:30], 3'b0};
+          tlb_update_asid_n = asid_i;
+          vaddr_n           = dtlb_vaddr_i;
+          state_d           = WAIT_GRANT;
+          dtlb_miss_o       = 1'b1;
+        end
+      end
+
+      WAIT_GRANT: begin
+        // send a request out
+        req_port_o.data_req = 1'b1;
+        // wait for the WAIT_GRANT
+        if (req_port_i.data_gnt) begin
+          // send the tag valid signal one cycle later
+          tag_valid_n = 1'b1;
+          state_d     = PTE_LOOKUP;
+        end
+      end
+
+      PTE_LOOKUP: begin
+        // we wait for the valid signal
+        if (data_rvalid_q) begin
+
+          // check if the global mapping bit is set
+          if (pte.g) global_mapping_n = 1'b1;
+
+          // -------------
+          // Invalid PTE
+          // -------------
+          // If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception.
+          if (!pte.v || (!pte.r && pte.w)) state_d = PROPAGATE_ERROR;
+          // -----------
+          // Valid PTE
+          // -----------
+          else begin
+            state_d = IDLE;
+            // it is a valid PTE
+            // if pte.r = 1 or pte.x = 1 it is a valid PTE
+            if (pte.r || pte.x) begin
+              // Valid translation found (either 1G, 2M or 4K entry)
+              if (is_instr_ptw_q) begin
+                // ------------
+                // Update ITLB
+                // ------------
+                // If page is not executable, we can directly raise an error. This
+                // doesn't put a useless entry into the TLB. The same idea applies
+                // to the access flag since we let the access flag be managed by SW.
+                if (!pte.x || !pte.a) state_d = PROPAGATE_ERROR;
+                else itlb_update_o.valid = 1'b1;
+
+              end else begin
+                // ------------
+                // Update DTLB
+                // ------------
+                // Check if the access flag has been set, otherwise throw a page-fault
+                // and let the software handle those bits.
+                // If page is not readable (there are no write-only pages)
+                // we can directly raise an error. This doesn't put a useless
+                // entry into the TLB.
+                if (pte.a && (pte.r || (pte.x && mxr_i))) begin
+                  dtlb_update_o.valid = 1'b1;
+                end else begin
+                  state_d = PROPAGATE_ERROR;
+                end
+                // Request is a store: perform some additional checks
+                // If the request was a store and the page is not write-able, raise an error
+                // the same applies if the dirty flag is not set
+                if (lsu_is_store_i && (!pte.w || !pte.d)) begin
+                  dtlb_update_o.valid = 1'b0;
+                  state_d = PROPAGATE_ERROR;
+                end
+              end
+              // check if the ppn is correctly aligned:
+              // 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
+              // exception.
+              if (ptw_lvl_q == LVL1 && pte.ppn[17:0] != '0) begin
+                state_d             = PROPAGATE_ERROR;
+                dtlb_update_o.valid = 1'b0;
+                itlb_update_o.valid = 1'b0;
+              end else if (ptw_lvl_q == LVL2 && pte.ppn[8:0] != '0) begin
+                state_d             = PROPAGATE_ERROR;
+                dtlb_update_o.valid = 1'b0;
+                itlb_update_o.valid = 1'b0;
+              end
+              // this is a pointer to the next TLB level
+            end else begin
+              // pointer to next level of page table
+              if (ptw_lvl_q == LVL1) begin
+                // we are in the second level now
+                ptw_lvl_n  = LVL2;
+                ptw_pptr_n = {pte.ppn, vaddr_q[29:21], 3'b0};
+              end
+
+              if (ptw_lvl_q == LVL2) begin
+                // here we received a pointer to the third level
+                ptw_lvl_n  = LVL3;
+                ptw_pptr_n = {pte.ppn, vaddr_q[20:12], 3'b0};
+              end
+
+              state_d = WAIT_GRANT;
+
+              if (ptw_lvl_q == LVL3) begin
+                // Should already be the last level page table => Error
+                ptw_lvl_n = LVL3;
+                state_d   = PROPAGATE_ERROR;
+              end
+            end
+          end
+
+          // Check if this access was actually allowed from a PMP perspective
+          if (!allow_access) begin
+            itlb_update_o.valid = 1'b0;
+            dtlb_update_o.valid = 1'b0;
+            // we have to return the failed address in bad_addr
+            ptw_pptr_n = ptw_pptr_q;
+            state_d = PROPAGATE_ACCESS_ERROR;
+          end
+        end
+        // we've got a data WAIT_GRANT so tell the cache that the tag is valid
+      end
+      // Propagate error to MMU/LSU
+      PROPAGATE_ERROR: begin
+        state_d     = IDLE;
+        ptw_error_o = 1'b1;
+      end
+      PROPAGATE_ACCESS_ERROR: begin
+        state_d                = IDLE;
+        ptw_access_exception_o = 1'b1;
+      end
+      // wait for the rvalid before going back to IDLE
+      WAIT_RVALID: begin
+        if (data_rvalid_q) state_d = IDLE;
+      end
+      default: begin
+        state_d = IDLE;
+      end
+    endcase
+
+    // -------
+    // Flush
+    // -------
+    // should we have flushed before we got an rvalid, wait for it until going back to IDLE
+    if (flush_i) begin
+      // on a flush check whether we are
+      // 1. in the PTE Lookup check whether we still need to wait for an rvalid
+      // 2. waiting for a grant, if so: wait for it
+      // if not, go back to idle
+      if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) ||
+                ((state_q == WAIT_GRANT) && req_port_i.data_gnt))
+        state_d = WAIT_RVALID;
+      else state_d = IDLE;
+    end
+  end
+
+  // sequential process
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      state_q           <= IDLE;
+      is_instr_ptw_q    <= 1'b0;
+      ptw_lvl_q         <= LVL1;
+      tag_valid_q       <= 1'b0;
+      tlb_update_asid_q <= '0;
+      vaddr_q           <= '0;
+      ptw_pptr_q        <= '0;
+      global_mapping_q  <= 1'b0;
+      data_rdata_q      <= '0;
+      data_rvalid_q     <= 1'b0;
+    end else begin
+      state_q           <= state_d;
+      ptw_pptr_q        <= ptw_pptr_n;
+      is_instr_ptw_q    <= is_instr_ptw_n;
+      ptw_lvl_q         <= ptw_lvl_n;
+      tag_valid_q       <= tag_valid_n;
+      tlb_update_asid_q <= tlb_update_asid_n;
+      vaddr_q           <= vaddr_n;
+      global_mapping_q  <= global_mapping_n;
+      data_rdata_q      <= req_port_i.data_rdata;
+      data_rvalid_q     <= req_port_i.data_rvalid;
+    end
+  end
+
+endmodule
+/* verilator lint_on WIDTH */
diff --git a/test/type_param/core/mmu_sv39/tlb.sv b/test/type_param/core/mmu_sv39/tlb.sv
new file mode 100644
index 0000000..3df2cb0
--- /dev/null
+++ b/test/type_param/core/mmu_sv39/tlb.sv
@@ -0,0 +1,290 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: David Schaffenrath, TU Graz
+// Author: Florian Zaruba, ETH Zurich
+// Date: 21.4.2017
+// Description: Translation Lookaside Buffer, SV39
+//              fully set-associative
+
+
+module tlb
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned TLB_ENTRIES = 4,
+    parameter int unsigned ASID_WIDTH = 1
+) (
+    input  logic                          clk_i,                  // Clock
+    input  logic                          rst_ni,                 // Asynchronous reset active low
+    input  logic                          flush_i,                // Flush signal
+    // Update TLB
+    input  tlb_update_t                   update_i,
+    // Lookup signals
+    input  logic                          lu_access_i,
+    input  logic        [ ASID_WIDTH-1:0] lu_asid_i,
+    input  logic        [riscv::VLEN-1:0] lu_vaddr_i,
+    output riscv::pte_t                   lu_content_o,
+    input  logic        [ ASID_WIDTH-1:0] asid_to_be_flushed_i,
+    input  logic        [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
+    output logic                          lu_is_2M_o,
+    output logic                          lu_is_1G_o,
+    output logic                          lu_hit_o
+);
+
+  // SV39 defines three levels of page tables
+  struct packed {
+    logic [ASID_WIDTH-1:0] asid;
+    logic [riscv::VPN2:0]  vpn2;
+    logic [8:0]            vpn1;
+    logic [8:0]            vpn0;
+    logic                  is_2M;
+    logic                  is_1G;
+    logic                  valid;
+  } [TLB_ENTRIES-1:0]
+      tags_q, tags_n;
+
+  riscv::pte_t [TLB_ENTRIES-1:0] content_q, content_n;
+  logic [8:0] vpn0, vpn1;
+  logic [  riscv::VPN2:0] vpn2;
+  logic [TLB_ENTRIES-1:0] lu_hit;  // to replacement logic
+  logic [TLB_ENTRIES-1:0] replace_en;  // replace the following entry, set by replacement strategy
+  //-------------
+  // Translation
+  //-------------
+  always_comb begin : translation
+    vpn0         = lu_vaddr_i[20:12];
+    vpn1         = lu_vaddr_i[29:21];
+    vpn2         = lu_vaddr_i[30+riscv::VPN2:30];
+
+    // default assignment
+    lu_hit       = '{default: 0};
+    lu_hit_o     = 1'b0;
+    lu_content_o = '{default: 0};
+    lu_is_1G_o   = 1'b0;
+    lu_is_2M_o   = 1'b0;
+
+    for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
+      // first level match, this may be a giga page, check the ASID flags as well
+      // if the entry is associated to a global address, don't match the ASID (ASID is don't care)
+      if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid) || content_q[i].g)  && vpn2 == tags_q[i].vpn2) begin
+        // second level
+        if (tags_q[i].is_1G) begin
+          lu_is_1G_o = 1'b1;
+          lu_content_o = content_q[i];
+          lu_hit_o = 1'b1;
+          lu_hit[i] = 1'b1;
+          // not a giga page hit so check further
+        end else if (vpn1 == tags_q[i].vpn1) begin
+          // this could be a 2 mega page hit or a 4 kB hit
+          // output accordingly
+          if (tags_q[i].is_2M || vpn0 == tags_q[i].vpn0) begin
+            lu_is_2M_o   = tags_q[i].is_2M;
+            lu_content_o = content_q[i];
+            lu_hit_o     = 1'b1;
+            lu_hit[i]    = 1'b1;
+          end
+        end
+      end
+    end
+  end
+
+
+
+  logic asid_to_be_flushed_is0;  // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high
+  logic vaddr_to_be_flushed_is0;  // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high
+  logic [TLB_ENTRIES-1:0] vaddr_vpn0_match;
+  logic [TLB_ENTRIES-1:0] vaddr_vpn1_match;
+  logic [TLB_ENTRIES-1:0] vaddr_vpn2_match;
+
+  assign asid_to_be_flushed_is0  = ~(|asid_to_be_flushed_i);
+  assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i);
+
+  // ------------------
+  // Update and Flush
+  // ------------------
+  always_comb begin : update_flush
+    tags_n    = tags_q;
+    content_n = content_q;
+
+    for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
+
+      vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[20:12] == tags_q[i].vpn0);
+      vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[29:21] == tags_q[i].vpn1);
+      vaddr_vpn2_match[i] = (vaddr_to_be_flushed_i[30+riscv::VPN2:30] == tags_q[i].vpn2);
+
+      if (flush_i) begin
+        // invalidate logic
+        // flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case)
+        if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0) tags_n[i].valid = 1'b0;
+        // flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages
+        else if (asid_to_be_flushed_is0 && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M) ) && (~vaddr_to_be_flushed_is0))
+          tags_n[i].valid = 1'b0;
+        // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case)
+        else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M)) && (asid_to_be_flushed_i == tags_q[i].asid) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0))
+          tags_n[i].valid = 1'b0;
+        // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case)
+        else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid) && (!asid_to_be_flushed_is0))
+          tags_n[i].valid = 1'b0;
+        // normal replacement
+      end else if (update_i.valid & replace_en[i]) begin
+        // update tag array
+        tags_n[i] = '{
+            asid: update_i.asid,
+            vpn2: update_i.vpn[18+riscv::VPN2:18],
+            vpn1: update_i.vpn[17:9],
+            vpn0: update_i.vpn[8:0],
+            is_1G: update_i.is_1G,
+            is_2M: update_i.is_2M,
+            valid: 1'b1
+        };
+        // and content as well
+        content_n[i] = update_i.content;
+      end
+    end
+  end
+
+  // -----------------------------------------------
+  // PLRU - Pseudo Least Recently Used Replacement
+  // -----------------------------------------------
+  logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n;
+  always_comb begin : plru_replacement
+    plru_tree_n = plru_tree_q;
+    // The PLRU-tree indexing:
+    // lvl0        0
+    //            / \
+        //           /   \
+        // lvl1     1     2
+        //         / \   / \
+        // lvl2   3   4 5   6
+        //       / \ /\/\  /\
+        //      ... ... ... ...
+        // Just predefine which nodes will be set/cleared
+        // E.g. for a TLB with 8 entries, the for-loop is semantically
+        // equivalent to the following pseudo-code:
+        // unique case (1'b1)
+        // lu_hit[7]: plru_tree_n[0, 2, 6] = {1, 1, 1};
+        // lu_hit[6]: plru_tree_n[0, 2, 6] = {1, 1, 0};
+        // lu_hit[5]: plru_tree_n[0, 2, 5] = {1, 0, 1};
+        // lu_hit[4]: plru_tree_n[0, 2, 5] = {1, 0, 0};
+        // lu_hit[3]: plru_tree_n[0, 1, 4] = {0, 1, 1};
+        // lu_hit[2]: plru_tree_n[0, 1, 4] = {0, 1, 0};
+        // lu_hit[1]: plru_tree_n[0, 1, 3] = {0, 0, 1};
+        // lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0};
+        // default: begin /* No hit */ end
+        // endcase
+        for (
+        int unsigned i = 0; i < TLB_ENTRIES; i++
+    ) begin
+      automatic int unsigned idx_base, shift, new_index;
+      // we got a hit so update the pointer as it was least recently used
+      if (lu_hit[i] & lu_access_i) begin
+        // Set the nodes to the values we would expect
+        for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
+          idx_base = $unsigned((2 ** lvl) - 1);
+          // lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+          shift = $clog2(TLB_ENTRIES) - lvl;
+          // to circumvent the 32 bit integer arithmetic assignment
+          new_index = ~((i >> (shift - 1)) & 32'b1);
+          plru_tree_n[idx_base+(i>>shift)] = new_index[0];
+        end
+      end
+    end
+    // Decode tree to write enable signals
+    // Next for-loop basically creates the following logic for e.g. an 8 entry
+    // TLB (note: pseudo-code obviously):
+    // replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
+    // replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
+    // replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
+    // replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
+    // replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
+    // replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
+    // replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
+    // replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
+    // For each entry traverse the tree. If every tree-node matches,
+    // the corresponding bit of the entry's index, this is
+    // the next entry to replace.
+    for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
+      automatic logic en;
+      automatic int unsigned idx_base, shift, new_index;
+      en = 1'b1;
+      for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
+        idx_base = $unsigned((2 ** lvl) - 1);
+        // lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+        shift = $clog2(TLB_ENTRIES) - lvl;
+
+        // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
+        new_index = (i >> (shift - 1)) & 32'b1;
+        if (new_index[0]) begin
+          en &= plru_tree_q[idx_base+(i>>shift)];
+        end else begin
+          en &= ~plru_tree_q[idx_base+(i>>shift)];
+        end
+      end
+      replace_en[i] = en;
+    end
+  end
+
+  // sequential process
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      tags_q      <= '{default: 0};
+      content_q   <= '{default: 0};
+      plru_tree_q <= '{default: 0};
+    end else begin
+      tags_q      <= tags_n;
+      content_q   <= content_n;
+      plru_tree_q <= plru_tree_n;
+    end
+  end
+  //--------------
+  // Sanity checks
+  //--------------
+
+  //pragma translate_off
+`ifndef VERILATOR
+
+  initial begin : p_assertions
+    assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1))
+    else begin
+      $error("TLB size must be a multiple of 2 and greater than 1");
+      $stop();
+    end
+    assert (ASID_WIDTH >= 1)
+    else begin
+      $error("ASID width must be at least 1");
+      $stop();
+    end
+  end
+
+  // Just for checking
+  function int countSetBits(logic [TLB_ENTRIES-1:0] vector);
+    automatic int count = 0;
+    foreach (vector[idx]) begin
+      count += vector[idx];
+    end
+    return count;
+  endfunction
+
+  assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1))
+  else begin
+    $error("More then one hit in TLB!");
+    $stop();
+  end
+  assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1))
+  else begin
+    $error("More then one TLB entry selected for next replace!");
+    $stop();
+  end
+
+`endif
+  //pragma translate_on
+
+endmodule
diff --git a/test/type_param/core/mult.sv b/test/type_param/core/mult.sv
new file mode 100644
index 0000000..7270389
--- /dev/null
+++ b/test/type_param/core/mult.sv
@@ -0,0 +1,149 @@
+
+
+module mult
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input  logic                             clk_i,
+    input  logic                             rst_ni,
+    input  logic                             flush_i,
+    input  fu_data_t                         fu_data_i,
+    input  logic                             mult_valid_i,
+    output riscv::xlen_t                     result_o,
+    output logic                             mult_valid_o,
+    output logic                             mult_ready_o,
+    output logic         [TRANS_ID_BITS-1:0] mult_trans_id_o
+);
+  logic mul_valid;
+  logic div_valid;
+  logic div_ready_i;  // receiver of division result is able to accept the result
+  logic [TRANS_ID_BITS-1:0] mul_trans_id;
+  logic [TRANS_ID_BITS-1:0] div_trans_id;
+  riscv::xlen_t mul_result;
+  riscv::xlen_t div_result;
+
+  logic div_valid_op;
+  logic mul_valid_op;
+  // Input Arbitration
+
+  assign mul_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR });
+
+  assign div_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW });
+
+  // ---------------------
+  // Output Arbitration
+  // ---------------------
+  // we give precedence to multiplication as the divider supports stalling and the multiplier is
+  // just a dumb pipelined multiplier
+  assign div_ready_i = (mul_valid) ? 1'b0 : 1'b1;
+  assign mult_trans_id_o = (mul_valid) ? mul_trans_id : div_trans_id;
+  assign result_o = (mul_valid) ? mul_result : div_result;
+  assign mult_valid_o = div_valid | mul_valid;
+  // mult_ready_o = division as the multiplication will unconditionally be ready to accept new requests
+
+  // ---------------------
+  // Multiplication
+  // ---------------------
+  multiplier #(
+      .CVA6Cfg(CVA6Cfg)
+  ) i_multiplier (
+      .clk_i,
+      .rst_ni,
+      .trans_id_i     (fu_data_i.trans_id),
+      .operation_i    (fu_data_i.operation),
+      .operand_a_i    (fu_data_i.operand_a),
+      .operand_b_i    (fu_data_i.operand_b),
+      .result_o       (mul_result),
+      .mult_valid_i   (mul_valid_op),
+      .mult_valid_o   (mul_valid),
+      .mult_trans_id_o(mul_trans_id),
+      .mult_ready_o   ()                      // this unit is unconditionally ready
+  );
+
+  // ---------------------
+  // Division
+  // ---------------------
+  riscv::xlen_t
+      operand_b,
+      operand_a;  // input operands after input MUX (input silencing, word operations or full inputs)
+  riscv::xlen_t result;  // result before result mux
+
+  logic         div_signed;  // signed or unsigned division
+  logic         rem;  // is it a reminder (or not a reminder e.g.: a division)
+  logic word_op_d, word_op_q;  // save whether the operation was signed or not
+
+  // is this a signed op?
+  assign div_signed = fu_data_i.operation inside {DIV, DIVW, REM, REMW};
+  // is this a modulo?
+  assign rem        = fu_data_i.operation inside {REM, REMU, REMW, REMUW};
+
+  // prepare the input operands and control divider
+  always_comb begin
+    // silence the inputs
+    operand_a = '0;
+    operand_b = '0;
+    // control signals
+    word_op_d = word_op_q;
+
+    // we've go a new division operation
+    if (mult_valid_i && fu_data_i.operation inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin
+      // is this a word operation?
+      if (riscv::IS_XLEN64 && (fu_data_i.operation == DIVW || fu_data_i.operation == DIVUW || fu_data_i.operation == REMW || fu_data_i.operation == REMUW)) begin
+        // yes so check if we should sign extend this is only done for a signed operation
+        if (div_signed) begin
+          operand_a = sext32(fu_data_i.operand_a[31:0]);
+          operand_b = sext32(fu_data_i.operand_b[31:0]);
+        end else begin
+          operand_a = fu_data_i.operand_a[31:0];
+          operand_b = fu_data_i.operand_b[31:0];
+        end
+
+        // save whether we want sign extend the result or not, this is done for all word operations
+        word_op_d = 1'b1;
+      end else begin
+        // regular op
+        operand_a = fu_data_i.operand_a;
+        operand_b = fu_data_i.operand_b;
+        word_op_d = 1'b0;
+      end
+    end
+  end
+
+  // ---------------------
+  // Serial Divider
+  // ---------------------
+  serdiv #(
+      .CVA6Cfg(CVA6Cfg),
+      .WIDTH  (riscv::XLEN)
+  ) i_div (
+      .clk_i    (clk_i),
+      .rst_ni   (rst_ni),
+      .id_i     (fu_data_i.trans_id),
+      .op_a_i   (operand_a),
+      .op_b_i   (operand_b),
+      .opcode_i ({rem, div_signed}),   // 00: udiv, 10: urem, 01: div, 11: rem
+      .in_vld_i (div_valid_op),
+      .in_rdy_o (mult_ready_o),
+      .flush_i  (flush_i),
+      .out_vld_o(div_valid),
+      .out_rdy_i(div_ready_i),
+      .id_o     (div_trans_id),
+      .res_o    (result)
+  );
+
+  // Result multiplexer
+  // if it was a signed word operation the bit will be set and the result will be sign extended accordingly
+  assign div_result = (riscv::IS_XLEN64 && word_op_q) ? sext32(result) : result;
+
+  // ---------------------
+  // Registers
+  // ---------------------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      word_op_q <= '0;
+    end else begin
+      word_op_q <= word_op_d;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/multiplier.sv b/test/type_param/core/multiplier.sv
new file mode 100644
index 0000000..e13d614
--- /dev/null
+++ b/test/type_param/core/multiplier.sv
@@ -0,0 +1,158 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+//
+// Description: Multiplication Unit with one pipeline register
+//              This unit relies on retiming features of the synthesizer
+//
+
+
+module multiplier
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input  logic                             clk_i,
+    input  logic                             rst_ni,
+    input  logic         [TRANS_ID_BITS-1:0] trans_id_i,
+    input  logic                             mult_valid_i,
+    input  fu_op                             operation_i,
+    input  riscv::xlen_t                     operand_a_i,
+    input  riscv::xlen_t                     operand_b_i,
+    output riscv::xlen_t                     result_o,
+    output logic                             mult_valid_o,
+    output logic                             mult_ready_o,
+    output logic         [TRANS_ID_BITS-1:0] mult_trans_id_o
+);
+  // Carry-less multiplication
+  logic [riscv::XLEN-1:0]
+      clmul_q, clmul_d, clmulr_q, clmulr_d, operand_a, operand_b, operand_a_rev, operand_b_rev;
+  logic clmul_rmode, clmul_hmode;
+
+  if (CVA6Cfg.RVB) begin : gen_bitmanip
+    // checking for clmul_rmode and clmul_hmode
+    assign clmul_rmode = (operation_i == CLMULR);
+    assign clmul_hmode = (operation_i == CLMULH);
+
+    // operand_a and b reverse generator
+    for (genvar i = 0; i < riscv::XLEN; i++) begin
+      assign operand_a_rev[i] = operand_a_i[(riscv::XLEN-1)-i];
+      assign operand_b_rev[i] = operand_b_i[(riscv::XLEN-1)-i];
+    end
+
+    // operand_a and operand_b selection
+    assign operand_a = (clmul_rmode | clmul_hmode) ? operand_a_rev : operand_a_i;
+    assign operand_b = (clmul_rmode | clmul_hmode) ? operand_b_rev : operand_b_i;
+
+    // implementation
+    always_comb begin
+      clmul_d = '0;
+      for (int i = 0; i <= riscv::XLEN; i++) begin
+        clmul_d = (|((operand_b >> i) & 1)) ? clmul_d ^ (operand_a << i) : clmul_d;
+      end
+    end
+
+    // clmulr + clmulh result generator
+    for (genvar i = 0; i < riscv::XLEN; i++) begin
+      assign clmulr_d[i] = clmul_d[(riscv::XLEN-1)-i];
+    end
+  end
+
+  // Pipeline register
+  logic [TRANS_ID_BITS-1:0] trans_id_q;
+  logic                     mult_valid_q;
+  fu_op operator_d, operator_q;
+  logic [riscv::XLEN*2-1:0] mult_result_d, mult_result_q;
+
+  // control registers
+  logic sign_a, sign_b;
+  logic mult_valid;
+
+  // control signals
+  assign mult_valid_o = mult_valid_q;
+  assign mult_trans_id_o = trans_id_q;
+  assign mult_ready_o = 1'b1;
+
+  assign mult_valid      = mult_valid_i && (operation_i inside {MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR});
+
+  // Sign Select MUX
+  always_comb begin
+    sign_a = 1'b0;
+    sign_b = 1'b0;
+
+    // signed multiplication
+    if (operation_i == MULH) begin
+      sign_a = 1'b1;
+      sign_b = 1'b1;
+      // signed - unsigned multiplication
+    end else if (operation_i == MULHSU) begin
+      sign_a = 1'b1;
+      // unsigned multiplication
+    end else begin
+      sign_a = 1'b0;
+      sign_b = 1'b0;
+    end
+  end
+
+
+  // single stage version
+  assign mult_result_d = $signed(
+      {operand_a_i[riscv::XLEN-1] & sign_a, operand_a_i}
+  ) * $signed(
+      {operand_b_i[riscv::XLEN-1] & sign_b, operand_b_i}
+  );
+
+
+  assign operator_d = operation_i;
+
+  always_comb begin : p_selmux
+    unique case (operator_q)
+      MULH, MULHU, MULHSU: result_o = mult_result_q[riscv::XLEN*2-1:riscv::XLEN];
+      CLMUL:               result_o = clmul_q;
+      CLMULH:              result_o = clmulr_q >> 1;
+      CLMULR:              result_o = clmulr_q;
+      // MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
+      default: begin
+        if (operator_q == MULW && riscv::IS_XLEN64) result_o = sext32(mult_result_q[31:0]);
+        else result_o = mult_result_q[riscv::XLEN-1:0];  // including MUL
+      end
+    endcase
+  end
+  if (CVA6Cfg.RVB) begin
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (~rst_ni) begin
+        clmul_q  <= '0;
+        clmulr_q <= '0;
+      end else begin
+        clmul_q  <= clmul_d;
+        clmulr_q <= clmulr_d;
+      end
+    end
+  end
+  // -----------------------
+  // Output pipeline register
+  // -----------------------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      mult_valid_q  <= '0;
+      trans_id_q    <= '0;
+      operator_q    <= MUL;
+      mult_result_q <= '0;
+    end else begin
+      // Input silencing
+      trans_id_q    <= trans_id_i;
+      // Output Register
+      mult_valid_q  <= mult_valid;
+      operator_q    <= operator_d;
+      mult_result_q <= mult_result_d;
+    end
+  end
+endmodule
diff --git a/test/type_param/core/perf_counters.sv b/test/type_param/core/perf_counters.sv
new file mode 100644
index 0000000..ff6d0d1
--- /dev/null
+++ b/test/type_param/core/perf_counters.sv
@@ -0,0 +1,226 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 06.10.2017
+// Description: Performance counters
+
+
+module perf_counters
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg  = config_pkg::cva6_cfg_empty,
+    parameter int unsigned           NumPorts = 3                            // number of miss ports
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    input logic debug_mode_i,  // debug mode
+    // SRAM like interface
+    input logic [11:0] addr_i,  // read/write address (up to 6 counters possible)
+    input logic we_i,  // write enable
+    input riscv::xlen_t data_i,  // data to write
+    output riscv::xlen_t data_o,  // data to read
+    // from commit stage
+    input  scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,     // the instruction we want to commit
+    input  logic [CVA6Cfg.NrCommitPorts-1:0]              commit_ack_i,       // acknowledge that we are indeed committing
+    // from L1 caches
+    input logic l1_icache_miss_i,
+    input logic l1_dcache_miss_i,
+    // from MMU
+    input logic itlb_miss_i,
+    input logic dtlb_miss_i,
+    // from issue stage
+    input logic sb_full_i,
+    // from frontend
+    input logic if_empty_i,
+    // from PC Gen
+    input exception_t ex_i,
+    input logic eret_i,
+    input bp_resolve_t resolved_branch_i,
+    // for newly added events
+    input exception_t branch_exceptions_i,  //Branch exceptions->execute unit-> branch_exception_o
+    input icache_dreq_t l1_icache_access_i,
+    input dcache_req_i_t [2:0] l1_dcache_access_i,
+    input  logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0]miss_vld_bits_i,  //For Cache eviction (3ports-LOAD,STORE,PTW)
+    input logic i_tlb_flush_i,
+    input logic stall_issue_i,  //stall-read operands
+    input logic [31:0] mcountinhibit_i
+);
+
+  logic [63:0] generic_counter_d[6:1];
+  logic [63:0] generic_counter_q[6:1];
+
+  //internal signal to keep track of exception
+  logic read_access_exception, update_access_exception;
+
+  logic events[6:1];
+  //internal signal for  MUX select line input
+  logic [4:0] mhpmevent_d[6:1];
+  logic [4:0] mhpmevent_q[6:1];
+  // internal signal to detect event on multiple commit ports
+  logic [CVA6Cfg.NrCommitPorts-1:0] load_event;
+  logic [CVA6Cfg.NrCommitPorts-1:0] store_event;
+  logic [CVA6Cfg.NrCommitPorts-1:0] branch_event;
+  logic [CVA6Cfg.NrCommitPorts-1:0] call_event;
+  logic [CVA6Cfg.NrCommitPorts-1:0] return_event;
+  logic [CVA6Cfg.NrCommitPorts-1:0] int_event;
+  logic [CVA6Cfg.NrCommitPorts-1:0] fp_event;
+
+  //Multiplexer
+  always_comb begin : Mux
+    events[6:1] = '{default: 0};
+    load_event = '{default: 0};
+    store_event = '{default: 0};
+    branch_event = '{default: 0};
+    call_event = '{default: 0};
+    return_event = '{default: 0};
+    int_event = '{default: 0};
+    fp_event = '{default: 0};
+
+    for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
+      load_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == LOAD);
+      store_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == STORE);
+      branch_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == CTRL_FLOW);
+      call_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == CTRL_FLOW && (commit_instr_i[j].op == ADD || commit_instr_i[j].op == JALR) && (commit_instr_i[j].rd == 'd1 || commit_instr_i[j].rd == 'd5));
+      return_event[j] = commit_ack_i[j] & (commit_instr_i[j].op == JALR && commit_instr_i[j].rd == 'd0);
+      int_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == ALU || commit_instr_i[j].fu == MULT);
+      fp_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == FPU || commit_instr_i[j].fu == FPU_VEC);
+    end
+
+    for (int unsigned i = 1; i <= 6; i++) begin
+      case (mhpmevent_q[i])
+        5'b00000: events[i] = 0;
+        5'b00001: events[i] = l1_icache_miss_i;  //L1 I-Cache misses
+        5'b00010: events[i] = l1_dcache_miss_i;  //L1 D-Cache misses
+        5'b00011: events[i] = itlb_miss_i;  //ITLB misses
+        5'b00100: events[i] = dtlb_miss_i;  //DTLB misses
+        5'b00101: events[i] = |load_event;  //Load accesses
+        5'b00110: events[i] = |store_event;  //Store accesses
+        5'b00111: events[i] = ex_i.valid;  //Exceptions
+        5'b01000: events[i] = eret_i;  //Exception handler returns
+        5'b01001: events[i] = |branch_event;  // Branch instructions
+        5'b01010:
+        events[i] = resolved_branch_i.valid && resolved_branch_i.is_mispredict;//Branch mispredicts
+        5'b01011: events[i] = branch_exceptions_i.valid;  //Branch exceptions
+        // The standard software calling convention uses register x1 to hold the return address on a call
+        // the unconditional jump is decoded as ADD op
+        5'b01100: events[i] = |call_event;  //Call
+        5'b01101: events[i] = |return_event;  //Return
+        5'b01110: events[i] = sb_full_i;  //MSB Full
+        5'b01111: events[i] = if_empty_i;  //Instruction fetch Empty
+        5'b10000: events[i] = l1_icache_access_i.req;  //L1 I-Cache accesses
+        5'b10001:
+        events[i] = l1_dcache_access_i[0].data_req || l1_dcache_access_i[1].data_req || l1_dcache_access_i[2].data_req;//L1 D-Cache accesses
+        5'b10010:
+        events[i] = (l1_dcache_miss_i && miss_vld_bits_i[0] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[1] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[2] == 8'hFF);//eviction
+        5'b10011: events[i] = i_tlb_flush_i;  //I-TLB flush
+        5'b10100: events[i] = |int_event;  //Integer instructions
+        5'b10101: events[i] = |fp_event;  //Floating Point Instructions
+        5'b10110: events[i] = stall_issue_i;  //Pipeline bubbles
+        default: events[i] = 0;
+      endcase
+    end
+
+  end
+
+  always_comb begin : generic_counter
+    generic_counter_d = generic_counter_q;
+    data_o = 'b0;
+    mhpmevent_d = mhpmevent_q;
+    read_access_exception = 1'b0;
+    update_access_exception = 1'b0;
+
+    // Increment the non-inhibited counters with active events
+    for (int unsigned i = 1; i <= 6; i++) begin
+      if ((!debug_mode_i) && (!we_i)) begin
+        if ((events[i]) == 1 && (!mcountinhibit_i[i+2])) begin
+          generic_counter_d[i] = generic_counter_q[i] + 1'b1;
+        end
+      end
+    end
+
+    //Read
+    unique case (addr_i)
+      riscv::CSR_MHPM_COUNTER_3,
+            riscv::CSR_MHPM_COUNTER_4,
+            riscv::CSR_MHPM_COUNTER_5,
+            riscv::CSR_MHPM_COUNTER_6,
+            riscv::CSR_MHPM_COUNTER_7,
+            riscv::CSR_MHPM_COUNTER_8  :begin
+        if (riscv::XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0];
+        else data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1];
+      end
+      riscv::CSR_MHPM_COUNTER_3H,
+            riscv::CSR_MHPM_COUNTER_4H,
+            riscv::CSR_MHPM_COUNTER_5H,
+            riscv::CSR_MHPM_COUNTER_6H,
+            riscv::CSR_MHPM_COUNTER_7H,
+            riscv::CSR_MHPM_COUNTER_8H :begin
+        if (riscv::XLEN == 32)
+          data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32];
+        else read_access_exception = 1'b1;
+      end
+      riscv::CSR_MHPM_EVENT_3,
+            riscv::CSR_MHPM_EVENT_4,
+            riscv::CSR_MHPM_EVENT_5,
+            riscv::CSR_MHPM_EVENT_6,
+            riscv::CSR_MHPM_EVENT_7,
+            riscv::CSR_MHPM_EVENT_8   :
+      data_o = mhpmevent_q[addr_i-riscv::CSR_MHPM_EVENT_3+1];
+      default: data_o = 'b0;
+    endcase
+
+    //Write
+    if (we_i) begin
+      unique case (addr_i)
+        riscv::CSR_MHPM_COUNTER_3,
+            riscv::CSR_MHPM_COUNTER_4,
+            riscv::CSR_MHPM_COUNTER_5,
+            riscv::CSR_MHPM_COUNTER_6,
+            riscv::CSR_MHPM_COUNTER_7,
+            riscv::CSR_MHPM_COUNTER_8  :begin
+          if (riscv::XLEN == 32)
+            generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0] = data_i;
+          else generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1] = data_i;
+        end
+        riscv::CSR_MHPM_COUNTER_3H,
+            riscv::CSR_MHPM_COUNTER_4H,
+            riscv::CSR_MHPM_COUNTER_5H,
+            riscv::CSR_MHPM_COUNTER_6H,
+            riscv::CSR_MHPM_COUNTER_7H,
+            riscv::CSR_MHPM_COUNTER_8H :begin
+          if (riscv::XLEN == 32)
+            generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32] = data_i;
+          else update_access_exception = 1'b1;
+        end
+        riscv::CSR_MHPM_EVENT_3,
+            riscv::CSR_MHPM_EVENT_4,
+            riscv::CSR_MHPM_EVENT_5,
+            riscv::CSR_MHPM_EVENT_6,
+            riscv::CSR_MHPM_EVENT_7,
+            riscv::CSR_MHPM_EVENT_8   :
+        mhpmevent_d[addr_i-riscv::CSR_MHPM_EVENT_3+1] = data_i;
+        default: update_access_exception = 1'b1;
+      endcase
+    end
+  end
+
+  //Registers
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      generic_counter_q <= '{default: 0};
+      mhpmevent_q       <= '{default: 0};
+    end else begin
+      generic_counter_q <= generic_counter_d;
+      mhpmevent_q       <= mhpmevent_d;
+    end
+  end
+
+endmodule
diff --git a/test/type_param/core/pmp/src/pmp.sv b/test/type_param/core/pmp/src/pmp.sv
new file mode 100644
index 0000000..a3adbb9
--- /dev/null
+++ b/test/type_param/core/pmp/src/pmp.sv
@@ -0,0 +1,94 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Moritz Schneider, ETH Zurich
+// Date: 2.10.2019
+// Description: purely combinatorial PMP unit (with extraction for more complex configs such as NAPOT)
+
+module pmp #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg    = config_pkg::cva6_cfg_empty,
+    parameter int unsigned           PLEN       = 34,                          // rv64: 56
+    parameter int unsigned           PMP_LEN    = 32,                          // rv64: 54
+    parameter int unsigned           NR_ENTRIES = 4
+) (
+    // Input
+    input logic [PLEN-1:0] addr_i,
+    input riscv::pmp_access_t access_type_i,
+    input riscv::priv_lvl_t priv_lvl_i,
+    // Configuration
+    input logic [15:0][PMP_LEN-1:0] conf_addr_i,
+    input riscv::pmpcfg_t [15:0] conf_i,
+    // Output
+    output logic allow_o
+);
+  // if there are no PMPs we can always grant the access.
+  if (NR_ENTRIES > 0) begin : gen_pmp
+    logic [NR_ENTRIES-1:0] match;
+
+    for (genvar i = 0; i < NR_ENTRIES; i++) begin
+      logic [PMP_LEN-1:0] conf_addr_prev;
+
+      assign conf_addr_prev = (i == 0) ? '0 : conf_addr_i[i-1];
+
+      pmp_entry #(
+          .CVA6Cfg(CVA6Cfg),
+          .PLEN   (PLEN),
+          .PMP_LEN(PMP_LEN)
+      ) i_pmp_entry (
+          .addr_i          (addr_i),
+          .conf_addr_i     (conf_addr_i[i]),
+          .conf_addr_prev_i(conf_addr_prev),
+          .conf_addr_mode_i(conf_i[i].addr_mode),
+          .match_o         (match[i])
+      );
+    end
+
+    always_comb begin
+      int i;
+
+      allow_o = 1'b0;
+      for (i = 0; i < NR_ENTRIES; i++) begin
+        // either we are in S or U mode or the config is locked in which
+        // case it also applies in M mode
+        if (priv_lvl_i != riscv::PRIV_LVL_M || conf_i[i].locked) begin
+          if (match[i]) begin
+            if ((access_type_i & conf_i[i].access_type) != access_type_i) allow_o = 1'b0;
+            else allow_o = 1'b1;
+            break;
+          end
+        end
+      end
+      if (i == NR_ENTRIES) begin  // no PMP entry matched the address
+        // allow all accesses from M-mode for no pmp match
+        if (priv_lvl_i == riscv::PRIV_LVL_M) allow_o = 1'b1;
+        // disallow accesses for all other modes
+        else
+          allow_o = 1'b0;
+      end
+    end
+  end else assign allow_o = 1'b1;
+
+  // synthesis translate_off
+  always_comb begin
+    logic no_locked;
+    no_locked = 1'b0;
+    if (priv_lvl_i == riscv::PRIV_LVL_M) begin
+      no_locked = 1'b1;
+      for (int i = 0; i < NR_ENTRIES; i++) begin
+        if (conf_i[i].locked && conf_i[i].addr_mode != riscv::OFF) begin
+          no_locked &= 1'b0;
+        end else no_locked &= 1'b1;
+      end
+      if (no_locked == 1'b1) assert (allow_o == 1'b1);
+    end
+  end
+  // synthesis translate_on
+
+endmodule
diff --git a/test/type_param/core/pmp/src/pmp_entry.sv b/test/type_param/core/pmp/src/pmp_entry.sv
new file mode 100644
index 0000000..667ae18
--- /dev/null
+++ b/test/type_param/core/pmp/src/pmp_entry.sv
@@ -0,0 +1,125 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Moritz Schneider, ETH Zurich
+// Date: 2.10.2019
+// Description: single PMP entry
+
+module pmp_entry #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter int unsigned PLEN = 56,
+    parameter int unsigned PMP_LEN = 54
+) (
+    // Input
+    input logic [PLEN-1:0] addr_i,
+
+    // Configuration
+    input logic [PMP_LEN-1:0] conf_addr_i,
+    input logic [PMP_LEN-1:0] conf_addr_prev_i,
+    input riscv::pmp_addr_mode_t conf_addr_mode_i,
+
+    // Output
+    output logic match_o
+);
+  logic [PLEN-1:0] conf_addr_n;
+  logic [$clog2(PLEN)-1:0] trail_ones;
+  logic [PLEN-1:0] base;
+  logic [PLEN-1:0] mask;
+  int unsigned size;
+  assign conf_addr_n = {2'b11, ~conf_addr_i};
+  lzc #(
+      .WIDTH(PLEN),
+      .MODE (1'b0)
+  ) i_lzc (
+      .in_i   (conf_addr_n),
+      .cnt_o  (trail_ones),
+      .empty_o()
+  );
+
+  always_comb begin
+    case (conf_addr_mode_i)
+      riscv::TOR: begin
+        base = '0;
+        mask = '0;
+        size = '0;
+        // check that the requested address is in between the two
+        // configuration addresses
+        if (addr_i >= ({2'b0, conf_addr_prev_i} << 2) && addr_i < ({2'b0, conf_addr_i} << 2)) begin
+          match_o = 1'b1;
+        end else match_o = 1'b0;
+
+        // synthesis translate_off
+        if (match_o == 0) begin
+          assert (addr_i >= ({2'b0, conf_addr_i} << 2) || addr_i < ({2'b0, conf_addr_prev_i} << 2));
+        end else begin
+          assert (addr_i < ({2'b0, conf_addr_i} << 2) && addr_i >= ({2'b0, conf_addr_prev_i} << 2));
+        end
+        // synthesis translate_on
+
+      end
+      riscv::NA4, riscv::NAPOT: begin
+
+        if (conf_addr_mode_i == riscv::NA4) size = 2;
+        else begin
+          // use the extracted trailing ones
+          size = {{(32 - $clog2(PLEN)) {1'b0}}, trail_ones} + 3;
+        end
+
+        mask = '1 << size;
+        base = ({2'b0, conf_addr_i} << 2) & mask;
+        match_o = (addr_i & mask) == base ? 1'b1 : 1'b0;
+
+        // synthesis translate_off
+        // size extract checks
+        assert (size >= 2);
+        if (conf_addr_mode_i == riscv::NAPOT) begin
+          assert (size > 2);
+          if (size < PMP_LEN) assert (conf_addr_i[size-3] == 0);
+          for (int i = 0; i < PMP_LEN; i++) begin
+            if (size > 3 && i <= size - 4) begin
+              assert (conf_addr_i[i] == 1);  // check that all the rest are ones
+            end
+          end
+        end
+
+        if (size < PLEN - 1) begin
+          if (base + 2 ** size > base) begin  // check for overflow
+            if (match_o == 0) begin
+              assert (addr_i >= base + 2 ** size || addr_i < base);
+            end else begin
+              assert (addr_i < base + 2 ** size && addr_i >= base);
+            end
+          end else begin
+            if (match_o == 0) begin
+              assert (addr_i - 2 ** size >= base || addr_i < base);
+            end else begin
+              assert (addr_i - 2 ** size < base && addr_i >= base);
+            end
+          end
+        end
+        // synthesis translate_on
+
+      end
+      riscv::OFF: begin
+        match_o = 1'b0;
+        base = '0;
+        mask = '0;
+        size = '0;
+      end
+      default: begin
+        match_o = 0;
+        base = '0;
+        mask = '0;
+        size = '0;
+      end
+    endcase
+  end
+
+endmodule
diff --git a/test/type_param/core/scoreboard.sv b/test/type_param/core/scoreboard.sv
new file mode 100644
index 0000000..5ea29cd
--- /dev/null
+++ b/test/type_param/core/scoreboard.sv
@@ -0,0 +1,452 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 08.04.2017
+// Description: Scoreboard - keeps track of all decoded, issued and committed instructions
+
+module scoreboard #(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter type rs3_len_t = logic
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    output logic sb_full_o,
+    input logic flush_unissued_instr_i,  // flush only un-issued instructions
+    input logic flush_i,  // flush whole scoreboard
+    input logic unresolved_branch_i,  // we have an unresolved branch
+    // list of clobbered registers to issue stage
+    output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr_o,
+    output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o,
+
+    // regfile like interface to operand read stage
+    input  logic         [ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i,
+    output riscv::xlen_t                                 rs1_o,
+    output logic                                         rs1_valid_o,
+
+    input  logic         [ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i,
+    output riscv::xlen_t                                 rs2_o,
+    output logic                                         rs2_valid_o,
+
+    input  logic     [ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i,
+    output rs3_len_t                                 rs3_o,
+    output logic                                     rs3_valid_o,
+
+    // advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer
+    output ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o,
+    input  logic                          [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
+
+    // instruction to put on top of scoreboard e.g.: top pointer
+    // we can always put this instruction to the top unless we signal with asserted full_o
+    input  ariane_pkg::scoreboard_entry_t decoded_instr_i,
+    input  logic                          decoded_instr_valid_i,
+    output logic                          decoded_instr_ack_o,
+
+    // instruction to issue logic, if issue_instr_valid and issue_ready is asserted, advance the issue pointer
+    output ariane_pkg::scoreboard_entry_t issue_instr_o,
+    output logic                          issue_instr_valid_o,
+    input  logic                          issue_ack_i,
+
+    // write-back port
+    input ariane_pkg::bp_resolve_t resolved_branch_i,
+    input logic [CVA6Cfg.NrWbPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0]  trans_id_i,  // transaction ID at which to write the result back
+    input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i,  // write data in
+    input ariane_pkg::exception_t [CVA6Cfg.NrWbPorts-1:0]               ex_i,        // exception from a functional unit (e.g.: ld/st exception)
+    input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i,  // data in is valid
+    input logic x_we_i,  // cvxif we for writeback
+
+    // RVFI
+    output logic [ariane_pkg::TRANS_ID_BITS-1:0] rvfi_issue_pointer_o,
+    output logic [CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] rvfi_commit_pointer_o
+);
+
+  // this is the FIFO struct of the issue queue
+  typedef struct packed {
+    logic issued;  // this bit indicates whether we issued this instruction e.g.: if it is valid
+    logic is_rd_fpr_flag;  // redundant meta info, added for speed
+    ariane_pkg::scoreboard_entry_t sbe;  // this is the score board entry we will send to ex
+  } sb_mem_t;
+  sb_mem_t [ariane_pkg::NR_SB_ENTRIES-1:0] mem_q, mem_n;
+
+  logic issue_full, issue_en;
+  logic [ariane_pkg::TRANS_ID_BITS:0] issue_cnt_n, issue_cnt_q;
+  logic [ariane_pkg::TRANS_ID_BITS-1:0] issue_pointer_n, issue_pointer_q;
+  logic [CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0]
+      commit_pointer_n, commit_pointer_q;
+  logic [$clog2(CVA6Cfg.NrCommitPorts):0] num_commit;
+
+  // the issue queue is full don't issue any new instructions
+  // works since aligned to power of 2
+  assign issue_full = (issue_cnt_q[ariane_pkg::TRANS_ID_BITS] == 1'b1);
+
+  assign sb_full_o  = issue_full;
+
+  ariane_pkg::scoreboard_entry_t decoded_instr;
+  always_comb begin
+    decoded_instr = decoded_instr_i;
+  end
+
+  // output commit instruction directly
+  always_comb begin : commit_ports
+    for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
+      commit_instr_o[i] = mem_q[commit_pointer_q[i]].sbe;
+      commit_instr_o[i].trans_id = commit_pointer_q[i];
+    end
+  end
+
+  // an instruction is ready for issue if we have place in the issue FIFO and it the decoder says it is valid
+  always_comb begin
+    issue_instr_o          = decoded_instr_i;
+    // make sure we assign the correct trans ID
+    issue_instr_o.trans_id = issue_pointer_q;
+    // we are ready if we are not full and don't have any unresolved branches, but it can be
+    // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i == 1)
+    issue_instr_valid_o    = decoded_instr_valid_i & ~unresolved_branch_i & ~issue_full;
+    decoded_instr_ack_o    = issue_ack_i & ~issue_full;
+  end
+
+  // maintain a FIFO with issued instructions
+  // keep track of all issued instructions
+  always_comb begin : issue_fifo
+    // default assignment
+    mem_n    = mem_q;
+    issue_en = 1'b0;
+
+    // if we got a acknowledge from the issue stage, put this scoreboard entry in the queue
+    if (decoded_instr_valid_i && decoded_instr_ack_o && !flush_unissued_instr_i) begin
+      // the decoded instruction we put in there is valid (1st bit)
+      // increase the issue counter and advance issue pointer
+      issue_en = 1'b1;
+      mem_n[issue_pointer_q] = {
+        1'b1,  // valid bit
+        (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
+          decoded_instr_i.op
+        )),  // whether rd goes to the fpr
+        decoded_instr  // decoded instruction record
+      };
+    end
+
+    // ------------
+    // FU NONE
+    // ------------
+    for (int unsigned i = 0; i < ariane_pkg::NR_SB_ENTRIES; i++) begin
+      // The FU is NONE -> this instruction is valid immediately
+      if (mem_q[i].sbe.fu == ariane_pkg::NONE && mem_q[i].issued) mem_n[i].sbe.valid = 1'b1;
+    end
+
+    // ------------
+    // Write Back
+    // ------------
+    for (int unsigned i = 0; i < CVA6Cfg.NrWbPorts; i++) begin
+      // check if this instruction was issued (e.g.: it could happen after a flush that there is still
+      // something in the pipeline e.g. an incomplete memory operation)
+      if (wt_valid_i[i] && mem_q[trans_id_i[i]].issued) begin
+        mem_n[trans_id_i[i]].sbe.valid  = 1'b1;
+        mem_n[trans_id_i[i]].sbe.result = wbdata_i[i];
+        // save the target address of a branch (needed for debug in commit stage)
+        if (CVA6Cfg.DebugEn) begin
+          mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address;
+        end
+        if (mem_n[trans_id_i[i]].sbe.fu == ariane_pkg::CVXIF && ~x_we_i) begin
+          mem_n[trans_id_i[i]].sbe.rd = 5'b0;
+        end
+        // write the exception back if it is valid
+        if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i];
+        // write the fflags back from the FPU (exception valid is never set), leave tval intact
+        else if(CVA6Cfg.FpPresent && (mem_q[trans_id_i[i]].sbe.fu == ariane_pkg::FPU || mem_q[trans_id_i[i]].sbe.fu == ariane_pkg::FPU_VEC)) begin
+          mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause;
+        end
+      end
+    end
+
+    // ------------
+    // Commit Port
+    // ------------
+    // we've got an acknowledge from commit
+    for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
+      if (commit_ack_i[i]) begin
+        // this instruction is no longer in issue e.g.: it is considered finished
+        mem_n[commit_pointer_q[i]].issued    = 1'b0;
+        mem_n[commit_pointer_q[i]].sbe.valid = 1'b0;
+      end
+    end
+
+    // ------
+    // Flush
+    // ------
+    if (flush_i) begin
+      for (int unsigned i = 0; i < ariane_pkg::NR_SB_ENTRIES; i++) begin
+        // set all valid flags for all entries to zero
+        mem_n[i].issued       = 1'b0;
+        mem_n[i].sbe.valid    = 1'b0;
+        mem_n[i].sbe.ex.valid = 1'b0;
+      end
+    end
+  end
+
+  // FIFO counter updates
+  if (CVA6Cfg.NrCommitPorts == 2) begin : gen_commit_ports
+    assign num_commit = commit_ack_i[1] + commit_ack_i[0];
+  end else begin : gen_one_commit_port
+    assign num_commit = commit_ack_i[0];
+  end
+
+  assign issue_cnt_n = (flush_i) ? '0 : issue_cnt_q - {{ariane_pkg::TRANS_ID_BITS - $clog2(
+      CVA6Cfg.NrCommitPorts
+  ) {1'b0}}, num_commit} + {{ariane_pkg::TRANS_ID_BITS - 1{1'b0}}, issue_en};
+  assign commit_pointer_n[0] = (flush_i) ? '0 : commit_pointer_q[0] + num_commit;
+  assign issue_pointer_n = (flush_i) ? '0 : issue_pointer_q + issue_en;
+
+  // precompute offsets for commit slots
+  for (genvar k = 1; k < CVA6Cfg.NrCommitPorts; k++) begin : gen_cnt_incr
+    assign commit_pointer_n[k] = (flush_i) ? '0 : commit_pointer_n[0] + unsigned'(k);
+  end
+
+  // -------------------
+  // RD clobber process
+  // -------------------
+  // rd_clobber output: output currently clobbered destination registers
+  logic            [2**ariane_pkg::REG_ADDR_SIZE-1:0][ariane_pkg::NR_SB_ENTRIES:0] gpr_clobber_vld;
+  logic            [2**ariane_pkg::REG_ADDR_SIZE-1:0][ariane_pkg::NR_SB_ENTRIES:0] fpr_clobber_vld;
+  ariane_pkg::fu_t [     ariane_pkg::NR_SB_ENTRIES:0]                              clobber_fu;
+
+  always_comb begin : clobber_assign
+    gpr_clobber_vld = '0;
+    fpr_clobber_vld = '0;
+
+    // default (highest entry hast lowest prio in arbiter tree below)
+    clobber_fu[ariane_pkg::NR_SB_ENTRIES] = ariane_pkg::NONE;
+    for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin
+      gpr_clobber_vld[i][ariane_pkg::NR_SB_ENTRIES] = 1'b1;
+      fpr_clobber_vld[i][ariane_pkg::NR_SB_ENTRIES] = 1'b1;
+    end
+
+    // check for all valid entries and set the clobber accordingly
+    for (int unsigned i = 0; i < ariane_pkg::NR_SB_ENTRIES; i++) begin
+      gpr_clobber_vld[mem_q[i].sbe.rd][i] = mem_q[i].issued & ~mem_q[i].is_rd_fpr_flag;
+      fpr_clobber_vld[mem_q[i].sbe.rd][i] = mem_q[i].issued & mem_q[i].is_rd_fpr_flag;
+      clobber_fu[i]                       = mem_q[i].sbe.fu;
+    end
+
+    // GPR[0] is always free
+    gpr_clobber_vld[0] = '0;
+  end
+
+  for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers
+    // get fu that is going to clobber this register (there should be only one)
+    rr_arb_tree #(
+        .NumIn(ariane_pkg::NR_SB_ENTRIES + 1),
+        .DataType(ariane_pkg::fu_t),
+        .ExtPrio(1'b1),
+        .AxiVldRdy(1'b1)
+    ) i_sel_gpr_clobbers (
+        .clk_i  (clk_i),
+        .rst_ni (rst_ni),
+        .flush_i(1'b0),
+        .rr_i   ('0),
+        .req_i  (gpr_clobber_vld[k]),
+        .gnt_o  (),
+        .data_i (clobber_fu),
+        .gnt_i  (1'b1),
+        .req_o  (),
+        .data_o (rd_clobber_gpr_o[k]),
+        .idx_o  ()
+    );
+    if (CVA6Cfg.FpPresent) begin
+      rr_arb_tree #(
+          .NumIn(ariane_pkg::NR_SB_ENTRIES + 1),
+          .DataType(ariane_pkg::fu_t),
+          .ExtPrio(1'b1),
+          .AxiVldRdy(1'b1)
+      ) i_sel_fpr_clobbers (
+          .clk_i  (clk_i),
+          .rst_ni (rst_ni),
+          .flush_i(1'b0),
+          .rr_i   ('0),
+          .req_i  (fpr_clobber_vld[k]),
+          .gnt_o  (),
+          .data_i (clobber_fu),
+          .gnt_i  (1'b1),
+          .req_o  (),
+          .data_o (rd_clobber_fpr_o[k]),
+          .idx_o  ()
+      );
+    end
+  end
+
+  // ----------------------------------
+  // Read Operands (a.k.a forwarding)
+  // ----------------------------------
+  // read operand interface: same logic as register file
+  logic [ariane_pkg::NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0] rs1_fwd_req, rs2_fwd_req, rs3_fwd_req;
+  logic [ariane_pkg::NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] rs_data;
+  logic rs1_valid, rs2_valid, rs3_valid;
+
+  // WB ports have higher prio than entries
+  for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb
+    assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
+        issue_instr_o.op
+    )));
+    assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
+        issue_instr_o.op
+    )));
+    assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
+        issue_instr_o.op
+    )));
+    assign rs_data[k] = wbdata_i[k];
+  end
+  for (genvar k = 0; unsigned'(k) < ariane_pkg::NR_SB_ENTRIES; k++) begin : gen_rs_entries
+    assign rs1_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
+        issue_instr_o.op
+    )));
+    assign rs2_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
+        issue_instr_o.op
+    )));
+    assign rs3_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
+        issue_instr_o.op
+    )));
+    assign rs_data[k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result;
+  end
+
+  // check whether we are accessing GPR[0]
+  assign rs1_valid_o = rs1_valid & ((|rs1_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
+      issue_instr_o.op
+  )));
+  assign rs2_valid_o = rs2_valid & ((|rs2_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
+      issue_instr_o.op
+  )));
+  assign rs3_valid_o = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid & ((|rs3_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
+      issue_instr_o.op
+  ))) : rs3_valid;
+
+  // use fixed prio here
+  // this implicitly gives higher prio to WB ports
+  rr_arb_tree #(
+      .NumIn(ariane_pkg::NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
+      .DataWidth(riscv::XLEN),
+      .ExtPrio(1'b1),
+      .AxiVldRdy(1'b1)
+  ) i_sel_rs1 (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i(1'b0),
+      .rr_i   ('0),
+      .req_i  (rs1_fwd_req),
+      .gnt_o  (),
+      .data_i (rs_data),
+      .gnt_i  (1'b1),
+      .req_o  (rs1_valid),
+      .data_o (rs1_o),
+      .idx_o  ()
+  );
+
+  rr_arb_tree #(
+      .NumIn(ariane_pkg::NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
+      .DataWidth(riscv::XLEN),
+      .ExtPrio(1'b1),
+      .AxiVldRdy(1'b1)
+  ) i_sel_rs2 (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i(1'b0),
+      .rr_i   ('0),
+      .req_i  (rs2_fwd_req),
+      .gnt_o  (),
+      .data_i (rs_data),
+      .gnt_i  (1'b1),
+      .req_o  (rs2_valid),
+      .data_o (rs2_o),
+      .idx_o  ()
+  );
+
+  riscv::xlen_t rs3;
+
+  rr_arb_tree #(
+      .NumIn(ariane_pkg::NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
+      .DataWidth(riscv::XLEN),
+      .ExtPrio(1'b1),
+      .AxiVldRdy(1'b1)
+  ) i_sel_rs3 (
+      .clk_i  (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i(1'b0),
+      .rr_i   ('0),
+      .req_i  (rs3_fwd_req),
+      .gnt_o  (),
+      .data_i (rs_data),
+      .gnt_i  (1'b1),
+      .req_o  (rs3_valid),
+      .data_o (rs3),
+      .idx_o  ()
+  );
+
+  if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port
+    assign rs3_o = rs3[riscv::XLEN-1:0];
+  end else begin : gen_fp_three_port
+    assign rs3_o = rs3[CVA6Cfg.FLen-1:0];
+  end
+
+
+  // sequential process
+  always_ff @(posedge clk_i or negedge rst_ni) begin : regs
+    if (!rst_ni) begin
+      mem_q            <= '{default: sb_mem_t'(0)};
+      issue_cnt_q      <= '0;
+      commit_pointer_q <= '0;
+      issue_pointer_q  <= '0;
+    end else begin
+      issue_cnt_q      <= issue_cnt_n;
+      issue_pointer_q  <= issue_pointer_n;
+      mem_q            <= mem_n;
+      commit_pointer_q <= commit_pointer_n;
+    end
+  end
+
+  //RVFI
+  assign rvfi_issue_pointer_o  = issue_pointer_q;
+  assign rvfi_commit_pointer_o = commit_pointer_q;
+
+  //pragma translate_off
+  initial begin
+    assert (ariane_pkg::NR_SB_ENTRIES == 2 ** ariane_pkg::TRANS_ID_BITS)
+    else $fatal(1, "Scoreboard size needs to be a power of two.");
+  end
+
+  // assert that zero is never set
+  assert property (@(posedge clk_i) disable iff (!rst_ni) (rd_clobber_gpr_o[0] == ariane_pkg::NONE))
+  else $fatal(1, "RD 0 should not bet set");
+  // assert that we never acknowledge a commit if the instruction is not valid
+  assert property (
+    @(posedge clk_i) disable iff (!rst_ni) commit_ack_i[0] |-> commit_instr_o[0].valid)
+  else $fatal(1, "Commit acknowledged but instruction is not valid");
+  if (CVA6Cfg.NrCommitPorts == 2) begin : gen_two_commit_ports
+    assert property (
+        @(posedge clk_i) disable iff (!rst_ni) commit_ack_i[1] |-> commit_instr_o[1].valid)
+    else $fatal(1, "Commit acknowledged but instruction is not valid");
+  end
+  // assert that we never give an issue ack signal if the instruction is not valid
+  assert property (@(posedge clk_i) disable iff (!rst_ni) issue_ack_i |-> issue_instr_valid_o)
+  else $fatal(1, "Issue acknowledged but instruction is not valid");
+
+  // there should never be more than one instruction writing the same destination register (except x0)
+  // check that no functional unit is retiring with the same transaction id
+  for (genvar i = 0; i < CVA6Cfg.NrWbPorts; i++) begin
+    for (genvar j = 0; j < CVA6Cfg.NrWbPorts; j++) begin
+      assert property (
+        @(posedge clk_i) disable iff (!rst_ni) wt_valid_i[i] && wt_valid_i[j] && (i != j) |-> (trans_id_i[i] != trans_id_i[j]))
+      else
+        $fatal(
+            1,
+            "Two or more functional units are retiring instructions with the same transaction id!"
+        );
+    end
+  end
+  //pragma translate_on
+endmodule
diff --git a/test/type_param/core/serdiv.sv b/test/type_param/core/serdiv.sv
new file mode 100644
index 0000000..244ee97
--- /dev/null
+++ b/test/type_param/core/serdiv.sv
@@ -0,0 +1,269 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+//         Andreas Traber    <traber@iis.ee.ethz.ch>, ETH Zurich
+//
+// Date: 18.10.2018
+// Description: simple 64bit serial divider
+
+
+module serdiv
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+    parameter WIDTH = 64,
+    parameter STABLE_HANDSHAKE = 0             // Guarantee a stable in_rdy_o during the input handshake. Keep it at 0 in CVA6
+) (
+    input logic clk_i,
+    input logic rst_ni,
+    // input IF
+    input logic [TRANS_ID_BITS-1:0] id_i,
+    input logic [WIDTH-1:0] op_a_i,
+    input logic [WIDTH-1:0] op_b_i,
+    input logic [1:0] opcode_i,  // 0: udiv, 2: urem, 1: div, 3: rem
+    // handshake
+    input  logic                      in_vld_i, // there is a cycle delay from in_rdy_o->in_vld_i, see issue_read_operands.sv stage
+    output logic in_rdy_o,
+    input logic flush_i,
+    // output IF
+    output logic out_vld_o,
+    input logic out_rdy_i,
+    output logic [TRANS_ID_BITS-1:0] id_o,
+    output logic [WIDTH-1:0] res_o
+);
+
+  /////////////////////////////////////
+  // signal declarations
+  /////////////////////////////////////
+
+  enum logic [1:0] {
+    IDLE,
+    DIVIDE,
+    FINISH
+  }
+      state_d, state_q;
+
+  logic [WIDTH-1:0] res_q, res_d;
+  logic [WIDTH-1:0] op_a_q, op_a_d;
+  logic [WIDTH-1:0] op_b_q, op_b_d;
+  logic op_a_sign, op_b_sign;
+  logic op_b_zero, op_b_zero_q, op_b_zero_d;
+  logic op_b_neg_one, op_b_neg_one_q, op_b_neg_one_d;
+
+  logic [TRANS_ID_BITS-1:0] id_q, id_d;
+
+  logic rem_sel_d, rem_sel_q;
+  logic comp_inv_d, comp_inv_q;
+  logic res_inv_d, res_inv_q;
+
+  logic [WIDTH-1:0] add_mux;
+  logic [WIDTH-1:0] add_out;
+  logic [WIDTH-1:0] add_tmp;
+  logic [WIDTH-1:0] b_mux;
+  logic [WIDTH-1:0] out_mux;
+
+  logic [$clog2(WIDTH)-1:0] cnt_q, cnt_d;
+  logic cnt_zero;
+
+  logic [WIDTH-1:0] lzc_a_input, lzc_b_input, op_b;
+  logic [$clog2(WIDTH)-1:0] lzc_a_result, lzc_b_result;
+  logic [$clog2(WIDTH+1)-1:0] shift_a;
+  logic [  $clog2(WIDTH+1):0] div_shift;
+
+  logic a_reg_en, b_reg_en, res_reg_en, ab_comp, pm_sel, load_en;
+  logic lzc_a_no_one, lzc_b_no_one;
+  logic div_res_zero_d, div_res_zero_q;
+
+
+  /////////////////////////////////////
+  // align the input operands
+  // for faster division
+  /////////////////////////////////////
+
+  assign op_a_sign    = op_a_i[$high(op_a_i)];
+  assign op_b_sign    = op_b_i[$high(op_b_i)];
+  assign op_b_zero    = lzc_b_no_one & ~op_b_sign;
+  assign op_b_neg_one = lzc_b_no_one & op_b_sign;
+
+  assign lzc_a_input  = (opcode_i[0] & op_a_sign) ? {~op_a_i[$high(op_a_i)-1:0], 1'b1} : op_a_i;
+  assign lzc_b_input  = (opcode_i[0] & op_b_sign) ? ~op_b_i : op_b_i;
+
+  lzc #(
+      .MODE (1),     // count leading zeros
+      .WIDTH(WIDTH)
+  ) i_lzc_a (
+      .in_i   (lzc_a_input),
+      .cnt_o  (lzc_a_result),
+      .empty_o(lzc_a_no_one)
+  );
+
+  lzc #(
+      .MODE (1),     // count leading zeros
+      .WIDTH(WIDTH)
+  ) i_lzc_b (
+      .in_i   (lzc_b_input),
+      .cnt_o  (lzc_b_result),
+      .empty_o(lzc_b_no_one)
+  );
+
+  assign shift_a = (lzc_a_no_one) ? WIDTH : {1'b0, lzc_a_result};
+  assign div_shift = {1'b0, lzc_b_result} - shift_a;
+
+  assign op_b = op_b_i <<< $unsigned(div_shift);
+
+  // the division is zero if |opB| > |opA| and can be terminated
+  assign div_res_zero_d = (load_en) ? div_shift[$high(div_shift)] : div_res_zero_q;
+
+  /////////////////////////////////////
+  // Datapath
+  /////////////////////////////////////
+
+  assign pm_sel = load_en & ~(opcode_i[0] & (op_a_sign ^ op_b_sign));
+
+  // muxes
+  assign add_mux = (load_en) ? op_a_i : op_b_q;
+
+  // attention: logical shift by one in case of negative operand B!
+  assign b_mux = (load_en) ? op_b : {comp_inv_q, (op_b_q[$high(op_b_q):1])};
+
+  // in case of bad timing, we could output from regs -> needs a cycle more in the FSM
+  assign out_mux     = (rem_sel_q) ? (op_b_neg_one_q ? '0 : op_a_q) : (op_b_zero_q ? '1 : (op_b_neg_one_q ? op_a_q : res_q));
+
+  // invert if necessary
+  assign res_o = (res_inv_q) ? -$signed(out_mux) : out_mux;
+
+  // main comparator
+  assign ab_comp     = ((op_a_q == op_b_q) | ((op_a_q > op_b_q) ^ comp_inv_q)) & ((|op_a_q) | op_b_zero_q);
+
+  // main adder
+  assign add_tmp = (load_en) ? 0 : op_a_q;
+  assign add_out = (pm_sel) ? add_tmp + add_mux : add_tmp - $signed(add_mux);
+
+  /////////////////////////////////////
+  // FSM, counter
+  /////////////////////////////////////
+
+  assign cnt_zero = (cnt_q == 0);
+  assign cnt_d = (load_en) ? div_shift[$clog2(WIDTH)-1:0] : (~cnt_zero) ? cnt_q - 1 : cnt_q;
+
+  always_comb begin : p_fsm
+    // default
+    state_d    = state_q;
+    in_rdy_o   = 1'b0;
+    out_vld_o  = 1'b0;
+    load_en    = 1'b0;
+    a_reg_en   = 1'b0;
+    b_reg_en   = 1'b0;
+    res_reg_en = 1'b0;
+
+    unique case (state_q)
+      IDLE: begin
+        in_rdy_o = 1'b1;
+
+        if (in_vld_i) begin
+          // CVA6: there is a cycle delay until the valid signal is asserted by the id stage
+          // Ara:  we need a stable handshake
+          in_rdy_o = (STABLE_HANDSHAKE) ? 1'b1 : 1'b0;
+          a_reg_en = 1'b1;
+          b_reg_en = 1'b1;
+          load_en  = 1'b1;
+          state_d  = DIVIDE;
+        end
+      end
+      DIVIDE: begin
+        if (~(div_res_zero_q | op_b_zero_q | op_b_neg_one_q)) begin
+          a_reg_en   = ab_comp;
+          b_reg_en   = 1'b1;
+          res_reg_en = 1'b1;
+        end
+        // can end the division immediately if the result is known
+        if (div_res_zero_q | op_b_zero_q | op_b_neg_one_q) begin
+          out_vld_o = 1'b1;
+          state_d   = FINISH;
+          if (out_rdy_i) begin
+            // in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage
+            state_d = IDLE;
+          end
+        end else if (cnt_zero) begin
+          state_d = FINISH;
+        end
+      end
+      FINISH: begin
+        out_vld_o = 1'b1;
+
+        if (out_rdy_i) begin
+          // in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage
+          state_d = IDLE;
+        end
+      end
+      default: state_d = IDLE;
+    endcase
+
+    if (flush_i) begin
+      in_rdy_o  = 1'b0;
+      out_vld_o = 1'b0;
+      a_reg_en  = 1'b0;
+      b_reg_en  = 1'b0;
+      load_en   = 1'b0;
+      state_d   = IDLE;
+    end
+  end
+
+  /////////////////////////////////////
+  // regs, flags
+  /////////////////////////////////////
+
+  // get flags
+  assign rem_sel_d = (load_en) ? opcode_i[1] : rem_sel_q;
+  assign comp_inv_d = (load_en) ? opcode_i[0] & op_b_sign : comp_inv_q;
+  assign op_b_zero_d = (load_en) ? op_b_zero : op_b_zero_q;
+  assign op_b_neg_one_d = (load_en) ? op_b_neg_one : op_b_neg_one_q;
+  assign res_inv_d       = (load_en) ? (~op_b_zero | opcode_i[1]) & opcode_i[0] & (op_a_sign ^ op_b_sign ^ op_b_neg_one) : res_inv_q;
+
+  // transaction id
+  assign id_d = (load_en) ? id_i : id_q;
+  assign id_o = id_q;
+
+  assign op_a_d = (a_reg_en) ? add_out : op_a_q;
+  assign op_b_d = (b_reg_en) ? b_mux : op_b_q;
+  assign res_d = (load_en) ? '0 : (res_reg_en) ? {res_q[$high(res_q)-1:0], ab_comp} : res_q;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (~rst_ni) begin
+      state_q        <= IDLE;
+      op_a_q         <= '0;
+      op_b_q         <= '0;
+      res_q          <= '0;
+      cnt_q          <= '0;
+      id_q           <= '0;
+      rem_sel_q      <= 1'b0;
+      comp_inv_q     <= 1'b0;
+      res_inv_q      <= 1'b0;
+      op_b_zero_q    <= 1'b0;
+      op_b_neg_one_q <= 1'b0;
+      div_res_zero_q <= 1'b0;
+    end else begin
+      state_q        <= state_d;
+      op_a_q         <= op_a_d;
+      op_b_q         <= op_b_d;
+      res_q          <= res_d;
+      cnt_q          <= cnt_d;
+      id_q           <= id_d;
+      rem_sel_q      <= rem_sel_d;
+      comp_inv_q     <= comp_inv_d;
+      res_inv_q      <= res_inv_d;
+      op_b_zero_q    <= op_b_zero_d;
+      op_b_neg_one_q <= op_b_neg_one_d;
+      div_res_zero_q <= div_res_zero_d;
+    end
+  end
+
+endmodule
diff --git a/test/type_param/core/store_buffer.sv b/test/type_param/core/store_buffer.sv
new file mode 100644
index 0000000..d41551d
--- /dev/null
+++ b/test/type_param/core/store_buffer.sv
@@ -0,0 +1,291 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 25.04.2017
+// Description: Store queue persists store requests and pushes them to memory
+//              if they are no longer speculative
+
+
+module store_buffer
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic flush_i,  // if we flush we need to pause the transactions on the memory
+                          // otherwise we will run in a deadlock with the memory arbiter
+    input logic stall_st_pending_i,  // Stall issuing non-speculative request
+    output logic         no_st_pending_o, // non-speculative queue is empty (e.g.: everything is committed to the memory hierarchy)
+    output logic         store_buffer_empty_o, // there is no store pending in neither the speculative unit or the non-speculative queue
+
+    input  logic [11:0]  page_offset_i,         // check for the page offset (the last 12 bit if the current load matches them)
+    output logic         page_offset_matches_o, // the above input page offset matches -> let the store buffer drain
+
+    input logic commit_i,  // commit the instruction which was placed there most recently
+    output logic commit_ready_o,  // commit queue is ready to accept another commit request
+    output logic ready_o,  // the store queue is ready to accept a new request
+                           // it is only ready if it can unconditionally commit the instruction, e.g.:
+                           // the commit buffer needs to be empty
+    input logic valid_i,  // this is a valid store
+    input  logic         valid_without_flush_i, // just tell if the address is valid which we are current putting and do not take any further action
+
+    input  logic [riscv::PLEN-1:0]  paddr_i,         // physical address of store which needs to be placed in the queue
+    output [riscv::PLEN-1:0] rvfi_mem_paddr_o,
+    input riscv::xlen_t data_i,  // data which is placed in the queue
+    input logic [(riscv::XLEN/8)-1:0] be_i,  // byte enable in
+    input logic [1:0] data_size_i,  // type of request we are making (e.g.: bytes to write)
+
+    // D$ interface
+    input  dcache_req_o_t req_port_i,
+    output dcache_req_i_t req_port_o
+);
+
+  // the store queue has two parts:
+  // 1. Speculative queue
+  // 2. Commit queue which is non-speculative, e.g.: the store will definitely happen.
+  struct packed {
+    logic [riscv::PLEN-1:0] address;
+    riscv::xlen_t data;
+    logic [(riscv::XLEN/8)-1:0] be;
+    logic [1:0] data_size;
+    logic valid;  // this entry is valid, we need this for checking if the address offset matches
+  }
+      speculative_queue_n[DEPTH_SPEC-1:0],
+      speculative_queue_q[DEPTH_SPEC-1:0],
+      commit_queue_n[DEPTH_COMMIT-1:0],
+      commit_queue_q[DEPTH_COMMIT-1:0];
+
+  // keep a status count for both buffers
+  logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt_n, speculative_status_cnt_q;
+  logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt_n, commit_status_cnt_q;
+  // Speculative queue
+  logic [$clog2(DEPTH_SPEC)-1:0] speculative_read_pointer_n, speculative_read_pointer_q;
+  logic [$clog2(DEPTH_SPEC)-1:0] speculative_write_pointer_n, speculative_write_pointer_q;
+  // Commit Queue
+  logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q;
+  logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q;
+
+  assign store_buffer_empty_o = (speculative_status_cnt_q == 0) & no_st_pending_o;
+  // ----------------------------------------
+  // Speculative Queue - Core Interface
+  // ----------------------------------------
+  always_comb begin : core_if
+    automatic logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt;
+    speculative_status_cnt      = speculative_status_cnt_q;
+
+    // default assignments
+    speculative_status_cnt_n    = speculative_status_cnt_q;
+    speculative_read_pointer_n  = speculative_read_pointer_q;
+    speculative_write_pointer_n = speculative_write_pointer_q;
+    speculative_queue_n         = speculative_queue_q;
+    // LSU interface
+    // we are ready to accept a new entry and the input data is valid
+    if (valid_i) begin
+      speculative_queue_n[speculative_write_pointer_q].address = paddr_i;
+      speculative_queue_n[speculative_write_pointer_q].data = data_i;
+      speculative_queue_n[speculative_write_pointer_q].be = be_i;
+      speculative_queue_n[speculative_write_pointer_q].data_size = data_size_i;
+      speculative_queue_n[speculative_write_pointer_q].valid = 1'b1;
+      // advance the write pointer
+      speculative_write_pointer_n = speculative_write_pointer_q + 1'b1;
+      speculative_status_cnt++;
+    end
+
+    // evict the current entry out of this queue, the commit queue will thankfully take it and commit it
+    // to the memory hierarchy
+    if (commit_i) begin
+      // invalidate
+      speculative_queue_n[speculative_read_pointer_q].valid = 1'b0;
+      // advance the read pointer
+      speculative_read_pointer_n = speculative_read_pointer_q + 1'b1;
+      speculative_status_cnt--;
+    end
+
+    speculative_status_cnt_n = speculative_status_cnt;
+
+    // when we flush evict the speculative stores
+    if (flush_i) begin
+      // reset all valid flags
+      for (int unsigned i = 0; i < DEPTH_SPEC; i++) speculative_queue_n[i].valid = 1'b0;
+
+      speculative_write_pointer_n = speculative_read_pointer_q;
+      // also reset the status count
+      speculative_status_cnt_n = 'b0;
+    end
+
+    // we are ready if the speculative and the commit queue have a space left
+    ready_o = (speculative_status_cnt_n < (DEPTH_SPEC)) || commit_i;
+  end
+
+  // ----------------------------------------
+  // Commit Queue - Memory Interface
+  // ----------------------------------------
+
+  // we will never kill a request in the store buffer since we already know that the translation is valid
+  // e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
+  assign req_port_o.kill_req = 1'b0;
+  assign req_port_o.data_we = 1'b1;  // we will always write in the store queue
+  assign req_port_o.tag_valid = 1'b0;
+
+  // we do not require an acknowledgement for writes, thus we do not need to identify uniquely the responses
+  assign req_port_o.data_id = '0;
+  // those signals can directly be output to the memory
+  assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
+  // if we got a new request we already saved the tag from the previous cycle
+  assign req_port_o.address_tag   = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_TAG_WIDTH     +
+                                                                                    ariane_pkg::DCACHE_INDEX_WIDTH-1 :
+                                                                                    ariane_pkg::DCACHE_INDEX_WIDTH];
+  assign req_port_o.data_wdata = commit_queue_q[commit_read_pointer_q].data;
+  assign req_port_o.data_be = commit_queue_q[commit_read_pointer_q].be;
+  assign req_port_o.data_size = commit_queue_q[commit_read_pointer_q].data_size;
+
+  assign rvfi_mem_paddr_o = commit_queue_n[commit_read_pointer_n].address;
+
+  always_comb begin : store_if
+    automatic logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt;
+    commit_status_cnt      = commit_status_cnt_q;
+
+    commit_ready_o         = (commit_status_cnt_q < DEPTH_COMMIT);
+    // no store is pending if we don't have any element in the commit queue e.g.: it is empty
+    no_st_pending_o        = (commit_status_cnt_q == 0);
+    // default assignments
+    commit_read_pointer_n  = commit_read_pointer_q;
+    commit_write_pointer_n = commit_write_pointer_q;
+
+    commit_queue_n         = commit_queue_q;
+
+    req_port_o.data_req    = 1'b0;
+
+    // there should be no commit when we are flushing
+    // if the entry in the commit queue is valid and not speculative anymore we can issue this instruction
+    if (commit_queue_q[commit_read_pointer_q].valid && !stall_st_pending_i) begin
+      req_port_o.data_req = 1'b1;
+      if (req_port_i.data_gnt) begin
+        // we can evict it from the commit buffer
+        commit_queue_n[commit_read_pointer_q].valid = 1'b0;
+        // advance the read_pointer
+        commit_read_pointer_n = commit_read_pointer_q + 1'b1;
+        commit_status_cnt--;
+      end
+    end
+    // we ignore the rvalid signal for now as we assume that the store
+    // happened if we got a grant
+
+    // shift the store request from the speculative buffer to the non-speculative
+    if (commit_i) begin
+      commit_queue_n[commit_write_pointer_q] = speculative_queue_q[speculative_read_pointer_q];
+      commit_write_pointer_n = commit_write_pointer_n + 1'b1;
+      commit_status_cnt++;
+    end
+
+    commit_status_cnt_n = commit_status_cnt;
+  end
+
+  // ------------------
+  // Address Checker
+  // ------------------
+  // The load should return the data stored by the most recent store to the
+  // same physical address.  The most direct way to implement this is to
+  // maintain physical addresses in the store buffer.
+
+  // Of course, there are other micro-architectural techniques to accomplish
+  // the same thing: you can interlock and wait for the store buffer to
+  // drain if the load VA matches any store VA modulo the page size (i.e.
+  // bits 11:0).  As a special case, it is correct to bypass if the full VA
+  // matches, and no younger stores' VAs match in bits 11:0.
+  //
+  // checks if the requested load is in the store buffer
+  // page offsets are virtually and physically the same
+  always_comb begin : address_checker
+    page_offset_matches_o = 1'b0;
+
+    // check if the LSBs are identical and the entry is valid
+    for (int unsigned i = 0; i < DEPTH_COMMIT; i++) begin
+      // Check if the page offset matches and whether the entry is valid, for the commit queue
+      if ((page_offset_i[11:3] == commit_queue_q[i].address[11:3]) && commit_queue_q[i].valid) begin
+        page_offset_matches_o = 1'b1;
+        break;
+      end
+    end
+
+    for (int unsigned i = 0; i < DEPTH_SPEC; i++) begin
+      // do the same for the speculative queue
+      if ((page_offset_i[11:3] == speculative_queue_q[i].address[11:3]) && speculative_queue_q[i].valid) begin
+        page_offset_matches_o = 1'b1;
+        break;
+      end
+    end
+    // or it matches with the entry we are currently putting into the queue
+    if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_without_flush_i) begin
+      page_offset_matches_o = 1'b1;
+    end
+  end
+
+
+  // registers
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_spec
+    if (~rst_ni) begin
+      speculative_queue_q         <= '{default: 0};
+      speculative_read_pointer_q  <= '0;
+      speculative_write_pointer_q <= '0;
+      speculative_status_cnt_q    <= '0;
+    end else begin
+      speculative_queue_q         <= speculative_queue_n;
+      speculative_read_pointer_q  <= speculative_read_pointer_n;
+      speculative_write_pointer_q <= speculative_write_pointer_n;
+      speculative_status_cnt_q    <= speculative_status_cnt_n;
+    end
+  end
+
+  // registers
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_commit
+    if (~rst_ni) begin
+      commit_queue_q         <= '{default: 0};
+      commit_read_pointer_q  <= '0;
+      commit_write_pointer_q <= '0;
+      commit_status_cnt_q    <= '0;
+    end else begin
+      commit_queue_q         <= commit_queue_n;
+      commit_read_pointer_q  <= commit_read_pointer_n;
+      commit_write_pointer_q <= commit_write_pointer_n;
+      commit_status_cnt_q    <= commit_status_cnt_n;
+    end
+  end
+
+  ///////////////////////////////////////////////////////
+  // assertions
+  ///////////////////////////////////////////////////////
+
+  //pragma translate_off
+  // assert that commit is never set when we are flushing this would be counter intuitive
+  // as flush and commit is decided in the same stage
+  commit_and_flush :
+  assert property (@(posedge clk_i) rst_ni && flush_i |-> !commit_i)
+  else $error("[Commit Queue] You are trying to commit and flush in the same cycle");
+
+  speculative_buffer_overflow :
+  assert property (@(posedge clk_i) rst_ni && (speculative_status_cnt_q == DEPTH_SPEC) |-> !valid_i)
+  else
+    $error("[Speculative Queue] You are trying to push new data although the buffer is not ready");
+
+  speculative_buffer_underflow :
+  assert property (@(posedge clk_i) rst_ni && (speculative_status_cnt_q == 0) |-> !commit_i)
+  else $error("[Speculative Queue] You are committing although there are no stores to commit");
+
+  commit_buffer_overflow :
+  assert property (@(posedge clk_i) rst_ni && (commit_status_cnt_q == DEPTH_COMMIT) |-> !commit_i)
+  else $error("[Commit Queue] You are trying to commit a store although the buffer is full");
+  //pragma translate_on
+endmodule
+
+
+
diff --git a/test/type_param/core/store_unit.sv b/test/type_param/core/store_unit.sv
new file mode 100644
index 0000000..fb93818
--- /dev/null
+++ b/test/type_param/core/store_unit.sv
@@ -0,0 +1,300 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 22.05.2017
+// Description: Store Unit, takes care of all store requests and atomic memory operations (AMOs)
+
+
+module store_unit
+  import ariane_pkg::*;
+#(
+    parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
+) (
+    input logic clk_i,  // Clock
+    input logic rst_ni,  // Asynchronous reset active low
+    input logic flush_i,
+    input logic stall_st_pending_i,
+    output logic no_st_pending_o,
+    output logic store_buffer_empty_o,
+    // store unit input port
+    input logic valid_i,
+    input lsu_ctrl_t lsu_ctrl_i,
+    output logic pop_st_o,
+    input logic commit_i,
+    output logic commit_ready_o,
+    input logic amo_valid_commit_i,
+    // store unit output port
+    output logic valid_o,
+    output logic [TRANS_ID_BITS-1:0] trans_id_o,
+    output riscv::xlen_t result_o,
+    output exception_t ex_o,
+    // MMU -> Address Translation
+    output logic translation_req_o,  // request address translation
+    output logic [riscv::VLEN-1:0] vaddr_o,  // virtual address out
+    output [riscv::PLEN-1:0] rvfi_mem_paddr_o,
+    input logic [riscv::PLEN-1:0] paddr_i,  // physical address in
+    input exception_t ex_i,
+    input  logic                     dtlb_hit_i,       // will be one in the same cycle translation_req was asserted if it hits
+    // address checker
+    input logic [11:0] page_offset_i,
+    output logic page_offset_matches_o,
+    // D$ interface
+    output amo_req_t amo_req_o,
+    input amo_resp_t amo_resp_i,
+    input dcache_req_o_t req_port_i,
+    output dcache_req_i_t req_port_o
+);
+  // it doesn't matter what we are writing back as stores don't return anything
+  assign result_o = lsu_ctrl_i.data;
+
+  enum logic [1:0] {
+    IDLE,
+    VALID_STORE,
+    WAIT_TRANSLATION,
+    WAIT_STORE_READY
+  }
+      state_d, state_q;
+
+  // store buffer control signals
+  logic st_ready;
+  logic st_valid;
+  logic st_valid_without_flush;
+  logic instr_is_amo;
+  assign instr_is_amo = is_amo(lsu_ctrl_i.operation);
+  // keep the data and the byte enable for the second cycle (after address translation)
+  riscv::xlen_t st_data_n, st_data_q;
+  logic [(riscv::XLEN/8)-1:0] st_be_n, st_be_q;
+  logic [1:0] st_data_size_n, st_data_size_q;
+  amo_t amo_op_d, amo_op_q;
+
+  logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
+
+  // output assignments
+  assign vaddr_o    = lsu_ctrl_i.vaddr; // virtual address
+  assign trans_id_o = trans_id_q; // transaction id from previous cycle
+
+  always_comb begin : store_control
+    translation_req_o      = 1'b0;
+    valid_o                = 1'b0;
+    st_valid               = 1'b0;
+    st_valid_without_flush = 1'b0;
+    pop_st_o               = 1'b0;
+    ex_o                   = ex_i;
+    trans_id_n             = lsu_ctrl_i.trans_id;
+    state_d                = state_q;
+
+    case (state_q)
+      // we got a valid store
+      IDLE: begin
+        if (valid_i) begin
+          state_d = VALID_STORE;
+          translation_req_o = 1'b1;
+          pop_st_o = 1'b1;
+          // check if translation was valid and we have space in the store buffer
+          // otherwise simply stall
+          if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
+            state_d  = WAIT_TRANSLATION;
+            pop_st_o = 1'b0;
+          end
+
+          if (!st_ready) begin
+            state_d  = WAIT_STORE_READY;
+            pop_st_o = 1'b0;
+          end
+        end
+      end
+
+      VALID_STORE: begin
+        valid_o = 1'b1;
+        // post this store to the store buffer if we are not flushing
+        if (!flush_i) st_valid = 1'b1;
+
+        st_valid_without_flush = 1'b1;
+
+        // we have another request and its not an AMO (the AMO buffer only has depth 1)
+        if ((valid_i && CVA6Cfg.RVA && !instr_is_amo) || (valid_i && !CVA6Cfg.RVA)) begin
+
+          translation_req_o = 1'b1;
+          state_d = VALID_STORE;
+          pop_st_o = 1'b1;
+
+          if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
+            state_d  = WAIT_TRANSLATION;
+            pop_st_o = 1'b0;
+          end
+
+          if (!st_ready) begin
+            state_d  = WAIT_STORE_READY;
+            pop_st_o = 1'b0;
+          end
+          // if we do not have another request go back to idle
+        end else begin
+          state_d = IDLE;
+        end
+      end
+
+      // the store queue is currently full
+      WAIT_STORE_READY: begin
+        // keep the translation request high
+        translation_req_o = 1'b1;
+
+        if (st_ready && dtlb_hit_i) begin
+          state_d = IDLE;
+        end
+      end
+
+      default: begin
+        // we didn't receive a valid translation, wait for one
+        // but we know that the store queue is not full as we could only have landed here if
+        // it wasn't full
+        if (state_q == WAIT_TRANSLATION && ariane_pkg::MMU_PRESENT) begin
+          translation_req_o = 1'b1;
+
+          if (dtlb_hit_i) begin
+            state_d = IDLE;
+          end
+        end
+      end
+    endcase
+
+    // -----------------
+    // Access Exception
+    // -----------------
+    // we got an address translation exception (access rights, misaligned or page fault)
+    if (ex_i.valid && (state_q != IDLE)) begin
+      // the only difference is that we do not want to store this request
+      pop_st_o = 1'b1;
+      st_valid = 1'b0;
+      state_d  = IDLE;
+      valid_o  = 1'b1;
+    end
+
+    if (flush_i) state_d = IDLE;
+  end
+
+  // -----------
+  // Re-aligner
+  // -----------
+  // re-align the write data to comply with the address offset
+  always_comb begin
+    st_be_n = lsu_ctrl_i.be;
+    // don't shift the data if we are going to perform an AMO as we still need to operate on this data
+    st_data_n = (CVA6Cfg.RVA && instr_is_amo) ? lsu_ctrl_i.data[riscv::XLEN-1:0] :
+        data_align(lsu_ctrl_i.vaddr[2:0], {{64 - riscv::XLEN{1'b0}}, lsu_ctrl_i.data});
+    st_data_size_n = extract_transfer_size(lsu_ctrl_i.operation);
+    // save AMO op for next cycle
+    if (CVA6Cfg.RVA) begin
+      case (lsu_ctrl_i.operation)
+        AMO_LRW, AMO_LRD:     amo_op_d = AMO_LR;
+        AMO_SCW, AMO_SCD:     amo_op_d = AMO_SC;
+        AMO_SWAPW, AMO_SWAPD: amo_op_d = AMO_SWAP;
+        AMO_ADDW, AMO_ADDD:   amo_op_d = AMO_ADD;
+        AMO_ANDW, AMO_ANDD:   amo_op_d = AMO_AND;
+        AMO_ORW, AMO_ORD:     amo_op_d = AMO_OR;
+        AMO_XORW, AMO_XORD:   amo_op_d = AMO_XOR;
+        AMO_MAXW, AMO_MAXD:   amo_op_d = AMO_MAX;
+        AMO_MAXWU, AMO_MAXDU: amo_op_d = AMO_MAXU;
+        AMO_MINW, AMO_MIND:   amo_op_d = AMO_MIN;
+        AMO_MINWU, AMO_MINDU: amo_op_d = AMO_MINU;
+        default:              amo_op_d = AMO_NONE;
+      endcase
+    end else begin
+      amo_op_d = AMO_NONE;
+    end
+  end
+
+  logic store_buffer_valid, amo_buffer_valid;
+  logic store_buffer_ready, amo_buffer_ready;
+
+  // multiplex between store unit and amo buffer
+  assign store_buffer_valid = st_valid & (amo_op_q == AMO_NONE);
+  assign amo_buffer_valid = st_valid & (amo_op_q != AMO_NONE);
+
+  assign st_ready = store_buffer_ready & amo_buffer_ready;
+
+  // ---------------
+  // Store Queue
+  // ---------------
+  store_buffer #(
+      .CVA6Cfg(CVA6Cfg)
+  ) store_buffer_i (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .stall_st_pending_i,
+      .no_st_pending_o,
+      .store_buffer_empty_o,
+      .page_offset_i,
+      .page_offset_matches_o,
+      .commit_i,
+      .commit_ready_o,
+      .ready_o              (store_buffer_ready),
+      .valid_i              (store_buffer_valid),
+      // the flush signal can be critical and we need this valid
+      // signal to check whether the page_offset matches or not,
+      // functionaly it doesn't make a difference whether we use
+      // the correct valid signal or not as we are flushing
+      // the whole pipeline anyway
+      .valid_without_flush_i(st_valid_without_flush),
+      .paddr_i,
+      .rvfi_mem_paddr_o     (rvfi_mem_paddr_o),
+      .data_i               (st_data_q),
+      .be_i                 (st_be_q),
+      .data_size_i          (st_data_size_q),
+      .req_port_i           (req_port_i),
+      .req_port_o           (req_port_o)
+  );
+
+  if (CVA6Cfg.RVA) begin
+    amo_buffer #(
+        .CVA6Cfg(CVA6Cfg)
+    ) i_amo_buffer (
+        .clk_i,
+        .rst_ni,
+        .flush_i,
+        .valid_i           (amo_buffer_valid),
+        .ready_o           (amo_buffer_ready),
+        .paddr_i           (paddr_i),
+        .amo_op_i          (amo_op_q),
+        .data_i            (st_data_q),
+        .data_size_i       (st_data_size_q),
+        .amo_req_o         (amo_req_o),
+        .amo_resp_i        (amo_resp_i),
+        .amo_valid_commit_i(amo_valid_commit_i),
+        .no_st_pending_i   (no_st_pending_o)
+    );
+  end else begin
+    assign amo_buffer_ready = '1;
+    assign amo_req_o        = '0;
+  end
+
+  // ---------------
+  // Registers
+  // ---------------
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      state_q        <= IDLE;
+      st_be_q        <= '0;
+      st_data_q      <= '0;
+      st_data_size_q <= '0;
+      trans_id_q     <= '0;
+      amo_op_q       <= AMO_NONE;
+    end else begin
+      state_q        <= state_d;
+      st_be_q        <= st_be_n;
+      st_data_q      <= st_data_n;
+      trans_id_q     <= trans_id_n;
+      st_data_size_q <= st_data_size_n;
+      amo_op_q       <= amo_op_d;
+    end
+  end
+
+endmodule
diff --git a/test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv b/test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv
new file mode 100644
index 0000000..1575595
--- /dev/null
+++ b/test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv
@@ -0,0 +1,301 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// ----------------------------
+// AXI to SRAM Adapter
+// ----------------------------
+// Author: Florian Zaruba (zarubaf@iis.ee.ethz.ch)
+//
+// Description: Manages AXI transactions
+//              Supports all burst accesses but only on aligned addresses and with full data width.
+//              Assertions should guide you if there is something unsupported happening.
+//
+module axi2mem #(
+    parameter int unsigned AXI_ID_WIDTH      = 10,
+    parameter int unsigned AXI_ADDR_WIDTH    = 64,
+    parameter int unsigned AXI_DATA_WIDTH    = 64,
+    parameter int unsigned AXI_USER_WIDTH    = 10
+)(
+    input logic                         clk_i,    // Clock
+    input logic                         rst_ni,  // Asynchronous reset active low
+    AXI_BUS.Slave                       slave,
+    output logic                        req_o,
+    output logic                        we_o,
+    output logic [AXI_ADDR_WIDTH-1:0]   addr_o,
+    output logic [AXI_DATA_WIDTH/8-1:0] be_o,
+    output logic [AXI_USER_WIDTH-1:0]   user_o,
+    output logic [AXI_DATA_WIDTH-1:0]   data_o,
+    input  logic [AXI_USER_WIDTH-1:0]   user_i,
+    input  logic [AXI_DATA_WIDTH-1:0]   data_i
+);
+
+    // AXI has the following rules governing the use of bursts:
+    // - for wrapping bursts, the burst length must be 2, 4, 8, or 16
+    // - a burst must not cross a 4KB address boundary
+    // - early termination of bursts is not supported.
+    typedef enum logic [1:0] { FIXED = 2'b00, INCR = 2'b01, WRAP = 2'b10} axi_burst_t;
+
+    localparam LOG_NR_BYTES = $clog2(AXI_DATA_WIDTH/8);
+
+    typedef struct packed {
+        logic [AXI_ID_WIDTH-1:0]   id;
+        logic [AXI_ADDR_WIDTH-1:0] addr;
+        logic [7:0]                len;
+        logic [2:0]                size;
+        axi_burst_t                burst;
+    } ax_req_t;
+
+    // Registers
+    enum logic [2:0] { IDLE, READ, WRITE, SEND_B, WAIT_WVALID }  state_d, state_q;
+    ax_req_t                   ax_req_d, ax_req_q;
+    logic [AXI_ADDR_WIDTH-1:0] req_addr_d, req_addr_q;
+    logic [7:0]                cnt_d, cnt_q;
+
+    function automatic logic [AXI_ADDR_WIDTH-1:0] get_wrap_boundary (input logic [AXI_ADDR_WIDTH-1:0] unaligned_address, input logic [7:0] len);
+        logic [AXI_ADDR_WIDTH-1:0] warp_address = '0;
+        //  for wrapping transfers ax_len can only be of size 1, 3, 7 or 15
+        if (len == 4'b1)
+            warp_address[AXI_ADDR_WIDTH-1:1+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-1:1+LOG_NR_BYTES];
+        else if (len == 4'b11)
+            warp_address[AXI_ADDR_WIDTH-1:2+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-1:2+LOG_NR_BYTES];
+        else if (len == 4'b111)
+            warp_address[AXI_ADDR_WIDTH-1:3+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-3:2+LOG_NR_BYTES];
+        else if (len == 4'b1111)
+            warp_address[AXI_ADDR_WIDTH-1:4+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-3:4+LOG_NR_BYTES];
+
+        return warp_address;
+    endfunction
+
+    logic [AXI_ADDR_WIDTH-1:0] aligned_address;
+    logic [AXI_ADDR_WIDTH-1:0] wrap_boundary;
+    logic [AXI_ADDR_WIDTH-1:0] upper_wrap_boundary;
+    logic [AXI_ADDR_WIDTH-1:0] cons_addr;
+
+    always_comb begin
+        // address generation
+        aligned_address = {ax_req_q.addr[AXI_ADDR_WIDTH-1:LOG_NR_BYTES], {{LOG_NR_BYTES}{1'b0}}};
+        wrap_boundary = get_wrap_boundary(ax_req_q.addr, ax_req_q.len);
+        // this will overflow
+        upper_wrap_boundary = wrap_boundary + ((ax_req_q.len + 1) << LOG_NR_BYTES);
+        // calculate consecutive address
+        cons_addr = aligned_address + (cnt_q << LOG_NR_BYTES);
+
+        // Transaction attributes
+        // default assignments
+        state_d    = state_q;
+        ax_req_d   = ax_req_q;
+        req_addr_d = req_addr_q;
+        cnt_d      = cnt_q;
+        // Memory default assignments
+        data_o = slave.w_data;
+        user_o = slave.w_user;
+        be_o   = slave.w_strb;
+        we_o   = 1'b0;
+        req_o  = 1'b0;
+        addr_o = '0;
+        // AXI assignments
+        // request
+        slave.aw_ready = 1'b0;
+        slave.ar_ready = 1'b0;
+        // read response channel
+        slave.r_valid  = 1'b0;
+        slave.r_data   = data_i;
+        slave.r_resp   = '0;
+        slave.r_last   = '0;
+        slave.r_id     = ax_req_q.id;
+        slave.r_user   = user_i;
+        // slave write data channel
+        slave.w_ready  = 1'b0;
+        // write response channel
+        slave.b_valid  = 1'b0;
+        slave.b_resp   = 1'b0;
+        slave.b_id     = 1'b0;
+        slave.b_user   = 1'b0;
+
+        case (state_q)
+
+            IDLE: begin
+                // Wait for a read or write
+                // ------------
+                // Read
+                // ------------
+                if (slave.ar_valid) begin
+                    slave.ar_ready = 1'b1;
+                    // sample ax
+                    ax_req_d       = {slave.ar_id, slave.ar_addr, slave.ar_len, slave.ar_size, slave.ar_burst};
+                    state_d        = READ;
+                    //  we can request the first address, this saves us time
+                    req_o          = 1'b1;
+                    addr_o         = slave.ar_addr;
+                    // save the address
+                    req_addr_d     = slave.ar_addr;
+                    // save the ar_len
+                    cnt_d          = 1;
+                // ------------
+                // Write
+                // ------------
+                end else if (slave.aw_valid) begin
+                    slave.aw_ready = 1'b1;
+                    slave.w_ready  = 1'b1;
+                    addr_o         = slave.aw_addr;
+                    // sample ax
+                    ax_req_d       = {slave.aw_id, slave.aw_addr, slave.aw_len, slave.aw_size, slave.aw_burst};
+                    // we've got our first w_valid so start the write process
+                    if (slave.w_valid) begin
+                        req_o          = 1'b1;
+                        we_o           = 1'b1;
+                        state_d        = (slave.w_last) ? SEND_B : WRITE;
+                        cnt_d          = 1;
+                    // we still have to wait for the first w_valid to arrive
+                    end else
+                        state_d = WAIT_WVALID;
+                end
+            end
+
+            // ~> we are still missing a w_valid
+            WAIT_WVALID: begin
+                slave.w_ready = 1'b1;
+                addr_o = ax_req_q.addr;
+                // we can now make our first request
+                if (slave.w_valid) begin
+                    req_o          = 1'b1;
+                    we_o           = 1'b1;
+                    state_d        = (slave.w_last) ? SEND_B : WRITE;
+                    cnt_d          = 1;
+                end
+            end
+
+            READ: begin
+                // keep request to memory high
+                req_o  = 1'b1;
+                addr_o = req_addr_q;
+                // send the response
+                slave.r_valid = 1'b1;
+                slave.r_data  = data_i;
+                slave.r_user  = user_i;
+                slave.r_id    = ax_req_q.id;
+                slave.r_last  = (cnt_q == ax_req_q.len + 1);
+
+                // check that the master is ready, the slave must not wait on this
+                if (slave.r_ready) begin
+                    // ----------------------------
+                    // Next address generation
+                    // ----------------------------
+                    // handle the correct burst type
+                    case (ax_req_q.burst)
+                        FIXED, INCR: addr_o = cons_addr;
+                        WRAP:  begin
+                            // check if the address reached warp boundary
+                            if (cons_addr == upper_wrap_boundary) begin
+                                addr_o = wrap_boundary;
+                            // address warped beyond boundary
+                            end else if (cons_addr > upper_wrap_boundary) begin
+                                addr_o = ax_req_q.addr + ((cnt_q - ax_req_q.len) << LOG_NR_BYTES);
+                            // we are still in the incremental regime
+                            end else begin
+                                addr_o = cons_addr;
+                            end
+                        end
+                    endcase
+                    // we need to change the address here for the upcoming request
+                    // we sent the last byte -> go back to idle
+                    if (slave.r_last) begin
+                        state_d = IDLE;
+                        // we already got everything
+                        req_o = 1'b0;
+                    end
+                    // save the request address for the next cycle
+                    req_addr_d = addr_o;
+                    // we can decrease the counter as the master has consumed the read data
+                    cnt_d = cnt_q + 1;
+                    // TODO: configure correct byte-lane
+                end
+            end
+            // ~> we already wrote the first word here
+            WRITE: begin
+
+                slave.w_ready = 1'b1;
+
+                // consume a word here
+                if (slave.w_valid) begin
+                    req_o         = 1'b1;
+                    we_o          = 1'b1;
+                    // ----------------------------
+                    // Next address generation
+                    // ----------------------------
+                    // handle the correct burst type
+                    case (ax_req_q.burst)
+
+                        FIXED, INCR: addr_o = cons_addr;
+                        WRAP:  begin
+                            // check if the address reached warp boundary
+                            if (cons_addr == upper_wrap_boundary) begin
+                                addr_o = wrap_boundary;
+                            // address warped beyond boundary
+                            end else if (cons_addr > upper_wrap_boundary) begin
+                                addr_o = ax_req_q.addr + ((cnt_q - ax_req_q.len) << LOG_NR_BYTES);
+                            // we are still in the incremental regime
+                            end else begin
+                                addr_o = cons_addr;
+                            end
+                        end
+                    endcase
+                    // save the request address for the next cycle
+                    req_addr_d = addr_o;
+                    // we can decrease the counter as the master has consumed the read data
+                    cnt_d = cnt_q + 1;
+
+                    if (slave.w_last)
+                        state_d = SEND_B;
+                end
+            end
+            // ~> send a write acknowledge back
+            SEND_B: begin
+                slave.b_valid = 1'b1;
+                slave.b_id    = ax_req_q.id;
+                if (slave.b_ready)
+                    state_d = IDLE;
+            end
+
+        endcase
+    end
+
+    `ifndef SYNTHESIS
+    `ifndef VERILATOR
+    // assert that only full data lane transfers allowed
+    // assert property (
+    //   @(posedge clk_i) slave.aw_valid |-> (slave.aw_size == LOG_NR_BYTES)) else $fatal ("Only full data lane transfers allowed");
+    //   assert property (
+    //   @(posedge clk_i) slave.ar_valid |-> (slave.ar_size == LOG_NR_BYTES)) else $fatal ("Only full data lane transfers allowed");
+    // assert property (
+    //   @(posedge clk_i) slave.aw_valid |-> (slave.ar_addr[LOG_NR_BYTES-1:0] == '0)) else $fatal ("Unaligned accesses are not allowed at the moment");
+    // assert property (
+    //   @(posedge clk_i) slave.ar_valid |-> (slave.aw_addr[LOG_NR_BYTES-1:0] == '0)) else $fatal ("Unaligned accesses are not allowed at the moment");
+    `endif
+    `endif
+    // --------------
+    // Registers
+    // --------------
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+            state_q    <= IDLE;
+            ax_req_q  <= '0;
+            req_addr_q <= '0;
+            cnt_q      <= '0;
+        end else begin
+            state_q    <= state_d;
+            ax_req_q   <= ax_req_d;
+            req_addr_q <= req_addr_d;
+            cnt_q      <= cnt_d;
+        end
+    end
+endmodule
+
+
diff --git a/test/type_param/corev_apu/bootrom/bootrom.sv b/test/type_param/corev_apu/bootrom/bootrom.sv
new file mode 100644
index 0000000..58ba804
--- /dev/null
+++ b/test/type_param/corev_apu/bootrom/bootrom.sv
@@ -0,0 +1,225 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the "License"); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File: $filename.v
+ *
+ * Description: Auto-generated bootrom
+ */
+
+// Auto-generated code
+module bootrom (
+   input  logic         clk_i,
+   input  logic         req_i,
+   input  logic [63:0]  addr_i,
+   output logic [63:0]  rdata_o
+);
+    localparam int RomSize = 186;
+
+    const logic [RomSize-1:0][63:0] mem = {
+        64'h00000000_00000068,
+        64'h74646977_2d6f692d,
+        64'h67657200_74666968,
+        64'h732d6765_72007374,
+        64'h70757272_65746e69,
+        64'h00646565_70732d74,
+        64'h6e657272_75630073,
+        64'h656d616e_2d676572,
+        64'h00646564_6e657478,
+        64'h652d7374_70757272,
+        64'h65746e69_00736567,
+        64'h6e617200_656c646e,
+        64'h61687000_72656c6c,
+        64'h6f72746e_6f632d74,
+        64'h70757272_65746e69,
+        64'h00736c6c_65632d74,
+        64'h70757272_65746e69,
+        64'h23007469_6c70732d,
+        64'h626c7400_65707974,
+        64'h2d756d6d_00617369,
+        64'h2c766373_69720073,
+        64'h75746174_73006765,
+        64'h72006570_79745f65,
+        64'h63697665_64007963,
+        64'h6e657571_6572662d,
+        64'h6b636f6c_63007963,
+        64'h6e657571_6572662d,
+        64'h65736162_656d6974,
+        64'h006c6564_6f6d0065,
+        64'h6c626974_61706d6f,
+        64'h6300736c_6c65632d,
+        64'h657a6973_2300736c,
+        64'h6c65632d_73736572,
+        64'h64646123_09000000,
+        64'h02000000_02000000,
+        64'h02000000_006c6f72,
+        64'h746e6f63_cc000000,
+        64'h08000000_03000000,
+        64'h00100000_00000000,
+        64'h00000018_00000000,
+        64'h5b000000_10000000,
+        64'h03000000_07000000,
+        64'h06000000_05000000,
+        64'h04000000_e4000000,
+        64'h10000000_03000000,
+        64'h00007265_6d69745f,
+        64'h6270612c_706c7570,
+        64'h1b000000_0f000000,
+        64'h03000000_00003030,
+        64'h30303030_38314072,
+        64'h656d6974_01000000,
+        64'h02000000_04000000,
+        64'hf9000000_04000000,
+        64'h03000000_02000000,
+        64'hef000000_04000000,
+        64'h03000000_01000000,
+        64'he4000000_04000000,
+        64'h03000000_00c20100,
+        64'hd6000000_04000000,
+        64'h03000000_80f0fa02,
+        64'h3f000000_04000000,
+        64'h03000000_00100000,
+        64'h00000000_00000010,
+        64'h00000000_5b000000,
+        64'h10000000_03000000,
+        64'h00000000_61303535,
+        64'h3631736e_1b000000,
+        64'h09000000_03000000,
+        64'h00000030_30303030,
+        64'h30303140_74726175,
+        64'h01000000_02000000,
+        64'h006c6f72_746e6f63,
+        64'hcc000000_08000000,
+        64'h03000000_00000c00,
+        64'h00000000_00000002,
+        64'h00000000_5b000000,
+        64'h10000000_03000000,
+        64'h07000000_01000000,
+        64'h03000000_01000000,
+        64'hb8000000_10000000,
+        64'h03000000_00000000,
+        64'h30746e69_6c632c76,
+        64'h63736972_1b000000,
+        64'h0d000000_03000000,
+        64'h00000030_30303030,
+        64'h30324074_6e696c63,
+        64'h01000000_b1000000,
+        64'h00000000_03000000,
+        64'h00007375_622d656c,
+        64'h706d6973_00636f73,
+        64'h2d657261_622d656e,
+        64'h61697261_2c687465,
+        64'h1b000000_1f000000,
+        64'h03000000_02000000,
+        64'h0f000000_04000000,
+        64'h03000000_02000000,
+        64'h00000000_04000000,
+        64'h03000000_00636f73,
+        64'h01000000_02000000,
+        64'h00000010_00000000,
+        64'h00000080_00000000,
+        64'h5b000000_10000000,
+        64'h03000000_00007972,
+        64'h6f6d656d_4f000000,
+        64'h07000000_03000000,
+        64'h00303030_30303030,
+        64'h38407972_6f6d656d,
+        64'h01000000_02000000,
+        64'h02000000_02000000,
+        64'h01000000_a9000000,
+        64'h04000000_03000000,
+        64'h00006374_6e692d75,
+        64'h70632c76_63736972,
+        64'h1b000000_0f000000,
+        64'h03000000_94000000,
+        64'h00000000_03000000,
+        64'h01000000_83000000,
+        64'h04000000_03000000,
+        64'h00000000_72656c6c,
+        64'h6f72746e_6f632d74,
+        64'h70757272_65746e69,
+        64'h01000000_79000000,
+        64'h00000000_03000000,
+        64'h00003933_76732c76,
+        64'h63736972_70000000,
+        64'h0b000000_03000000,
+        64'h00006364_66616d69,
+        64'h34367672_66000000,
+        64'h0b000000_03000000,
+        64'h00000076_63736972,
+        64'h00656e61_69726120,
+        64'h2c687465_1b000000,
+        64'h12000000_03000000,
+        64'h00000000_79616b6f,
+        64'h5f000000_05000000,
+        64'h03000000_00000000,
+        64'h5b000000_04000000,
+        64'h03000000_00757063,
+        64'h4f000000_04000000,
+        64'h03000000_80f0fa02,
+        64'h3f000000_04000000,
+        64'h03000000_00000030,
+        64'h40757063_01000000,
+        64'h00800000_2c000000,
+        64'h04000000_03000000,
+        64'h00000000_0f000000,
+        64'h04000000_03000000,
+        64'h01000000_00000000,
+        64'h04000000_03000000,
+        64'h00000000_73757063,
+        64'h01000000_00657261,
+        64'h622d656e_61697261,
+        64'h2c687465_26000000,
+        64'h10000000_03000000,
+        64'h00766564_2d657261,
+        64'h622d656e_61697261,
+        64'h2c687465_1b000000,
+        64'h14000000_03000000,
+        64'h02000000_0f000000,
+        64'h04000000_03000000,
+        64'h02000000_00000000,
+        64'h04000000_03000000,
+        64'h00000000_01000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h0c040000_06010000,
+        64'h00000000_10000000,
+        64'h11000000_28000000,
+        64'h44040000_38000000,
+        64'h4a050000_edfe0dd0,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_ffdff06f,
+        64'h10500073_03c58593,
+        64'h00000597_f1402573,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00040067_07458593,
+        64'h00000597_f1402573,
+        64'h01f41413_00100413
+    };
+
+    logic [$clog2(RomSize)-1:0] addr_q;
+
+    always_ff @(posedge clk_i) begin
+        if (req_i) begin
+            addr_q <= addr_i[$clog2(RomSize)-1+3:3];
+        end
+    end
+
+    // this prevents spurious Xes from propagating into
+    // the speculative fetch stage of the core
+    assign rdata_o = (addr_q < RomSize) ? mem[addr_q] : '0;
+endmodule
diff --git a/test/type_param/corev_apu/clint/axi_lite_interface.sv b/test/type_param/corev_apu/clint/axi_lite_interface.sv
new file mode 100644
index 0000000..c431dc0
--- /dev/null
+++ b/test/type_param/corev_apu/clint/axi_lite_interface.sv
@@ -0,0 +1,170 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 17/07/2017
+// Description: AXI Lite compatible interface
+//
+
+module axi_lite_interface #(
+    parameter int unsigned AXI_ADDR_WIDTH = 64,
+    parameter int unsigned AXI_DATA_WIDTH = 64,
+    parameter int unsigned AXI_ID_WIDTH   = 10,
+    parameter type         axi_req_t      = ariane_axi::req_t,
+    parameter type         axi_resp_t     = ariane_axi::resp_t
+) (
+    input logic                               clk_i,    // Clock
+    input logic                               rst_ni,   // Asynchronous reset active low
+
+    input  axi_req_t                          axi_req_i,
+    output axi_resp_t                         axi_resp_o,
+
+    output logic [AXI_ADDR_WIDTH-1:0]         address_o,
+    output logic                              en_o,        // transaction is valid
+    output logic                              we_o,        // write
+    output logic [(AXI_DATA_WIDTH/8)-1:0]     be_o,        // byte enable write
+    input  logic [AXI_DATA_WIDTH-1:0]         data_i,      // data
+    output logic [AXI_DATA_WIDTH-1:0]         data_o
+);
+
+    // The RLAST signal is not required, and is considered asserted for every transfer on the read data channel.
+    enum logic [1:0] { IDLE, READ, WRITE, WRITE_B } state_q, state_d;
+    // save the trans id, we will need it for reflection otherwise we are not plug compatible to the AXI standard
+    logic [AXI_ID_WIDTH-1:0]   trans_id_n, trans_id_q;
+    // address register
+    logic [AXI_ADDR_WIDTH-1:0] address_n,  address_q;
+
+    // pass through read data on the read data channel
+    assign axi_resp_o.r.data = data_i;
+    // send back the transaction id we've latched
+    assign axi_resp_o.r.id = trans_id_q;
+    assign axi_resp_o.b.id = trans_id_q;
+    // set r_last to one as defined by the AXI4 - Lite standard
+    assign axi_resp_o.r.last = 1'b1;
+    // we do not support any errors so set response flag to all zeros
+    assign axi_resp_o.b.resp = 2'b0;
+    assign axi_resp_o.r.resp = 2'b0;
+    // output data which we want to write to the slave
+    assign data_o = axi_req_i.w.data;
+    assign be_o = axi_req_i.w.strb;
+    // ------------------------
+    // AXI4-Lite State Machine
+    // ------------------------
+    always_comb begin
+        // default signal assignment
+        state_d    = state_q;
+        address_n  = address_q;
+        trans_id_n = trans_id_q;
+
+        // we'll answer a write request only if we got address and data
+        axi_resp_o.aw_ready = 1'b0;
+        axi_resp_o.w_ready  = 1'b0;
+        axi_resp_o.b_valid  = 1'b0;
+
+        axi_resp_o.ar_ready = 1'b0;
+        axi_resp_o.r_valid  = 1'b0;
+
+        address_o      = '0;
+        we_o           = 1'b0;
+        en_o           = 1'b0;
+
+        case (state_q)
+            // we are ready to accept a new request
+            IDLE: begin
+                // we've git a valid write request, we also know that we have asserted the aw_ready
+                if (axi_req_i.aw_valid) begin
+                    axi_resp_o.aw_ready = 1'b1;
+                    // this costs performance but the interconnect does not obey the AXI standard
+                    // e.g.: we could wait for aw_valid && w_valid to do the transaction.
+                    state_d = WRITE;
+                    // save address
+                    address_n = axi_req_i.aw.addr;
+                    // save the transaction id for reflection
+                    trans_id_n = axi_req_i.aw.id;
+
+                // we've got a valid read request, we also know that we have asserted the ar_ready
+                end else if (axi_req_i.ar_valid) begin
+                    axi_resp_o.ar_ready = 1'b1;
+                    state_d = READ;
+                    // save address
+                    address_n = axi_req_i.ar.addr;
+                    // save the transaction id for reflection
+                    trans_id_n = axi_req_i.ar.id;
+
+                end
+            end
+            // We've got a read request at least one cycle earlier
+            // so data_i will already contain the data we'd like tor read
+            READ: begin
+                // enable the ram-like
+                en_o       = 1'b1;
+                // further assert the correct address
+                address_o = address_q;
+                // the read is valid
+                axi_resp_o.r_valid = 1'b1;
+                // check if we got a valid r_ready and go back to IDLE
+                if (axi_req_i.r_ready)
+                    state_d = IDLE;
+            end
+            // We've got a write request at least one cycle earlier
+            // wait here for the data
+            WRITE: begin
+                if (axi_req_i.w_valid) begin
+                    axi_resp_o.w_ready = 1'b1;
+                    // use the latched address
+                    address_o = address_q;
+                    en_o = 1'b1;
+                    we_o = 1'b1;
+                    // close this request
+                    state_d = WRITE_B;
+                end
+            end
+
+            WRITE_B: begin
+                axi_resp_o.b_valid  = 1'b1;
+                // we've already performed the write here so wait for the ready signal
+                if (axi_req_i.b_ready)
+                    state_d = IDLE;
+            end
+            default:;
+
+        endcase
+    end
+
+    // ------------------------
+    // Registers
+    // ------------------------
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (!rst_ni) begin
+            state_q    <= IDLE;
+            address_q  <= '0;
+            trans_id_q <= '0;
+        end else begin
+            state_q    <= state_d;
+            address_q  <= address_n;
+            trans_id_q <= trans_id_n;
+        end
+    end
+
+    // ------------------------
+    // Assertions
+    // ------------------------
+    // Listen for illegal transactions
+    //pragma translate_off
+    `ifndef VERILATOR
+    // check that burst length is just one
+    assert property (@(posedge clk_i) axi_req_i.ar_valid |->  ((axi_req_i.ar.len == 8'b0)))
+    else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end
+    // do the same for the write channel
+    assert property (@(posedge clk_i) axi_req_i.aw_valid |->  ((axi_req_i.aw.len == 8'b0)))
+    else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end
+    `endif
+    //pragma translate_on
+endmodule
diff --git a/test/type_param/corev_apu/clint/clint.sv b/test/type_param/corev_apu/clint/clint.sv
new file mode 100644
index 0000000..e76f96d
--- /dev/null
+++ b/test/type_param/corev_apu/clint/clint.sv
@@ -0,0 +1,294 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 15/07/2017
+// Description: A RISC-V privilege spec 1.11 (WIP) compatible CLINT (core local interrupt controller)
+//
+
+// Platforms provide a real-time counter, exposed as a memory-mapped machine-mode register, mtime. mtime must run at
+// constant frequency, and the platform must provide a mechanism for determining the timebase of mtime (device tree).
+
+module clint #(
+    parameter int unsigned AXI_ADDR_WIDTH = 64,
+    parameter int unsigned AXI_DATA_WIDTH = 64,
+    parameter int unsigned AXI_ID_WIDTH   = 10,
+    parameter int unsigned NR_CORES       = 1, // Number of cores therefore also the number of timecmp registers and timer interrupts
+    parameter type         axi_req_t      = ariane_axi::req_t,
+    parameter type         axi_resp_t     = ariane_axi::resp_t
+) (
+    input  logic                clk_i,       // Clock
+    input  logic                rst_ni,      // Asynchronous reset active low
+    input  logic                testmode_i,
+    input  axi_req_t            axi_req_i,
+    output axi_resp_t           axi_resp_o,
+    input  logic                rtc_i,       // Real-time clock in (usually 32.768 kHz)
+    output logic [NR_CORES-1:0] timer_irq_o, // Timer interrupts
+    output logic [NR_CORES-1:0] ipi_o        // software interrupt (a.k.a inter-process-interrupt)
+);
+    // register offset
+    localparam logic [15:0] MSIP_BASE     = 16'h0;
+    localparam logic [15:0] MTIMECMP_BASE = 16'h4000;
+    localparam logic [15:0] MTIME_BASE    = 16'hbff8;
+
+    localparam AddrSelWidth = (NR_CORES == 1) ? 1 : $clog2(NR_CORES);
+
+    // signals from AXI 4 Lite
+    logic [AXI_ADDR_WIDTH-1:0] address;
+    logic                      en;
+    logic                      we;
+    logic [7:0]                be;
+    logic [63:0] wdata;
+    logic [63:0] rdata;
+
+
+    // bit 11 and 10 are determining the address offset
+    logic [15:0] register_address;
+    assign register_address = address[15:0];
+    // actual registers
+    logic [63:0]               mtime_n, mtime_q;
+    logic [NR_CORES-1:0][63:0] mtimecmp_n, mtimecmp_q;
+    logic [NR_CORES-1:0]       msip_n, msip_q;
+    // increase the timer
+    logic increase_timer;
+
+    // -----------------------------
+    // AXI Interface Logic
+    // -----------------------------
+    axi_lite_interface #(
+        .AXI_ADDR_WIDTH ( AXI_ADDR_WIDTH ),
+        .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
+        .AXI_ID_WIDTH   ( AXI_ID_WIDTH   ),
+        .axi_req_t      ( axi_req_t      ),
+        .axi_resp_t     ( axi_resp_t     )
+    ) axi_lite_interface_i (
+        .clk_i      ( clk_i      ),
+        .rst_ni     ( rst_ni     ),
+        .axi_req_i  ( axi_req_i  ),
+        .axi_resp_o ( axi_resp_o ),
+        .address_o  ( address    ),
+        .en_o       ( en         ),
+        .we_o       ( we         ),
+        .be_o       ( be         ),
+        .data_i     ( rdata      ),
+        .data_o     ( wdata      )
+    );
+
+    // -----------------------------
+    // Register Update Logic
+    // -----------------------------
+    // APB register write logic
+    always_comb begin
+        mtime_n    = mtime_q;
+        mtimecmp_n = mtimecmp_q;
+        msip_n     = msip_q;
+        // RTC says we should increase the timer
+        if (increase_timer)
+            mtime_n = mtime_q + 1;
+
+        // written from APB bus - gets priority
+        if (en && we) begin
+            case (register_address) inside
+                [MSIP_BASE:MSIP_BASE+4*NR_CORES]: begin
+                    msip_n[$unsigned(address[AddrSelWidth-1+2:2])] = wdata[32*address[2]];
+                end
+
+                [MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin
+                    if (riscv::XLEN == 32) begin
+                        if (be[3:0] == 4'hf)
+                            mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])][31:0] = wdata[31:0];
+                        else
+                            mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])][63:32] = wdata[63:32];
+                            
+                    end else begin
+                        mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])] = wdata;
+                    end
+                end              
+
+                [MTIME_BASE:MTIME_BASE+4]: begin
+                    if (riscv::XLEN == 32) begin
+                        if (address[2:0] == 3'h0)
+                            mtime_n[31:0] = wdata[31:0];
+                        else begin
+                            if (address[2:0] == 3'h4)
+                                mtime_n[63:32] = wdata[63:32];
+                        end
+                    end else begin
+                        mtime_n = wdata;
+                    end
+                end
+                default:;
+            endcase
+        end
+    end
+
+    // APB register read logic
+    always_comb begin
+        rdata = 'b0;
+
+        if (en && !we) begin
+            case (register_address) inside
+                [MSIP_BASE:MSIP_BASE+4*NR_CORES]: begin
+                    if (riscv::XLEN == 32)
+                        rdata[31:0] =  msip_q[$unsigned(address[AddrSelWidth-1+2:2])];
+                    else
+                        rdata = msip_q[$unsigned(address[AddrSelWidth-1+2:2])];
+                end
+
+                [MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin
+                    if (riscv::XLEN == 32) begin
+                        if (address[2:0] == 3'h0)
+                            rdata[31:0] = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])][31:0];
+                        else begin
+                            if (address[2:0] == 3'h4)
+                                rdata[63:32] = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])][63:32];
+                        end
+
+                    end else begin
+                        rdata = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])];
+                    end
+                end
+
+                [MTIME_BASE:MTIME_BASE+4]: begin
+                    if (riscv::XLEN == 32) begin
+                        if (address[2:0] == 3'h0)
+                            rdata[31:0] = mtime_q[31:0];
+                        else begin
+                            if (address[2:0] == 3'h4)
+                                rdata[63:32] = mtime_q[63:32];
+                        end
+                    end else begin
+                        rdata = mtime_q;
+                    end
+                end
+                default:;
+            endcase
+        end
+    end
+
+    // -----------------------------
+    // IRQ Generation
+    // -----------------------------
+    // The mtime register has a 64-bit precision on all RV32, RV64, and RV128 systems. Platforms provide a 64-bit
+    // memory-mapped machine-mode timer compare register (mtimecmp), which causes a timer interrupt to be posted when the
+    // mtime register contains a value greater than or equal (mtime >= mtimecmp) to the value in the mtimecmp register.
+    // The interrupt remains posted until it is cleared by writing the mtimecmp register. The interrupt will only be taken
+    // if interrupts are enabled and the MTIE bit is set in the mie register.
+    always_comb begin : irq_gen
+        // check that the mtime cmp register is set to a meaningful value
+        for (int unsigned i = 0; i < NR_CORES; i++) begin
+            if (mtime_q >= mtimecmp_q[i]) begin
+                timer_irq_o[i] = 1'b1;
+            end else begin
+                timer_irq_o[i] = 1'b0;
+            end
+        end
+    end
+
+    // -----------------------------
+    // RTC time tracking facilities
+    // -----------------------------
+    // 1. Put the RTC input through a classic two stage edge-triggered synchronizer to filter out any
+    //    metastability effects (or at least make them unlikely :-))
+    clint_sync_wedge i_sync_edge (
+        .clk_i,
+        .rst_ni,
+        .serial_i  ( rtc_i          ),
+        .r_edge_o  ( increase_timer ),
+        .f_edge_o  (                ), // left open
+        .serial_o  (                )  // left open
+    );
+
+    // Registers
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+            mtime_q    <= 64'b0;
+            mtimecmp_q <= 'b0;
+            msip_q     <= '0;
+        end else begin
+            mtime_q    <= mtime_n;
+            mtimecmp_q <= mtimecmp_n;
+            msip_q     <= msip_n;
+        end
+    end
+
+    assign ipi_o = msip_q;
+
+    // -------------
+    // Assertions
+    // --------------
+    //pragma translate_off
+    `ifndef VERILATOR
+    // Static assertion check for appropriate bus width
+        initial begin
+            assert (AXI_DATA_WIDTH == 64) else $fatal(1, "Timer needs to interface with a 64 bit bus, everything else is not supported");
+        end
+    `endif
+    //pragma translate_on
+
+endmodule
+
+// TODO(zarubaf): Replace by common-cells 2.0
+module clint_sync_wedge #(
+    parameter int unsigned STAGES = 2
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic serial_i,
+    output logic r_edge_o,
+    output logic f_edge_o,
+    output logic serial_o
+);
+    logic serial, serial_q;
+
+    assign serial_o =  serial_q;
+    assign f_edge_o = (~serial) & serial_q;
+    assign r_edge_o =  serial & (~serial_q);
+
+    clint_sync #(
+        .STAGES (STAGES)
+    ) i_sync (
+        .clk_i,
+        .rst_ni,
+        .serial_i,
+        .serial_o ( serial )
+    );
+
+    always_ff @(posedge clk_i, negedge rst_ni) begin
+        if (!rst_ni) begin
+            serial_q <= 1'b0;
+        end else begin
+            serial_q <= serial;
+        end
+    end
+endmodule
+
+module clint_sync #(
+    parameter int unsigned STAGES = 2
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic serial_i,
+    output logic serial_o
+);
+
+   logic [STAGES-1:0] reg_q;
+
+    always_ff @(posedge clk_i, negedge rst_ni) begin
+        if (!rst_ni) begin
+            reg_q <= 'h0;
+        end else begin
+            reg_q <= {reg_q[STAGES-2:0], serial_i};
+        end
+    end
+
+    assign serial_o = reg_q[STAGES-1];
+
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv b/test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv
new file mode 100644
index 0000000..90134ca
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv
@@ -0,0 +1,88 @@
+// Copyright 2015 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+`define REGS_MAX_ADR             2'd2
+
+module apb_timer
+#(
+    parameter APB_ADDR_WIDTH = 12,  //APB slaves are 4KB by default
+    parameter TIMER_CNT = 2 // how many timers should be instantiated
+)
+(
+    input  logic                      HCLK,
+    input  logic                      HRESETn,
+    input  logic [APB_ADDR_WIDTH-1:0] PADDR,
+    input  logic               [31:0] PWDATA,
+    input  logic                      PWRITE,
+    input  logic                      PSEL,
+    input  logic                      PENABLE,
+    output logic               [31:0] PRDATA,
+    output logic                      PREADY,
+    output logic                      PSLVERR,
+
+    output logic [(TIMER_CNT * 2) - 1:0] irq_o // overflow and cmp interrupt
+);
+
+    logic [TIMER_CNT-1:0] psel_int, pready, pslverr;
+    logic [$clog2(TIMER_CNT) - 1:0] slave_address_int;
+    logic [TIMER_CNT-1:0] [31:0] prdata;
+
+    assign slave_address_int = PADDR[$clog2(TIMER_CNT)+ `REGS_MAX_ADR + 1:`REGS_MAX_ADR + 2];
+
+    always_comb
+    begin
+        psel_int = '0;
+        psel_int[slave_address_int] = PSEL;
+    end
+
+    // output mux
+    always_comb
+    begin
+
+        if (psel_int != '0)
+        begin
+            PRDATA = prdata[slave_address_int];
+            PREADY = pready[slave_address_int];
+            PSLVERR = pslverr[slave_address_int];
+        end
+        else
+        begin
+            PRDATA = '0;
+            PREADY = 1'b1;
+            PSLVERR = 1'b0;
+        end
+    end
+
+
+    genvar k;
+
+    generate
+    for(k = 0; k < TIMER_CNT; k++)
+    begin : TIMER_GEN
+      timer #(
+          .APB_ADDR_WIDTH ( APB_ADDR_WIDTH )
+      ) timer_i (
+          .HCLK       ( HCLK          ),
+          .HRESETn    ( HRESETn       ),
+
+          .PADDR      ( PADDR        ),
+          .PWDATA     ( PWDATA       ),
+          .PWRITE     ( PWRITE       ),
+          .PSEL       ( psel_int[k]  ),
+          .PENABLE    ( PENABLE      ),
+          .PRDATA     ( prdata[k]    ),
+          .PREADY     ( pready[k]    ),
+          .PSLVERR    ( pslverr[k]   ),
+
+          .irq_o      ( irq_o[2*k+1 : 2*k] )
+      );
+    end
+endgenerate
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/apb_timer/timer.sv b/test/type_param/corev_apu/fpga/src/apb_timer/timer.sv
new file mode 100644
index 0000000..1a3a4f1
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/apb_timer/timer.sv
@@ -0,0 +1,145 @@
+// Copyright 2015 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// define three registers per timer - timer, cmp and prescaler registers
+`define REGS_MAX_IDX             'd2
+`define REG_TIMER                 2'b00
+`define REG_TIMER_CTRL            2'b01
+`define REG_CMP                   2'b10
+`define PRESCALER_STARTBIT        'd3
+`define PRESCALER_STOPBIT         'd5
+`define ENABLE_BIT                'd0
+
+module timer
+#(
+    parameter APB_ADDR_WIDTH = 12  //APB slaves are 4KB by default
+)
+(
+    input  logic                      HCLK,
+    input  logic                      HRESETn,
+    input  logic [APB_ADDR_WIDTH-1:0] PADDR,
+    input  logic               [31:0] PWDATA,
+    input  logic                      PWRITE,
+    input  logic                      PSEL,
+    input  logic                      PENABLE,
+    output logic               [31:0] PRDATA,
+    output logic                      PREADY,
+    output logic                      PSLVERR,
+
+    output logic                [1:0] irq_o // overflow and cmp interrupt
+);
+
+    // APB register interface
+    logic [`REGS_MAX_IDX-1:0]       register_adr;
+    assign register_adr = PADDR[`REGS_MAX_IDX + 2:2];
+    // APB logic: we are always ready to capture the data into our regs
+    // not supporting transfare failure
+    assign PREADY  = 1'b1;
+    assign PSLVERR = 1'b0;
+    // registers
+    logic [0:`REGS_MAX_IDX] [31:0]  regs_q, regs_n;
+    logic [31:0] cycle_counter_n, cycle_counter_q;
+
+    logic [2:0] prescaler_int;
+
+    //irq logic
+    always_comb
+    begin
+        irq_o = 2'b0;
+
+        // overlow irq
+        if (regs_q[`REG_TIMER] == 32'hffff_ffff)
+            irq_o[0] = 1'b1;
+
+        // compare match irq if compare reg ist set
+        if (regs_q[`REG_CMP] != 'b0 && regs_q[`REG_TIMER] == regs_q[`REG_CMP])
+            irq_o[1] = 1'b1;
+
+    end
+
+    assign prescaler_int = regs_q[`REG_TIMER_CTRL][`PRESCALER_STOPBIT:`PRESCALER_STARTBIT];
+    // register write logic
+    always_comb
+    begin
+        regs_n = regs_q;
+        cycle_counter_n = cycle_counter_q + 1;
+
+        // reset timer after cmp or overflow
+        if (irq_o[0] == 1'b1 || irq_o[1] == 1'b1)
+            regs_n[`REG_TIMER] = 1'b0;
+        else if(regs_q[`REG_TIMER_CTRL][`ENABLE_BIT] && prescaler_int != 'b0 && prescaler_int == cycle_counter_q) // prescaler
+        begin
+            regs_n[`REG_TIMER] = regs_q[`REG_TIMER] + 1; //prescaler mode
+        end
+        else if (regs_q[`REG_TIMER_CTRL][`ENABLE_BIT] && regs_q[`REG_TIMER_CTRL][`PRESCALER_STOPBIT:`PRESCALER_STARTBIT] == 'b0) // normal count mode
+            regs_n[`REG_TIMER] = regs_q[`REG_TIMER] + 1;
+
+        // reset prescaler cycle counter
+        if (cycle_counter_q >= regs_q[`REG_TIMER_CTRL])
+            cycle_counter_n = 32'b0;
+
+        // written from APB bus - gets priority
+        if (PSEL && PENABLE && PWRITE)
+        begin
+
+            case (register_adr)
+                `REG_TIMER:
+                    regs_n[`REG_TIMER] = PWDATA;
+
+                `REG_TIMER_CTRL:
+                    regs_n[`REG_TIMER_CTRL] = PWDATA;
+
+                `REG_CMP:
+                begin
+                    regs_n[`REG_CMP] = PWDATA;
+                    regs_n[`REG_TIMER] = 32'b0; // reset timer if compare register is written
+                end
+            endcase
+        end
+    end
+
+    // APB register read logic
+    always_comb
+    begin
+        PRDATA = 'b0;
+
+        if (PSEL && PENABLE && !PWRITE)
+        begin
+
+            case (register_adr)
+                `REG_TIMER:
+                    PRDATA = regs_q[`REG_TIMER];
+
+                `REG_TIMER_CTRL:
+                    PRDATA = regs_q[`REG_TIMER_CTRL];
+
+                `REG_CMP:
+                    PRDATA = regs_q[`REG_CMP];
+            endcase
+
+        end
+    end
+    // synchronouse part
+    always_ff @(posedge HCLK, negedge HRESETn)
+    begin
+        if(~HRESETn)
+        begin
+            regs_q          <= '{default: 32'b0};
+            cycle_counter_q <= 32'b0;
+        end
+        else
+        begin
+            regs_q          <= regs_n;
+            cycle_counter_q <= cycle_counter_n;
+        end
+    end
+
+
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv
new file mode 100644
index 0000000..ceaa312
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv
@@ -0,0 +1,449 @@
+// Copyright 2014-2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Igor Loi <igor.loi@unibo.it>
+// Davide Rossi <davide.rossi@unibo.it>
+// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+`define OKAY   2'b00
+`define EXOKAY 2'b01
+`define SLVERR 2'b10
+`define DECERR 2'b11
+
+module axi2apb
+#(
+    parameter AXI4_ADDRESS_WIDTH = 32,
+    parameter AXI4_RDATA_WIDTH   = 32,
+    parameter AXI4_WDATA_WIDTH   = 32,
+    parameter AXI4_ID_WIDTH      = 16,
+    parameter AXI4_USER_WIDTH    = 10,
+    parameter AXI_NUMBYTES       = AXI4_WDATA_WIDTH/8,
+
+    parameter BUFF_DEPTH_SLAVE   = 4,
+    parameter APB_ADDR_WIDTH     = 32
+)
+(
+    input logic                           ACLK,
+    input logic                           ARESETn,
+    input logic                           test_en_i,
+
+    input  logic [AXI4_ID_WIDTH-1:0]      AWID_i,
+    input  logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR_i,
+    input  logic [ 7:0]                   AWLEN_i,
+    input  logic [ 2:0]                   AWSIZE_i,
+    input  logic [ 1:0]                   AWBURST_i,
+    input  logic                          AWLOCK_i,
+    input  logic [ 3:0]                   AWCACHE_i,
+    input  logic [ 2:0]                   AWPROT_i,
+    input  logic [ 3:0]                   AWREGION_i,
+    input  logic [ AXI4_USER_WIDTH-1:0]   AWUSER_i,
+    input  logic [ 3:0]                   AWQOS_i,
+    input  logic                          AWVALID_i,
+    output logic                          AWREADY_o,
+
+    input  logic [AXI4_WDATA_WIDTH-1:0]   WDATA_i,
+    input  logic [AXI_NUMBYTES-1:0]       WSTRB_i,
+    input  logic                          WLAST_i,
+    input  logic [AXI4_USER_WIDTH-1:0]    WUSER_i,
+    input  logic                          WVALID_i,
+    output logic                          WREADY_o,
+
+    output logic   [AXI4_ID_WIDTH-1:0]    BID_o,
+    output logic   [ 1:0]                 BRESP_o,
+    output logic                          BVALID_o,
+    output logic   [AXI4_USER_WIDTH-1:0]  BUSER_o,
+    input  logic                          BREADY_i,
+
+    input  logic [AXI4_ID_WIDTH-1:0]      ARID_i,
+    input  logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR_i,
+    input  logic [ 7:0]                   ARLEN_i,
+    input  logic [ 2:0]                   ARSIZE_i,
+    input  logic [ 1:0]                   ARBURST_i,
+    input  logic                          ARLOCK_i,
+    input  logic [ 3:0]                   ARCACHE_i,
+    input  logic [ 2:0]                   ARPROT_i,
+    input  logic [ 3:0]                   ARREGION_i,
+    input  logic [ AXI4_USER_WIDTH-1:0]   ARUSER_i,
+    input  logic [ 3:0]                   ARQOS_i,
+    input  logic                          ARVALID_i,
+    output logic                          ARREADY_o,
+
+    output  logic [AXI4_ID_WIDTH-1:0]     RID_o,
+    output  logic [AXI4_RDATA_WIDTH-1:0]  RDATA_o,
+    output  logic [ 1:0]                  RRESP_o,
+    output  logic                         RLAST_o,
+    output  logic [AXI4_USER_WIDTH-1:0]   RUSER_o,
+    output  logic                         RVALID_o,
+    input   logic                         RREADY_i,
+
+    output logic                          PENABLE,
+    output logic                          PWRITE,
+    output logic [APB_ADDR_WIDTH-1:0]     PADDR,
+    output logic                          PSEL,
+    output logic [AXI4_WDATA_WIDTH-1:0]   PWDATA,
+    input  logic [AXI4_RDATA_WIDTH-1:0]   PRDATA,
+    input  logic                          PREADY,
+    input  logic                          PSLVERR
+);
+
+    // --------------------
+    // AXI write address bus
+    // --------------------
+    logic [AXI4_ID_WIDTH-1:0]       AWID;
+    logic [AXI4_ADDRESS_WIDTH-1:0]  AWADDR;
+    logic [ 7:0]                    AWLEN;
+    logic [ 2:0]                    AWSIZE;
+    logic [ 1:0]                    AWBURST;
+    logic                           AWLOCK;
+    logic [ 3:0]                    AWCACHE;
+    logic [ 2:0]                    AWPROT;
+    logic [ 3:0]                    AWREGION;
+    logic [ AXI4_USER_WIDTH-1:0]    AWUSER;
+    logic [ 3:0]                    AWQOS;
+    logic                           AWVALID;
+    logic                           AWREADY;
+    // --------------------
+    // AXI write data bus
+    // --------------------
+    logic [AXI4_WDATA_WIDTH-1:0]    WDATA;  // from FIFO
+    logic [AXI_NUMBYTES-1:0]        WSTRB;  // from FIFO
+    logic                           WLAST;  // from FIFO
+    logic [AXI4_USER_WIDTH-1:0]     WUSER;  // from FIFO
+    logic                           WVALID; // from FIFO
+    logic                           WREADY; // TO FIFO
+    // --------------------
+    // AXI write response bus
+    // --------------------
+    logic   [AXI4_ID_WIDTH-1:0]     BID;
+    logic   [ 1:0]                  BRESP;
+    logic                           BVALID;
+    logic   [AXI4_USER_WIDTH-1:0]   BUSER;
+    logic                           BREADY;
+    // --------------------
+    // AXI read address bus
+    // --------------------
+    logic [AXI4_ID_WIDTH-1:0]       ARID;
+    logic [AXI4_ADDRESS_WIDTH-1:0]  ARADDR;
+    logic [ 7:0]                    ARLEN;
+    logic [ 2:0]                    ARSIZE;
+    logic [ 1:0]                    ARBURST;
+    logic                           ARLOCK;
+    logic [ 3:0]                    ARCACHE;
+    logic [ 2:0]                    ARPROT;
+    logic [ 3:0]                    ARREGION;
+    logic [ AXI4_USER_WIDTH-1:0]    ARUSER;
+    logic [ 3:0]                    ARQOS;
+    logic                           ARVALID;
+    logic                           ARREADY;
+    // --------------------
+    // AXI read data bus
+    // --------------------
+    logic [AXI4_ID_WIDTH-1:0]       RID;
+    logic [AXI4_RDATA_WIDTH-1:0]    RDATA;
+    logic [ 1:0]                    RRESP;
+    logic                           RLAST;
+    logic [AXI4_USER_WIDTH-1:0]     RUSER;
+    logic                           RVALID;
+    logic                           RREADY;
+
+  enum logic [2:0] { IDLE,
+                     DONE_SINGLE_RD,
+                     WAIT_W_PREADY,
+                     WAIT_R_PREADY,
+                     SEND_B_RESP
+                    } CS, NS;
+
+  logic [AXI4_ADDRESS_WIDTH-1:0] address;
+  logic sample_RDATA;
+
+  logic [AXI4_RDATA_WIDTH-1:0] RDATA_Q;
+
+  logic read_req;
+  logic write_req;
+
+  assign PENABLE = write_req | read_req;
+  assign PWRITE  = write_req;
+  assign PADDR   = address[APB_ADDR_WIDTH-1:0];
+  assign PWDATA  = WDATA;
+  assign PSEL    = 1'b1;
+
+   // AXI WRITE ADDRESS CHANNEL BUFFER
+   axi_aw_buffer #(
+       .ID_WIDTH     ( AXI4_ID_WIDTH      ),
+       .ADDR_WIDTH   ( AXI4_ADDRESS_WIDTH ),
+       .USER_WIDTH   ( AXI4_USER_WIDTH    ),
+       .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE   )
+   ) slave_aw_buffer_i (
+      .clk_i           ( ACLK        ),
+      .rst_ni          ( ARESETn     ),
+      .test_en_i       ( test_en_i   ),
+
+      .slave_valid_i   ( AWVALID_i   ),
+      .slave_addr_i    ( AWADDR_i    ),
+      .slave_prot_i    ( AWPROT_i    ),
+      .slave_region_i  ( AWREGION_i  ),
+      .slave_len_i     ( AWLEN_i     ),
+      .slave_size_i    ( AWSIZE_i    ),
+      .slave_burst_i   ( AWBURST_i   ),
+      .slave_lock_i    ( AWLOCK_i    ),
+      .slave_cache_i   ( AWCACHE_i   ),
+      .slave_qos_i     ( AWQOS_i     ),
+      .slave_id_i      ( AWID_i      ),
+      .slave_user_i    ( AWUSER_i    ),
+      .slave_ready_o   ( AWREADY_o   ),
+
+      .master_valid_o  ( AWVALID     ),
+      .master_addr_o   ( AWADDR      ),
+      .master_prot_o   ( AWPROT      ),
+      .master_region_o ( AWREGION    ),
+      .master_len_o    ( AWLEN       ),
+      .master_size_o   ( AWSIZE      ),
+      .master_burst_o  ( AWBURST     ),
+      .master_lock_o   ( AWLOCK      ),
+      .master_cache_o  ( AWCACHE     ),
+      .master_qos_o    ( AWQOS       ),
+      .master_id_o     ( AWID        ),
+      .master_user_o   ( AWUSER      ),
+      .master_ready_i  ( AWREADY     )
+   );
+
+   // AXI WRITE ADDRESS CHANNEL BUFFER
+   axi_ar_buffer #(
+       .ID_WIDTH     ( AXI4_ID_WIDTH      ),
+       .ADDR_WIDTH   ( AXI4_ADDRESS_WIDTH ),
+       .USER_WIDTH   ( AXI4_USER_WIDTH    ),
+       .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE   )
+   ) slave_ar_buffer_i (
+      .clk_i           ( ACLK       ),
+      .rst_ni          ( ARESETn    ),
+      .test_en_i       ( test_en_i  ),
+
+      .slave_valid_i   ( ARVALID_i  ),
+      .slave_addr_i    ( ARADDR_i   ),
+      .slave_prot_i    ( ARPROT_i   ),
+      .slave_region_i  ( ARREGION_i ),
+      .slave_len_i     ( ARLEN_i    ),
+      .slave_size_i    ( ARSIZE_i   ),
+      .slave_burst_i   ( ARBURST_i  ),
+      .slave_lock_i    ( ARLOCK_i   ),
+      .slave_cache_i   ( ARCACHE_i  ),
+      .slave_qos_i     ( ARQOS_i    ),
+      .slave_id_i      ( ARID_i     ),
+      .slave_user_i    ( ARUSER_i   ),
+      .slave_ready_o   ( ARREADY_o  ),
+
+      .master_valid_o  ( ARVALID    ),
+      .master_addr_o   ( ARADDR     ),
+      .master_prot_o   ( ARPROT     ),
+      .master_region_o ( ARREGION   ),
+      .master_len_o    ( ARLEN      ),
+      .master_size_o   ( ARSIZE     ),
+      .master_burst_o  ( ARBURST    ),
+      .master_lock_o   ( ARLOCK     ),
+      .master_cache_o  ( ARCACHE    ),
+      .master_qos_o    ( ARQOS      ),
+      .master_id_o     ( ARID       ),
+      .master_user_o   ( ARUSER     ),
+      .master_ready_i  ( ARREADY    )
+   );
+
+
+   axi_w_buffer #(
+       .DATA_WIDTH(AXI4_WDATA_WIDTH),
+       .USER_WIDTH(AXI4_USER_WIDTH),
+       .BUFFER_DEPTH(BUFF_DEPTH_SLAVE)
+   ) slave_w_buffer_i (
+        .clk_i          ( ACLK      ),
+        .rst_ni         ( ARESETn   ),
+        .test_en_i      ( test_en_i ),
+
+        .slave_valid_i  ( WVALID_i  ),
+        .slave_data_i   ( WDATA_i   ),
+        .slave_strb_i   ( WSTRB_i   ),
+        .slave_user_i   ( WUSER_i   ),
+        .slave_last_i   ( WLAST_i   ),
+        .slave_ready_o  ( WREADY_o  ),
+
+        .master_valid_o ( WVALID    ),
+        .master_data_o  ( WDATA     ),
+        .master_strb_o  ( WSTRB     ),
+        .master_user_o  ( WUSER     ),
+        .master_last_o  ( WLAST     ),
+        .master_ready_i ( WREADY    )
+    );
+
+   axi_r_buffer #(
+        .ID_WIDTH     ( AXI4_ID_WIDTH    ),
+        .DATA_WIDTH   ( AXI4_RDATA_WIDTH ),
+        .USER_WIDTH   ( AXI4_USER_WIDTH  ),
+        .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE )
+   ) slave_r_buffer_i (
+        .clk_i          ( ACLK       ),
+        .rst_ni         ( ARESETn    ),
+        .test_en_i      ( test_en_i  ),
+
+        .slave_valid_i  ( RVALID     ),
+        .slave_data_i   ( RDATA      ),
+        .slave_resp_i   ( RRESP      ),
+        .slave_user_i   ( RUSER      ),
+        .slave_id_i     ( RID        ),
+        .slave_last_i   ( RLAST      ),
+        .slave_ready_o  ( RREADY     ),
+
+        .master_valid_o ( RVALID_o   ),
+        .master_data_o  ( RDATA_o    ),
+        .master_resp_o  ( RRESP_o    ),
+        .master_user_o  ( RUSER_o    ),
+        .master_id_o    ( RID_o      ),
+        .master_last_o  ( RLAST_o    ),
+        .master_ready_i ( RREADY_i   )
+   );
+
+   axi_b_buffer #(
+        .ID_WIDTH(AXI4_ID_WIDTH),
+        .USER_WIDTH(AXI4_USER_WIDTH),
+        .BUFFER_DEPTH(BUFF_DEPTH_SLAVE)
+   ) slave_b_buffer (
+        .clk_i          ( ACLK      ),
+        .rst_ni         ( ARESETn   ),
+        .test_en_i      ( test_en_i ),
+
+        .slave_valid_i  ( BVALID    ),
+        .slave_resp_i   ( BRESP     ),
+        .slave_id_i     ( BID       ),
+        .slave_user_i   ( BUSER     ),
+        .slave_ready_o  ( BREADY    ),
+
+        .master_valid_o ( BVALID_o  ),
+        .master_resp_o  ( BRESP_o   ),
+        .master_id_o    ( BID_o     ),
+        .master_user_o  ( BUSER_o   ),
+        .master_ready_i ( BREADY_i  )
+    );
+
+    always_comb begin
+
+      read_req     = 1'b0;
+      write_req    = 1'b0;
+      address      = '0;
+
+      sample_RDATA = 1'b0;
+
+      ARREADY      = 1'b0;
+      AWREADY      = 1'b0;
+      WREADY       = 1'b0;
+
+      BVALID       = 1'b0;
+      BRESP        = `OKAY;
+      BID          = AWID;
+      BUSER        = AWUSER;
+
+      RVALID       = 1'b0;
+      RLAST        = 1'b0;
+      RID          = ARID;
+      RUSER        = ARUSER;
+      RRESP        = `OKAY;
+      RDATA        = RDATA_Q;
+
+      case(CS)
+
+        WAIT_R_PREADY: begin
+            read_req     = 1'b1;
+            address      = ARADDR[APB_ADDR_WIDTH  - 1 : 0];
+            sample_RDATA = PREADY;
+
+            if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE
+               NS = DONE_SINGLE_RD;
+            end
+        end
+
+        WAIT_W_PREADY: begin
+            write_req   = 1'b1;
+            address     = AWADDR[APB_ADDR_WIDTH - 1:0];
+            // There is a Pending WRITE!!
+            if (PREADY == 1'b1) begin // APB is READY --> WDATA is LAtched
+                NS = SEND_B_RESP;
+            end
+        end
+
+        IDLE: begin
+            if (ARVALID == 1'b1) begin
+                read_req     = 1'b1;
+                address      = ARADDR[APB_ADDR_WIDTH - 1:0];;
+                sample_RDATA = PREADY;
+
+                if(PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE
+                    NS   = DONE_SINGLE_RD;
+                end else begin // APB not ready
+                    NS = WAIT_R_PREADY;
+                end
+            end else begin
+                if (AWVALID) begin
+                    address =  AWADDR[APB_ADDR_WIDTH - 1:0];
+                    if (WVALID) begin
+                        write_req = 1'b1;
+
+                        // There is a Pending WRITE!!
+                        if (PREADY == 1'b1) begin// APB is READY --> WDATA is LAtched
+                            NS = SEND_B_RESP;
+                        end else begin // APB not READY
+                           NS = WAIT_W_PREADY;
+                        end
+                    end else begin // GOT ADDRESS WRITE, not DATA
+                        write_req       = 1'b0;
+                        address         = '0;
+                        NS              = IDLE;
+                    end
+                end
+            end
+        end
+
+        SEND_B_RESP: begin
+
+            BVALID   = 1'b1;
+            address  = '0;
+
+            if (BREADY) begin
+                NS      = IDLE;
+                AWREADY = 1'b1;
+                WREADY  = 1'b1;
+            end
+        end
+
+        DONE_SINGLE_RD: begin
+
+            RVALID    = 1'b1;
+            RLAST     = 1;
+            address   = '0;
+
+            if (RREADY) begin // ready to send back the rdata
+                NS = IDLE;
+                ARREADY = 1'b1;
+            end
+        end
+
+        default: NS = IDLE;
+
+      endcase
+    end
+
+    always_ff @(posedge ACLK, negedge ARESETn) begin
+        if (ARESETn == 1'b0) begin
+            CS      <= IDLE;
+            RDATA_Q <= '0;
+        end else begin
+            CS      <= NS;
+
+            if (sample_RDATA)
+                RDATA_Q <= PRDATA;
+        end
+    end
+
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv
new file mode 100644
index 0000000..c98b179
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv
@@ -0,0 +1,745 @@
+// Copyright 2014-2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Igor Loi <igor.loi@unibo.it>
+// Davide Rossi <davide.rossi@unibo.it>
+// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+`define OKAY   2'b00
+`define EXOKAY 2'b01
+`define SLVERR 2'b10
+`define DECERR 2'b11
+
+module axi2apb_64_32 #(
+    parameter int unsigned AXI4_ADDRESS_WIDTH = 32,
+    parameter int unsigned AXI4_RDATA_WIDTH   = 64,
+    parameter int unsigned AXI4_WDATA_WIDTH   = 64,
+    parameter int unsigned AXI4_ID_WIDTH      = 16,
+    parameter int unsigned AXI4_USER_WIDTH    = 10,
+    parameter int unsigned AXI_NUMBYTES       = AXI4_WDATA_WIDTH/8,
+
+    parameter int unsigned BUFF_DEPTH_SLAVE   = 4,
+    parameter int unsigned APB_NUM_SLAVES     = 8,
+    parameter int unsigned APB_ADDR_WIDTH     = 12
+)
+(
+    input logic                           ACLK,
+    input logic                           ARESETn,
+    input logic                           test_en_i,
+    // ---------------------------------------------------------
+    // AXI TARG Port Declarations ------------------------------
+    // ---------------------------------------------------------
+    //AXI write address bus -------------- // USED// -----------
+    input  logic [AXI4_ID_WIDTH-1:0]       AWID_i     ,
+    input  logic [AXI4_ADDRESS_WIDTH-1:0]  AWADDR_i   ,
+    input  logic [ 7:0]                    AWLEN_i    ,
+    input  logic [ 2:0]                    AWSIZE_i   ,
+    input  logic [ 1:0]                    AWBURST_i  ,
+    input  logic                           AWLOCK_i   ,
+    input  logic [ 3:0]                    AWCACHE_i  ,
+    input  logic [ 2:0]                    AWPROT_i   ,
+    input  logic [ 3:0]                    AWREGION_i ,
+    input  logic [ AXI4_USER_WIDTH-1:0]    AWUSER_i   ,
+    input  logic [ 3:0]                    AWQOS_i    ,
+    input  logic                           AWVALID_i  ,
+    output logic                           AWREADY_o  ,
+    // ---------------------------------------------------------
+
+    //AXI write data bus -------------- // USED// --------------
+    input  logic [AXI_NUMBYTES-1:0][7:0]   WDATA_i    ,
+    input  logic [AXI_NUMBYTES-1:0]        WSTRB_i    ,
+    input  logic                           WLAST_i    ,
+    input  logic [AXI4_USER_WIDTH-1:0]     WUSER_i    ,
+    input  logic                           WVALID_i   ,
+    output logic                           WREADY_o   ,
+    // ---------------------------------------------------------
+
+    //AXI write response bus -------------- // USED// ----------
+    output logic   [AXI4_ID_WIDTH-1:0]     BID_o      ,
+    output logic   [ 1:0]                  BRESP_o    ,
+    output logic                           BVALID_o   ,
+    output logic   [AXI4_USER_WIDTH-1:0]   BUSER_o    ,
+    input  logic                           BREADY_i   ,
+    // ---------------------------------------------------------
+
+    //AXI read address bus -------------------------------------
+    input  logic [AXI4_ID_WIDTH-1:0]       ARID_i     ,
+    input  logic [AXI4_ADDRESS_WIDTH-1:0]  ARADDR_i   ,
+    input  logic [ 7:0]                    ARLEN_i    ,
+    input  logic [ 2:0]                    ARSIZE_i   ,
+    input  logic [ 1:0]                    ARBURST_i  ,
+    input  logic                           ARLOCK_i   ,
+    input  logic [ 3:0]                    ARCACHE_i  ,
+    input  logic [ 2:0]                    ARPROT_i   ,
+    input  logic [ 3:0]                    ARREGION_i ,
+    input  logic [ AXI4_USER_WIDTH-1:0]    ARUSER_i   ,
+    input  logic [ 3:0]                    ARQOS_i    ,
+    input  logic                           ARVALID_i  ,
+    output logic                           ARREADY_o  ,
+    // ---------------------------------------------------------
+
+    //AXI read data bus ----------------------------------------
+    output  logic [AXI4_ID_WIDTH-1:0]      RID_o      ,
+    output  logic [AXI4_RDATA_WIDTH-1:0]   RDATA_o    ,
+    output  logic [ 1:0]                   RRESP_o    ,
+    output  logic                          RLAST_o    ,
+    output  logic [AXI4_USER_WIDTH-1:0]    RUSER_o    ,
+    output  logic                          RVALID_o   ,
+    input   logic                          RREADY_i   ,
+    // ---------------------------------------------------------
+
+    output logic                           PENABLE    ,
+    output logic                           PWRITE     ,
+    output logic [APB_ADDR_WIDTH-1:0]      PADDR      ,
+    output logic                           PSEL       ,
+    output logic [31:0]                    PWDATA     ,
+    input  logic [31:0]                    PRDATA     ,
+    input  logic                           PREADY     ,
+    input  logic                           PSLVERR
+);
+
+    // --------------------
+    // AXI write address bus
+    // --------------------
+    logic [AXI4_ID_WIDTH-1:0]      AWID;
+    logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR;
+    logic [ 7:0]                   AWLEN;
+    logic [ 2:0]                   AWSIZE;
+    logic [ 1:0]                   AWBURST;
+    logic                          AWLOCK;
+    logic [ 3:0]                   AWCACHE;
+    logic [ 2:0]                   AWPROT;
+    logic [ 3:0]                   AWREGION;
+    logic [ AXI4_USER_WIDTH-1:0]   AWUSER;
+    logic [ 3:0]                   AWQOS;
+    logic                          AWVALID;
+    logic                          AWREADY;
+    // --------------------
+    // AXI write data bus
+    // --------------------
+    logic [1:0][31:0]              WDATA;  // from FIFO
+    logic [AXI_NUMBYTES-1:0]       WSTRB;  // from FIFO
+    logic                          WLAST;  // from FIFO
+    logic [AXI4_USER_WIDTH-1:0]    WUSER;  // from FIFO
+    logic                          WVALID; // from FIFO
+    logic                          WREADY; // TO FIFO
+    // --------------------
+    // AXI write response bus
+    // --------------------
+    logic [AXI4_ID_WIDTH-1:0]      BID;
+    logic [ 1:0]                   BRESP;
+    logic                          BVALID;
+    logic [AXI4_USER_WIDTH-1:0]    BUSER;
+    logic                          BREADY;
+    // --------------------
+    // AXI read address bus
+    // --------------------
+    logic [AXI4_ID_WIDTH-1:0]      ARID;
+    logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR;
+    logic [ 7:0]                   ARLEN;
+    logic [ 2:0]                   ARSIZE;
+    logic [ 1:0]                   ARBURST;
+    logic                          ARLOCK;
+    logic [ 3:0]                   ARCACHE;
+    logic [ 2:0]                   ARPROT;
+    logic [ 3:0]                   ARREGION;
+    logic [ AXI4_USER_WIDTH-1:0]   ARUSER;
+    logic [ 3:0]                   ARQOS;
+    logic                          ARVALID;
+    logic                          ARREADY;
+    // --------------------
+    // AXI read data bus
+    // --------------------
+    logic [AXI4_ID_WIDTH-1:0]    RID;
+    logic [1:0][31:0]            RDATA;
+    logic [ 1:0]                 RRESP;
+    logic                        RLAST;
+    logic [AXI4_USER_WIDTH-1:0]  RUSER;
+    logic                        RVALID;
+    logic                        RREADY;
+
+    enum logic [3:0] { IDLE,
+                       SINGLE_RD, SINGLE_RD_64,
+                       BURST_RD_1, BURST_RD, BURST_RD_64,
+                       BURST_WR, BURST_WR_64,
+                       SINGLE_WR,SINGLE_WR_64,
+                       WAIT_R_PREADY, WAIT_W_PREADY
+                      } CS, NS;
+
+    logic        W_word_sel;
+
+    logic [APB_ADDR_WIDTH-1:0] address;
+
+    logic        read_req;
+    logic        write_req;
+
+    logic        sample_AR;
+    logic [8:0]  ARLEN_Q;
+    logic        decr_ARLEN;
+
+    logic        sample_AW;
+    logic [8:0]  AWLEN_Q;
+    logic        decr_AWLEN;
+
+    logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR_Q;
+    logic                          incr_ARADDR;
+
+    logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR_Q;
+    logic                          incr_AWADDR;
+
+    logic        sample_RDATA_0; // sample the first 32 bit chunk to be aggregated in 64 bit rdata
+    logic        sample_RDATA_1; // sample the second 32 bit chunk to be aggregated in 64 bit rdata
+    logic [31:0] RDATA_Q_0;
+    logic [31:0] RDATA_Q_1;
+
+    assign PENABLE = write_req | read_req;
+    assign PWRITE  = write_req;
+    assign PADDR   = address[APB_ADDR_WIDTH-1:0];
+
+    assign PWDATA  = WDATA[W_word_sel];
+    assign PSEL    = 1'b1;
+
+    // AXI WRITE ADDRESS CHANNEL BUFFER
+    axi_aw_buffer #(
+        .ID_WIDTH     ( AXI4_ID_WIDTH      ),
+        .ADDR_WIDTH   ( AXI4_ADDRESS_WIDTH ),
+        .USER_WIDTH   ( AXI4_USER_WIDTH    ),
+        .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE   )
+    ) slave_aw_buffer_i (
+       .clk_i           ( ACLK        ),
+       .rst_ni          ( ARESETn     ),
+       .test_en_i       ( test_en_i   ),
+       .slave_valid_i   ( AWVALID_i   ),
+       .slave_addr_i    ( AWADDR_i    ),
+       .slave_prot_i    ( AWPROT_i    ),
+       .slave_region_i  ( AWREGION_i  ),
+       .slave_len_i     ( AWLEN_i     ),
+       .slave_size_i    ( AWSIZE_i    ),
+       .slave_burst_i   ( AWBURST_i   ),
+       .slave_lock_i    ( AWLOCK_i    ),
+       .slave_cache_i   ( AWCACHE_i   ),
+       .slave_qos_i     ( AWQOS_i     ),
+       .slave_id_i      ( AWID_i      ),
+       .slave_user_i    ( AWUSER_i    ),
+       .slave_ready_o   ( AWREADY_o   ),
+       .master_valid_o  ( AWVALID     ),
+       .master_addr_o   ( AWADDR      ),
+       .master_prot_o   ( AWPROT      ),
+       .master_region_o ( AWREGION    ),
+       .master_len_o    ( AWLEN       ),
+       .master_size_o   ( AWSIZE      ),
+       .master_burst_o  ( AWBURST     ),
+       .master_lock_o   ( AWLOCK      ),
+       .master_cache_o  ( AWCACHE     ),
+       .master_qos_o    ( AWQOS       ),
+       .master_id_o     ( AWID        ),
+       .master_user_o   ( AWUSER      ),
+       .master_ready_i  ( AWREADY     )
+    );
+    // AXI WRITE ADDRESS CHANNEL BUFFER
+    axi_ar_buffer #(
+        .ID_WIDTH       ( AXI4_ID_WIDTH      ),
+        .ADDR_WIDTH     ( AXI4_ADDRESS_WIDTH ),
+        .USER_WIDTH     ( AXI4_USER_WIDTH    ),
+        .BUFFER_DEPTH   ( BUFF_DEPTH_SLAVE   )
+    ) slave_ar_buffer_i (
+       .clk_i           ( ACLK       ),
+       .rst_ni          ( ARESETn    ),
+       .test_en_i       ( test_en_i  ),
+       .slave_valid_i   ( ARVALID_i  ),
+       .slave_addr_i    ( ARADDR_i   ),
+       .slave_prot_i    ( ARPROT_i   ),
+       .slave_region_i  ( ARREGION_i ),
+       .slave_len_i     ( ARLEN_i    ),
+       .slave_size_i    ( ARSIZE_i   ),
+       .slave_burst_i   ( ARBURST_i  ),
+       .slave_lock_i    ( ARLOCK_i   ),
+       .slave_cache_i   ( ARCACHE_i  ),
+       .slave_qos_i     ( ARQOS_i    ),
+       .slave_id_i      ( ARID_i     ),
+       .slave_user_i    ( ARUSER_i   ),
+       .slave_ready_o   ( ARREADY_o  ),
+       .master_valid_o  ( ARVALID    ),
+       .master_addr_o   ( ARADDR     ),
+       .master_prot_o   ( ARPROT     ),
+       .master_region_o ( ARREGION   ),
+       .master_len_o    ( ARLEN      ),
+       .master_size_o   ( ARSIZE     ),
+       .master_burst_o  ( ARBURST    ),
+       .master_lock_o   ( ARLOCK     ),
+       .master_cache_o  ( ARCACHE    ),
+       .master_qos_o    ( ARQOS      ),
+       .master_id_o     ( ARID       ),
+       .master_user_o   ( ARUSER     ),
+       .master_ready_i  ( ARREADY    )
+    );
+    axi_w_buffer #(
+        .DATA_WIDTH   ( AXI4_WDATA_WIDTH ),
+        .USER_WIDTH   ( AXI4_USER_WIDTH  ),
+        .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE )
+    ) slave_w_buffer_i (
+         .clk_i          ( ACLK      ),
+         .rst_ni         ( ARESETn   ),
+         .test_en_i      ( test_en_i ),
+         .slave_valid_i  ( WVALID_i  ),
+         .slave_data_i   ( WDATA_i   ),
+         .slave_strb_i   ( WSTRB_i   ),
+         .slave_user_i   ( WUSER_i   ),
+         .slave_last_i   ( WLAST_i   ),
+         .slave_ready_o  ( WREADY_o  ),
+         .master_valid_o ( WVALID    ),
+         .master_data_o  ( WDATA     ),
+         .master_strb_o  ( WSTRB     ),
+         .master_user_o  ( WUSER     ),
+         .master_last_o  ( WLAST     ),
+         .master_ready_i ( WREADY    )
+    );
+    axi_r_buffer #(
+         .ID_WIDTH     ( AXI4_ID_WIDTH    ),
+         .DATA_WIDTH   ( AXI4_RDATA_WIDTH ),
+         .USER_WIDTH   ( AXI4_USER_WIDTH  ),
+         .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE )
+    ) slave_r_buffer_i (
+         .clk_i          ( ACLK       ),
+         .rst_ni         ( ARESETn    ),
+         .test_en_i      ( test_en_i  ),
+         .slave_valid_i  ( RVALID     ),
+         .slave_data_i   ( RDATA      ),
+         .slave_resp_i   ( RRESP      ),
+         .slave_user_i   ( RUSER      ),
+         .slave_id_i     ( RID        ),
+         .slave_last_i   ( RLAST      ),
+         .slave_ready_o  ( RREADY     ),
+         .master_valid_o ( RVALID_o   ),
+         .master_data_o  ( RDATA_o    ),
+         .master_resp_o  ( RRESP_o    ),
+         .master_user_o  ( RUSER_o    ),
+         .master_id_o    ( RID_o      ),
+         .master_last_o  ( RLAST_o    ),
+         .master_ready_i ( RREADY_i   )
+    );
+
+    axi_b_buffer #(
+        .ID_WIDTH       ( AXI4_ID_WIDTH    ),
+        .USER_WIDTH     ( AXI4_USER_WIDTH  ),
+        .BUFFER_DEPTH   ( BUFF_DEPTH_SLAVE )
+    ) slave_b_buffer_i (
+        .clk_i          ( ACLK      ),
+        .rst_ni         ( ARESETn   ),
+        .test_en_i      ( test_en_i ),
+
+        .slave_valid_i  ( BVALID    ),
+        .slave_resp_i   ( BRESP     ),
+        .slave_id_i     ( BID       ),
+        .slave_user_i   ( BUSER     ),
+        .slave_ready_o  ( BREADY    ),
+
+        .master_valid_o ( BVALID_o  ),
+        .master_resp_o  ( BRESP_o   ),
+        .master_id_o    ( BID_o     ),
+        .master_user_o  ( BUSER_o   ),
+        .master_ready_i ( BREADY_i  )
+    );
+
+    always_comb begin
+        read_req   = 1'b0;
+        write_req  = 1'b0;
+        W_word_sel = 1'b0; // Write Word Selector
+
+        sample_AW  = 1'b0;
+        decr_AWLEN = 1'b0;
+        sample_AR  = 1'b0;
+        decr_ARLEN = 1'b0;
+
+        incr_AWADDR = 1'b0;
+        incr_ARADDR = 1'b0;
+
+        sample_RDATA_0 = 1'b0;
+        sample_RDATA_1 = 1'b0;
+
+        ARREADY = 1'b0;
+        AWREADY = 1'b0;
+        WREADY  = 1'b0;
+        RDATA   = '0;
+
+        BVALID = 1'b0;
+        BRESP  = `OKAY;
+        BID    = AWID;
+        BUSER  = AWUSER;
+
+        RVALID = 1'b0;
+        RLAST  = 1'b0;
+        RID    = ARID;
+        RUSER  = ARUSER;
+        RRESP  = `OKAY;
+
+        case(CS)
+
+            WAIT_R_PREADY: begin
+                sample_AR = 1'b0;
+                read_req  = 1'b1;
+                address   = ARADDR;
+
+                if (PREADY == 1'b1) begin// APB is READY --> RDATA is AVAILABLE
+                    if (ARLEN == 0) begin
+                        case (ARSIZE)
+                            3'h3: begin
+                                NS = SINGLE_RD_64;
+                                if (ARADDR[2:0] == 3'h4)
+                                    sample_RDATA_1 = 1'b1;
+                                else  sample_RDATA_0 = 1'b1;
+                            end
+
+                            default: begin
+                                NS = SINGLE_RD;
+                                if (ARADDR[2:0] == 3'h4)
+                                    sample_RDATA_1 = 1'b1;
+                                else
+                                    sample_RDATA_0 = 1'b1;
+                                end
+                            endcase
+                    end else begin // ARLEN > 0 --> BURST
+                       NS             = BURST_RD_64;
+                       sample_RDATA_0 = 1'b1;
+                       decr_ARLEN     = 1'b1;
+                       incr_ARADDR    = 1'b1;
+                    end
+                end else begin // APB not ready
+                    NS = WAIT_R_PREADY;
+                end
+            end
+
+            WAIT_W_PREADY: begin
+                address   = AWADDR;
+                write_req = 1'b1;
+
+                if (AWADDR[2:0] == 3'h4)
+                    W_word_sel = 1'b1;
+                else
+                    W_word_sel = 1'b0;
+
+                // There is a Pending WRITE!!
+                if (PREADY == 1'b1) begin // APB is READY --> WDATA is LAtched
+                    if (AWLEN == 0) begin // single write
+                        case (AWSIZE)
+                            3'h3: NS = SINGLE_WR_64;
+                            default: NS = SINGLE_WR;
+                        endcase
+                    end else begin // BURST WRITE
+                        sample_AW = 1'b1;
+                        NS        = BURST_WR_64;
+                    end
+                end else begin // APB not READY
+                    NS = WAIT_W_PREADY;
+                end
+            end
+
+            IDLE: begin
+                if (ARVALID == 1'b1)  begin
+                    sample_AR = 1'b1;
+                    read_req  = 1'b1;
+                    address   = ARADDR;
+
+                    if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE
+                        if (ARLEN == 0) begin
+                            case (ARSIZE)
+                                3'h3: begin
+                                    NS = SINGLE_RD_64;
+                                    if (ARADDR[2:0] == 4)
+                                        sample_RDATA_1 = 1'b1;
+                                    else
+                                        sample_RDATA_0 = 1'b1;
+                                end
+                                default: begin
+                                    NS = SINGLE_RD;
+                                    if (ARADDR[2:0] == 4)
+                                        sample_RDATA_1 = 1'b1;
+                                    else
+                                        sample_RDATA_0 = 1'b1;
+                                    end
+                            endcase end else begin //ARLEN > 0 --> BURST
+                            NS             = BURST_RD_64;
+                            sample_RDATA_0 = 1'b1;
+                        end
+                    end else begin // APB not ready
+                        NS = WAIT_R_PREADY;
+                    end
+                end else begin
+
+                    if (AWVALID) begin //: _VALID_AW_REQ_
+                        if (WVALID) begin // : _VALID_W_REQ_
+                            write_req = 1'b1;
+                            address   = AWADDR;
+
+                            if (AWADDR[2:0] == 3'h4)
+                                W_word_sel = 1'b1;
+                            else
+                                W_word_sel = 1'b0;
+
+                          // There is a Pending WRITE!!
+                            if (PREADY == 1'b1) begin// APB is READY --> WDATA is LAtched _APB_SLAVE_READY_
+                                  if(AWLEN == 0) begin //: _SINGLE_WRITE_
+                                        case(AWSIZE)
+                                            3'h3: NS = SINGLE_WR_64;
+                                            default: NS = SINGLE_WR;
+                                        endcase
+                                  end else begin // BURST WRITE
+                                        sample_AW   = 1'b1;
+                                        if ((AWADDR[2:0] == 3'h4) && (WSTRB[7:4] == 0))
+                                          incr_AWADDR = 1'b0;
+                                        else
+                                          incr_AWADDR = 1'b1;
+                                        NS = BURST_WR_64;
+                                  end
+                            end else begin// APB not READY
+                                NS = WAIT_W_PREADY;
+                            end
+                        end else begin // GOT ADDRESS WRITE, not DATA
+                            write_req = 1'b0;
+                            address   = '0;
+                            NS        = IDLE;
+                        end
+                    end else begin// No requests
+                        NS = IDLE;
+                        address =  '0;
+                    end
+                end
+            end
+
+            SINGLE_WR_64: begin
+                address    = AWADDR + 4;
+                W_word_sel = 1'b1; // write the Second data chunk
+                write_req  = WVALID;
+                if (WVALID) begin
+                    if (PREADY == 1'b1)
+                        NS = SINGLE_WR;
+                    else
+                        NS = SINGLE_WR_64;
+                end else begin
+                    NS = SINGLE_WR_64;
+                end
+            end
+
+            SINGLE_WR:  begin
+                BVALID   = 1'b1;
+                address  = '0;
+                if (BREADY)  begin
+                    NS      = IDLE;
+                    AWREADY = 1'b1;
+                    WREADY  = 1'b1;
+                end else begin
+                    NS = SINGLE_WR;
+                end
+            end
+
+            BURST_WR_64: begin
+                W_word_sel = 1'b1; // write the Second data chunk first
+                write_req  = WVALID & (|WSTRB[7:4]);
+                address    = AWADDR_Q; // second Chunk, Fixzed Burst
+
+                if (WVALID) begin
+                    if (&WSTRB[7:4]) begin
+                        if(PREADY == 1'b1) begin
+                            NS          = BURST_WR;
+                            WREADY      = 1'b1; // pop onother data from the WDATA fifo
+                            decr_AWLEN  = 1'b1; // decrement the remaining BURST beat
+                            incr_AWADDR = 1'b1; // increment address
+                        end else begin
+                            NS = BURST_WR_64;
+                        end
+                    end else begin
+                        NS = BURST_WR;
+                        WREADY      = 1'b1; // pop onother data from the WDATA fifo
+                        decr_AWLEN  = 1'b1; // decrement the remaining BURST beat
+                        incr_AWADDR = 1'b1; // increment address
+                    end
+                end else begin
+                    NS = BURST_WR_64;
+                end
+            end
+
+            BURST_WR: begin
+                address = AWADDR_Q; // second Chunk, Fixzed Burst
+                if (AWLEN_Q == 0) begin // last : _BURST_COMPLETED_
+                    BVALID = 1'b1;
+                    if (BREADY) begin
+                      NS      = IDLE;
+                      AWREADY = 1'b1;
+                    end else
+                      NS = BURST_WR;
+                end else begin //: _BUSRST_NOT_COMPLETED_
+                    W_word_sel = 1'b0; // write the Second data chunk first
+                    write_req  = WVALID & (&WSTRB[3:0]);
+                    if (WVALID) begin
+                        if (PREADY == 1'b1) begin
+                            NS          = BURST_WR_64;
+                            incr_AWADDR = 1'b1;
+                            decr_AWLEN  = 1'b1; //decrement the remaining BURST beat
+                          end else
+                            NS = BURST_WR;
+                    end else begin
+                        NS = BURST_WR_64;
+                    end
+              end
+            end
+
+            BURST_RD_64: begin
+               read_req = 1'b1;
+               address  = ARADDR_Q;
+
+                if (ARLEN_Q == 0) begin // burst completed
+                    NS      = IDLE;
+                    ARREADY = 1'b1;
+                end else begin
+                    if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE
+                        decr_ARLEN     = 1'b1;
+                        sample_RDATA_1 = 1'b1;
+                        NS = BURST_RD;
+
+                        if (ARADDR_Q[2:0] == 3'h4)
+                          incr_ARADDR = 1'b1;
+                        else
+                          incr_ARADDR = 1'b0;
+                      end
+                    else  begin
+                        NS = BURST_RD_64;
+                    end
+                 end
+            end
+
+            BURST_RD: begin
+                RVALID   = 1'b1;
+                RDATA[0] = RDATA_Q_0;
+                RDATA[1] = RDATA_Q_1;
+                RLAST    = (ARLEN_Q == 0) ? 1'b1 : 1'b0;
+                address  = ARADDR_Q;
+
+                if (RREADY) begin // ready to send back the rdata
+                    if (ARLEN_Q == 0) begin // burst completed
+                        NS      = IDLE;
+                        ARREADY = 1'b1;
+                    end else begin //: _READ_BUSRST_NOT_COMPLETED_
+                        read_req = 1'b1;
+                        if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE
+                            sample_RDATA_0 = 1'b1;
+                            NS             = BURST_RD_64;
+                            incr_ARADDR    = 1'b1;
+                            decr_ARLEN     = 1'b1;
+                        end else begin
+                            NS = BURST_RD_1;
+                        end
+                    end
+                end else begin // NOT ready to send back the rdata
+                    NS = BURST_RD;
+                end
+            end
+
+            BURST_RD_1: begin
+                read_req = 1'b1;
+                address  = ARADDR_Q;
+
+                if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE
+                    sample_RDATA_0 = 1'b1;
+                    NS             = BURST_RD_64;
+                    incr_ARADDR    = 1'b1;
+                    decr_ARLEN     = 1'b1;
+                end else begin
+                    NS = BURST_RD_1;
+                end
+            end
+
+            SINGLE_RD: begin
+                RVALID   = 1'b1;
+                RDATA[0] = RDATA_Q_0;
+                RDATA[1] = RDATA_Q_1;
+                RLAST    = 1;
+                address  = '0;
+
+                if (RREADY) begin // ready to send back the rdata
+                    NS      = IDLE;
+                    ARREADY = 1'b1;
+                end else begin // NOT ready to send back the rdata
+                    NS = SINGLE_RD;
+                end
+            end
+
+            SINGLE_RD_64: begin
+                read_req       = 1'b1;
+                address        = ARADDR + 4;
+                if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE
+                    NS = SINGLE_RD;
+                    if(ARADDR[2:0] == 3'h4)
+                        sample_RDATA_0 = 1'b1;
+                    else
+                        sample_RDATA_1 = 1'b1;
+                end else begin
+                  NS = SINGLE_RD_64;
+                end
+            end
+
+            default: begin
+                NS      = IDLE;
+                address = '0;
+            end
+        endcase
+    end
+
+    // -----------
+    // Registers
+    // -----------
+    always_ff @(posedge ACLK, negedge ARESETn) begin
+        if (ARESETn == 1'b0) begin
+            CS        <= IDLE;
+            //Read Channel
+            ARLEN_Q   <= '0;
+            AWADDR_Q  <= '0;
+            //Write Channel
+            AWLEN_Q   <= '0;
+            RDATA_Q_0 <= '0;
+            RDATA_Q_1 <= '0;
+            ARADDR_Q  <= '0;
+        end else  begin
+            CS <= NS;
+
+            if (sample_AR) begin
+                ARLEN_Q <= {ARLEN,1'b0} + 2;
+            end else if (decr_ARLEN) begin
+                ARLEN_Q <= ARLEN_Q - 1;
+            end
+
+            if (sample_RDATA_0)
+                RDATA_Q_0 <= PRDATA;
+
+            if (sample_RDATA_1)
+                RDATA_Q_1 <= PRDATA;
+
+            case ({sample_AW, decr_AWLEN})
+                2'b00: AWLEN_Q <= AWLEN_Q;
+                2'b01: AWLEN_Q <= AWLEN_Q - 1;
+                2'b10: AWLEN_Q <= {AWLEN, 1'b0} + 1;
+                2'b11: AWLEN_Q <= {AWLEN, 1'b0};
+            endcase
+
+            case ({sample_AW, incr_AWADDR})
+                2'b00: AWADDR_Q <= AWADDR_Q;
+                2'b01: AWADDR_Q <= AWADDR_Q + 4;
+                2'b10: AWADDR_Q <= {AWADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000};
+                2'b11: AWADDR_Q <= {AWADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000} + 4;
+            endcase
+
+            case({sample_AR, incr_ARADDR})
+                2'b00: ARADDR_Q <= ARADDR_Q;
+                2'b01: ARADDR_Q <= ARADDR_Q + 4;
+                2'b10: ARADDR_Q <= {ARADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000};
+                2'b11: ARADDR_Q <= {ARADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000} + 4;
+            endcase
+        end
+    end
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv
new file mode 100644
index 0000000..e133693
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv
@@ -0,0 +1,74 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Davide Rossi <davide.rossi@unibo.it>
+
+module axi_ar_buffer #(
+    parameter int ID_WIDTH     = -1,
+    parameter int ADDR_WIDTH   = -1,
+    parameter int USER_WIDTH   = -1,
+    parameter int BUFFER_DEPTH = -1
+)(
+
+    input logic                   clk_i,
+    input logic                   rst_ni,
+    input logic                   test_en_i,
+
+    input  logic                  slave_valid_i,
+    input  logic [ADDR_WIDTH-1:0] slave_addr_i,
+    input  logic [2:0]            slave_prot_i,
+    input  logic [3:0]            slave_region_i,
+    input  logic [7:0]            slave_len_i,
+    input  logic [2:0]            slave_size_i,
+    input  logic [1:0]            slave_burst_i,
+    input  logic                  slave_lock_i,
+    input  logic [3:0]            slave_cache_i,
+    input  logic [3:0]            slave_qos_i,
+    input  logic [ID_WIDTH-1:0]   slave_id_i,
+    input  logic [USER_WIDTH-1:0] slave_user_i,
+    output logic                  slave_ready_o,
+
+    output logic                  master_valid_o,
+    output logic [ADDR_WIDTH-1:0] master_addr_o,
+    output logic [2:0]            master_prot_o,
+    output logic [3:0]            master_region_o,
+    output logic [7:0]            master_len_o,
+    output logic [2:0]            master_size_o,
+    output logic [1:0]            master_burst_o,
+    output logic                  master_lock_o,
+    output logic [3:0]            master_cache_o,
+    output logic [3:0]            master_qos_o,
+    output logic [ID_WIDTH-1:0]   master_id_o,
+    output logic [USER_WIDTH-1:0] master_user_o,
+    input  logic                  master_ready_i
+);
+
+   logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_in;
+   logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_out;
+
+   assign s_data_in = {slave_cache_i,  slave_prot_i,  slave_lock_i,  slave_burst_i,  slave_size_i,  slave_len_i,  slave_qos_i,  slave_region_i,  slave_addr_i,  slave_user_i,  slave_id_i} ;
+   assign             {master_cache_o, master_prot_o, master_lock_o, master_burst_o, master_size_o, master_len_o, master_qos_o, master_region_o, master_addr_o, master_user_o, master_id_o} =  s_data_out;
+
+
+
+  axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH)) i_axi_single_slice (
+    .clk_i      ( clk_i          ),
+    .rst_ni     ( rst_ni         ),
+    .testmode_i ( test_en_i      ),
+    .valid_i    ( slave_valid_i  ),
+    .ready_o    ( slave_ready_o  ),
+    .data_i     ( s_data_in      ),
+    .ready_i    ( master_ready_i ),
+    .valid_o    ( master_valid_o ),
+    .data_o     ( s_data_out     )
+  );
+
+
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv
new file mode 100644
index 0000000..15b9345
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv
@@ -0,0 +1,74 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Davide Rossi <davide.rossi@unibo.it>
+
+module axi_aw_buffer #(
+    parameter int ID_WIDTH     = -1,
+    parameter int ADDR_WIDTH   = -1,
+    parameter int USER_WIDTH   = -1,
+    parameter int BUFFER_DEPTH = -1
+)(
+
+    input logic                   clk_i,
+    input logic                   rst_ni,
+    input logic                   test_en_i,
+
+    input  logic                  slave_valid_i,
+    input  logic [ADDR_WIDTH-1:0] slave_addr_i,
+    input  logic [2:0]            slave_prot_i,
+    input  logic [3:0]            slave_region_i,
+    input  logic [7:0]            slave_len_i,
+    input  logic [2:0]            slave_size_i,
+    input  logic [1:0]            slave_burst_i,
+    input  logic                  slave_lock_i,
+    input  logic [3:0]            slave_cache_i,
+    input  logic [3:0]            slave_qos_i,
+    input  logic [ID_WIDTH-1:0]   slave_id_i,
+    input  logic [USER_WIDTH-1:0] slave_user_i,
+    output logic                  slave_ready_o,
+
+    output logic                  master_valid_o,
+    output logic [ADDR_WIDTH-1:0] master_addr_o,
+    output logic [2:0]            master_prot_o,
+    output logic [3:0]            master_region_o,
+    output logic [7:0]            master_len_o,
+    output logic [2:0]            master_size_o,
+    output logic [1:0]            master_burst_o,
+    output logic                  master_lock_o,
+    output logic [3:0]            master_cache_o,
+    output logic [3:0]            master_qos_o,
+    output logic [ID_WIDTH-1:0]   master_id_o,
+    output logic [USER_WIDTH-1:0] master_user_o,
+    input  logic                  master_ready_i
+);
+
+   logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_in;
+   logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_out;
+
+
+
+   assign s_data_in = {slave_cache_i,  slave_prot_i,  slave_lock_i,  slave_burst_i,  slave_size_i,  slave_len_i,  slave_qos_i,  slave_region_i,  slave_addr_i,  slave_user_i,  slave_id_i};
+   assign             {master_cache_o, master_prot_o, master_lock_o, master_burst_o, master_size_o, master_len_o, master_qos_o, master_region_o, master_addr_o, master_user_o, master_id_o} = s_data_out;
+
+
+    axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH)) i_axi_single_slice (
+      .clk_i      ( clk_i          ),
+      .rst_ni     ( rst_ni         ),
+      .testmode_i ( test_en_i      ),
+      .valid_i    ( slave_valid_i  ),
+      .ready_o    ( slave_ready_o  ),
+      .data_i     ( s_data_in      ),
+      .ready_i    ( master_ready_i ),
+      .valid_o    ( master_valid_o ),
+      .data_o     ( s_data_out     )
+    );
+
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv
new file mode 100644
index 0000000..d2576bb
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv
@@ -0,0 +1,54 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Davide Rossi <davide.rossi@unibo.it>
+
+module axi_b_buffer #(
+    parameter int ID_WIDTH     = -1,
+    parameter int USER_WIDTH   = -1,
+    parameter int BUFFER_DEPTH = -1
+)(
+   input logic                   clk_i,
+   input logic                   rst_ni,
+   input logic                   test_en_i,
+
+   input logic                   slave_valid_i,
+   input logic  [1:0]            slave_resp_i,
+   input logic  [ID_WIDTH-1:0]   slave_id_i,
+   input logic  [USER_WIDTH-1:0] slave_user_i,
+   output logic                  slave_ready_o,
+
+   output logic                  master_valid_o,
+   output logic [1:0]            master_resp_o,
+   output logic [ID_WIDTH-1:0]   master_id_o,
+   output logic [USER_WIDTH-1:0] master_user_o,
+   input  logic                  master_ready_i
+);
+
+    logic [2+USER_WIDTH+ID_WIDTH-1:0] s_data_in;
+    logic [2+USER_WIDTH+ID_WIDTH-1:0] s_data_out;
+
+    assign s_data_in = {slave_id_i,  slave_user_i,  slave_resp_i};
+    assign             {master_id_o, master_user_o, master_resp_o} = s_data_out;
+
+
+    axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(2+USER_WIDTH+ID_WIDTH)) i_axi_single_slice (
+      .clk_i      ( clk_i          ),
+      .rst_ni     ( rst_ni         ),
+      .testmode_i ( test_en_i      ),
+      .valid_i    ( slave_valid_i  ),
+      .ready_o    ( slave_ready_o  ),
+      .data_i     ( s_data_in      ),
+      .ready_i    ( master_ready_i ),
+      .valid_o    ( master_valid_o ),
+      .data_o     ( s_data_out     )
+    );
+
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv
new file mode 100644
index 0000000..3c92b25
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv
@@ -0,0 +1,60 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Davide Rossi <davide.rossi@unibo.it>
+
+module axi_r_buffer #(
+   parameter ID_WIDTH      = 4,
+   parameter DATA_WIDTH    = 64,
+   parameter USER_WIDTH    = 6,
+   parameter BUFFER_DEPTH  = 8,
+   parameter STRB_WIDTH    = DATA_WIDTH/8   // DO NOT OVERRIDE
+)(
+   input logic                   clk_i,
+   input logic                   rst_ni,
+   input logic                   test_en_i,
+
+   input logic                   slave_valid_i,
+   input logic  [DATA_WIDTH-1:0] slave_data_i,
+   input logic  [1:0]            slave_resp_i,
+   input logic  [USER_WIDTH-1:0] slave_user_i,
+   input logic  [ID_WIDTH-1:0]   slave_id_i,
+   input logic                   slave_last_i,
+   output logic                  slave_ready_o,
+
+   output logic                  master_valid_o,
+   output logic [DATA_WIDTH-1:0] master_data_o,
+   output logic [1:0]            master_resp_o,
+   output logic [USER_WIDTH-1:0] master_user_o,
+   output logic [ID_WIDTH-1:0]   master_id_o,
+   output logic                  master_last_o,
+   input  logic                  master_ready_i
+);
+
+   logic [2+DATA_WIDTH+USER_WIDTH+ID_WIDTH:0] s_data_in;
+   logic [2+DATA_WIDTH+USER_WIDTH+ID_WIDTH:0] s_data_out;
+
+
+   assign s_data_in =  {slave_id_i,  slave_user_i,  slave_data_i,  slave_resp_i,  slave_last_i};
+   assign              {master_id_o, master_user_o, master_data_o, master_resp_o, master_last_o} = s_data_out;
+
+   axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(3+DATA_WIDTH+USER_WIDTH+ID_WIDTH)) i_axi_single_slice (
+     .clk_i      ( clk_i          ),
+     .rst_ni     ( rst_ni         ),
+     .testmode_i ( test_en_i      ),
+     .valid_i    ( slave_valid_i  ),
+     .ready_o    ( slave_ready_o  ),
+     .data_i     ( s_data_in      ),
+     .ready_i    ( master_ready_i ),
+     .valid_o    ( master_valid_o ),
+     .data_o     ( s_data_out     )
+   );
+
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv
new file mode 100644
index 0000000..fe7fbbc
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv
@@ -0,0 +1,51 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Wrapper for a generic fifo
+module axi_single_slice #(
+    parameter int BUFFER_DEPTH = -1,
+    parameter int DATA_WIDTH   = -1
+) (
+    input  logic                  clk_i,    // Clock
+    input  logic                  rst_ni,  // Asynchronous reset active low
+    input  logic                  testmode_i,
+    input  logic                  valid_i,
+    output logic                  ready_o,
+    input  logic [DATA_WIDTH-1:0] data_i,
+
+    input  logic                  ready_i,
+    output logic                  valid_o,
+    output logic [DATA_WIDTH-1:0] data_o
+);
+
+    logic full, empty;
+
+    assign ready_o = ~full;
+    assign valid_o = ~empty;
+
+    fifo #(
+        .FALL_THROUGH ( 1'b0         ),
+        .DATA_WIDTH   ( DATA_WIDTH   ),
+        .DEPTH        ( BUFFER_DEPTH )
+    ) i_fifo (
+        .clk_i      ( clk_i             ),
+        .rst_ni     ( rst_ni            ),
+        .flush_i    ( 1'b0              ),
+        .threshold_o (), // NC
+        .testmode_i ( testmode_i        ),
+        .full_o     ( full              ),
+        .empty_o    ( empty             ),
+        .data_i     ( data_i            ),
+        .push_i     ( valid_i & ready_o ),
+        .data_o     ( data_o            ),
+        .pop_i      ( ready_i & valid_o )
+    );
+
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv
new file mode 100644
index 0000000..91072d6
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv
@@ -0,0 +1,311 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Davide Rossi <davide.rossi@unibo.it>
+
+module axi_slice
+#(
+    parameter AXI_ADDR_WIDTH = 32,
+    parameter AXI_DATA_WIDTH = 64,
+    parameter AXI_USER_WIDTH = 6,
+    parameter AXI_ID_WIDTH   = 3,
+    parameter SLICE_DEPTH    = 2,
+    parameter AXI_STRB_WIDTH = AXI_DATA_WIDTH/8
+)
+(
+    input  logic                      clk_i,
+    input  logic                      rst_ni,
+    input  logic                      test_en_i,
+
+    // AXI4 SLAVE
+    //***************************************
+    // WRITE ADDRESS CHANNEL
+    input  logic                      axi_slave_aw_valid_i,
+    input  logic [AXI_ADDR_WIDTH-1:0] axi_slave_aw_addr_i,
+    input  logic [2:0]                axi_slave_aw_prot_i,
+    input  logic [3:0]                axi_slave_aw_region_i,
+    input  logic [7:0]                axi_slave_aw_len_i,
+    input  logic [2:0]                axi_slave_aw_size_i,
+    input  logic [1:0]                axi_slave_aw_burst_i,
+    input  logic                      axi_slave_aw_lock_i,
+    input  logic [3:0]                axi_slave_aw_cache_i,
+    input  logic [3:0]                axi_slave_aw_qos_i,
+    input  logic [AXI_ID_WIDTH-1:0]   axi_slave_aw_id_i,
+    input  logic [AXI_USER_WIDTH-1:0] axi_slave_aw_user_i,
+    output logic                      axi_slave_aw_ready_o,
+
+    // READ ADDRESS CHANNEL
+    input  logic                      axi_slave_ar_valid_i,
+    input  logic [AXI_ADDR_WIDTH-1:0] axi_slave_ar_addr_i,
+    input  logic [2:0]                axi_slave_ar_prot_i,
+    input  logic [3:0]                axi_slave_ar_region_i,
+    input  logic [7:0]                axi_slave_ar_len_i,
+    input  logic [2:0]                axi_slave_ar_size_i,
+    input  logic [1:0]                axi_slave_ar_burst_i,
+    input  logic                      axi_slave_ar_lock_i,
+    input  logic [3:0]                axi_slave_ar_cache_i,
+    input  logic [3:0]                axi_slave_ar_qos_i,
+    input  logic [AXI_ID_WIDTH-1:0]   axi_slave_ar_id_i,
+    input  logic [AXI_USER_WIDTH-1:0] axi_slave_ar_user_i,
+    output logic                      axi_slave_ar_ready_o,
+
+    // WRITE DATA CHANNEL
+    input  logic                      axi_slave_w_valid_i,
+    input  logic [AXI_DATA_WIDTH-1:0] axi_slave_w_data_i,
+    input  logic [AXI_STRB_WIDTH-1:0] axi_slave_w_strb_i,
+    input  logic [AXI_USER_WIDTH-1:0] axi_slave_w_user_i,
+    input  logic                      axi_slave_w_last_i,
+    output logic                      axi_slave_w_ready_o,
+
+    // READ DATA CHANNEL
+    output logic                      axi_slave_r_valid_o,
+    output logic [AXI_DATA_WIDTH-1:0] axi_slave_r_data_o,
+    output logic [1:0]                axi_slave_r_resp_o,
+    output logic                      axi_slave_r_last_o,
+    output logic [AXI_ID_WIDTH-1:0]   axi_slave_r_id_o,
+    output logic [AXI_USER_WIDTH-1:0] axi_slave_r_user_o,
+    input  logic                      axi_slave_r_ready_i,
+
+    // WRITE RESPONSE CHANNEL
+    output logic                      axi_slave_b_valid_o,
+    output logic [1:0]                axi_slave_b_resp_o,
+    output logic [AXI_ID_WIDTH-1:0]   axi_slave_b_id_o,
+    output logic [AXI_USER_WIDTH-1:0] axi_slave_b_user_o,
+    input  logic                      axi_slave_b_ready_i,
+
+        // AXI4 MASTER
+    //***************************************
+    // WRITE ADDRESS CHANNEL
+    output logic                      axi_master_aw_valid_o,
+    output logic [AXI_ADDR_WIDTH-1:0] axi_master_aw_addr_o,
+    output logic [2:0]                axi_master_aw_prot_o,
+    output logic [3:0]                axi_master_aw_region_o,
+    output logic [7:0]                axi_master_aw_len_o,
+    output logic [2:0]                axi_master_aw_size_o,
+    output logic [1:0]                axi_master_aw_burst_o,
+    output logic                      axi_master_aw_lock_o,
+    output logic [3:0]                axi_master_aw_cache_o,
+    output logic [3:0]                axi_master_aw_qos_o,
+    output logic [AXI_ID_WIDTH-1:0]   axi_master_aw_id_o,
+    output logic [AXI_USER_WIDTH-1:0] axi_master_aw_user_o,
+    input  logic                      axi_master_aw_ready_i,
+
+    // READ ADDRESS CHANNEL
+    output logic                      axi_master_ar_valid_o,
+    output logic [AXI_ADDR_WIDTH-1:0] axi_master_ar_addr_o,
+    output logic [2:0]                axi_master_ar_prot_o,
+    output logic [3:0]                axi_master_ar_region_o,
+    output logic [7:0]                axi_master_ar_len_o,
+    output logic [2:0]                axi_master_ar_size_o,
+    output logic [1:0]                axi_master_ar_burst_o,
+    output logic                      axi_master_ar_lock_o,
+    output logic [3:0]                axi_master_ar_cache_o,
+    output logic [3:0]                axi_master_ar_qos_o,
+    output logic [AXI_ID_WIDTH-1:0]   axi_master_ar_id_o,
+    output logic [AXI_USER_WIDTH-1:0] axi_master_ar_user_o,
+    input  logic                      axi_master_ar_ready_i,
+
+    // WRITE DATA CHANNEL
+    output logic                      axi_master_w_valid_o,
+    output logic [AXI_DATA_WIDTH-1:0] axi_master_w_data_o,
+    output logic [AXI_STRB_WIDTH-1:0] axi_master_w_strb_o,
+    output logic [AXI_USER_WIDTH-1:0] axi_master_w_user_o,
+    output logic                      axi_master_w_last_o,
+    input  logic                      axi_master_w_ready_i,
+
+    // READ DATA CHANNEL
+    input  logic                      axi_master_r_valid_i,
+    input  logic [AXI_DATA_WIDTH-1:0] axi_master_r_data_i,
+    input  logic [1:0]                axi_master_r_resp_i,
+    input  logic                      axi_master_r_last_i,
+    input  logic [AXI_ID_WIDTH-1:0]   axi_master_r_id_i,
+    input  logic [AXI_USER_WIDTH-1:0] axi_master_r_user_i,
+    output logic                      axi_master_r_ready_o,
+
+    // WRITE RESPONSE CHANNEL
+    input  logic                      axi_master_b_valid_i,
+    input  logic [1:0]                axi_master_b_resp_i,
+    input  logic [AXI_ID_WIDTH-1:0]   axi_master_b_id_i,
+    input  logic [AXI_USER_WIDTH-1:0] axi_master_b_user_i,
+    output logic                      axi_master_b_ready_o
+);
+
+   // AXI WRITE ADDRESS CHANNEL BUFFER
+   axi_aw_buffer
+   #(
+       .ID_WIDTH     (AXI_ID_WIDTH),
+       .ADDR_WIDTH   (AXI_ADDR_WIDTH),
+       .USER_WIDTH   (AXI_USER_WIDTH),
+       .BUFFER_DEPTH (SLICE_DEPTH)
+   )
+   aw_buffer_i
+   (
+      .clk_i            ( clk_i                  ),
+      .rst_ni           ( rst_ni                 ),
+      .test_en_i        ( test_en_i              ),
+
+      .slave_valid_i    ( axi_slave_aw_valid_i   ),
+      .slave_addr_i     ( axi_slave_aw_addr_i    ),
+      .slave_prot_i     ( axi_slave_aw_prot_i    ),
+      .slave_region_i   ( axi_slave_aw_region_i  ),
+      .slave_len_i      ( axi_slave_aw_len_i     ),
+      .slave_size_i     ( axi_slave_aw_size_i    ),
+      .slave_burst_i    ( axi_slave_aw_burst_i   ),
+      .slave_lock_i     ( axi_slave_aw_lock_i    ),
+      .slave_cache_i    ( axi_slave_aw_cache_i   ),
+      .slave_qos_i      ( axi_slave_aw_qos_i     ),
+      .slave_id_i       ( axi_slave_aw_id_i      ),
+      .slave_user_i     ( axi_slave_aw_user_i    ),
+      .slave_ready_o    ( axi_slave_aw_ready_o   ),
+
+      .master_valid_o   ( axi_master_aw_valid_o  ),
+      .master_addr_o    ( axi_master_aw_addr_o   ),
+      .master_prot_o    ( axi_master_aw_prot_o   ),
+      .master_region_o  ( axi_master_aw_region_o ),
+      .master_len_o     ( axi_master_aw_len_o    ),
+      .master_size_o    ( axi_master_aw_size_o   ),
+      .master_burst_o   ( axi_master_aw_burst_o  ),
+      .master_lock_o    ( axi_master_aw_lock_o   ),
+      .master_cache_o   ( axi_master_aw_cache_o  ),
+      .master_qos_o     ( axi_master_aw_qos_o    ),
+      .master_id_o      ( axi_master_aw_id_o     ),
+      .master_user_o    ( axi_master_aw_user_o   ),
+      .master_ready_i   ( axi_master_aw_ready_i  )
+   );
+
+   // AXI READ ADDRESS CHANNEL BUFFER
+   axi_ar_buffer
+   #(
+       .ID_WIDTH     (AXI_ID_WIDTH),
+       .ADDR_WIDTH   (AXI_ADDR_WIDTH),
+       .USER_WIDTH   (AXI_USER_WIDTH),
+       .BUFFER_DEPTH (SLICE_DEPTH)
+   )
+   ar_buffer_i
+   (
+      .clk_i           ( clk_i                   ),
+      .rst_ni          ( rst_ni                  ),
+      .test_en_i       ( test_en_i               ),
+
+      .slave_valid_i   ( axi_slave_ar_valid_i    ),
+      .slave_addr_i    ( axi_slave_ar_addr_i     ),
+      .slave_prot_i    ( axi_slave_ar_prot_i     ),
+      .slave_region_i  ( axi_slave_ar_region_i   ),
+      .slave_len_i     ( axi_slave_ar_len_i      ),
+      .slave_size_i    ( axi_slave_ar_size_i     ),
+      .slave_burst_i   ( axi_slave_ar_burst_i    ),
+      .slave_lock_i    ( axi_slave_ar_lock_i     ),
+      .slave_cache_i   ( axi_slave_ar_cache_i    ),
+      .slave_qos_i     ( axi_slave_ar_qos_i      ),
+      .slave_id_i      ( axi_slave_ar_id_i       ),
+      .slave_user_i    ( axi_slave_ar_user_i     ),
+      .slave_ready_o   ( axi_slave_ar_ready_o    ),
+
+      .master_valid_o  ( axi_master_ar_valid_o   ),
+      .master_addr_o   ( axi_master_ar_addr_o    ),
+      .master_prot_o   ( axi_master_ar_prot_o    ),
+      .master_region_o ( axi_master_ar_region_o  ),
+      .master_len_o    ( axi_master_ar_len_o     ),
+      .master_size_o   ( axi_master_ar_size_o    ),
+      .master_burst_o  ( axi_master_ar_burst_o   ),
+      .master_lock_o   ( axi_master_ar_lock_o    ),
+      .master_cache_o  ( axi_master_ar_cache_o   ),
+      .master_qos_o    ( axi_master_ar_qos_o     ),
+      .master_id_o     ( axi_master_ar_id_o      ),
+      .master_user_o   ( axi_master_ar_user_o    ),
+      .master_ready_i  ( axi_master_ar_ready_i   )
+   );
+
+   // WRITE DATA CHANNEL BUFFER
+   axi_w_buffer
+   #(
+       .DATA_WIDTH   (AXI_DATA_WIDTH),
+       .USER_WIDTH   (AXI_USER_WIDTH),
+       .BUFFER_DEPTH (SLICE_DEPTH)
+   )
+   w_buffer_i
+   (
+      .clk_i          ( clk_i                 ),
+      .rst_ni         ( rst_ni                ),
+      .test_en_i      ( test_en_i             ),
+
+      .slave_valid_i  ( axi_slave_w_valid_i   ),
+      .slave_data_i   ( axi_slave_w_data_i    ),
+      .slave_strb_i   ( axi_slave_w_strb_i    ),
+      .slave_user_i   ( axi_slave_w_user_i    ),
+      .slave_last_i   ( axi_slave_w_last_i    ),
+      .slave_ready_o  ( axi_slave_w_ready_o   ),
+
+      .master_valid_o ( axi_master_w_valid_o  ),
+      .master_data_o  ( axi_master_w_data_o   ),
+      .master_strb_o  ( axi_master_w_strb_o   ),
+      .master_user_o  ( axi_master_w_user_o   ),
+      .master_last_o  ( axi_master_w_last_o   ),
+      .master_ready_i ( axi_master_w_ready_i  )
+   );
+
+   // READ DATA CHANNEL BUFFER
+   axi_r_buffer
+   #(
+       .ID_WIDTH     (AXI_ID_WIDTH),
+       .DATA_WIDTH   (AXI_DATA_WIDTH),
+       .USER_WIDTH   (AXI_USER_WIDTH),
+       .BUFFER_DEPTH (SLICE_DEPTH)
+   )
+   r_buffer_i
+   (
+      .clk_i           ( clk_i                 ),
+      .rst_ni          ( rst_ni                ),
+      .test_en_i       ( test_en_i             ),
+
+      .slave_valid_i   ( axi_master_r_valid_i  ),
+      .slave_data_i    ( axi_master_r_data_i   ),
+      .slave_resp_i    ( axi_master_r_resp_i   ),
+      .slave_user_i    ( axi_master_r_user_i   ),
+      .slave_id_i      ( axi_master_r_id_i     ),
+      .slave_last_i    ( axi_master_r_last_i   ),
+      .slave_ready_o   ( axi_master_r_ready_o  ),
+
+      .master_valid_o  ( axi_slave_r_valid_o   ),
+      .master_data_o   ( axi_slave_r_data_o    ),
+      .master_resp_o   ( axi_slave_r_resp_o    ),
+      .master_user_o   ( axi_slave_r_user_o    ),
+      .master_id_o     ( axi_slave_r_id_o      ),
+      .master_last_o   ( axi_slave_r_last_o    ),
+      .master_ready_i  ( axi_slave_r_ready_i   )
+   );
+
+   // WRITE RESPONSE CHANNEL BUFFER
+   axi_b_buffer
+   #(
+       .ID_WIDTH     (AXI_ID_WIDTH),
+       .USER_WIDTH   (AXI_USER_WIDTH),
+       .BUFFER_DEPTH (SLICE_DEPTH)
+   )
+   b_buffer_i
+   (
+      .clk_i           ( clk_i                 ),
+      .rst_ni          ( rst_ni                ),
+      .test_en_i       ( test_en_i             ),
+
+      .slave_valid_i   ( axi_master_b_valid_i  ),
+      .slave_resp_i    ( axi_master_b_resp_i   ),
+      .slave_id_i      ( axi_master_b_id_i     ),
+      .slave_user_i    ( axi_master_b_user_i   ),
+      .slave_ready_o   ( axi_master_b_ready_o  ),
+
+      .master_valid_o  ( axi_slave_b_valid_o   ),
+      .master_resp_o   ( axi_slave_b_resp_o    ),
+      .master_id_o     ( axi_slave_b_id_o      ),
+      .master_user_o   ( axi_slave_b_user_o    ),
+      .master_ready_i  ( axi_slave_b_ready_i   )
+   );
+
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv
new file mode 100644
index 0000000..2e643a4
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv
@@ -0,0 +1,115 @@
+module axi_slice_wrap #(
+    parameter AXI_ADDR_WIDTH    = 32,
+    parameter AXI_DATA_WIDTH    = 64,
+    parameter AXI_USER_WIDTH    = 6,
+    parameter AXI_ID_WIDTH      = 3,
+    parameter SLICE_DEPTH       = 2,
+    parameter AXI_STRB_WIDTH    = AXI_DATA_WIDTH/8
+)(
+    input logic    clk_i,    // Clock
+    input logic    rst_ni,  // Asynchronous reset active low
+    input logic    test_en_i,
+    AXI_BUS.Slave  axi_slave,
+    AXI_BUS.Master axi_master
+);
+
+    axi_slice #(
+        .AXI_ADDR_WIDTH ( AXI_ADDR_WIDTH               ),
+        .AXI_DATA_WIDTH ( AXI_DATA_WIDTH               ),
+        .AXI_USER_WIDTH ( AXI_USER_WIDTH               ),
+        .AXI_ID_WIDTH   ( AXI_ID_WIDTH                 ),
+        .SLICE_DEPTH    ( SLICE_DEPTH                  ),
+        .AXI_STRB_WIDTH ( AXI_STRB_WIDTH               )
+    ) i_axi_slice (
+        .axi_slave_aw_valid_i   ( axi_slave.aw_valid   ),
+        .axi_slave_aw_addr_i    ( axi_slave.aw_addr    ),
+        .axi_slave_aw_prot_i    ( axi_slave.aw_prot    ),
+        .axi_slave_aw_region_i  ( axi_slave.aw_region  ),
+        .axi_slave_aw_len_i     ( axi_slave.aw_len     ),
+        .axi_slave_aw_size_i    ( axi_slave.aw_size    ),
+        .axi_slave_aw_burst_i   ( axi_slave.aw_burst   ),
+        .axi_slave_aw_lock_i    ( axi_slave.aw_lock    ),
+        .axi_slave_aw_cache_i   ( axi_slave.aw_cache   ),
+        .axi_slave_aw_qos_i     ( axi_slave.aw_qos     ),
+        .axi_slave_aw_id_i      ( axi_slave.aw_id      ),
+        .axi_slave_aw_user_i    ( axi_slave.aw_user    ),
+        .axi_slave_aw_ready_o   ( axi_slave.aw_ready   ),
+        .axi_slave_ar_valid_i   ( axi_slave.ar_valid   ),
+        .axi_slave_ar_addr_i    ( axi_slave.ar_addr    ),
+        .axi_slave_ar_prot_i    ( axi_slave.ar_prot    ),
+        .axi_slave_ar_region_i  ( axi_slave.ar_region  ),
+        .axi_slave_ar_len_i     ( axi_slave.ar_len     ),
+        .axi_slave_ar_size_i    ( axi_slave.ar_size    ),
+        .axi_slave_ar_burst_i   ( axi_slave.ar_burst   ),
+        .axi_slave_ar_lock_i    ( axi_slave.ar_lock    ),
+        .axi_slave_ar_cache_i   ( axi_slave.ar_cache   ),
+        .axi_slave_ar_qos_i     ( axi_slave.ar_qos     ),
+        .axi_slave_ar_id_i      ( axi_slave.ar_id      ),
+        .axi_slave_ar_user_i    ( axi_slave.ar_user    ),
+        .axi_slave_ar_ready_o   ( axi_slave.ar_ready   ),
+        .axi_slave_w_valid_i    ( axi_slave.w_valid    ),
+        .axi_slave_w_data_i     ( axi_slave.w_data     ),
+        .axi_slave_w_strb_i     ( axi_slave.w_strb     ),
+        .axi_slave_w_user_i     ( axi_slave.w_user     ),
+        .axi_slave_w_last_i     ( axi_slave.w_last     ),
+        .axi_slave_w_ready_o    ( axi_slave.w_ready    ),
+        .axi_slave_r_valid_o    ( axi_slave.r_valid    ),
+        .axi_slave_r_data_o     ( axi_slave.r_data     ),
+        .axi_slave_r_resp_o     ( axi_slave.r_resp     ),
+        .axi_slave_r_last_o     ( axi_slave.r_last     ),
+        .axi_slave_r_id_o       ( axi_slave.r_id       ),
+        .axi_slave_r_user_o     ( axi_slave.r_user     ),
+        .axi_slave_r_ready_i    ( axi_slave.r_ready    ),
+        .axi_slave_b_valid_o    ( axi_slave.b_valid    ),
+        .axi_slave_b_resp_o     ( axi_slave.b_resp     ),
+        .axi_slave_b_id_o       ( axi_slave.b_id       ),
+        .axi_slave_b_user_o     ( axi_slave.b_user     ),
+        .axi_slave_b_ready_i    ( axi_slave.b_ready    ),
+        .axi_master_aw_valid_o  ( axi_master.aw_valid  ),
+        .axi_master_aw_addr_o   ( axi_master.aw_addr   ),
+        .axi_master_aw_prot_o   ( axi_master.aw_prot   ),
+        .axi_master_aw_region_o ( axi_master.aw_region ),
+        .axi_master_aw_len_o    ( axi_master.aw_len    ),
+        .axi_master_aw_size_o   ( axi_master.aw_size   ),
+        .axi_master_aw_burst_o  ( axi_master.aw_burst  ),
+        .axi_master_aw_lock_o   ( axi_master.aw_lock   ),
+        .axi_master_aw_cache_o  ( axi_master.aw_cache  ),
+        .axi_master_aw_qos_o    ( axi_master.aw_qos    ),
+        .axi_master_aw_id_o     ( axi_master.aw_id     ),
+        .axi_master_aw_user_o   ( axi_master.aw_user   ),
+        .axi_master_aw_ready_i  ( axi_master.aw_ready  ),
+        .axi_master_ar_valid_o  ( axi_master.ar_valid  ),
+        .axi_master_ar_addr_o   ( axi_master.ar_addr   ),
+        .axi_master_ar_prot_o   ( axi_master.ar_prot   ),
+        .axi_master_ar_region_o ( axi_master.ar_region ),
+        .axi_master_ar_len_o    ( axi_master.ar_len    ),
+        .axi_master_ar_size_o   ( axi_master.ar_size   ),
+        .axi_master_ar_burst_o  ( axi_master.ar_burst  ),
+        .axi_master_ar_lock_o   ( axi_master.ar_lock   ),
+        .axi_master_ar_cache_o  ( axi_master.ar_cache  ),
+        .axi_master_ar_qos_o    ( axi_master.ar_qos    ),
+        .axi_master_ar_id_o     ( axi_master.ar_id     ),
+        .axi_master_ar_user_o   ( axi_master.ar_user   ),
+        .axi_master_ar_ready_i  ( axi_master.ar_ready  ),
+        .axi_master_w_valid_o   ( axi_master.w_valid   ),
+        .axi_master_w_data_o    ( axi_master.w_data    ),
+        .axi_master_w_strb_o    ( axi_master.w_strb    ),
+        .axi_master_w_user_o    ( axi_master.w_user    ),
+        .axi_master_w_last_o    ( axi_master.w_last    ),
+        .axi_master_w_ready_i   ( axi_master.w_ready   ),
+        .axi_master_r_valid_i   ( axi_master.r_valid   ),
+        .axi_master_r_data_i    ( axi_master.r_data    ),
+        .axi_master_r_resp_i    ( axi_master.r_resp    ),
+        .axi_master_r_last_i    ( axi_master.r_last    ),
+        .axi_master_r_id_i      ( axi_master.r_id      ),
+        .axi_master_r_user_i    ( axi_master.r_user    ),
+        .axi_master_r_ready_o   ( axi_master.r_ready   ),
+        .axi_master_b_valid_i   ( axi_master.b_valid   ),
+        .axi_master_b_resp_i    ( axi_master.b_resp    ),
+        .axi_master_b_id_i      ( axi_master.b_id      ),
+        .axi_master_b_user_i    ( axi_master.b_user    ),
+        .axi_master_b_ready_o   ( axi_master.b_ready   ),
+        .*
+    );
+
+endmodule
diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv
new file mode 100644
index 0000000..0e89a47
--- /dev/null
+++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv
@@ -0,0 +1,55 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Davide Rossi <davide.rossi@unibo.it>
+
+module axi_w_buffer #(
+    parameter int DATA_WIDTH   = -1,
+    parameter int USER_WIDTH   = -1,
+    parameter int BUFFER_DEPTH = -1,
+    parameter int STRB_WIDTH   = DATA_WIDTH/8   // DO NOT OVERRIDE
+)(
+    input logic                   clk_i,
+    input logic                   rst_ni,
+    input logic                   test_en_i,
+
+    input logic                   slave_valid_i,
+    input logic  [DATA_WIDTH-1:0] slave_data_i,
+    input logic  [STRB_WIDTH-1:0] slave_strb_i,
+    input logic  [USER_WIDTH-1:0] slave_user_i,
+    input logic                   slave_last_i,
+    output logic                  slave_ready_o,
+
+    output logic                  master_valid_o,
+    output logic [DATA_WIDTH-1:0] master_data_o,
+    output logic [STRB_WIDTH-1:0] master_strb_o,
+    output logic [USER_WIDTH-1:0] master_user_o,
+    output logic                  master_last_o,
+    input  logic                  master_ready_i
+);
+
+    logic [DATA_WIDTH+STRB_WIDTH+USER_WIDTH:0] s_data_in;
+    logic [DATA_WIDTH+STRB_WIDTH+USER_WIDTH:0] s_data_out;
+
+    assign s_data_in = { slave_user_i,  slave_strb_i,  slave_data_i,  slave_last_i  };
+    assign             { master_user_o, master_strb_o, master_data_o, master_last_o } = s_data_out;
+
+    axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(1+DATA_WIDTH+STRB_WIDTH+USER_WIDTH)) i_axi_single_slice (
+      .clk_i      ( clk_i          ),
+      .rst_ni     ( rst_ni         ),
+      .testmode_i ( test_en_i      ),
+      .valid_i    ( slave_valid_i  ),
+      .ready_o    ( slave_ready_o  ),
+      .data_i     ( s_data_in      ),
+      .ready_i    ( master_ready_i ),
+      .valid_o    ( master_valid_o ),
+      .data_o     ( s_data_out     )
+    );
+endmodule
diff --git a/test/type_param/corev_apu/register_interface/include/register_interface/assign.svh b/test/type_param/corev_apu/register_interface/include/register_interface/assign.svh
new file mode 100644
index 0000000..30c44ac
--- /dev/null
+++ b/test/type_param/corev_apu/register_interface/include/register_interface/assign.svh
@@ -0,0 +1,46 @@
+// Copyright (c) 2020 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+/// Macros to define register bus request/response structs.
+
+`ifndef REGISTER_INTERFACE_ASSIGN_SVH_
+`define REGISTER_INTERFACE_ASSIGN_SVH_
+
+`define REG_BUS_ASSIGN_TO_REQ(lhs, rhs) \
+  assign lhs = '{                       \
+    addr: rhs.addr,                     \
+    write: rhs.write,                   \
+    wdata: rhs.wdata,                   \
+    wstrb: rhs.wstrb,                   \
+    valid: rhs.valid                    \
+  };
+
+`define REG_BUS_ASSIGN_FROM_REQ(lhs, rhs) \
+  assign lhs.addr = rhs.addr;             \
+  assign lhs.write = rhs.write;           \
+  assign lhs.wdata = rhs.wdata;           \
+  assign lhs.wstrb = rhs.wstrb;           \
+  assign lhs.valid = rhs.valid;           \
+
+`define REG_BUS_ASSIGN_TO_RSP(lhs, rhs) \
+  assign lhs = '{                       \
+    rdata: rhs.rdata,                   \
+    error: rhs.error,                   \
+    ready: rhs.ready                    \
+  };
+
+`define REG_BUS_ASSIGN_FROM_RSP(lhs, rhs) \
+  assign lhs.rdata = rhs.rdata;           \
+  assign lhs.error = rhs.error;           \
+  assign lhs.ready = rhs.ready;
+
+`endif
\ No newline at end of file
diff --git a/test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh b/test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh
new file mode 100644
index 0000000..350d79e
--- /dev/null
+++ b/test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh
@@ -0,0 +1,38 @@
+// Copyright (c) 2020 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+/// Macros to define register bus request/response structs.
+
+`ifndef REGISTER_INTERFACE_TYPEDEF_SVH_
+`define REGISTER_INTERFACE_TYPEDEF_SVH_
+
+`define REG_BUS_TYPEDEF_REQ(req_t, addr_t, data_t, strb_t) \
+    typedef struct packed { \
+        addr_t addr; \
+        logic  write; \
+        data_t wdata; \
+        strb_t wstrb; \
+        logic  valid; \
+    } req_t;
+
+`define REG_BUS_TYPEDEF_RSP(rsp_t, data_t) \
+    typedef struct packed { \
+        data_t rdata; \
+        logic  error; \
+        logic  ready; \
+    } rsp_t;
+
+`define REG_BUS_TYPEDEF_ALL(name, addr_t, data_t, strb_t) \
+    `REG_BUS_TYPEDEF_REQ(name``_req_t, addr_t, data_t, strb_t) \
+    `REG_BUS_TYPEDEF_RSP(name``_rsp_t, data_t)
+
+`endif
diff --git a/test/type_param/corev_apu/register_interface/src/apb_to_reg.sv b/test/type_param/corev_apu/register_interface/src/apb_to_reg.sv
new file mode 100644
index 0000000..f2f14dc
--- /dev/null
+++ b/test/type_param/corev_apu/register_interface/src/apb_to_reg.sv
@@ -0,0 +1,39 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+module apb_to_reg (
+  input  logic          clk_i,
+  input  logic          rst_ni,
+
+  input  logic          penable_i,
+  input  logic          pwrite_i,
+  input  logic [31:0]   paddr_i,
+  input  logic          psel_i,
+  input  logic [31:0]   pwdata_i,
+  output logic [31:0]   prdata_o,
+  output logic          pready_o,
+  output logic          pslverr_o,
+
+  REG_BUS.out  reg_o
+);
+
+  always_comb begin
+    reg_o.addr = paddr_i;
+    reg_o.write = pwrite_i;
+    reg_o.wdata = pwdata_i;
+    reg_o.wstrb = '1;
+    reg_o.valid = psel_i & penable_i;
+    pready_o = reg_o.ready;
+    pslverr_o = reg_o.error;
+    prdata_o = reg_o.rdata;
+  end
+endmodule
diff --git a/test/type_param/corev_apu/register_interface/src/reg_intf.sv b/test/type_param/corev_apu/register_interface/src/reg_intf.sv
new file mode 100644
index 0000000..5923ae3
--- /dev/null
+++ b/test/type_param/corev_apu/register_interface/src/reg_intf.sv
@@ -0,0 +1,43 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+/// A simple register interface.
+///
+/// This is pretty much as simple as it gets. Transactions consist of only one
+/// phase. The master sets the address, write, write data, and write strobe
+/// signals and pulls valid high. Once pulled high, valid must remain high and
+/// none of the signals may change. The transaction completes when both valid
+/// and ready are high. Valid must not depend on ready. The slave presents the
+/// read data and error signals. These signals must be constant while valid and
+/// ready are both high.
+interface REG_BUS #(
+  /// The width of the address.
+  parameter int ADDR_WIDTH = -1,
+  /// The width of the data.
+  parameter int DATA_WIDTH = -1
+)(
+  input logic clk_i
+);
+
+  logic [ADDR_WIDTH-1:0]   addr;
+  logic                    write; // 0=read, 1=write
+  logic [DATA_WIDTH-1:0]   rdata;
+  logic [DATA_WIDTH-1:0]   wdata;
+  logic [DATA_WIDTH/8-1:0] wstrb; // byte-wise strobe
+  logic                    error; // 0=ok, 1=error
+  logic                    valid;
+  logic                    ready;
+
+  modport in  (input  addr, write, wdata, wstrb, valid, output rdata, error, ready);
+  modport out (output addr, write, wdata, wstrb, valid, input  rdata, error, ready);
+
+endinterface
diff --git a/test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv b/test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv
new file mode 100644
index 0000000..0299db6
--- /dev/null
+++ b/test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv
@@ -0,0 +1,66 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the "License"); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File: $filename.v
+ *
+ * Description: Auto-generated bootrom
+ */
+
+// Auto-generated code
+module debug_rom (
+  input  logic         clk_i,
+  input  logic         req_i,
+  input  logic [63:0]  addr_i,
+  output logic [63:0]  rdata_o
+);
+
+  localparam int unsigned RomSize = 19;
+
+  logic [RomSize-1:0][63:0] mem;
+  assign mem = {
+    64'h00000000_7b200073,
+    64'h7b202473_7b302573,
+    64'h10852423_f1402473,
+    64'ha85ff06f_7b202473,
+    64'h7b302573_10052223,
+    64'h00100073_7b202473,
+    64'h7b302573_10052623,
+    64'h00c51513_00c55513,
+    64'h00000517_fd5ff06f,
+    64'hfa041ce3_00247413,
+    64'h40044403_00a40433,
+    64'hf1402473_02041c63,
+    64'h00147413_40044403,
+    64'h00a40433_10852023,
+    64'hf1402473_00c51513,
+    64'h00c55513_00000517,
+    64'h7b351073_7b241073,
+    64'h0ff0000f_04c0006f,
+    64'h07c0006f_00c0006f
+  };
+
+  logic [$clog2(RomSize)-1:0] addr_q;
+
+  always_ff @(posedge clk_i) begin
+    if (req_i) begin
+      addr_q <= addr_i[$clog2(RomSize)-1+3:3];
+    end
+  end
+
+  // this prevents spurious Xes from propagating into
+  // the speculative fetch stage of the core
+  always_comb begin : p_outmux
+    rdata_o = '0;
+    if (addr_q < $clog2(RomSize)'(RomSize)) begin
+        rdata_o = mem[addr_q];
+    end
+  end
+
+endmodule
diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv
new file mode 100644
index 0000000..45e8878
--- /dev/null
+++ b/test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv
@@ -0,0 +1,634 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:  dm_csrs.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   30.6.2018
+ *
+ * Description: Debug CSRs. Communication over Debug Transport Module (DTM)
+ */
+
+module dm_csrs #(
+  parameter int unsigned        NrHarts          = 1,
+  parameter int unsigned        BusWidth         = 32,
+  parameter logic [NrHarts-1:0] SelectableHarts  = {NrHarts{1'b1}}
+) (
+  input  logic                              clk_i,           // Clock
+  input  logic                              rst_ni,          // Asynchronous reset active low
+  input  logic                              testmode_i,
+  input  logic                              dmi_rst_ni,      // Debug Module Intf reset active-low
+  input  logic                              dmi_req_valid_i,
+  output logic                              dmi_req_ready_o,
+  input  dm::dmi_req_t                      dmi_req_i,
+  // every request needs a response one cycle later
+  output logic                              dmi_resp_valid_o,
+  input  logic                              dmi_resp_ready_i,
+  output dm::dmi_resp_t                     dmi_resp_o,
+  // global ctrl
+  output logic                              ndmreset_o,      // non-debug module reset active-high
+  output logic                              dmactive_o,      // 1 -> debug-module is active,
+                                                             // 0 -> synchronous re-set
+  // hart status
+  input  dm::hartinfo_t [NrHarts-1:0]       hartinfo_i,      // static hartinfo
+  input  logic [NrHarts-1:0]                halted_i,        // hart is halted
+  input  logic [NrHarts-1:0]                unavailable_i,   // e.g.: powered down
+  input  logic [NrHarts-1:0]                resumeack_i,     // hart acknowledged resume request
+  // hart control
+  output logic [19:0]                       hartsel_o,       // hartselect to ctrl module
+  output logic [NrHarts-1:0]                haltreq_o,       // request to halt a hart
+  output logic [NrHarts-1:0]                resumereq_o,     // request hart to resume
+  output logic                              clear_resumeack_o,
+
+  output logic                              cmd_valid_o,       // debugger writing to cmd field
+  output dm::command_t                      cmd_o,             // abstract command
+  input  logic                              cmderror_valid_i,  // an error occurred
+  input  dm::cmderr_e                       cmderror_i,        // this error occurred
+  input  logic                              cmdbusy_i,         // cmd is currently busy executing
+
+  output logic [dm::ProgBufSize-1:0][31:0]  progbuf_o, // to system bus
+  output logic [dm::DataCount-1:0][31:0]    data_o,
+
+  input  logic [dm::DataCount-1:0][31:0]    data_i,
+  input  logic                              data_valid_i,
+  // system bus access module (SBA)
+  output logic [BusWidth-1:0]               sbaddress_o,
+  input  logic [BusWidth-1:0]               sbaddress_i,
+  output logic                              sbaddress_write_valid_o,
+  // control signals in
+  output logic                              sbreadonaddr_o,
+  output logic                              sbautoincrement_o,
+  output logic [2:0]                        sbaccess_o,
+  // data out
+  output logic                              sbreadondata_o,
+  output logic [BusWidth-1:0]               sbdata_o,
+  output logic                              sbdata_read_valid_o,
+  output logic                              sbdata_write_valid_o,
+  // read data in
+  input  logic [BusWidth-1:0]               sbdata_i,
+  input  logic                              sbdata_valid_i,
+  // control signals
+  input  logic                              sbbusy_i,
+  input  logic                              sberror_valid_i, // bus error occurred
+  input  logic [2:0]                        sberror_i // bus error occurred
+);
+  // the amount of bits we need to represent all harts
+  localparam int unsigned HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts);
+  localparam int unsigned NrHartsAligned = 2**HartSelLen;
+
+  dm::dtm_op_e dtm_op;
+  assign dtm_op = dm::dtm_op_e'(dmi_req_i.op);
+
+  logic        resp_queue_full;
+  logic        resp_queue_empty;
+  logic        resp_queue_push;
+  logic        resp_queue_pop;
+  logic [31:0] resp_queue_data;
+
+  localparam dm::dm_csr_e DataEnd = dm::dm_csr_e'(dm::Data0 + {4'h0, dm::DataCount} - 8'h1);
+  localparam dm::dm_csr_e ProgBufEnd = dm::dm_csr_e'(dm::ProgBuf0 + {4'h0, dm::ProgBufSize} - 8'h1);
+
+  logic [31:0] haltsum0, haltsum1, haltsum2, haltsum3;
+  logic [((NrHarts-1)/2**5 + 1) * 32 - 1 : 0] halted;
+  logic [(NrHarts-1)/2**5:0][31:0] halted_reshaped0;
+  logic [(NrHarts-1)/2**10:0][31:0] halted_reshaped1;
+  logic [(NrHarts-1)/2**15:0][31:0] halted_reshaped2;
+  logic [((NrHarts-1)/2**10+1)*32-1:0] halted_flat1;
+  logic [((NrHarts-1)/2**15+1)*32-1:0] halted_flat2;
+  logic [31:0] halted_flat3;
+
+  // haltsum0
+  logic [14:0] hartsel_idx0;
+  always_comb begin : p_haltsum0
+    halted              = '0;
+    haltsum0            = '0;
+    hartsel_idx0        = hartsel_o[19:5];
+    halted[NrHarts-1:0] = halted_i;
+    halted_reshaped0    = halted;
+    if (hartsel_idx0 < 15'((NrHarts-1)/2**5+1)) begin
+      haltsum0 = halted_reshaped0[hartsel_idx0];
+    end
+  end
+
+  // haltsum1
+  logic [9:0] hartsel_idx1;
+  always_comb begin : p_reduction1
+    halted_flat1 = '0;
+    haltsum1     = '0;
+    hartsel_idx1 = hartsel_o[19:10];
+
+    for (int unsigned k = 0; k < (NrHarts-1)/2**5+1; k++) begin
+      halted_flat1[k] = |halted_reshaped0[k];
+    end
+    halted_reshaped1 = halted_flat1;
+
+    if (hartsel_idx1 < 10'(((NrHarts-1)/2**10+1))) begin
+      haltsum1 = halted_reshaped1[hartsel_idx1];
+    end
+  end
+
+  // haltsum2
+  logic [4:0] hartsel_idx2;
+  always_comb begin : p_reduction2
+    halted_flat2 = '0;
+    haltsum2     = '0;
+    hartsel_idx2 = hartsel_o[19:15];
+
+    for (int unsigned k = 0; k < (NrHarts-1)/2**10+1; k++) begin
+      halted_flat2[k] = |halted_reshaped1[k];
+    end
+    halted_reshaped2 = halted_flat2;
+
+    if (hartsel_idx2 < 5'(((NrHarts-1)/2**15+1))) begin
+      haltsum2         = halted_reshaped2[hartsel_idx2];
+    end
+  end
+
+  // haltsum3
+  always_comb begin : p_reduction3
+    halted_flat3 = '0;
+    for (int unsigned k = 0; k < NrHarts/2**15+1; k++) begin
+      halted_flat3[k] = |halted_reshaped2[k];
+    end
+    haltsum3 = halted_flat3;
+  end
+
+
+  dm::dmstatus_t      dmstatus;
+  dm::dmcontrol_t     dmcontrol_d, dmcontrol_q;
+  dm::abstractcs_t    abstractcs;
+  dm::cmderr_e        cmderr_d, cmderr_q;
+  dm::command_t       command_d, command_q;
+  logic               cmd_valid_d, cmd_valid_q;
+  dm::abstractauto_t  abstractauto_d, abstractauto_q;
+  dm::sbcs_t          sbcs_d, sbcs_q;
+  logic [63:0]        sbaddr_d, sbaddr_q;
+  logic [63:0]        sbdata_d, sbdata_q;
+
+  logic [NrHarts-1:0] havereset_d, havereset_q;
+  // program buffer
+  logic [dm::ProgBufSize-1:0][31:0] progbuf_d, progbuf_q;
+  logic [dm::DataCount-1:0][31:0] data_d, data_q;
+
+  logic [HartSelLen-1:0] selected_hart;
+
+  // a successful response returns zero
+  assign dmi_resp_o.resp = dm::DTM_SUCCESS;
+  assign dmi_resp_valid_o     = ~resp_queue_empty;
+  assign dmi_req_ready_o      = ~resp_queue_full;
+  assign resp_queue_push      = dmi_req_valid_i & dmi_req_ready_o;
+  // SBA
+  assign sbautoincrement_o = sbcs_q.sbautoincrement;
+  assign sbreadonaddr_o    = sbcs_q.sbreadonaddr;
+  assign sbreadondata_o    = sbcs_q.sbreadondata;
+  assign sbaccess_o        = sbcs_q.sbaccess;
+  assign sbdata_o          = sbdata_q[BusWidth-1:0];
+  assign sbaddress_o       = sbaddr_q[BusWidth-1:0];
+
+  assign hartsel_o         = {dmcontrol_q.hartselhi, dmcontrol_q.hartsello};
+
+  // needed to avoid lint warnings
+  logic [NrHartsAligned-1:0] havereset_d_aligned, havereset_q_aligned,
+                             resumeack_aligned, unavailable_aligned,
+                             halted_aligned;
+  assign resumeack_aligned   = NrHartsAligned'(resumeack_i);
+  assign unavailable_aligned = NrHartsAligned'(unavailable_i);
+  assign halted_aligned      = NrHartsAligned'(halted_i);
+
+  assign havereset_d         = NrHarts'(havereset_d_aligned);
+  assign havereset_q_aligned = NrHartsAligned'(havereset_q);
+
+  dm::hartinfo_t [NrHartsAligned-1:0] hartinfo_aligned;
+  always_comb begin : p_hartinfo_align
+    hartinfo_aligned = '0;
+    hartinfo_aligned[NrHarts-1:0] = hartinfo_i;
+  end
+
+  // helper variables
+  dm::dm_csr_e dm_csr_addr;
+  dm::sbcs_t sbcs;
+  dm::abstractcs_t a_abstractcs;
+  logic [3:0] autoexecdata_idx; // 0 == Data0 ... 11 == Data11
+
+  // Get the data index, i.e. 0 for dm::Data0 up to 11 for dm::Data11
+  assign dm_csr_addr = dm::dm_csr_e'({1'b0, dmi_req_i.addr});
+  // Xilinx Vivado 2020.1 does not allow subtraction of two enums; do the subtraction with logic
+  // types instead.
+  assign autoexecdata_idx = 4'({dm_csr_addr} - {dm::Data0});
+
+  always_comb begin : csr_read_write
+    // --------------------
+    // Static Values (R/O)
+    // --------------------
+    // dmstatus
+    dmstatus    = '0;
+    dmstatus.version = dm::DbgVersion013;
+    // no authentication implemented
+    dmstatus.authenticated = 1'b1;
+    // we do not support halt-on-reset sequence
+    dmstatus.hasresethaltreq = 1'b0;
+    // TODO(zarubaf) things need to change here if we implement the array mask
+    dmstatus.allhavereset = havereset_q_aligned[selected_hart];
+    dmstatus.anyhavereset = havereset_q_aligned[selected_hart];
+
+    dmstatus.allresumeack = resumeack_aligned[selected_hart];
+    dmstatus.anyresumeack = resumeack_aligned[selected_hart];
+
+    dmstatus.allunavail   = unavailable_aligned[selected_hart];
+    dmstatus.anyunavail   = unavailable_aligned[selected_hart];
+
+    // as soon as we are out of the legal Hart region tell the debugger
+    // that there are only non-existent harts
+    dmstatus.allnonexistent = logic'(32'(hartsel_o) > (NrHarts - 1));
+    dmstatus.anynonexistent = logic'(32'(hartsel_o) > (NrHarts - 1));
+
+    // We are not allowed to be in multiple states at once. This is a to
+    // make the running/halted and unavailable states exclusive.
+    dmstatus.allhalted    = halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+    dmstatus.anyhalted    = halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+
+    dmstatus.allrunning   = ~halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+    dmstatus.anyrunning   = ~halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart];
+
+    // abstractcs
+    abstractcs = '0;
+    abstractcs.datacount = dm::DataCount;
+    abstractcs.progbufsize = dm::ProgBufSize;
+    abstractcs.busy = cmdbusy_i;
+    abstractcs.cmderr = cmderr_q;
+
+    // abstractautoexec
+    abstractauto_d = abstractauto_q;
+    abstractauto_d.zero0 = '0;
+
+    // default assignments
+    havereset_d_aligned = NrHartsAligned'(havereset_q);
+    dmcontrol_d         = dmcontrol_q;
+    cmderr_d            = cmderr_q;
+    command_d           = command_q;
+    progbuf_d           = progbuf_q;
+    data_d              = data_q;
+    sbcs_d              = sbcs_q;
+    sbaddr_d            = 64'(sbaddress_i);
+    sbdata_d            = sbdata_q;
+
+    resp_queue_data         = 32'h0;
+    cmd_valid_d             = 1'b0;
+    sbaddress_write_valid_o = 1'b0;
+    sbdata_read_valid_o     = 1'b0;
+    sbdata_write_valid_o    = 1'b0;
+    clear_resumeack_o       = 1'b0;
+
+    // helper variables
+    sbcs         = '0;
+    a_abstractcs = '0;
+
+    // reads
+    if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_READ) begin
+      unique case (dm_csr_addr) inside
+        [(dm::Data0):DataEnd]: begin
+          resp_queue_data = data_q[$clog2(dm::DataCount)'(autoexecdata_idx)];
+          if (!cmdbusy_i) begin
+            // check whether we need to re-execute the command (just give a cmd_valid)
+            cmd_valid_d = abstractauto_q.autoexecdata[autoexecdata_idx];
+          // An abstract command was executing while one of the data registers was read
+          end else if (cmderr_q == dm::CmdErrNone) begin
+            cmderr_d = dm::CmdErrBusy;
+          end
+        end
+        dm::DMControl:    resp_queue_data = dmcontrol_q;
+        dm::DMStatus:     resp_queue_data = dmstatus;
+        dm::Hartinfo:     resp_queue_data = hartinfo_aligned[selected_hart];
+        dm::AbstractCS:   resp_queue_data = abstractcs;
+        dm::AbstractAuto: resp_queue_data = abstractauto_q;
+        // command is read-only
+        dm::Command:    resp_queue_data = '0;
+        [(dm::ProgBuf0):ProgBufEnd]: begin
+          resp_queue_data = progbuf_q[dmi_req_i.addr[$clog2(dm::ProgBufSize)-1:0]];
+          if (!cmdbusy_i) begin
+            // check whether we need to re-execute the command (just give a cmd_valid)
+            // range of autoexecprogbuf is 31:16
+            cmd_valid_d = abstractauto_q.autoexecprogbuf[{1'b1, dmi_req_i.addr[3:0]}];
+
+          // An abstract command was executing while one of the progbuf registers was read
+          end else if (cmderr_q == dm::CmdErrNone) begin
+            cmderr_d = dm::CmdErrBusy;
+          end
+        end
+        dm::HaltSum0: resp_queue_data = haltsum0;
+        dm::HaltSum1: resp_queue_data = haltsum1;
+        dm::HaltSum2: resp_queue_data = haltsum2;
+        dm::HaltSum3: resp_queue_data = haltsum3;
+        dm::SBCS: begin
+          resp_queue_data = sbcs_q;
+        end
+        dm::SBAddress0: begin
+          resp_queue_data = sbaddr_q[31:0];
+        end
+        dm::SBAddress1: begin
+          resp_queue_data = sbaddr_q[63:32];
+        end
+        dm::SBData0: begin
+          // access while the SBA was busy
+          if (sbbusy_i || sbcs_q.sbbusyerror) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbdata_read_valid_o = (sbcs_q.sberror == '0);
+            resp_queue_data = sbdata_q[31:0];
+          end
+        end
+        dm::SBData1: begin
+          // access while the SBA was busy
+          if (sbbusy_i || sbcs_q.sbbusyerror) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            resp_queue_data = sbdata_q[63:32];
+          end
+        end
+        default:;
+      endcase
+    end
+
+    // write
+    if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_WRITE) begin
+      unique case (dm_csr_addr) inside
+        [(dm::Data0):DataEnd]: begin
+          if (dm::DataCount > 0) begin
+            // attempts to write them while busy is set does not change their value
+            if (!cmdbusy_i) begin
+              data_d[dmi_req_i.addr[$clog2(dm::DataCount)-1:0]] = dmi_req_i.data;
+              // check whether we need to re-execute the command (just give a cmd_valid)
+              cmd_valid_d = abstractauto_q.autoexecdata[autoexecdata_idx];
+            //An abstract command was executing while one of the data registers was written
+            end else if (cmderr_q == dm::CmdErrNone) begin
+              cmderr_d = dm::CmdErrBusy;
+            end
+          end
+        end
+        dm::DMControl: begin
+          dmcontrol_d = dmi_req_i.data;
+          // clear the havreset of the selected hart
+          if (dmcontrol_d.ackhavereset) begin
+            havereset_d_aligned[selected_hart] = 1'b0;
+          end
+        end
+        dm::DMStatus:; // write are ignored to R/O register
+        dm::Hartinfo:; // hartinfo is R/O
+        // only command error is write-able
+        dm::AbstractCS: begin // W1C
+          // Gets set if an abstract command fails. The bits in this
+          // field remain set until they are cleared by writing 1 to
+          // them. No abstract command is started until the value is
+          // reset to 0.
+          a_abstractcs = dm::abstractcs_t'(dmi_req_i.data);
+          // reads during abstract command execution are not allowed
+          if (!cmdbusy_i) begin
+            cmderr_d = dm::cmderr_e'(~a_abstractcs.cmderr & cmderr_q);
+          end else if (cmderr_q == dm::CmdErrNone) begin
+            cmderr_d = dm::CmdErrBusy;
+          end
+        end
+        dm::Command: begin
+          // writes are ignored if a command is already busy
+          if (!cmdbusy_i) begin
+            cmd_valid_d = 1'b1;
+            command_d = dm::command_t'(dmi_req_i.data);
+          // if there was an attempted to write during a busy execution
+          // and the cmderror field is zero set the busy error
+          end else if (cmderr_q == dm::CmdErrNone) begin
+            cmderr_d = dm::CmdErrBusy;
+          end
+        end
+        dm::AbstractAuto: begin
+          // this field can only be written legally when there is no command executing
+          if (!cmdbusy_i) begin
+            abstractauto_d                 = 32'h0;
+            abstractauto_d.autoexecdata    = 12'(dmi_req_i.data[dm::DataCount-1:0]);
+            abstractauto_d.autoexecprogbuf = 16'(dmi_req_i.data[dm::ProgBufSize-1+16:16]);
+          end else if (cmderr_q == dm::CmdErrNone) begin
+            cmderr_d = dm::CmdErrBusy;
+          end
+        end
+        [(dm::ProgBuf0):ProgBufEnd]: begin
+          // attempts to write them while busy is set does not change their value
+          if (!cmdbusy_i) begin
+            progbuf_d[dmi_req_i.addr[$clog2(dm::ProgBufSize)-1:0]] = dmi_req_i.data;
+            // check whether we need to re-execute the command (just give a cmd_valid)
+            // this should probably throw an error if executed during another command
+            // was busy
+            // range of autoexecprogbuf is 31:16
+            cmd_valid_d = abstractauto_q.autoexecprogbuf[{1'b1, dmi_req_i.addr[3:0]}];
+          //An abstract command was executing while one of the progbuf registers was written
+          end else if (cmderr_q == dm::CmdErrNone) begin
+            cmderr_d = dm::CmdErrBusy;
+          end
+        end
+        dm::SBCS: begin
+          // access while the SBA was busy
+          if (sbbusy_i) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbcs = dm::sbcs_t'(dmi_req_i.data);
+            sbcs_d = sbcs;
+            // R/W1C
+            sbcs_d.sbbusyerror = sbcs_q.sbbusyerror & (~sbcs.sbbusyerror);
+            sbcs_d.sberror     = sbcs_q.sberror     & (~sbcs.sberror);
+          end
+        end
+        dm::SBAddress0: begin
+          // access while the SBA was busy
+          if (sbbusy_i || sbcs_q.sbbusyerror) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbaddr_d[31:0] = dmi_req_i.data;
+            sbaddress_write_valid_o = (sbcs_q.sberror == '0);
+          end
+        end
+        dm::SBAddress1: begin
+          // access while the SBA was busy
+          if (sbbusy_i || sbcs_q.sbbusyerror) begin
+            sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbaddr_d[63:32] = dmi_req_i.data;
+          end
+        end
+        dm::SBData0: begin
+          // access while the SBA was busy
+          if (sbbusy_i || sbcs_q.sbbusyerror) begin
+           sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbdata_d[31:0] = dmi_req_i.data;
+            sbdata_write_valid_o = (sbcs_q.sberror == '0);
+          end
+        end
+        dm::SBData1: begin
+          // access while the SBA was busy
+          if (sbbusy_i || sbcs_q.sbbusyerror) begin
+           sbcs_d.sbbusyerror = 1'b1;
+          end else begin
+            sbdata_d[63:32] = dmi_req_i.data;
+          end
+        end
+        default:;
+      endcase
+    end
+    // hart threw a command error and has precedence over bus writes
+    if (cmderror_valid_i) begin
+      cmderr_d = cmderror_i;
+    end
+
+    // update data registers
+    if (data_valid_i) begin
+      data_d = data_i;
+    end
+
+    // set the havereset flag when we did a ndmreset
+    if (ndmreset_o) begin
+      havereset_d_aligned[NrHarts-1:0] = '1;
+    end
+    // -------------
+    // System Bus
+    // -------------
+    // set bus error
+    if (sberror_valid_i) begin
+      sbcs_d.sberror = sberror_i;
+    end
+    // update read data
+    if (sbdata_valid_i) begin
+      sbdata_d = 64'(sbdata_i);
+    end
+
+    // dmcontrol
+    // TODO(zarubaf) we currently do not implement the hartarry mask
+    dmcontrol_d.hasel           = 1'b0;
+    // we do not support resetting an individual hart
+    dmcontrol_d.hartreset       = 1'b0;
+    dmcontrol_d.setresethaltreq = 1'b0;
+    dmcontrol_d.clrresethaltreq = 1'b0;
+    dmcontrol_d.zero1           = '0;
+    dmcontrol_d.zero0           = '0;
+    // Non-writeable, clear only
+    dmcontrol_d.ackhavereset    = 1'b0;
+    if (!dmcontrol_q.resumereq && dmcontrol_d.resumereq) begin
+      clear_resumeack_o = 1'b1;
+    end
+    if (dmcontrol_q.resumereq && resumeack_i) begin
+      dmcontrol_d.resumereq = 1'b0;
+    end
+    // static values for dcsr
+    sbcs_d.sbversion            = 3'd1;
+    sbcs_d.sbbusy               = sbbusy_i;
+    sbcs_d.sbasize              = $bits(sbcs_d.sbasize)'(BusWidth);
+    sbcs_d.sbaccess128          = 1'b0;
+    sbcs_d.sbaccess64           = logic'(BusWidth == 32'd64);
+    sbcs_d.sbaccess32           = logic'(BusWidth == 32'd32);
+    sbcs_d.sbaccess16           = 1'b0;
+    sbcs_d.sbaccess8            = 1'b0;
+    sbcs_d.sbaccess             = (BusWidth == 32'd64) ? 3'd3 : 3'd2;
+  end
+
+  // output multiplexer
+  always_comb begin : p_outmux
+    selected_hart = hartsel_o[HartSelLen-1:0];
+    // default assignment
+    haltreq_o = '0;
+    resumereq_o = '0;
+    if (selected_hart <= HartSelLen'(NrHarts-1)) begin
+      haltreq_o[selected_hart]   = dmcontrol_q.haltreq;
+      resumereq_o[selected_hart] = dmcontrol_q.resumereq;
+    end
+  end
+
+  assign dmactive_o  = dmcontrol_q.dmactive;
+  assign cmd_o       = command_q;
+  assign cmd_valid_o = cmd_valid_q;
+  assign progbuf_o   = progbuf_q;
+  assign data_o      = data_q;
+
+  assign resp_queue_pop = dmi_resp_ready_i & ~resp_queue_empty;
+
+  assign ndmreset_o = dmcontrol_q.ndmreset;
+
+  // response FIFO
+  fifo_v2 #(
+    .dtype            ( logic [31:0]         ),
+    .DEPTH            ( 2                    )
+  ) i_fifo (
+    .clk_i            ( clk_i                ),
+    .rst_ni           ( dmi_rst_ni           ), // reset only when system is re-set
+    .flush_i          ( 1'b0                 ), // we do not need to flush this queue
+    .testmode_i       ( testmode_i           ),
+    .full_o           ( resp_queue_full      ),
+    .empty_o          ( resp_queue_empty     ),
+    .alm_full_o       (                      ),
+    .alm_empty_o      (                      ),
+    .data_i           ( resp_queue_data      ),
+    .push_i           ( resp_queue_push      ),
+    .data_o           ( dmi_resp_o.data      ),
+    .pop_i            ( resp_queue_pop       )
+  );
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    // PoR
+    if (!rst_ni) begin
+      dmcontrol_q    <= '0;
+      // this is the only write-able bit during reset
+      cmderr_q       <= dm::CmdErrNone;
+      command_q      <= '0;
+      cmd_valid_q    <= '0;
+      abstractauto_q <= '0;
+      progbuf_q      <= '0;
+      data_q         <= '0;
+      sbcs_q         <= '0;
+      sbaddr_q       <= '0;
+      sbdata_q       <= '0;
+      havereset_q    <= '1;
+    end else begin
+      havereset_q    <= SelectableHarts & havereset_d;
+      // synchronous re-set of debug module, active-low, except for dmactive
+      if (!dmcontrol_q.dmactive) begin
+        dmcontrol_q.haltreq          <= '0;
+        dmcontrol_q.resumereq        <= '0;
+        dmcontrol_q.hartreset        <= '0;
+        dmcontrol_q.ackhavereset     <= '0;
+        dmcontrol_q.zero1            <= '0;
+        dmcontrol_q.hasel            <= '0;
+        dmcontrol_q.hartsello        <= '0;
+        dmcontrol_q.hartselhi        <= '0;
+        dmcontrol_q.zero0            <= '0;
+        dmcontrol_q.setresethaltreq  <= '0;
+        dmcontrol_q.clrresethaltreq  <= '0;
+        dmcontrol_q.ndmreset         <= '0;
+        // this is the only write-able bit during reset
+        dmcontrol_q.dmactive         <= dmcontrol_d.dmactive;
+        cmderr_q                     <= dm::CmdErrNone;
+        command_q                    <= '0;
+        cmd_valid_q                  <= '0;
+        abstractauto_q               <= '0;
+        progbuf_q                    <= '0;
+        data_q                       <= '0;
+        sbcs_q                       <= '0;
+        sbaddr_q                     <= '0;
+        sbdata_q                     <= '0;
+      end else begin
+        dmcontrol_q                  <= dmcontrol_d;
+        cmderr_q                     <= cmderr_d;
+        command_q                    <= command_d;
+        cmd_valid_q                  <= cmd_valid_d;
+        abstractauto_q               <= abstractauto_d;
+        progbuf_q                    <= progbuf_d;
+        data_q                       <= data_d;
+        sbcs_q                       <= sbcs_d;
+        sbaddr_q                     <= sbaddr_d;
+        sbdata_q                     <= sbdata_d;
+      end
+    end
+  end
+
+endmodule : dm_csrs
diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv
new file mode 100755
index 0000000..178259f
--- /dev/null
+++ b/test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv
@@ -0,0 +1,523 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License.  You may obtain a copy of the License at
+* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+*
+* File:   dm_mem.sv
+* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+* Date:   11.7.2018
+*
+* Description: Memory module for execution-based debug clients
+*
+*/
+
+module dm_mem #(
+  parameter int unsigned        NrHarts          =  1,
+  parameter int unsigned        BusWidth         = 32,
+  parameter logic [NrHarts-1:0] SelectableHarts  = {NrHarts{1'b1}},
+  parameter int unsigned        DmBaseAddress    = '0
+) (
+  input  logic                             clk_i,       // Clock
+  input  logic                             rst_ni,      // debug module reset
+
+  output logic [NrHarts-1:0]               debug_req_o,
+  input  logic [19:0]                      hartsel_i,
+  // from Ctrl and Status register
+  input  logic [NrHarts-1:0]               haltreq_i,
+  input  logic [NrHarts-1:0]               resumereq_i,
+  input  logic                             clear_resumeack_i,
+
+  // state bits
+  output logic [NrHarts-1:0]               halted_o,    // hart acknowledge halt
+  output logic [NrHarts-1:0]               resuming_o,  // hart is resuming
+
+  input  logic [dm::ProgBufSize-1:0][31:0] progbuf_i,    // program buffer to expose
+
+  input  logic [dm::DataCount-1:0][31:0]   data_i,       // data in
+  output logic [dm::DataCount-1:0][31:0]   data_o,       // data out
+  output logic                             data_valid_o, // data out is valid
+  // abstract command interface
+  input  logic                             cmd_valid_i,
+  input  dm::command_t                     cmd_i,
+  output logic                             cmderror_valid_o,
+  output dm::cmderr_e                      cmderror_o,
+  output logic                             cmdbusy_o,
+  // data interface
+
+  // SRAM interface
+  input  logic                             req_i,
+  input  logic                             we_i,
+  input  logic [BusWidth-1:0]              addr_i,
+  input  logic [BusWidth-1:0]              wdata_i,
+  input  logic [BusWidth/8-1:0]            be_i,
+  output logic [BusWidth-1:0]              rdata_o
+);
+  localparam int unsigned DbgAddressBits = 12;
+  localparam int unsigned HartSelLen     = (NrHarts == 1) ? 1 : $clog2(NrHarts);
+  localparam int unsigned NrHartsAligned = 2**HartSelLen;
+  localparam int unsigned MaxAar         = (BusWidth == 64) ? 4 : 3;
+  localparam bit          HasSndScratch  = (DmBaseAddress != 0);
+  // Depending on whether we are at the zero page or not we either use `x0` or `x10/a0`
+  localparam logic [4:0]  LoadBaseAddr   = (DmBaseAddress == 0) ? 5'd0 : 5'd10;
+
+  localparam logic [DbgAddressBits-1:0] DataBaseAddr        = (dm::DataAddr);
+  localparam logic [DbgAddressBits-1:0] DataEndAddr         = (dm::DataAddr + 4*dm::DataCount - 1);
+  localparam logic [DbgAddressBits-1:0] ProgBufBaseAddr     = (dm::DataAddr - 4*dm::ProgBufSize);
+  localparam logic [DbgAddressBits-1:0] ProgBufEndAddr      = (dm::DataAddr - 1);
+  localparam logic [DbgAddressBits-1:0] AbstractCmdBaseAddr = (ProgBufBaseAddr - 4*10);
+  localparam logic [DbgAddressBits-1:0] AbstractCmdEndAddr  = (ProgBufBaseAddr - 1);
+
+  localparam logic [DbgAddressBits-1:0] WhereToAddr   = 'h300;
+  localparam logic [DbgAddressBits-1:0] FlagsBaseAddr = 'h400;
+  localparam logic [DbgAddressBits-1:0] FlagsEndAddr  = 'h7FF;
+
+  localparam logic [DbgAddressBits-1:0] HaltedAddr    = 'h100;
+  localparam logic [DbgAddressBits-1:0] GoingAddr     = 'h104;
+  localparam logic [DbgAddressBits-1:0] ResumingAddr  = 'h108;
+  localparam logic [DbgAddressBits-1:0] ExceptionAddr = 'h10C;
+
+  logic [dm::ProgBufSize/2-1:0][63:0]   progbuf;
+  logic [7:0][63:0]   abstract_cmd;
+  logic [NrHarts-1:0] halted_d, halted_q;
+  logic [NrHarts-1:0] resuming_d, resuming_q;
+  logic               resume, go, going;
+
+  logic exception;
+  logic unsupported_command;
+
+  logic [63:0] rom_rdata;
+  logic [63:0] rdata_d, rdata_q;
+  logic        word_enable32_q;
+
+  // this is needed to avoid lint warnings related to array indexing
+  // resize hartsel to valid range
+  logic [HartSelLen-1:0] hartsel, wdata_hartsel;
+
+  assign hartsel       = hartsel_i[HartSelLen-1:0];
+  assign wdata_hartsel = wdata_i[HartSelLen-1:0];
+
+  logic [NrHartsAligned-1:0] resumereq_aligned, haltreq_aligned,
+                             halted_d_aligned, halted_q_aligned,
+                             halted_aligned, resumereq_wdata_aligned,
+                             resuming_d_aligned, resuming_q_aligned;
+
+  assign resumereq_aligned       = NrHartsAligned'(resumereq_i);
+  assign haltreq_aligned         = NrHartsAligned'(haltreq_i);
+  assign resumereq_wdata_aligned = NrHartsAligned'(resumereq_i);
+
+  assign halted_q_aligned        = NrHartsAligned'(halted_q);
+  assign halted_d                = NrHarts'(halted_d_aligned);
+  assign resuming_q_aligned      = NrHartsAligned'(resuming_q);
+  assign resuming_d              = NrHarts'(resuming_d_aligned);
+
+  // distinguish whether we need to forward data from the ROM or the FSM
+  // latch the address for this
+  logic fwd_rom_d, fwd_rom_q;
+  dm::ac_ar_cmd_t ac_ar;
+
+  // Abstract Command Access Register
+  assign ac_ar       = dm::ac_ar_cmd_t'(cmd_i.control);
+  assign debug_req_o = haltreq_i;
+  assign halted_o    = halted_q;
+  assign resuming_o  = resuming_q;
+
+  // reshape progbuf
+  assign progbuf = progbuf_i;
+
+  typedef enum logic [1:0] { Idle, Go, Resume, CmdExecuting } state_e;
+  state_e state_d, state_q;
+
+  // hart ctrl queue
+  always_comb begin : p_hart_ctrl_queue
+    cmderror_valid_o = 1'b0;
+    cmderror_o       = dm::CmdErrNone;
+    state_d          = state_q;
+    go               = 1'b0;
+    resume           = 1'b0;
+    cmdbusy_o        = 1'b1;
+
+    unique case (state_q)
+      Idle: begin
+        cmdbusy_o = 1'b0;
+        if (cmd_valid_i && halted_q_aligned[hartsel] && !unsupported_command) begin
+          // give the go signal
+          state_d = Go;
+        end else if (cmd_valid_i) begin
+          // hart must be halted for all requests
+          cmderror_valid_o = 1'b1;
+          cmderror_o = dm::CmdErrorHaltResume;
+        end
+        // CSRs want to resume, the request is ignored when the hart is
+        // requested to halt or it didn't clear the resuming_q bit before
+        if (resumereq_aligned[hartsel] && !resuming_q_aligned[hartsel] &&
+            !haltreq_aligned[hartsel] && halted_q_aligned[hartsel]) begin
+          state_d = Resume;
+        end
+      end
+
+      Go: begin
+        // we are already busy here since we scheduled the execution of a program
+        cmdbusy_o = 1'b1;
+        go        = 1'b1;
+        // the thread is now executing the command, track its state
+        if (going) begin
+            state_d = CmdExecuting;
+        end
+      end
+
+      Resume: begin
+        cmdbusy_o = 1'b1;
+        resume = 1'b1;
+        if (resuming_q_aligned[hartsel]) begin
+          state_d = Idle;
+        end
+      end
+
+      CmdExecuting: begin
+        cmdbusy_o = 1'b1;
+        go        = 1'b0;
+        // wait until the hart has halted again
+        if (halted_aligned[hartsel]) begin
+          state_d = Idle;
+        end
+      end
+
+      default: ;
+    endcase
+
+    // only signal once that cmd is unsupported so that we can clear cmderr
+    // in subsequent writes to abstractcs
+    if (unsupported_command && cmd_valid_i) begin
+      cmderror_valid_o = 1'b1;
+      cmderror_o = dm::CmdErrNotSupported;
+    end
+
+    if (exception) begin
+      cmderror_valid_o = 1'b1;
+      cmderror_o = dm::CmdErrorException;
+    end
+  end
+
+  // word mux for 32bit and 64bit buses
+  logic [63:0] word_mux;
+  assign word_mux = (fwd_rom_q) ? rom_rdata : rdata_q;
+
+  if (BusWidth == 64) begin : gen_word_mux64
+    assign rdata_o = word_mux;
+  end else begin : gen_word_mux32
+    assign rdata_o = (word_enable32_q) ? word_mux[32 +: 32] : word_mux[0 +: 32];
+  end
+
+  // read/write logic
+  logic [63:0] data_bits;
+  logic [7:0][7:0] rdata;
+  always_comb begin : p_rw_logic
+
+    halted_d_aligned   = NrHartsAligned'(halted_q);
+    resuming_d_aligned = NrHartsAligned'(resuming_q);
+    rdata_d        = rdata_q;
+    // convert the data in bits representation
+    data_bits      = data_i;
+    rdata          = '0;
+
+    // write data in csr register
+    data_valid_o   = 1'b0;
+    exception      = 1'b0;
+    halted_aligned     = '0;
+    going          = 1'b0;
+
+    // The resume ack signal is lowered when the resume request is deasserted
+    if (clear_resumeack_i) begin
+      resuming_d_aligned[hartsel] = 1'b0;
+    end
+    // we've got a new request
+    if (req_i) begin
+      // this is a write
+      if (we_i) begin
+        unique case (addr_i[DbgAddressBits-1:0]) inside
+          HaltedAddr: begin
+            halted_aligned[wdata_hartsel] = 1'b1;
+            halted_d_aligned[wdata_hartsel] = 1'b1;
+          end
+          GoingAddr: begin
+            going = 1'b1;
+          end
+          ResumingAddr: begin
+            // clear the halted flag as the hart resumed execution
+            halted_d_aligned[wdata_hartsel] = 1'b0;
+            // set the resuming flag which needs to be cleared by the debugger
+            resuming_d_aligned[wdata_hartsel] = 1'b1;
+          end
+          // an exception occurred during execution
+          ExceptionAddr: exception = 1'b1;
+          // core can write data registers
+          [DataBaseAddr:DataEndAddr]: begin
+            data_valid_o = 1'b1;
+            for (int i = 0; i < $bits(be_i); i++) begin
+              if (be_i[i]) begin
+                data_bits[i*8+:8] = wdata_i[i*8+:8];
+              end
+            end
+          end
+          default ;
+        endcase
+
+      // this is a read
+      end else begin
+        unique case (addr_i[DbgAddressBits-1:0]) inside
+          // variable ROM content
+          WhereToAddr: begin
+            // variable jump to abstract cmd, program_buffer or resume
+            if (resumereq_wdata_aligned[wdata_hartsel]) begin
+              rdata_d = {32'b0, dm::jal('0, 21'(dm::ResumeAddress[11:0])-21'(WhereToAddr))};
+            end
+
+            // there is a command active so jump there
+            if (cmdbusy_o) begin
+              // transfer not set is shortcut to the program buffer if postexec is set
+              // keep this statement narrow to not catch invalid commands
+              if (cmd_i.cmdtype == dm::AccessRegister &&
+                  !ac_ar.transfer && ac_ar.postexec) begin
+                rdata_d = {32'b0, dm::jal('0, 21'(ProgBufBaseAddr)-21'(WhereToAddr))};
+              // this is a legit abstract cmd -> execute it
+              end else begin
+                rdata_d = {32'b0, dm::jal('0, 21'(AbstractCmdBaseAddr)-21'(WhereToAddr))};
+              end
+            end
+          end
+
+          [DataBaseAddr:DataEndAddr]: begin
+            rdata_d = {
+                      data_i[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] -
+                          DataBaseAddr[DbgAddressBits-1:3] + 1'b1)],
+                      data_i[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] -
+                          DataBaseAddr[DbgAddressBits-1:3])]
+                      };
+          end
+
+          [ProgBufBaseAddr:ProgBufEndAddr]: begin
+            rdata_d = progbuf[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] -
+                          ProgBufBaseAddr[DbgAddressBits-1:3])];
+          end
+
+          // two slots for abstract command
+          [AbstractCmdBaseAddr:AbstractCmdEndAddr]: begin
+            // return the correct address index
+            rdata_d = abstract_cmd[3'(addr_i[DbgAddressBits-1:3] -
+                           AbstractCmdBaseAddr[DbgAddressBits-1:3])];
+          end
+          // harts are polling for flags here
+          [FlagsBaseAddr:FlagsEndAddr]: begin
+            // release the corresponding hart
+            if (({addr_i[DbgAddressBits-1:3], 3'b0} - FlagsBaseAddr[DbgAddressBits-1:0]) ==
+              (DbgAddressBits'(hartsel) & {{(DbgAddressBits-3){1'b1}}, 3'b0})) begin
+              rdata[DbgAddressBits'(hartsel) & DbgAddressBits'(3'b111)] = {6'b0, resume, go};
+            end
+            rdata_d = rdata;
+          end
+          default: ;
+        endcase
+      end
+    end
+
+    data_o = data_bits;
+  end
+
+  always_comb begin : p_abstract_cmd_rom
+    // this abstract command is currently unsupported
+    unsupported_command = 1'b0;
+    // default memory
+    // if ac_ar.transfer is not set then we can take a shortcut to the program buffer
+    abstract_cmd[0][31:0]  = dm::illegal();
+    // load debug module base address into a0, this is shared among all commands
+    abstract_cmd[0][63:32] = HasSndScratch ? dm::auipc(5'd10, '0) : dm::nop();
+    // clr lowest 12b -> DM base offset
+    abstract_cmd[1][31:0]  = HasSndScratch ? dm::srli(5'd10, 5'd10, 6'd12) : dm::nop();
+    abstract_cmd[1][63:32] = HasSndScratch ? dm::slli(5'd10, 5'd10, 6'd12) : dm::nop();
+    abstract_cmd[2][31:0]  = dm::nop();
+    abstract_cmd[2][63:32] = dm::nop();
+    abstract_cmd[3][31:0]  = dm::nop();
+    abstract_cmd[3][63:32] = dm::nop();
+    abstract_cmd[4][31:0]  = HasSndScratch ? dm::csrr(dm::CSR_DSCRATCH1, 5'd10) : dm::nop();
+    abstract_cmd[4][63:32] = dm::ebreak();
+    abstract_cmd[7:5]      = '0;
+
+    // this depends on the command being executed
+    unique case (cmd_i.cmdtype)
+      // --------------------
+      // Access Register
+      // --------------------
+      dm::AccessRegister: begin
+        if (32'(ac_ar.aarsize) < MaxAar && ac_ar.transfer && ac_ar.write) begin
+          // store a0 in dscratch1
+          abstract_cmd[0][31:0] = HasSndScratch ? dm::csrw(dm::CSR_DSCRATCH1, 5'd10) : dm::nop();
+          // this range is reserved
+          if (ac_ar.regno[15:14] != '0) begin
+            abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap
+            unsupported_command = 1'b1;
+          // A0 access needs to be handled separately, as we use A0 to load
+          // the DM address offset need to access DSCRATCH1 in this case
+          end else if (HasSndScratch && ac_ar.regno[12] && (!ac_ar.regno[5]) &&
+                      (ac_ar.regno[4:0] == 5'd10)) begin
+            // store s0 in dscratch
+            abstract_cmd[2][31:0]  = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+            // load from data register
+            abstract_cmd[2][63:32] = dm::load(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+            // and store it in the corresponding CSR
+            abstract_cmd[3][31:0]  = dm::csrw(dm::CSR_DSCRATCH1, 5'd8);
+            // restore s0 again from dscratch
+            abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+          // GPR/FPR access
+          end else if (ac_ar.regno[12]) begin
+            // determine whether we want to access the floating point register or not
+            if (ac_ar.regno[5]) begin
+              abstract_cmd[2][31:0] =
+                  dm::float_load(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+            end else begin
+              abstract_cmd[2][31:0] =
+                  dm::load(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+            end
+          // CSR access
+          end else begin
+            // data register to CSR
+            // store s0 in dscratch
+            abstract_cmd[2][31:0]  = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+            // load from data register
+            abstract_cmd[2][63:32] = dm::load(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+            // and store it in the corresponding CSR
+            abstract_cmd[3][31:0]  = dm::csrw(dm::csr_reg_t'(ac_ar.regno[11:0]), 5'd8);
+            // restore s0 again from dscratch
+            abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+          end
+        end else if (32'(ac_ar.aarsize) < MaxAar && ac_ar.transfer && !ac_ar.write) begin
+          // store a0 in dscratch1
+          abstract_cmd[0][31:0]  = HasSndScratch ?
+                                   dm::csrw(dm::CSR_DSCRATCH1, LoadBaseAddr) :
+                                   dm::nop();
+          // this range is reserved
+          if (ac_ar.regno[15:14] != '0) begin
+              abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap
+              unsupported_command = 1'b1;
+          // A0 access needs to be handled separately, as we use A0 to load
+          // the DM address offset need to access DSCRATCH1 in this case
+          end else if (HasSndScratch && ac_ar.regno[12] && (!ac_ar.regno[5]) &&
+                      (ac_ar.regno[4:0] == 5'd10)) begin
+            // store s0 in dscratch
+            abstract_cmd[2][31:0]  = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+            // read value from CSR into s0
+            abstract_cmd[2][63:32] = dm::csrr(dm::CSR_DSCRATCH1, 5'd8);
+            // and store s0 into data section
+            abstract_cmd[3][31:0]  = dm::store(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+            // restore s0 again from dscratch
+            abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+          // GPR/FPR access
+          end else if (ac_ar.regno[12]) begin
+            // determine whether we want to access the floating point register or not
+            if (ac_ar.regno[5]) begin
+              abstract_cmd[2][31:0] =
+                  dm::float_store(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+            end else begin
+              abstract_cmd[2][31:0] =
+                  dm::store(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr);
+            end
+          // CSR access
+          end else begin
+            // CSR register to data
+            // store s0 in dscratch
+            abstract_cmd[2][31:0]  = dm::csrw(dm::CSR_DSCRATCH0, 5'd8);
+            // read value from CSR into s0
+            abstract_cmd[2][63:32] = dm::csrr(dm::csr_reg_t'(ac_ar.regno[11:0]), 5'd8);
+            // and store s0 into data section
+            abstract_cmd[3][31:0]  = dm::store(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr);
+            // restore s0 again from dscratch
+            abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8);
+          end
+        end else if (32'(ac_ar.aarsize) >= MaxAar || ac_ar.aarpostincrement == 1'b1) begin
+          // this should happend when e.g. ac_ar.aarsize >= MaxAar
+          // Openocd will try to do an access with aarsize=64 bits
+          // first before falling back to 32 bits.
+          abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap
+          unsupported_command = 1'b1;
+        end
+
+        // Check whether we need to execute the program buffer. When we
+        // get an unsupported command we really should abort instead of
+        // still trying to execute the program buffer, makes it easier
+        // for the debugger to recover
+        if (ac_ar.postexec && !unsupported_command) begin
+          // issue a nop, we will automatically run into the program buffer
+          abstract_cmd[4][63:32] = dm::nop();
+        end
+      end
+      // not supported at the moment
+      // dm::QuickAccess:;
+      // dm::AccessMemory:;
+      default: begin
+        abstract_cmd[0][31:0] = dm::ebreak();
+        unsupported_command = 1'b1;
+      end
+    endcase
+  end
+
+  logic [63:0] rom_addr;
+  assign rom_addr = 64'(addr_i);
+
+  // Depending on whether the debug module is located
+  // at the zero page we can instantiate a simplified version
+  // which only requires one scratch register per hart.
+  // For all other cases we need to set aside
+  // two registers per hart, hence we also need
+  // two scratch registers.
+  if (HasSndScratch) begin : gen_rom_snd_scratch
+    debug_rom i_debug_rom (
+      .clk_i,
+      .req_i,
+      .addr_i  ( rom_addr  ),
+      .rdata_o ( rom_rdata )
+    );
+  end else begin : gen_rom_one_scratch
+    // It uses the zero register (`x0`) as the base
+    // for its loads. The zero register does not need to
+    // be saved.
+    debug_rom_one_scratch i_debug_rom (
+      .clk_i,
+      .req_i,
+      .addr_i  ( rom_addr  ),
+      .rdata_o ( rom_rdata )
+    );
+  end
+
+  // ROM starts at the HaltAddress of the core e.g.: it immediately jumps to
+  // the ROM base address
+  assign fwd_rom_d = logic'(addr_i[DbgAddressBits-1:0] >= dm::HaltAddress[DbgAddressBits-1:0]);
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      fwd_rom_q       <= 1'b0;
+      rdata_q         <= '0;
+      state_q         <= Idle;
+      word_enable32_q <= 1'b0;
+    end else begin
+      fwd_rom_q       <= fwd_rom_d;
+      rdata_q         <= rdata_d;
+      state_q         <= state_d;
+      word_enable32_q <= addr_i[2];
+    end
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      halted_q   <= 1'b0;
+      resuming_q <= 1'b0;
+    end else begin
+      halted_q   <= SelectableHarts & halted_d;
+      resuming_q <= SelectableHarts & resuming_d;
+    end
+  end
+
+endmodule : dm_mem
diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv
new file mode 100644
index 0000000..971f128
--- /dev/null
+++ b/test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv
@@ -0,0 +1,436 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:   dm_pkg.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   30.6.2018
+ *
+ * Description: Debug-module package, contains common system definitions.
+ *
+ */
+
+package dm;
+  localparam logic [3:0] DbgVersion013 = 4'h2;
+  // size of program buffer in junks of 32-bit words
+  localparam logic [4:0] ProgBufSize   = 5'h8;
+
+  // amount of data count registers implemented
+  localparam logic [3:0] DataCount     = 4'h2;
+
+  // address to which a hart should jump when it was requested to halt
+  localparam logic [63:0] HaltAddress = 64'h800;
+  localparam logic [63:0] ResumeAddress = HaltAddress + 4;
+  localparam logic [63:0] ExceptionAddress = HaltAddress + 8;
+
+  // address where data0-15 is shadowed or if shadowed in a CSR
+  // address of the first CSR used for shadowing the data
+  localparam logic [11:0] DataAddr = 12'h380; // we are aligned with Rocket here
+
+  // debug registers
+  typedef enum logic [7:0] {
+    Data0        = 8'h04,
+    Data1        = 8'h05,
+    Data2        = 8'h06,
+    Data3        = 8'h07,
+    Data4        = 8'h08,
+    Data5        = 8'h09,
+    Data6        = 8'h0A,
+    Data7        = 8'h0B,
+    Data8        = 8'h0C,
+    Data9        = 8'h0D,
+    Data10       = 8'h0E,
+    Data11       = 8'h0F,
+    DMControl    = 8'h10,
+    DMStatus     = 8'h11, // r/o
+    Hartinfo     = 8'h12,
+    HaltSum1     = 8'h13,
+    HAWindowSel  = 8'h14,
+    HAWindow     = 8'h15,
+    AbstractCS   = 8'h16,
+    Command      = 8'h17,
+    AbstractAuto = 8'h18,
+    DevTreeAddr0 = 8'h19,
+    DevTreeAddr1 = 8'h1A,
+    DevTreeAddr2 = 8'h1B,
+    DevTreeAddr3 = 8'h1C,
+    NextDM       = 8'h1D,
+    ProgBuf0     = 8'h20,
+    ProgBuf1     = 8'h21,
+    ProgBuf2     = 8'h22,
+    ProgBuf3     = 8'h23,
+    ProgBuf4     = 8'h24,
+    ProgBuf5     = 8'h25,
+    ProgBuf6     = 8'h26,
+    ProgBuf7     = 8'h27,
+    ProgBuf8     = 8'h28,
+    ProgBuf9     = 8'h29,
+    ProgBuf10    = 8'h2A,
+    ProgBuf11    = 8'h2B,
+    ProgBuf12    = 8'h2C,
+    ProgBuf13    = 8'h2D,
+    ProgBuf14    = 8'h2E,
+    ProgBuf15    = 8'h2F,
+    AuthData     = 8'h30,
+    HaltSum2     = 8'h34,
+    HaltSum3     = 8'h35,
+    SBAddress3   = 8'h37,
+    SBCS         = 8'h38,
+    SBAddress0   = 8'h39,
+    SBAddress1   = 8'h3A,
+    SBAddress2   = 8'h3B,
+    SBData0      = 8'h3C,
+    SBData1      = 8'h3D,
+    SBData2      = 8'h3E,
+    SBData3      = 8'h3F,
+    HaltSum0     = 8'h40
+  } dm_csr_e;
+
+  // debug causes
+  localparam logic [2:0] CauseBreakpoint = 3'h1;
+  localparam logic [2:0] CauseTrigger    = 3'h2;
+  localparam logic [2:0] CauseRequest    = 3'h3;
+  localparam logic [2:0] CauseSingleStep = 3'h4;
+
+  typedef struct packed {
+    logic [31:23] zero1;
+    logic         impebreak;
+    logic [21:20] zero0;
+    logic         allhavereset;
+    logic         anyhavereset;
+    logic         allresumeack;
+    logic         anyresumeack;
+    logic         allnonexistent;
+    logic         anynonexistent;
+    logic         allunavail;
+    logic         anyunavail;
+    logic         allrunning;
+    logic         anyrunning;
+    logic         allhalted;
+    logic         anyhalted;
+    logic         authenticated;
+    logic         authbusy;
+    logic         hasresethaltreq;
+    logic         devtreevalid;
+    logic [3:0]   version;
+  } dmstatus_t;
+
+  typedef struct packed {
+    logic         haltreq;
+    logic         resumereq;
+    logic         hartreset;
+    logic         ackhavereset;
+    logic         zero1;
+    logic         hasel;
+    logic [25:16] hartsello;
+    logic [15:6]  hartselhi;
+    logic [5:4]   zero0;
+    logic         setresethaltreq;
+    logic         clrresethaltreq;
+    logic         ndmreset;
+    logic         dmactive;
+  } dmcontrol_t;
+
+  typedef struct packed {
+    logic [31:24] zero1;
+    logic [23:20] nscratch;
+    logic [19:17] zero0;
+    logic         dataaccess;
+    logic [15:12] datasize;
+    logic [11:0]  dataaddr;
+  } hartinfo_t;
+
+  typedef enum logic [2:0] {
+    CmdErrNone, CmdErrBusy, CmdErrNotSupported,
+    CmdErrorException, CmdErrorHaltResume,
+    CmdErrorBus, CmdErrorOther = 7
+  } cmderr_e;
+
+  typedef struct packed {
+    logic [31:29] zero3;
+    logic [28:24] progbufsize;
+    logic [23:13] zero2;
+    logic         busy;
+    logic         zero1;
+    cmderr_e      cmderr;
+    logic [7:4]   zero0;
+    logic [3:0]   datacount;
+  } abstractcs_t;
+
+  typedef enum logic [7:0] {
+    AccessRegister = 8'h0,
+    QuickAccess    = 8'h1,
+    AccessMemory   = 8'h2
+  } cmd_e;
+
+  typedef struct packed {
+    cmd_e        cmdtype;
+    logic [23:0] control;
+  } command_t;
+
+  typedef struct packed {
+    logic [31:16] autoexecprogbuf;
+    logic [15:12] zero0;
+    logic [11:0]  autoexecdata;
+  } abstractauto_t;
+
+  typedef struct packed {
+    logic         zero1;
+    logic [22:20] aarsize;
+    logic         aarpostincrement;
+    logic         postexec;
+    logic         transfer;
+    logic         write;
+    logic [15:0]  regno;
+  } ac_ar_cmd_t;
+
+  // DTM
+  typedef enum logic [1:0] {
+    DTM_NOP   = 2'h0,
+    DTM_READ  = 2'h1,
+    DTM_WRITE = 2'h2
+  } dtm_op_e;
+
+  typedef struct packed {
+    logic [31:29] sbversion;
+    logic [28:23] zero0;
+    logic         sbbusyerror;
+    logic         sbbusy;
+    logic         sbreadonaddr;
+    logic [19:17] sbaccess;
+    logic         sbautoincrement;
+    logic         sbreadondata;
+    logic [14:12] sberror;
+    logic [11:5]  sbasize;
+    logic         sbaccess128;
+    logic         sbaccess64;
+    logic         sbaccess32;
+    logic         sbaccess16;
+    logic         sbaccess8;
+  } sbcs_t;
+
+  localparam logic [1:0] DTM_SUCCESS = 2'h0;
+
+  typedef struct packed {
+    logic [6:0]  addr;
+    dtm_op_e     op;
+    logic [31:0] data;
+  } dmi_req_t;
+
+  typedef struct packed  {
+    logic [31:0] data;
+    logic [1:0]  resp;
+  } dmi_resp_t;
+
+  // privilege levels
+  typedef enum logic[1:0] {
+    PRIV_LVL_M = 2'b11,
+    PRIV_LVL_S = 2'b01,
+    PRIV_LVL_U = 2'b00
+  } priv_lvl_t;
+
+  // debugregs in core
+  typedef struct packed {
+    logic [31:28]     xdebugver;
+    logic [27:16]     zero2;
+    logic             ebreakm;
+    logic             zero1;
+    logic             ebreaks;
+    logic             ebreaku;
+    logic             stepie;
+    logic             stopcount;
+    logic             stoptime;
+    logic [8:6]       cause;
+    logic             zero0;
+    logic             mprven;
+    logic             nmip;
+    logic             step;
+    priv_lvl_t        prv;
+  } dcsr_t;
+
+  // CSRs
+  typedef enum logic [11:0] {
+    // Floating-Point CSRs
+    CSR_FFLAGS         = 12'h001,
+    CSR_FRM            = 12'h002,
+    CSR_FCSR           = 12'h003,
+    CSR_FTRAN          = 12'h800,
+    // Supervisor Mode CSRs
+    CSR_SSTATUS        = 12'h100,
+    CSR_SIE            = 12'h104,
+    CSR_STVEC          = 12'h105,
+    CSR_SCOUNTEREN     = 12'h106,
+    CSR_SSCRATCH       = 12'h140,
+    CSR_SEPC           = 12'h141,
+    CSR_SCAUSE         = 12'h142,
+    CSR_STVAL          = 12'h143,
+    CSR_SIP            = 12'h144,
+    CSR_SATP           = 12'h180,
+    // Machine Mode CSRs
+    CSR_MSTATUS        = 12'h300,
+    CSR_MISA           = 12'h301,
+    CSR_MEDELEG        = 12'h302,
+    CSR_MIDELEG        = 12'h303,
+    CSR_MIE            = 12'h304,
+    CSR_MTVEC          = 12'h305,
+    CSR_MCOUNTEREN     = 12'h306,
+    CSR_MSCRATCH       = 12'h340,
+    CSR_MEPC           = 12'h341,
+    CSR_MCAUSE         = 12'h342,
+    CSR_MTVAL          = 12'h343,
+    CSR_MIP            = 12'h344,
+    CSR_PMPCFG0        = 12'h3A0,
+    CSR_PMPADDR0       = 12'h3B0,
+    CSR_MVENDORID      = 12'hF11,
+    CSR_MARCHID        = 12'hF12,
+    CSR_MIMPID         = 12'hF13,
+    CSR_MHARTID        = 12'hF14,
+    CSR_MCYCLE         = 12'hB00,
+    CSR_MINSTRET       = 12'hB02,
+    CSR_DCACHE         = 12'h701,
+    CSR_ICACHE         = 12'h700,
+
+    CSR_TSELECT        = 12'h7A0,
+    CSR_TDATA1         = 12'h7A1,
+    CSR_TDATA2         = 12'h7A2,
+    CSR_TDATA3         = 12'h7A3,
+    CSR_TINFO          = 12'h7A4,
+
+    // Debug CSR
+    CSR_DCSR           = 12'h7b0,
+    CSR_DPC            = 12'h7b1,
+    CSR_DSCRATCH0      = 12'h7b2, // optional
+    CSR_DSCRATCH1      = 12'h7b3, // optional
+
+    // Counters and Timers
+    CSR_CYCLE          = 12'hC00,
+    CSR_TIME           = 12'hC01,
+    CSR_INSTRET        = 12'hC02
+  } csr_reg_t;
+
+  // SBA state
+  typedef enum logic [2:0] {
+    Idle,
+    Read,
+    Write,
+    WaitRead,
+    WaitWrite
+  } sba_state_e;
+
+  // Instruction Generation Helpers
+  function automatic logic [31:0] jal (logic [4:0]  rd,
+                                       logic [20:0] imm);
+    // OpCode Jal
+    return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h6f};
+  endfunction
+
+  function automatic logic [31:0] jalr (logic [4:0]  rd,
+                                        logic [4:0]  rs1,
+                                        logic [11:0] offset);
+    // OpCode Jal
+    return {offset[11:0], rs1, 3'b0, rd, 7'h67};
+  endfunction
+
+  function automatic logic [31:0] andi (logic [4:0]  rd,
+                                        logic [4:0]  rs1,
+                                        logic [11:0] imm);
+    // OpCode andi
+    return {imm[11:0], rs1, 3'h7, rd, 7'h13};
+  endfunction
+
+  function automatic logic [31:0] slli (logic [4:0] rd,
+                                        logic [4:0] rs1,
+                                        logic [5:0] shamt);
+    // OpCode slli
+    return {6'b0, shamt[5:0], rs1, 3'h1, rd, 7'h13};
+  endfunction
+
+  function automatic logic [31:0] srli (logic [4:0] rd,
+                                        logic [4:0] rs1,
+                                        logic [5:0] shamt);
+    // OpCode srli
+    return {6'b0, shamt[5:0], rs1, 3'h5, rd, 7'h13};
+  endfunction
+
+  function automatic logic [31:0] load (logic [2:0]  size,
+                                        logic [4:0]  dest,
+                                        logic [4:0]  base,
+                                        logic [11:0] offset);
+    // OpCode Load
+    return {offset[11:0], base, size, dest, 7'h03};
+  endfunction
+
+  function automatic logic [31:0] auipc (logic [4:0]  rd,
+                                         logic [20:0] imm);
+    // OpCode Auipc
+    return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h17};
+  endfunction
+
+  function automatic logic [31:0] store (logic [2:0]  size,
+                                         logic [4:0]  src,
+                                         logic [4:0]  base,
+                                         logic [11:0] offset);
+    // OpCode Store
+    return {offset[11:5], src, base, size, offset[4:0], 7'h23};
+  endfunction
+
+  function automatic logic [31:0] float_load (logic [2:0]  size,
+                                              logic [4:0]  dest,
+                                              logic [4:0]  base,
+                                              logic [11:0] offset);
+    // OpCode Load
+    return {offset[11:0], base, size, dest, 7'b00_001_11};
+  endfunction
+
+  function automatic logic [31:0] float_store (logic [2:0]  size,
+                                               logic [4:0]  src,
+                                               logic [4:0]  base,
+                                               logic [11:0] offset);
+    // OpCode Store
+    return {offset[11:5], src, base, size, offset[4:0], 7'b01_001_11};
+  endfunction
+
+  function automatic logic [31:0] csrw (csr_reg_t   csr,
+                                        logic [4:0] rs1);
+    // CSRRW, rd, OpCode System
+    return {csr, rs1, 3'h1, 5'h0, 7'h73};
+  endfunction
+
+  function automatic logic [31:0] csrr (csr_reg_t   csr,
+                                        logic [4:0] dest);
+    // rs1, CSRRS, rd, OpCode System
+    return {csr, 5'h0, 3'h2, dest, 7'h73};
+  endfunction
+
+  function automatic logic [31:0] branch(logic [4:0]  src2,
+                                         logic [4:0]  src1,
+                                         logic [2:0]  funct3,
+                                         logic [11:0] offset);
+    // OpCode Branch
+    return {offset[11], offset[9:4], src2, src1, funct3,
+        offset[3:0], offset[10], 7'b11_000_11};
+  endfunction
+
+  function automatic logic [31:0] ebreak ();
+    return 32'h00100073;
+  endfunction
+
+  function automatic logic [31:0] wfi ();
+    return 32'h10500073;
+  endfunction
+
+  function automatic logic [31:0] nop ();
+    return 32'h00000013;
+  endfunction
+
+  function automatic logic [31:0] illegal ();
+    return 32'h00000000;
+  endfunction
+
+endpackage : dm
diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv
new file mode 100644
index 0000000..98c586c
--- /dev/null
+++ b/test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv
@@ -0,0 +1,170 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License.  You may obtain a copy of the License at
+* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+*
+* File:   dm_sba.sv
+* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+* Date:   1.8.2018
+*
+* Description: System Bus Access Module
+*
+*/
+module dm_sba #(
+  parameter int unsigned BusWidth = 32,
+  parameter bit          ReadByteEnable = 1
+) (
+  input  logic                   clk_i,       // Clock
+  input  logic                   rst_ni,
+  input  logic                   dmactive_i,  // synchronous reset active low
+
+  output logic                   master_req_o,
+  output logic [BusWidth-1:0]    master_add_o,
+  output logic                   master_we_o,
+  output logic [BusWidth-1:0]    master_wdata_o,
+  output logic [BusWidth/8-1:0]  master_be_o,
+  input  logic                   master_gnt_i,
+  input  logic                   master_r_valid_i,
+  input  logic [BusWidth-1:0]    master_r_rdata_i,
+
+  input  logic [BusWidth-1:0]    sbaddress_i,
+  input  logic                   sbaddress_write_valid_i,
+  // control signals in
+  input  logic                   sbreadonaddr_i,
+  output logic [BusWidth-1:0]    sbaddress_o,
+  input  logic                   sbautoincrement_i,
+  input  logic [2:0]             sbaccess_i,
+  // data in
+  input  logic                   sbreadondata_i,
+  input  logic [BusWidth-1:0]    sbdata_i,
+  input  logic                   sbdata_read_valid_i,
+  input  logic                   sbdata_write_valid_i,
+  // read data out
+  output logic [BusWidth-1:0]    sbdata_o,
+  output logic                   sbdata_valid_o,
+  // control signals
+  output logic                   sbbusy_o,
+  output logic                   sberror_valid_o, // bus error occurred
+  output logic [2:0]             sberror_o // bus error occurred
+);
+
+  dm::sba_state_e state_d, state_q;
+
+  logic [BusWidth-1:0]           address;
+  logic                          req;
+  logic                          gnt;
+  logic                          we;
+  logic [BusWidth/8-1:0]         be;
+  logic [BusWidth/8-1:0]         be_mask;
+  logic [$clog2(BusWidth/8)-1:0] be_idx;
+
+  assign sbbusy_o = logic'(state_q != dm::Idle);
+
+  always_comb begin : p_be_mask
+    be_mask = '0;
+
+    // generate byte enable mask
+    unique case (sbaccess_i)
+      3'b000: begin
+        be_mask[be_idx] = '1;
+      end
+      3'b001: begin
+        be_mask[int'({be_idx[$high(be_idx):1], 1'b0}) +: 2] = '1;
+      end
+      3'b010: begin
+        if (BusWidth == 32'd64) be_mask[int'({be_idx[$high(be_idx)], 2'h0}) +: 4] = '1;
+        else                    be_mask = '1;
+      end
+      3'b011: be_mask = '1;
+      default: ;
+    endcase
+  end
+
+  always_comb begin : p_fsm
+    req     = 1'b0;
+    address = sbaddress_i;
+    we      = 1'b0;
+    be      = '0;
+    be_idx  = sbaddress_i[$clog2(BusWidth/8)-1:0];
+
+    sberror_o       = '0;
+    sberror_valid_o = 1'b0;
+    sbaddress_o     = sbaddress_i;
+
+    state_d = state_q;
+
+    unique case (state_q)
+      dm::Idle: begin
+        // debugger requested a read
+        if (sbaddress_write_valid_i && sbreadonaddr_i)  state_d = dm::Read;
+        // debugger requested a write
+        if (sbdata_write_valid_i) state_d = dm::Write;
+        // perform another read
+        if (sbdata_read_valid_i && sbreadondata_i) state_d = dm::Read;
+      end
+
+      dm::Read: begin
+        req = 1'b1;
+        if (ReadByteEnable) be = be_mask;
+        if (gnt) state_d = dm::WaitRead;
+      end
+
+      dm::Write: begin
+        req = 1'b1;
+        we  = 1'b1;
+        be = be_mask;
+        if (gnt) state_d = dm::WaitWrite;
+      end
+
+      dm::WaitRead: begin
+        if (sbdata_valid_o) begin
+          state_d = dm::Idle;
+          // auto-increment address
+          if (sbautoincrement_i) sbaddress_o = sbaddress_i + (32'h1 << sbaccess_i);
+        end
+      end
+
+      dm::WaitWrite: begin
+        if (sbdata_valid_o) begin
+          state_d = dm::Idle;
+          // auto-increment address
+          if (sbautoincrement_i) sbaddress_o = sbaddress_i + (32'h1 << sbaccess_i);
+        end
+      end
+
+      default: state_d = dm::Idle; // catch parasitic state
+    endcase
+
+    // handle error case
+    if (sbaccess_i > 3 && state_q != dm::Idle) begin
+      req             = 1'b0;
+      state_d         = dm::Idle;
+      sberror_valid_o = 1'b1;
+      sberror_o       = 3'd3;
+    end
+    // further error handling should go here ...
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      state_q <= dm::Idle;
+    end else begin
+      state_q <= state_d;
+    end
+  end
+
+  assign master_req_o    = req;
+  assign master_add_o    = address[BusWidth-1:0];
+  assign master_we_o     = we;
+  assign master_wdata_o  = sbdata_i[BusWidth-1:0];
+  assign master_be_o     = be[BusWidth/8-1:0];
+  assign gnt             = master_gnt_i;
+  assign sbdata_valid_o  = master_r_valid_i;
+  assign sbdata_o        = master_r_rdata_i[BusWidth-1:0];
+
+endmodule : dm_sba
diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_top.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_top.sv
new file mode 100644
index 0000000..9887aef
--- /dev/null
+++ b/test/type_param/corev_apu/riscv-dbg/src/dm_top.sv
@@ -0,0 +1,218 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License.  You may obtain a copy of the License at
+* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+*
+* File:   dm_top.sv
+* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+* Date:   30.6.2018
+*
+* Description: Top-level of debug module (DM). This is an AXI-Slave.
+*              DTM protocol is equal to SiFives debug protocol to leverage
+*              SW infrastructure re-use. As of version 0.13
+*/
+
+module dm_top #(
+  parameter int unsigned        NrHarts          = 1,
+  parameter int unsigned        BusWidth         = 32,
+  parameter int unsigned        DmBaseAddress    = 'h1000, // default to non-zero page
+  // Bitmask to select physically available harts for systems
+  // that don't use hart numbers in a contiguous fashion.
+  parameter logic [NrHarts-1:0] SelectableHarts  = {NrHarts{1'b1}},
+  parameter bit                 ReadByteEnable   = 1 // toggle new behavior to drive master_be_o during a read
+) (
+  input  logic                  clk_i,       // clock
+  input  logic                  rst_ni,      // asynchronous reset active low, connect PoR here, not the system reset
+  input  logic                  testmode_i,
+  output logic                  ndmreset_o,  // non-debug module reset
+  output logic                  dmactive_o,  // debug module is active
+  output logic [NrHarts-1:0]    debug_req_o, // async debug request
+  input  logic [NrHarts-1:0]    unavailable_i, // communicate whether the hart is unavailable (e.g.: power down)
+  input  dm::hartinfo_t [NrHarts-1:0] hartinfo_i,
+
+  input  logic                  slave_req_i,
+  input  logic                  slave_we_i,
+  input  logic [BusWidth-1:0]   slave_addr_i,
+  input  logic [BusWidth/8-1:0] slave_be_i,
+  input  logic [BusWidth-1:0]   slave_wdata_i,
+  output logic [BusWidth-1:0]   slave_rdata_o,
+
+  output logic                  master_req_o,
+  output logic [BusWidth-1:0]   master_add_o,
+  output logic                  master_we_o,
+  output logic [BusWidth-1:0]   master_wdata_o,
+  output logic [BusWidth/8-1:0] master_be_o,
+  input  logic                  master_gnt_i,
+  input  logic                  master_r_valid_i,
+  input  logic [BusWidth-1:0]   master_r_rdata_i,
+
+  // Connection to DTM - compatible to RocketChip Debug Module
+  input  logic                  dmi_rst_ni,
+  input  logic                  dmi_req_valid_i,
+  output logic                  dmi_req_ready_o,
+  input  dm::dmi_req_t          dmi_req_i,
+
+  output logic                  dmi_resp_valid_o,
+  input  logic                  dmi_resp_ready_i,
+  output dm::dmi_resp_t         dmi_resp_o
+);
+
+  // Debug CSRs
+  logic [NrHarts-1:0]               halted;
+  // logic [NrHarts-1:0]               running;
+  logic [NrHarts-1:0]               resumeack;
+  logic [NrHarts-1:0]               haltreq;
+  logic [NrHarts-1:0]               resumereq;
+  logic                             clear_resumeack;
+  logic                             cmd_valid;
+  dm::command_t                     cmd;
+
+  logic                             cmderror_valid;
+  dm::cmderr_e                      cmderror;
+  logic                             cmdbusy;
+  logic [dm::ProgBufSize-1:0][31:0] progbuf;
+  logic [dm::DataCount-1:0][31:0]   data_csrs_mem;
+  logic [dm::DataCount-1:0][31:0]   data_mem_csrs;
+  logic                             data_valid;
+  logic [19:0]                      hartsel;
+  // System Bus Access Module
+  logic [BusWidth-1:0]              sbaddress_csrs_sba;
+  logic [BusWidth-1:0]              sbaddress_sba_csrs;
+  logic                             sbaddress_write_valid;
+  logic                             sbreadonaddr;
+  logic                             sbautoincrement;
+  logic [2:0]                       sbaccess;
+  logic                             sbreadondata;
+  logic [BusWidth-1:0]              sbdata_write;
+  logic                             sbdata_read_valid;
+  logic                             sbdata_write_valid;
+  logic [BusWidth-1:0]              sbdata_read;
+  logic                             sbdata_valid;
+  logic                             sbbusy;
+  logic                             sberror_valid;
+  logic [2:0]                       sberror;
+
+
+  dm_csrs #(
+    .NrHarts(NrHarts),
+    .BusWidth(BusWidth),
+    .SelectableHarts(SelectableHarts)
+  ) i_dm_csrs (
+    .clk_i,
+    .rst_ni,
+    .testmode_i,
+    .dmi_rst_ni,
+    .dmi_req_valid_i,
+    .dmi_req_ready_o,
+    .dmi_req_i,
+    .dmi_resp_valid_o,
+    .dmi_resp_ready_i,
+    .dmi_resp_o,
+    .ndmreset_o,
+    .dmactive_o,
+    .hartsel_o               ( hartsel               ),
+    .hartinfo_i,
+    .halted_i                ( halted                ),
+    .unavailable_i,
+    .resumeack_i             ( resumeack             ),
+    .haltreq_o               ( haltreq               ),
+    .resumereq_o             ( resumereq             ),
+    .clear_resumeack_o       ( clear_resumeack       ),
+    .cmd_valid_o             ( cmd_valid             ),
+    .cmd_o                   ( cmd                   ),
+    .cmderror_valid_i        ( cmderror_valid        ),
+    .cmderror_i              ( cmderror              ),
+    .cmdbusy_i               ( cmdbusy               ),
+    .progbuf_o               ( progbuf               ),
+    .data_i                  ( data_mem_csrs         ),
+    .data_valid_i            ( data_valid            ),
+    .data_o                  ( data_csrs_mem         ),
+    .sbaddress_o             ( sbaddress_csrs_sba    ),
+    .sbaddress_i             ( sbaddress_sba_csrs    ),
+    .sbaddress_write_valid_o ( sbaddress_write_valid ),
+    .sbreadonaddr_o          ( sbreadonaddr          ),
+    .sbautoincrement_o       ( sbautoincrement       ),
+    .sbaccess_o              ( sbaccess              ),
+    .sbreadondata_o          ( sbreadondata          ),
+    .sbdata_o                ( sbdata_write          ),
+    .sbdata_read_valid_o     ( sbdata_read_valid     ),
+    .sbdata_write_valid_o    ( sbdata_write_valid    ),
+    .sbdata_i                ( sbdata_read           ),
+    .sbdata_valid_i          ( sbdata_valid          ),
+    .sbbusy_i                ( sbbusy                ),
+    .sberror_valid_i         ( sberror_valid         ),
+    .sberror_i               ( sberror               )
+  );
+
+  dm_sba #(
+    .BusWidth(BusWidth),
+    .ReadByteEnable(ReadByteEnable)
+  ) i_dm_sba (
+    .clk_i,
+    .rst_ni,
+    .dmactive_i              ( dmactive_o            ),
+
+    .master_req_o,
+    .master_add_o,
+    .master_we_o,
+    .master_wdata_o,
+    .master_be_o,
+    .master_gnt_i,
+    .master_r_valid_i,
+    .master_r_rdata_i,
+
+    .sbaddress_i             ( sbaddress_csrs_sba    ),
+    .sbaddress_o             ( sbaddress_sba_csrs    ),
+    .sbaddress_write_valid_i ( sbaddress_write_valid ),
+    .sbreadonaddr_i          ( sbreadonaddr          ),
+    .sbautoincrement_i       ( sbautoincrement       ),
+    .sbaccess_i              ( sbaccess              ),
+    .sbreadondata_i          ( sbreadondata          ),
+    .sbdata_i                ( sbdata_write          ),
+    .sbdata_read_valid_i     ( sbdata_read_valid     ),
+    .sbdata_write_valid_i    ( sbdata_write_valid    ),
+    .sbdata_o                ( sbdata_read           ),
+    .sbdata_valid_o          ( sbdata_valid          ),
+    .sbbusy_o                ( sbbusy                ),
+    .sberror_valid_o         ( sberror_valid         ),
+    .sberror_o               ( sberror               )
+  );
+
+  dm_mem #(
+    .NrHarts(NrHarts),
+    .BusWidth(BusWidth),
+    .SelectableHarts(SelectableHarts),
+    .DmBaseAddress(DmBaseAddress)
+  ) i_dm_mem (
+    .clk_i,
+    .rst_ni,
+    .debug_req_o,
+    .hartsel_i               ( hartsel               ),
+    .haltreq_i               ( haltreq               ),
+    .resumereq_i             ( resumereq             ),
+    .clear_resumeack_i       ( clear_resumeack       ),
+    .halted_o                ( halted                ),
+    .resuming_o              ( resumeack             ),
+    .cmd_valid_i             ( cmd_valid             ),
+    .cmd_i                   ( cmd                   ),
+    .cmderror_valid_o        ( cmderror_valid        ),
+    .cmderror_o              ( cmderror              ),
+    .cmdbusy_o               ( cmdbusy               ),
+    .progbuf_i               ( progbuf               ),
+    .data_i                  ( data_csrs_mem         ),
+    .data_o                  ( data_mem_csrs         ),
+    .data_valid_o            ( data_valid            ),
+    .req_i                   ( slave_req_i           ),
+    .we_i                    ( slave_we_i            ),
+    .addr_i                  ( slave_addr_i          ),
+    .wdata_i                 ( slave_wdata_i         ),
+    .be_i                    ( slave_be_i            ),
+    .rdata_o                 ( slave_rdata_o         )
+  );
+
+endmodule : dm_top
diff --git a/test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv b/test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv
new file mode 100644
index 0000000..4665c91
--- /dev/null
+++ b/test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv
@@ -0,0 +1,73 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License.  You may obtain a copy of the License at
+* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+*
+* File:   axi_riscv_debug_module.sv
+* Author: Andreas Traber <atraber@iis.ee.ethz.ch>
+* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+*
+* Description: Clock domain crossings for JTAG to DMI very heavily based
+*              on previous work by Andreas Traber for the PULP project.
+*              This is mainly a wrapper around the existing CDCs.
+*/
+module dmi_cdc (
+  // JTAG side (master side)
+  input  logic             tck_i,
+  input  logic             trst_ni,
+
+  input  dm::dmi_req_t     jtag_dmi_req_i,
+  output logic             jtag_dmi_ready_o,
+  input  logic             jtag_dmi_valid_i,
+
+  output dm::dmi_resp_t    jtag_dmi_resp_o,
+  output logic             jtag_dmi_valid_o,
+  input  logic             jtag_dmi_ready_i,
+
+  // core side (slave side)
+  input  logic             clk_i,
+  input  logic             rst_ni,
+
+  output dm::dmi_req_t     core_dmi_req_o,
+  output logic             core_dmi_valid_o,
+  input  logic             core_dmi_ready_i,
+
+  input dm::dmi_resp_t     core_dmi_resp_i,
+  output logic             core_dmi_ready_o,
+  input  logic             core_dmi_valid_i
+);
+
+  cdc_2phase #(.T(dm::dmi_req_t)) i_cdc_req (
+    .src_rst_ni  ( trst_ni          ),
+    .src_clk_i   ( tck_i            ),
+    .src_data_i  ( jtag_dmi_req_i   ),
+    .src_valid_i ( jtag_dmi_valid_i ),
+    .src_ready_o ( jtag_dmi_ready_o ),
+
+    .dst_rst_ni  ( rst_ni           ),
+    .dst_clk_i   ( clk_i            ),
+    .dst_data_o  ( core_dmi_req_o   ),
+    .dst_valid_o ( core_dmi_valid_o ),
+    .dst_ready_i ( core_dmi_ready_i )
+  );
+
+  cdc_2phase #(.T(dm::dmi_resp_t)) i_cdc_resp (
+    .src_rst_ni  ( rst_ni           ),
+    .src_clk_i   ( clk_i            ),
+    .src_data_i  ( core_dmi_resp_i  ),
+    .src_valid_i ( core_dmi_valid_i ),
+    .src_ready_o ( core_dmi_ready_o ),
+
+    .dst_rst_ni  ( trst_ni          ),
+    .dst_clk_i   ( tck_i            ),
+    .dst_data_o  ( jtag_dmi_resp_o  ),
+    .dst_valid_o ( jtag_dmi_valid_o ),
+    .dst_ready_i ( jtag_dmi_ready_i )
+  );
+
+endmodule : dmi_cdc
diff --git a/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv
new file mode 100644
index 0000000..c4c7b52
--- /dev/null
+++ b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv
@@ -0,0 +1,271 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+* Copyright and related rights are licensed under the Solderpad Hardware
+* License, Version 0.51 (the “License”); you may not use this file except in
+* compliance with the License.  You may obtain a copy of the License at
+* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+* or agreed to in writing, software, hardware and materials distributed under
+* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+* CONDITIONS OF ANY KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations under the License.
+*
+* File:   axi_riscv_debug_module.sv
+* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+* Date:   19.7.2018
+*
+* Description: JTAG DMI (debug module interface)
+*
+*/
+
+module dmi_jtag #(
+  parameter logic [31:0] IdcodeValue = 32'h00000001
+) (
+  input  logic         clk_i,      // DMI Clock
+  input  logic         rst_ni,     // Asynchronous reset active low
+  input  logic         testmode_i,
+
+  output logic         dmi_rst_no, // hard reset
+  output dm::dmi_req_t dmi_req_o,
+  output logic         dmi_req_valid_o,
+  input  logic         dmi_req_ready_i,
+
+  input dm::dmi_resp_t dmi_resp_i,
+  output logic         dmi_resp_ready_o,
+  input  logic         dmi_resp_valid_i,
+
+  input  logic         tck_i,    // JTAG test clock pad
+  input  logic         tms_i,    // JTAG test mode select pad
+  input  logic         trst_ni,  // JTAG test reset pad
+  input  logic         td_i,     // JTAG test data input pad
+  output logic         td_o,     // JTAG test data output pad
+  output logic         tdo_oe_o  // Data out output enable
+);
+  assign       dmi_rst_no = rst_ni;
+
+  logic        test_logic_reset;
+  logic        shift_dr;
+  logic        update_dr;
+  logic        capture_dr;
+  logic        dmi_access;
+  logic        dtmcs_select;
+  logic        dmi_reset;
+  logic        dmi_tdi;
+  logic        dmi_tdo;
+
+  dm::dmi_req_t  dmi_req;
+  logic          dmi_req_ready;
+  logic          dmi_req_valid;
+
+  dm::dmi_resp_t dmi_resp;
+  logic          dmi_resp_valid;
+  logic          dmi_resp_ready;
+
+  typedef struct packed {
+    logic [6:0]  address;
+    logic [31:0] data;
+    logic [1:0]  op;
+  } dmi_t;
+
+  typedef enum logic [1:0] {
+    DMINoError = 2'h0, DMIReservedError = 2'h1,
+    DMIOPFailed = 2'h2, DMIBusy = 2'h3
+  } dmi_error_e;
+
+  typedef enum logic [2:0] { Idle, Read, WaitReadValid, Write, WaitWriteValid } state_e;
+  state_e state_d, state_q;
+
+  logic [$bits(dmi_t)-1:0] dr_d, dr_q;
+  logic [6:0] address_d, address_q;
+  logic [31:0] data_d, data_q;
+
+  dmi_t  dmi;
+  assign dmi          = dmi_t'(dr_q);
+  assign dmi_req.addr = address_q;
+  assign dmi_req.data = data_q;
+  assign dmi_req.op   = (state_q == Write) ? dm::DTM_WRITE : dm::DTM_READ;
+  // we'will always be ready to accept the data we requested
+  assign dmi_resp_ready = 1'b1;
+
+  logic error_dmi_busy;
+  dmi_error_e error_d, error_q;
+
+  always_comb begin : p_fsm
+    error_dmi_busy = 1'b0;
+    // default assignments
+    state_d   = state_q;
+    address_d = address_q;
+    data_d    = data_q;
+    error_d   = error_q;
+
+    dmi_req_valid = 1'b0;
+
+    unique case (state_q)
+      Idle: begin
+        // make sure that no error is sticky
+        if (dmi_access && update_dr && (error_q == DMINoError)) begin
+          // save address and value
+          address_d = dmi.address;
+          data_d = dmi.data;
+          if (dm::dtm_op_e'(dmi.op) == dm::DTM_READ) begin
+            state_d = Read;
+          end else if (dm::dtm_op_e'(dmi.op) == dm::DTM_WRITE) begin
+            state_d = Write;
+          end
+          // else this is a nop and we can stay here
+        end
+      end
+
+      Read: begin
+        dmi_req_valid = 1'b1;
+        if (dmi_req_ready) begin
+          state_d = WaitReadValid;
+        end
+      end
+
+      WaitReadValid: begin
+        // load data into register and shift out
+        if (dmi_resp_valid) begin
+          data_d = dmi_resp.data;
+          state_d = Idle;
+        end
+      end
+
+      Write: begin
+        dmi_req_valid = 1'b1;
+        // request sent, wait for response before going back to idle
+        if (dmi_req_ready) begin
+          state_d = WaitWriteValid;
+        end
+      end
+
+      WaitWriteValid: begin
+        // got a valid answer go back to idle
+        if (dmi_resp_valid) begin
+          state_d = Idle;
+        end
+      end
+
+      default: begin
+        // just wait for idle here
+        if (dmi_resp_valid) begin
+          state_d = Idle;
+        end
+      end
+    endcase
+
+    // update_dr means we got another request but we didn't finish
+    // the one in progress, this state is sticky
+    if (update_dr && state_q != Idle) begin
+      error_dmi_busy = 1'b1;
+    end
+
+    // if capture_dr goes high while we are in the read state
+    // or in the corresponding wait state we are not giving back a valid word
+    // -> throw an error
+    if (capture_dr && state_q inside {Read, WaitReadValid}) begin
+      error_dmi_busy = 1'b1;
+    end
+
+    if (error_dmi_busy) begin
+      error_d = DMIBusy;
+    end
+    // clear sticky error flag
+    if (update_dr && dmi_reset && dtmcs_select) begin
+      error_d = DMINoError;
+    end
+  end
+
+  // shift register
+  assign dmi_tdo = dr_q[0];
+
+  always_comb begin : p_shift
+    dr_d    = dr_q;
+
+    if (capture_dr) begin
+      if (dmi_access) begin
+        if (error_q == DMINoError && !error_dmi_busy) begin
+          dr_d = {address_q, data_q, DMINoError};
+        // DMI was busy, report an error
+        end else if (error_q == DMIBusy || error_dmi_busy) begin
+          dr_d = {address_q, data_q, DMIBusy};
+        end
+      end
+    end
+
+    if (shift_dr) begin
+      if (dmi_access) begin
+        dr_d = {dmi_tdi, dr_q[$bits(dr_q)-1:1]};
+      end
+    end
+
+    if (test_logic_reset) begin
+      dr_d = '0;
+    end
+  end
+
+  always_ff @(posedge tck_i or negedge trst_ni) begin : p_regs
+    if (!trst_ni) begin
+      dr_q      <= '0;
+      state_q   <= Idle;
+      address_q <= '0;
+      data_q    <= '0;
+      error_q   <= DMINoError;
+    end else begin
+      dr_q      <= dr_d;
+      state_q   <= state_d;
+      address_q <= address_d;
+      data_q    <= data_d;
+      error_q   <= error_d;
+    end
+  end
+
+  // ---------
+  // TAP
+  // ---------
+  dmi_jtag_tap #(
+    .IrLength (5),
+    .IdcodeValue(IdcodeValue)
+  ) i_dmi_jtag_tap (
+    .tck_i,
+    .tms_i,
+    .trst_ni,
+    .td_i,
+    .td_o,
+    .tdo_oe_o,
+    .testmode_i,
+    .test_logic_reset_o ( test_logic_reset ),
+    .shift_dr_o         ( shift_dr         ),
+    .update_dr_o        ( update_dr        ),
+    .capture_dr_o       ( capture_dr       ),
+    .dmi_access_o       ( dmi_access       ),
+    .dtmcs_select_o     ( dtmcs_select     ),
+    .dmi_reset_o        ( dmi_reset        ),
+    .dmi_error_i        ( error_q          ),
+    .dmi_tdi_o          ( dmi_tdi          ),
+    .dmi_tdo_i          ( dmi_tdo          )
+  );
+
+  // ---------
+  // CDC
+  // ---------
+  dmi_cdc i_dmi_cdc (
+    // JTAG side (master side)
+    .tck_i,
+    .trst_ni,
+    .jtag_dmi_req_i    ( dmi_req          ),
+    .jtag_dmi_ready_o  ( dmi_req_ready    ),
+    .jtag_dmi_valid_i  ( dmi_req_valid    ),
+    .jtag_dmi_resp_o   ( dmi_resp         ),
+    .jtag_dmi_valid_o  ( dmi_resp_valid   ),
+    .jtag_dmi_ready_i  ( dmi_resp_ready   ),
+    // core side
+    .clk_i,
+    .rst_ni,
+    .core_dmi_req_o    ( dmi_req_o        ),
+    .core_dmi_valid_o  ( dmi_req_valid_o  ),
+    .core_dmi_ready_i  ( dmi_req_ready_i  ),
+    .core_dmi_resp_i   ( dmi_resp_i       ),
+    .core_dmi_ready_o  ( dmi_resp_ready_o ),
+    .core_dmi_valid_i  ( dmi_resp_valid_i )
+  );
+
+endmodule : dmi_jtag
diff --git a/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv
new file mode 100644
index 0000000..c2e8d6e
--- /dev/null
+++ b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv
@@ -0,0 +1,349 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:   dmi_jtag_tap.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   19.7.2018
+ *
+ * Description: JTAG TAP for DMI (according to debug spec 0.13)
+ *
+ */
+
+module dmi_jtag_tap #(
+  parameter int unsigned IrLength = 5,
+  // JTAG IDCODE Value
+  parameter logic [31:0] IdcodeValue = 32'h00000001
+  // xxxx             version
+  // xxxxxxxxxxxxxxxx part number
+  // xxxxxxxxxxx      manufacturer id
+  // 1                required by standard
+) (
+  input  logic        tck_i,    // JTAG test clock pad
+  input  logic        tms_i,    // JTAG test mode select pad
+  input  logic        trst_ni,  // JTAG test reset pad
+  input  logic        td_i,     // JTAG test data input pad
+  output logic        td_o,     // JTAG test data output pad
+  output logic        tdo_oe_o, // Data out output enable
+  input  logic        testmode_i,
+  output logic        test_logic_reset_o,
+  output logic        shift_dr_o,
+  output logic        update_dr_o,
+  output logic        capture_dr_o,
+
+  // we want to access DMI register
+  output logic        dmi_access_o,
+  // JTAG is interested in writing the DTM CSR register
+  output logic        dtmcs_select_o,
+  // clear error state
+  output logic        dmi_reset_o,
+  input  logic [1:0]  dmi_error_i,
+  // test data to submodule
+  output logic        dmi_tdi_o,
+  // test data in from submodule
+  input  logic        dmi_tdo_i
+);
+
+  // to submodule
+  assign dmi_tdi_o = td_i;
+
+  typedef enum logic [3:0] {
+    TestLogicReset, RunTestIdle, SelectDrScan,
+    CaptureDr, ShiftDr, Exit1Dr, PauseDr, Exit2Dr,
+    UpdateDr, SelectIrScan, CaptureIr, ShiftIr,
+    Exit1Ir, PauseIr, Exit2Ir, UpdateIr
+  } tap_state_e;
+
+  tap_state_e tap_state_q, tap_state_d;
+
+  typedef enum logic [IrLength-1:0] {
+    BYPASS0   = 'h0,
+    IDCODE    = 'h1,
+    DTMCSR    = 'h10,
+    DMIACCESS = 'h11,
+    BYPASS1   = 'h1f
+  } ir_reg_e;
+
+  typedef struct packed {
+    logic [31:18] zero1;
+    logic         dmihardreset;
+    logic         dmireset;
+    logic         zero0;
+    logic [14:12] idle;
+    logic [11:10] dmistat;
+    logic [9:4]   abits;
+    logic [3:0]   version;
+  } dtmcs_t;
+
+  // ----------------
+  // IR logic
+  // ----------------
+
+  // shift register
+  logic [IrLength-1:0]  jtag_ir_shift_d, jtag_ir_shift_q;
+  // IR register -> this gets captured from shift register upon update_ir
+  ir_reg_e              jtag_ir_d, jtag_ir_q;
+  logic capture_ir, shift_ir, update_ir; // pause_ir
+
+  always_comb begin : p_jtag
+    jtag_ir_shift_d = jtag_ir_shift_q;
+    jtag_ir_d       = jtag_ir_q;
+
+    // IR shift register
+    if (shift_ir) begin
+      jtag_ir_shift_d = {td_i, jtag_ir_shift_q[IrLength-1:1]};
+    end
+
+    // capture IR register
+    if (capture_ir) begin
+      jtag_ir_shift_d =  IrLength'(4'b0101);
+    end
+
+    // update IR register
+    if (update_ir) begin
+      jtag_ir_d = ir_reg_e'(jtag_ir_shift_q);
+    end
+
+    // synchronous test-logic reset
+    if (test_logic_reset_o) begin
+      jtag_ir_shift_d = '0;
+      jtag_ir_d       = IDCODE;
+    end
+  end
+
+  always_ff @(posedge tck_i, negedge trst_ni) begin : p_jtag_ir_reg
+    if (!trst_ni) begin
+      jtag_ir_shift_q <= '0;
+      jtag_ir_q       <= IDCODE;
+    end else begin
+      jtag_ir_shift_q <= jtag_ir_shift_d;
+      jtag_ir_q       <= jtag_ir_d;
+    end
+  end
+
+  // ----------------
+  // TAP DR Regs
+  // ----------------
+  // - Bypass
+  // - IDCODE
+  // - DTM CS
+  logic [31:0] idcode_d, idcode_q;
+  logic        idcode_select;
+  logic        bypass_select;
+  dtmcs_t      dtmcs_d, dtmcs_q;
+  logic        bypass_d, bypass_q;  // this is a 1-bit register
+
+  assign dmi_reset_o = dtmcs_q.dmireset;
+
+  always_comb begin
+    idcode_d = idcode_q;
+    bypass_d = bypass_q;
+    dtmcs_d  = dtmcs_q;
+
+    if (capture_dr_o) begin
+      if (idcode_select) idcode_d = IdcodeValue;
+      if (bypass_select) bypass_d = 1'b0;
+      if (dtmcs_select_o) begin
+        dtmcs_d  = '{
+                      zero1        : '0,
+                      dmihardreset : 1'b0,
+                      dmireset     : 1'b0,
+                      zero0        : '0,
+                      idle         : 3'd1, // 1: Enter Run-Test/Idle and leave it immediately
+                      dmistat      : dmi_error_i, // 0: No error, 2: Op failed, 3: too fast
+                      abits        : 6'd7, // The size of address in dmi
+                      version      : 4'd1  // Version described in spec version 0.13 (and later?)
+                    };
+      end
+    end
+
+    if (shift_dr_o) begin
+      if (idcode_select)  idcode_d = {td_i, 31'(idcode_q >> 1)};
+      if (bypass_select)  bypass_d = td_i;
+      if (dtmcs_select_o) dtmcs_d  = {td_i, 31'(dtmcs_q >> 1)};
+    end
+
+    if (test_logic_reset_o) begin
+      idcode_d = IdcodeValue;
+      bypass_d = 1'b0;
+    end
+  end
+
+  // ----------------
+  // Data reg select
+  // ----------------
+  always_comb begin : p_data_reg_sel
+    dmi_access_o   = 1'b0;
+    dtmcs_select_o = 1'b0;
+    idcode_select  = 1'b0;
+    bypass_select  = 1'b0;
+    unique case (jtag_ir_q)
+      BYPASS0:   bypass_select  = 1'b1;
+      IDCODE:    idcode_select  = 1'b1;
+      DTMCSR:    dtmcs_select_o = 1'b1;
+      DMIACCESS: dmi_access_o   = 1'b1;
+      BYPASS1:   bypass_select  = 1'b1;
+      default:   bypass_select  = 1'b1;
+    endcase
+  end
+
+  // ----------------
+  // Output select
+  // ----------------
+  logic tdo_mux;
+
+  always_comb begin : p_out_sel
+    // we are shifting out the IR register
+    if (shift_ir) begin
+      tdo_mux = jtag_ir_shift_q[0];
+    // here we are shifting the DR register
+    end else begin
+      unique case (jtag_ir_q)
+        IDCODE:         tdo_mux = idcode_q[0];     // Reading ID code
+        DTMCSR:         tdo_mux = dtmcs_q.version[0];
+        DMIACCESS:      tdo_mux = dmi_tdo_i;       // Read from DMI TDO
+        default:        tdo_mux = bypass_q;      // BYPASS instruction
+      endcase
+    end
+  end
+
+  // ----------------
+  // DFT
+  // ----------------
+  logic tck_n, tck_ni;
+
+  cluster_clock_inverter i_tck_inv (
+    .clk_i ( tck_i  ),
+    .clk_o ( tck_ni )
+  );
+
+  pulp_clock_mux2 i_dft_tck_mux (
+    .clk0_i    ( tck_ni     ),
+    .clk1_i    ( tck_i      ), // bypass the inverted clock for testing
+    .clk_sel_i ( testmode_i ),
+    .clk_o     ( tck_n      )
+  );
+
+  // TDO changes state at negative edge of TCK
+  always_ff @(posedge tck_n, negedge trst_ni) begin : p_tdo_regs
+    if (!trst_ni) begin
+      td_o     <= 1'b0;
+      tdo_oe_o <= 1'b0;
+    end else begin
+      td_o     <= tdo_mux;
+      tdo_oe_o <= (shift_ir | shift_dr_o);
+    end
+  end
+  // ----------------
+  // TAP FSM
+  // ----------------
+  // Determination of next state; purely combinatorial
+  always_comb begin : p_tap_fsm
+
+    test_logic_reset_o = 1'b0;
+
+    capture_dr_o       = 1'b0;
+    shift_dr_o         = 1'b0;
+    update_dr_o        = 1'b0;
+
+    capture_ir         = 1'b0;
+    shift_ir           = 1'b0;
+    // pause_ir           = 1'b0; unused
+    update_ir          = 1'b0;
+
+    unique case (tap_state_q)
+      TestLogicReset: begin
+        tap_state_d = (tms_i) ? TestLogicReset : RunTestIdle;
+        test_logic_reset_o = 1'b1;
+      end
+      RunTestIdle: begin
+        tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
+      end
+      // DR Path
+      SelectDrScan: begin
+        tap_state_d = (tms_i) ? SelectIrScan : CaptureDr;
+      end
+      CaptureDr: begin
+        capture_dr_o = 1'b1;
+        tap_state_d = (tms_i) ? Exit1Dr : ShiftDr;
+      end
+      ShiftDr: begin
+        shift_dr_o = 1'b1;
+        tap_state_d = (tms_i) ? Exit1Dr : ShiftDr;
+      end
+      Exit1Dr: begin
+        tap_state_d = (tms_i) ? UpdateDr : PauseDr;
+      end
+      PauseDr: begin
+        tap_state_d = (tms_i) ? Exit2Dr : PauseDr;
+      end
+      Exit2Dr: begin
+        tap_state_d = (tms_i) ? UpdateDr : ShiftDr;
+      end
+      UpdateDr: begin
+        update_dr_o = 1'b1;
+        tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
+      end
+      // IR Path
+      SelectIrScan: begin
+        tap_state_d = (tms_i) ? TestLogicReset : CaptureIr;
+      end
+      // In this controller state, the shift register bank in the
+      // Instruction Register parallel loads a pattern of fixed values on
+      // the rising edge of TCK. The last two significant bits must always
+      // be "01".
+      CaptureIr: begin
+        capture_ir = 1'b1;
+        tap_state_d = (tms_i) ? Exit1Ir : ShiftIr;
+      end
+      // In this controller state, the instruction register gets connected
+      // between TDI and TDO, and the captured pattern gets shifted on
+      // each rising edge of TCK. The instruction available on the TDI
+      // pin is also shifted in to the instruction register.
+      ShiftIr: begin
+        shift_ir = 1'b1;
+        tap_state_d = (tms_i) ? Exit1Ir : ShiftIr;
+      end
+      Exit1Ir: begin
+        tap_state_d = (tms_i) ? UpdateIr : PauseIr;
+      end
+      PauseIr: begin
+        // pause_ir = 1'b1; // unused
+        tap_state_d = (tms_i) ? Exit2Ir : PauseIr;
+      end
+      Exit2Ir: begin
+        tap_state_d = (tms_i) ? UpdateIr : ShiftIr;
+      end
+      // In this controller state, the instruction in the instruction
+      // shift register is latched to the latch bank of the Instruction
+      // Register on every falling edge of TCK. This instruction becomes
+      // the current instruction once it is latched.
+      UpdateIr: begin
+        update_ir = 1'b1;
+        tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
+      end
+      default: ; // can't actually happen since case is full
+    endcase
+  end
+
+  always_ff @(posedge tck_i or negedge trst_ni) begin : p_regs
+    if (!trst_ni) begin
+      tap_state_q <= RunTestIdle;
+      idcode_q    <= IdcodeValue;
+      bypass_q    <= 1'b0;
+      dtmcs_q     <= '0;
+    end else begin
+      tap_state_q <= tap_state_d;
+      idcode_q    <= idcode_d;
+      bypass_q    <= bypass_d;
+      dtmcs_q     <= dtmcs_d;
+    end
+  end
+
+endmodule : dmi_jtag_tap
diff --git a/test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv b/test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv
new file mode 100644
index 0000000..bbf6f94
--- /dev/null
+++ b/test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv
@@ -0,0 +1,357 @@
+// Do not edit - auto-generated
+module plic_regs #(
+  parameter type reg_req_t  = logic,
+  parameter type reg_rsp_t  = logic
+)(
+  input logic [30:0][2:0] prio_i,
+  output logic [30:0][2:0] prio_o,
+  output logic [30:0] prio_we_o,
+  output logic [30:0] prio_re_o,
+  input logic [0:0][30:0] ip_i,
+  output logic [0:0] ip_re_o,
+  input logic [1:0][30:0] ie_i,
+  output logic [1:0][30:0] ie_o,
+  output logic [1:0] ie_we_o,
+  output logic [1:0] ie_re_o,
+  input logic [1:0][2:0] threshold_i,
+  output logic [1:0][2:0] threshold_o,
+  output logic [1:0] threshold_we_o,
+  output logic [1:0] threshold_re_o,
+  input logic [1:0][4:0] cc_i,
+  output logic [1:0][4:0] cc_o,
+  output logic [1:0] cc_we_o,
+  output logic [1:0] cc_re_o,
+  // Bus Interface
+  input  reg_req_t req_i,
+  output reg_rsp_t resp_o
+);
+always_comb begin
+  resp_o.ready = 1'b1;
+  resp_o.rdata = '0;
+  resp_o.error = '0;
+  prio_o = '0;
+  prio_we_o = '0;
+  prio_re_o = '0;
+  ie_o = '0;
+  ie_we_o = '0;
+  ie_re_o = '0;
+  threshold_o = '0;
+  threshold_we_o = '0;
+  threshold_re_o = '0;
+  cc_o = '0;
+  cc_we_o = '0;
+  cc_re_o = '0;
+  if (req_i.valid) begin
+    if (req_i.write) begin
+      unique case(req_i.addr)
+        32'hc000000: begin
+          prio_o[0][2:0] = req_i.wdata[2:0];
+          prio_we_o[0] = 1'b1;
+        end
+        32'hc000004: begin
+          prio_o[1][2:0] = req_i.wdata[2:0];
+          prio_we_o[1] = 1'b1;
+        end
+        32'hc000008: begin
+          prio_o[2][2:0] = req_i.wdata[2:0];
+          prio_we_o[2] = 1'b1;
+        end
+        32'hc00000c: begin
+          prio_o[3][2:0] = req_i.wdata[2:0];
+          prio_we_o[3] = 1'b1;
+        end
+        32'hc000010: begin
+          prio_o[4][2:0] = req_i.wdata[2:0];
+          prio_we_o[4] = 1'b1;
+        end
+        32'hc000014: begin
+          prio_o[5][2:0] = req_i.wdata[2:0];
+          prio_we_o[5] = 1'b1;
+        end
+        32'hc000018: begin
+          prio_o[6][2:0] = req_i.wdata[2:0];
+          prio_we_o[6] = 1'b1;
+        end
+        32'hc00001c: begin
+          prio_o[7][2:0] = req_i.wdata[2:0];
+          prio_we_o[7] = 1'b1;
+        end
+        32'hc000020: begin
+          prio_o[8][2:0] = req_i.wdata[2:0];
+          prio_we_o[8] = 1'b1;
+        end
+        32'hc000024: begin
+          prio_o[9][2:0] = req_i.wdata[2:0];
+          prio_we_o[9] = 1'b1;
+        end
+        32'hc000028: begin
+          prio_o[10][2:0] = req_i.wdata[2:0];
+          prio_we_o[10] = 1'b1;
+        end
+        32'hc00002c: begin
+          prio_o[11][2:0] = req_i.wdata[2:0];
+          prio_we_o[11] = 1'b1;
+        end
+        32'hc000030: begin
+          prio_o[12][2:0] = req_i.wdata[2:0];
+          prio_we_o[12] = 1'b1;
+        end
+        32'hc000034: begin
+          prio_o[13][2:0] = req_i.wdata[2:0];
+          prio_we_o[13] = 1'b1;
+        end
+        32'hc000038: begin
+          prio_o[14][2:0] = req_i.wdata[2:0];
+          prio_we_o[14] = 1'b1;
+        end
+        32'hc00003c: begin
+          prio_o[15][2:0] = req_i.wdata[2:0];
+          prio_we_o[15] = 1'b1;
+        end
+        32'hc000040: begin
+          prio_o[16][2:0] = req_i.wdata[2:0];
+          prio_we_o[16] = 1'b1;
+        end
+        32'hc000044: begin
+          prio_o[17][2:0] = req_i.wdata[2:0];
+          prio_we_o[17] = 1'b1;
+        end
+        32'hc000048: begin
+          prio_o[18][2:0] = req_i.wdata[2:0];
+          prio_we_o[18] = 1'b1;
+        end
+        32'hc00004c: begin
+          prio_o[19][2:0] = req_i.wdata[2:0];
+          prio_we_o[19] = 1'b1;
+        end
+        32'hc000050: begin
+          prio_o[20][2:0] = req_i.wdata[2:0];
+          prio_we_o[20] = 1'b1;
+        end
+        32'hc000054: begin
+          prio_o[21][2:0] = req_i.wdata[2:0];
+          prio_we_o[21] = 1'b1;
+        end
+        32'hc000058: begin
+          prio_o[22][2:0] = req_i.wdata[2:0];
+          prio_we_o[22] = 1'b1;
+        end
+        32'hc00005c: begin
+          prio_o[23][2:0] = req_i.wdata[2:0];
+          prio_we_o[23] = 1'b1;
+        end
+        32'hc000060: begin
+          prio_o[24][2:0] = req_i.wdata[2:0];
+          prio_we_o[24] = 1'b1;
+        end
+        32'hc000064: begin
+          prio_o[25][2:0] = req_i.wdata[2:0];
+          prio_we_o[25] = 1'b1;
+        end
+        32'hc000068: begin
+          prio_o[26][2:0] = req_i.wdata[2:0];
+          prio_we_o[26] = 1'b1;
+        end
+        32'hc00006c: begin
+          prio_o[27][2:0] = req_i.wdata[2:0];
+          prio_we_o[27] = 1'b1;
+        end
+        32'hc000070: begin
+          prio_o[28][2:0] = req_i.wdata[2:0];
+          prio_we_o[28] = 1'b1;
+        end
+        32'hc000074: begin
+          prio_o[29][2:0] = req_i.wdata[2:0];
+          prio_we_o[29] = 1'b1;
+        end
+        32'hc000078: begin
+          prio_o[30][2:0] = req_i.wdata[2:0];
+          prio_we_o[30] = 1'b1;
+        end
+        32'hc002000: begin
+          ie_o[0][30:0] = req_i.wdata[30:0];
+          ie_we_o[0] = 1'b1;
+        end
+        32'hc002080: begin
+          ie_o[1][30:0] = req_i.wdata[30:0];
+          ie_we_o[1] = 1'b1;
+        end
+        32'hc200000: begin
+          threshold_o[0][2:0] = req_i.wdata[2:0];
+          threshold_we_o[0] = 1'b1;
+        end
+        32'hc201000: begin
+          threshold_o[1][2:0] = req_i.wdata[2:0];
+          threshold_we_o[1] = 1'b1;
+        end
+        32'hc200004: begin
+          cc_o[0][4:0] = req_i.wdata[4:0];
+          cc_we_o[0] = 1'b1;
+        end
+        32'hc201004: begin
+          cc_o[1][4:0] = req_i.wdata[4:0];
+          cc_we_o[1] = 1'b1;
+        end
+        default: resp_o.error = 1'b1;
+      endcase
+    end else begin
+      unique case(req_i.addr)
+        32'hc000000: begin
+          resp_o.rdata[2:0] = prio_i[0][2:0];
+          prio_re_o[0] = 1'b1;
+        end
+        32'hc000004: begin
+          resp_o.rdata[2:0] = prio_i[1][2:0];
+          prio_re_o[1] = 1'b1;
+        end
+        32'hc000008: begin
+          resp_o.rdata[2:0] = prio_i[2][2:0];
+          prio_re_o[2] = 1'b1;
+        end
+        32'hc00000c: begin
+          resp_o.rdata[2:0] = prio_i[3][2:0];
+          prio_re_o[3] = 1'b1;
+        end
+        32'hc000010: begin
+          resp_o.rdata[2:0] = prio_i[4][2:0];
+          prio_re_o[4] = 1'b1;
+        end
+        32'hc000014: begin
+          resp_o.rdata[2:0] = prio_i[5][2:0];
+          prio_re_o[5] = 1'b1;
+        end
+        32'hc000018: begin
+          resp_o.rdata[2:0] = prio_i[6][2:0];
+          prio_re_o[6] = 1'b1;
+        end
+        32'hc00001c: begin
+          resp_o.rdata[2:0] = prio_i[7][2:0];
+          prio_re_o[7] = 1'b1;
+        end
+        32'hc000020: begin
+          resp_o.rdata[2:0] = prio_i[8][2:0];
+          prio_re_o[8] = 1'b1;
+        end
+        32'hc000024: begin
+          resp_o.rdata[2:0] = prio_i[9][2:0];
+          prio_re_o[9] = 1'b1;
+        end
+        32'hc000028: begin
+          resp_o.rdata[2:0] = prio_i[10][2:0];
+          prio_re_o[10] = 1'b1;
+        end
+        32'hc00002c: begin
+          resp_o.rdata[2:0] = prio_i[11][2:0];
+          prio_re_o[11] = 1'b1;
+        end
+        32'hc000030: begin
+          resp_o.rdata[2:0] = prio_i[12][2:0];
+          prio_re_o[12] = 1'b1;
+        end
+        32'hc000034: begin
+          resp_o.rdata[2:0] = prio_i[13][2:0];
+          prio_re_o[13] = 1'b1;
+        end
+        32'hc000038: begin
+          resp_o.rdata[2:0] = prio_i[14][2:0];
+          prio_re_o[14] = 1'b1;
+        end
+        32'hc00003c: begin
+          resp_o.rdata[2:0] = prio_i[15][2:0];
+          prio_re_o[15] = 1'b1;
+        end
+        32'hc000040: begin
+          resp_o.rdata[2:0] = prio_i[16][2:0];
+          prio_re_o[16] = 1'b1;
+        end
+        32'hc000044: begin
+          resp_o.rdata[2:0] = prio_i[17][2:0];
+          prio_re_o[17] = 1'b1;
+        end
+        32'hc000048: begin
+          resp_o.rdata[2:0] = prio_i[18][2:0];
+          prio_re_o[18] = 1'b1;
+        end
+        32'hc00004c: begin
+          resp_o.rdata[2:0] = prio_i[19][2:0];
+          prio_re_o[19] = 1'b1;
+        end
+        32'hc000050: begin
+          resp_o.rdata[2:0] = prio_i[20][2:0];
+          prio_re_o[20] = 1'b1;
+        end
+        32'hc000054: begin
+          resp_o.rdata[2:0] = prio_i[21][2:0];
+          prio_re_o[21] = 1'b1;
+        end
+        32'hc000058: begin
+          resp_o.rdata[2:0] = prio_i[22][2:0];
+          prio_re_o[22] = 1'b1;
+        end
+        32'hc00005c: begin
+          resp_o.rdata[2:0] = prio_i[23][2:0];
+          prio_re_o[23] = 1'b1;
+        end
+        32'hc000060: begin
+          resp_o.rdata[2:0] = prio_i[24][2:0];
+          prio_re_o[24] = 1'b1;
+        end
+        32'hc000064: begin
+          resp_o.rdata[2:0] = prio_i[25][2:0];
+          prio_re_o[25] = 1'b1;
+        end
+        32'hc000068: begin
+          resp_o.rdata[2:0] = prio_i[26][2:0];
+          prio_re_o[26] = 1'b1;
+        end
+        32'hc00006c: begin
+          resp_o.rdata[2:0] = prio_i[27][2:0];
+          prio_re_o[27] = 1'b1;
+        end
+        32'hc000070: begin
+          resp_o.rdata[2:0] = prio_i[28][2:0];
+          prio_re_o[28] = 1'b1;
+        end
+        32'hc000074: begin
+          resp_o.rdata[2:0] = prio_i[29][2:0];
+          prio_re_o[29] = 1'b1;
+        end
+        32'hc000078: begin
+          resp_o.rdata[2:0] = prio_i[30][2:0];
+          prio_re_o[30] = 1'b1;
+        end
+        32'hc001000: begin
+          resp_o.rdata[30:0] = ip_i[0][30:0];
+          ip_re_o[0] = 1'b1;
+        end
+        32'hc002000: begin
+          resp_o.rdata[30:0] = ie_i[0][30:0];
+          ie_re_o[0] = 1'b1;
+        end
+        32'hc002080: begin
+          resp_o.rdata[30:0] = ie_i[1][30:0];
+          ie_re_o[1] = 1'b1;
+        end
+        32'hc200000: begin
+          resp_o.rdata[2:0] = threshold_i[0][2:0];
+          threshold_re_o[0] = 1'b1;
+        end
+        32'hc201000: begin
+          resp_o.rdata[2:0] = threshold_i[1][2:0];
+          threshold_re_o[1] = 1'b1;
+        end
+        32'hc200004: begin
+          resp_o.rdata[4:0] = cc_i[0][4:0];
+          cc_re_o[0] = 1'b1;
+        end
+        32'hc201004: begin
+          resp_o.rdata[4:0] = cc_i[1][4:0];
+          cc_re_o[1] = 1'b1;
+        end
+        default: resp_o.error = 1'b1;
+      endcase
+    end
+  end
+end
+endmodule
+
diff --git a/test/type_param/corev_apu/rv_plic/rtl/plic_top.sv b/test/type_param/corev_apu/rv_plic/rtl/plic_top.sv
new file mode 100644
index 0000000..2a32102
--- /dev/null
+++ b/test/type_param/corev_apu/rv_plic/rtl/plic_top.sv
@@ -0,0 +1,157 @@
+// Copyright 2022 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author:  Florian Zaruba <zaruabf@iis.ee.ethz.ch>
+//
+// Description: Platform level interrupt controller
+
+module plic_top #(
+  parameter int N_SOURCE    = 30,
+  parameter int N_TARGET    = 2,
+  parameter int MAX_PRIO    = 7,
+  parameter int SRCW        = $clog2(N_SOURCE+1),
+  parameter type reg_req_t  = logic,
+  parameter type reg_rsp_t  = logic
+) (
+  input  logic clk_i,    // Clock
+  input  logic rst_ni,  // Asynchronous reset active low
+  // Bus Interface
+  input  reg_req_t req_i,
+  output reg_rsp_t resp_o,
+  input logic [N_SOURCE-1:0] le_i, // 0:level 1:edge
+  // Interrupt Sources
+  input  logic [N_SOURCE-1:0] irq_sources_i,
+  // Interrupt notification to targets
+  output logic [N_TARGET-1:0] eip_targets_o
+);
+  localparam PRIOW = $clog2(MAX_PRIO+1);
+
+  logic [N_SOURCE-1:0] ip;
+
+  logic [N_TARGET-1:0][PRIOW-1:0]    threshold_q;
+
+  logic [N_TARGET-1:0]               claim_re; //Target read indicator
+  logic [N_TARGET-1:0][SRCW-1:0]     claim_id;
+  logic [N_SOURCE-1:0]               claim; //Converted from claim_re/claim_id
+
+  logic [N_TARGET-1:0]               complete_we; //Target write indicator
+  logic [N_TARGET-1:0][SRCW-1:0]     complete_id;
+  logic [N_SOURCE-1:0]               complete; //Converted from complete_re/complete_id
+
+  logic [N_SOURCE-1:0][PRIOW-1:0]    prio_q;
+  logic [N_TARGET-1:0][N_SOURCE-1:0] ie_q;
+
+  always_comb begin
+    claim = '0;
+    complete = '0;
+    for (int i = 0 ; i < N_TARGET ; i++) begin
+      if (claim_re[i] && claim_id[i] != 0) claim[claim_id[i]-1] = 1'b1;
+      if (complete_we[i] && complete_id[i] != 0) complete[complete_id[i]-1] = 1'b1;
+    end
+  end
+
+  // Gateways
+  rv_plic_gateway #(
+    .N_SOURCE (N_SOURCE)
+  ) i_rv_plic_gateway (
+    .clk_i,
+    .rst_ni,
+    .src(irq_sources_i),
+    .le(le_i),
+    .claim(claim),
+    .complete(complete),
+    .ip(ip)
+  );
+
+  // Target interrupt notification
+  for (genvar i = 0 ; i < N_TARGET; i++) begin : gen_target
+    rv_plic_target #(
+      .N_SOURCE  ( N_SOURCE ),
+      .MAX_PRIO  ( MAX_PRIO ),
+      .ALGORITHM ( "SEQUENTIAL" )
+    ) i_target (
+      .clk_i,
+      .rst_ni,
+      .ip(ip),
+      .ie(ie_q[i]),
+      .prio(prio_q),
+      .threshold(threshold_q[i]),
+      .irq(eip_targets_o[i]),
+      .irq_id(claim_id[i])
+    );
+  end
+
+  logic [N_TARGET-1:0] threshold_we_o;
+  logic [N_TARGET-1:0][PRIOW-1:0] threshold_o;
+
+  logic [N_SOURCE:0][PRIOW-1:0] prio_i, prio_o;
+  logic [N_SOURCE:0] prio_we_o;
+
+  // TODO(zarubaf): This needs more graceful handling
+  // it will break if the number of sources is larger than 32
+  logic [N_TARGET-1:0][N_SOURCE:0] ie_i, ie_o;
+  logic [N_TARGET-1:0] ie_we_o;
+
+  plic_regs #(
+    .reg_req_t ( reg_req_t ),
+    .reg_rsp_t ( reg_rsp_t )
+  ) i_plic_regs (
+    .prio_i(prio_i),
+    .prio_o(prio_o),
+    .prio_we_o(prio_we_o),
+    .prio_re_o(), // don't care
+    // source zero is always zero
+    .ip_i({ip, 1'b0}),
+    .ip_re_o(), // don't care
+    .ie_i(ie_i),
+    .ie_o(ie_o),
+    .ie_we_o(ie_we_o),
+    .ie_re_o(), // don't care
+    .threshold_i(threshold_q),
+    .threshold_o(threshold_o),
+    .threshold_we_o(threshold_we_o),
+    .threshold_re_o(), // don't care
+    .cc_i(claim_id),
+    .cc_o(complete_id),
+    .cc_we_o(complete_we),
+    .cc_re_o(claim_re),
+    .req_i,
+    .resp_o
+  );
+
+  assign prio_i[0] = '0;
+
+  for (genvar i = 0; i < N_TARGET; i++) begin
+    assign ie_i[i] = {ie_q[i][N_SOURCE-1:0], 1'b0};
+  end
+
+  for (genvar i = 1; i < N_SOURCE + 1; i++) begin
+    assign prio_i[i] = prio_q[i - 1];
+  end
+
+  // registers
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (~rst_ni) begin
+      prio_q <= '0;
+      ie_q <= '0;
+      threshold_q <= '0;
+    end else begin
+      // source zero is 0
+      for (int i = 0; i < N_SOURCE; i++) begin
+        prio_q[i] <= prio_we_o[i + 1] ? prio_o[i + 1] : prio_q[i];
+      end
+      for (int i = 0; i < N_TARGET; i++) begin
+        threshold_q[i] <= threshold_we_o[i] ? threshold_o[i] : threshold_q[i];
+        ie_q[i] <= ie_we_o[i] ? ie_o[i][N_SOURCE:1] : ie_q[i];
+      end
+
+    end
+  end
+endmodule
diff --git a/test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv
new file mode 100644
index 0000000..c68f78c
--- /dev/null
+++ b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv
@@ -0,0 +1,60 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// RISC-V Platform-Level Interrupt Gateways module
+
+module rv_plic_gateway #(
+  parameter int N_SOURCE = 32
+) (
+  input clk_i,
+  input rst_ni,
+
+  input [N_SOURCE-1:0] src,
+  input [N_SOURCE-1:0] le,      // Level0 Edge1
+
+  input [N_SOURCE-1:0] claim, // $onehot0(claim)
+  input [N_SOURCE-1:0] complete, // $onehot0(complete)
+
+  output logic [N_SOURCE-1:0] ip
+);
+
+logic [N_SOURCE-1:0] ia;    // Interrupt Active
+
+logic [N_SOURCE-1:0] set;   // Set: (le) ? src & ~src_d : src ;
+logic [N_SOURCE-1:0] src_d;
+
+always_ff @(posedge clk_i, negedge rst_ni) begin
+  if (!rst_ni) src_d <= '0;
+  else         src_d <= src;
+end
+
+always_comb begin
+  for (int i = 0 ; i < N_SOURCE; i++) begin
+    set[i] = (le[i]) ? src[i] & ~src_d[i] : src[i] ;
+  end
+end
+
+// Interrupt pending is set by source (depends on le), cleared by claim.
+// Until interrupt is claimed, set doesn't affect ip.
+// RISC-V PLIC spec mentioned it can have counter for edge triggered
+// But skipped the feature as counter consumes substantial logic size.
+always_ff @(posedge clk_i, negedge rst_ni) begin
+  if (!rst_ni) begin
+    ip <= '0;
+  end else begin
+    ip <= (ip | (set & ~ia & ~ip)) & (~claim);
+  end
+end
+
+// Interrupt active is to control ip. If ip is set then until completed
+// by target, ip shouldn't be set by source even claim can clear ip.
+always_ff @(posedge clk_i, negedge rst_ni) begin
+  if (!rst_ni) begin
+    ia <= '0;
+  end else begin
+    ia <= (ia | (set & ~ia)) & (~complete);
+  end
+end
+
+endmodule
diff --git a/test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv
new file mode 100644
index 0000000..26bd69c
--- /dev/null
+++ b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv
@@ -0,0 +1,125 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// RISC-V Platform-Level Interrupt Generator for Target
+//
+// This module basically doing IE & IP based on priority and threshold.
+// Keep in mind that increasing MAX_PRIO affects logic size a lot.
+
+module rv_plic_target #(
+  parameter int N_SOURCE = 32,
+  parameter int MAX_PRIO = 7,
+  parameter     ALGORITHM = "SEQUENTIAL", // SEQUENTIAL | MATRIX
+
+  // Local param (Do not change this through parameter
+  parameter int unsigned SRCW  = $clog2(N_SOURCE+1),
+  parameter int unsigned PRIOW = $clog2(MAX_PRIO+1) // Bits to represent MAX_PRIO
+) (
+  input clk_i,
+  input rst_ni,
+
+  input [N_SOURCE-1:0] ip,
+  input [N_SOURCE-1:0] ie,
+
+  input [N_SOURCE-1:0][PRIOW-1:0] prio,
+  input [PRIOW-1:0] threshold,
+
+  output logic            irq,
+  output logic [SRCW-1:0] irq_id
+);
+
+
+//always_ff @(posedge clk_i, negedge rst_ni) begin
+//  if (!rst_ni) begin
+//    gt_th <= '0;
+//  end else begin
+//    for (int i = 0 ; i < N_SOURCE ; i++) begin
+//      gt_th[i] = (prio[i] > threshold) ? 1'b1 : 1'b0 ;
+//    end
+//  end
+//end
+
+
+if (ALGORITHM == "SEQUENTIAL") begin : gen_sequential
+  // Let first implementation be brute-force
+  // As N_SOURCE increasing logic depth increases O(logN)
+  // This approach slows down the simulation.
+  logic [PRIOW-1:0] max_prio;
+  logic irq_next;
+  logic [SRCW-1:0] irq_id_next;
+  always_comb begin
+    max_prio = threshold + 1'b1; // Priority strictly greater than threshold
+    irq_id_next = '0; // default: No Interrupt
+    irq_next = 1'b0;
+    for (int i = N_SOURCE-1 ; i >= 0 ; i--) begin
+      if ((ip[i] & ie[i]) == 1'b1 && prio[i] >= max_prio) begin
+        max_prio = prio[i];
+        irq_id_next = SRCW'(i+1);
+        irq_next = 1'b1;
+      end
+    end // for i
+  end
+
+  always_ff @(posedge clk_i, negedge rst_ni) begin
+    if (!rst_ni) begin
+      irq <= 1'b0;
+      irq_id <= '0;
+    end else begin
+      irq <= irq_next;
+      irq_id <= irq_id_next;
+    end
+  end
+end else if (ALGORITHM == "MATRIX") begin : gen_mat
+  // Second trial : N X N matrix
+  // Set mat[i][j] to 1 if prio[i] >= prio[j] and ip[i] & ie[i] & ip[j] & ie[j]
+  // Comparator depth is just 1 then logN AND gate then Leading One detector
+  // It is to find the max value of priority
+  //
+  // This uses a lot of comparators: (N x (N-1))/2.
+  // So if above approach(ALGORITHM 1) meets timing, don't use this algorithm.
+  logic [N_SOURCE-1:0] is;
+
+  logic [N_SOURCE-1:0][N_SOURCE-1:0] mat;
+  logic [N_SOURCE-1:0] merged_row;
+
+  assign is = ip & ie;
+  always_comb begin
+    merged_row[N_SOURCE-1] = is[N_SOURCE-1] & (prio[N_SOURCE-1] > threshold);
+    for (int i = 0 ; i < N_SOURCE-1 ; i++) begin
+      merged_row[i] = 1'b1;
+      for (int j = i+1 ; j < N_SOURCE ; j++) begin
+        mat[i][j] = (prio[i] <= threshold) ? 1'b0 :         // No compare if less than TH
+                    (is[i] & is[j]) ? prio[i] >= prio[j] :
+                    (is[i]) ? 1'b 1 : 1'b 0 ;
+        merged_row[i] = merged_row[i] & mat[i][j]; // all should be 1
+      end // for j
+    end // for i
+  end // always_comb
+
+  // Leading One detector
+  logic [N_SOURCE-1:0] lod;
+  assign lod = merged_row & (~merged_row + 1'b1);
+  always_ff @(posedge clk_i, negedge rst_ni) begin
+    if (!rst_ni) begin
+      irq <= 1'b0;
+      irq_id <= '0; // No interrupt
+    end else if (|lod) begin
+      // as $onehot0(lod), at most one bit set.
+      // so, safely run for loop
+      for (int i = N_SOURCE-1 ; i >= 0 ; i--) begin
+        if (lod[i] == 1'b1) begin
+          irq <= 1'b 1;
+          irq_id <= SRCW'(i + 1);
+        end
+      end // for
+    end else begin
+      // No pending interrupt
+      irq <= 1'b0;
+      irq_id <= '0;
+    end
+  end // always_ff
+end // ALGORITHM
+
+endmodule
+
diff --git a/test/type_param/corev_apu/src/ariane.sv b/test/type_param/corev_apu/src/ariane.sv
new file mode 100644
index 0000000..1ec15ef
--- /dev/null
+++ b/test/type_param/corev_apu/src/ariane.sv
@@ -0,0 +1,86 @@
+// Copyright 2017-2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 19.03.2017
+// Description: Ariane Top-level module
+
+
+module ariane import ariane_pkg::*; #(
+  parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+  parameter bit IsRVFI = bit'(0),
+  parameter type rvfi_probes_t = logic,
+  parameter int unsigned AxiAddrWidth = ariane_axi::AddrWidth,
+  parameter int unsigned AxiDataWidth = ariane_axi::DataWidth,
+  parameter int unsigned AxiIdWidth   = ariane_axi::IdWidth,
+  parameter type axi_ar_chan_t = ariane_axi::ar_chan_t,
+  parameter type axi_aw_chan_t = ariane_axi::aw_chan_t,
+  parameter type axi_w_chan_t  = ariane_axi::w_chan_t,
+  parameter type noc_req_t = ariane_axi::req_t,
+  parameter type noc_resp_t = ariane_axi::resp_t
+) (
+  input  logic                         clk_i,
+  input  logic                         rst_ni,
+  // Core ID, Cluster ID and boot address are considered more or less static
+  input  logic [riscv::VLEN-1:0]       boot_addr_i,  // reset boot address
+  input  logic [riscv::XLEN-1:0]       hart_id_i,    // hart id in a multicore environment (reflected in a CSR)
+
+  // Interrupt inputs
+  input  logic [1:0]                   irq_i,        // level sensitive IR lines, mip & sip (async)
+  input  logic                         ipi_i,        // inter-processor interrupts (async)
+  // Timer facilities
+  input  logic                         time_irq_i,   // timer interrupt in (async)
+  input  logic                         debug_req_i,  // debug request (async)
+  // RISC-V formal interface port (`rvfi`):
+  // Can be left open when formal tracing is not needed.
+  output rvfi_probes_t rvfi_probes_o,
+  // memory side
+  output noc_req_t                     noc_req_o,
+  input  noc_resp_t                    noc_resp_i
+);
+
+  cvxif_pkg::cvxif_req_t  cvxif_req;
+  cvxif_pkg::cvxif_resp_t cvxif_resp;
+
+  cva6 #(
+    .CVA6Cfg ( CVA6Cfg ),
+    .IsRVFI ( IsRVFI ),
+    .rvfi_probes_t ( rvfi_probes_t ),
+    .axi_ar_chan_t (axi_ar_chan_t),
+    .axi_aw_chan_t (axi_aw_chan_t),
+    .axi_w_chan_t (axi_w_chan_t),
+    .noc_req_t (noc_req_t),
+    .noc_resp_t (noc_resp_t)
+  ) i_cva6 (
+    .clk_i                ( clk_i                     ),
+    .rst_ni               ( rst_ni                    ),
+    .boot_addr_i          ( boot_addr_i               ),
+    .hart_id_i            ( hart_id_i                 ),
+    .irq_i                ( irq_i                     ),
+    .ipi_i                ( ipi_i                     ),
+    .time_irq_i           ( time_irq_i                ),
+    .debug_req_i          ( debug_req_i               ),
+    .rvfi_probes_o        ( rvfi_probes_o             ),
+    .cvxif_req_o          ( cvxif_req                 ),
+    .cvxif_resp_i         ( cvxif_resp                ),
+    .noc_req_o            ( noc_req_o                 ),
+    .noc_resp_i           ( noc_resp_i                )
+  );
+
+  if (CVA6Cfg.CvxifEn) begin : gen_example_coprocessor
+    cvxif_example_coprocessor i_cvxif_coprocessor (
+      .clk_i                ( clk_i                          ),
+      .rst_ni               ( rst_ni                         ),
+      .cvxif_req_i          ( cvxif_req                      ),
+      .cvxif_resp_o         ( cvxif_resp                     )
+    );
+  end
+
+endmodule // ariane
diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv
new file mode 100644
index 0000000..b3c5615
--- /dev/null
+++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv
@@ -0,0 +1,93 @@
+// Copyright (c) 2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// AXI Reservation Table
+module axi_res_tbl #(
+    parameter int unsigned AXI_ADDR_WIDTH = 0,
+    parameter int unsigned AXI_ID_WIDTH = 0
+) (
+    input  logic                        clk_i,
+    input  logic                        rst_ni,
+    input  logic [AXI_ADDR_WIDTH-1:0]   clr_addr_i,
+    input  logic                        clr_req_i,
+    output logic                        clr_gnt_o,
+    input  logic [AXI_ADDR_WIDTH-1:0]   set_addr_i,
+    input  logic [AXI_ID_WIDTH-1:0]     set_id_i,
+    input  logic                        set_req_i,
+    output logic                        set_gnt_o,
+    input  logic [AXI_ADDR_WIDTH-1:0]   check_addr_i,
+    input  logic [AXI_ID_WIDTH-1:0]     check_id_i,
+    output logic                        check_res_o,
+    input  logic                        check_req_i,
+    output logic                        check_gnt_o
+);
+
+    localparam integer N_IDS = 2**AXI_ID_WIDTH;
+
+    // Declarations of Signals and Types
+    logic [N_IDS-1:0][AXI_ADDR_WIDTH-1:0]   tbl_d,                      tbl_q;
+    logic                                   clr,
+                                            set;
+
+    generate for (genvar i = 0; i < N_IDS; ++i) begin: gen_tbl
+        always_comb begin
+            tbl_d[i] = tbl_q[i];
+            if (set && i == set_id_i) begin
+                tbl_d[i] = set_addr_i;
+            end else if (clr && tbl_q[i] == clr_addr_i) begin
+                tbl_d[i] = '0;
+            end
+        end
+    end endgenerate
+
+    // Table-Managing Logic
+    always_comb begin
+        clr         = 1'b0;
+        set         = 1'b0;
+        clr_gnt_o   = 1'b0;
+        set_gnt_o   = 1'b0;
+        check_res_o = 1'b0;
+        check_gnt_o = 1'b0;
+
+        if (clr_req_i) begin
+            clr         = 1'b1;
+            clr_gnt_o   = 1'b1;
+        end else if (set_req_i) begin
+            set         = 1'b1;
+            set_gnt_o   = 1'b1;
+        end else if (check_req_i) begin
+            check_res_o = (tbl_q[check_id_i] == check_addr_i);
+            check_gnt_o = 1'b1;
+        end
+    end
+
+    // Registers
+    always_ff @(posedge clk_i, negedge rst_ni) begin
+        if (~rst_ni) begin
+            tbl_q   <= '0;
+        end else begin
+            tbl_q   <= tbl_d;
+        end
+    end
+
+    // Validate parameters.
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin: validate_params
+        assert (AXI_ADDR_WIDTH > 0)
+            else $fatal(1, "AXI_ADDR_WIDTH must be greater than 0!");
+        assert (AXI_ID_WIDTH > 0)
+            else $fatal(1, "AXI_ID_WIDTH must be greater than 0!");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv
new file mode 100644
index 0000000..fafdb4f
--- /dev/null
+++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv
@@ -0,0 +1,1004 @@
+// Copyright (c) 2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// AXI RISC-V Atomic Operations (AMOs) Adapter
+//
+// This adapter implements atomic memory operations in accordance with the RVWMO memory consistency
+// model.
+//
+// Interface notes:
+// -  This module has combinational paths between AXI inputs and outputs for minimum latency. Add
+//    slices upstream or downstream or in both directions if combinatorial paths become too long.
+//    The module adheres to the AXI ready/valid dependency specification to prevent combinatorial
+//    loops.
+
+module axi_riscv_amos #(
+    // AXI Parameters
+    parameter int unsigned AXI_ADDR_WIDTH       = 0,
+    parameter int unsigned AXI_DATA_WIDTH       = 0,
+    parameter int unsigned AXI_ID_WIDTH         = 0,
+    parameter int unsigned AXI_USER_WIDTH       = 0,
+    // Maximum number of AXI write transactions outstanding at the same time
+    parameter int unsigned AXI_MAX_WRITE_TXNS   = 0,
+    // Word width of the widest RISC-V processor that can issue requests to this module.
+    // 32 for RV32; 64 for RV64, where both 32-bit (.W suffix) and 64-bit (.D suffix) AMOs are
+    // supported if `aw_strb` is set correctly.
+    parameter int unsigned RISCV_WORD_WIDTH     = 0,
+    /// Derived Parameters (do NOT change manually!)
+    localparam int unsigned AXI_STRB_WIDTH      = AXI_DATA_WIDTH / 8
+) (
+    input  logic                        clk_i,
+    input  logic                        rst_ni,
+
+    /// Slave Interface
+    input  logic [AXI_ADDR_WIDTH-1:0]   slv_aw_addr_i,
+    input  logic [2:0]                  slv_aw_prot_i,
+    input  logic [3:0]                  slv_aw_region_i,
+    input  logic [5:0]                  slv_aw_atop_i,
+    input  logic [7:0]                  slv_aw_len_i,
+    input  logic [2:0]                  slv_aw_size_i,
+    input  logic [1:0]                  slv_aw_burst_i,
+    input  logic                        slv_aw_lock_i,
+    input  logic [3:0]                  slv_aw_cache_i,
+    input  logic [3:0]                  slv_aw_qos_i,
+    input  logic [AXI_ID_WIDTH-1:0]     slv_aw_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   slv_aw_user_i,
+    output logic                        slv_aw_ready_o,
+    input  logic                        slv_aw_valid_i,
+
+    input  logic [AXI_ADDR_WIDTH-1:0]   slv_ar_addr_i,
+    input  logic [2:0]                  slv_ar_prot_i,
+    input  logic [3:0]                  slv_ar_region_i,
+    input  logic [7:0]                  slv_ar_len_i,
+    input  logic [2:0]                  slv_ar_size_i,
+    input  logic [1:0]                  slv_ar_burst_i,
+    input  logic                        slv_ar_lock_i,
+    input  logic [3:0]                  slv_ar_cache_i,
+    input  logic [3:0]                  slv_ar_qos_i,
+    input  logic [AXI_ID_WIDTH-1:0]     slv_ar_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   slv_ar_user_i,
+    output logic                        slv_ar_ready_o,
+    input  logic                        slv_ar_valid_i,
+
+    input  logic [AXI_DATA_WIDTH-1:0]   slv_w_data_i,
+    input  logic [AXI_STRB_WIDTH-1:0]   slv_w_strb_i,
+    input  logic [AXI_USER_WIDTH-1:0]   slv_w_user_i,
+    input  logic                        slv_w_last_i,
+    output logic                        slv_w_ready_o,
+    input  logic                        slv_w_valid_i,
+
+    output logic [AXI_DATA_WIDTH-1:0]   slv_r_data_o,
+    output logic [1:0]                  slv_r_resp_o,
+    output logic                        slv_r_last_o,
+    output logic [AXI_ID_WIDTH-1:0]     slv_r_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   slv_r_user_o,
+    input  logic                        slv_r_ready_i,
+    output logic                        slv_r_valid_o,
+
+    output logic [1:0]                  slv_b_resp_o,
+    output logic [AXI_ID_WIDTH-1:0]     slv_b_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   slv_b_user_o,
+    input  logic                        slv_b_ready_i,
+    output logic                        slv_b_valid_o,
+
+    /// Master Interface
+    output logic [AXI_ADDR_WIDTH-1:0]   mst_aw_addr_o,
+    output logic [2:0]                  mst_aw_prot_o,
+    output logic [3:0]                  mst_aw_region_o,
+    output logic [5:0]                  mst_aw_atop_o,
+    output logic [7:0]                  mst_aw_len_o,
+    output logic [2:0]                  mst_aw_size_o,
+    output logic [1:0]                  mst_aw_burst_o,
+    output logic                        mst_aw_lock_o,
+    output logic [3:0]                  mst_aw_cache_o,
+    output logic [3:0]                  mst_aw_qos_o,
+    output logic [AXI_ID_WIDTH-1:0]     mst_aw_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   mst_aw_user_o,
+    input  logic                        mst_aw_ready_i,
+    output logic                        mst_aw_valid_o,
+
+    output logic [AXI_ADDR_WIDTH-1:0]   mst_ar_addr_o,
+    output logic [2:0]                  mst_ar_prot_o,
+    output logic [3:0]                  mst_ar_region_o,
+    output logic [7:0]                  mst_ar_len_o,
+    output logic [2:0]                  mst_ar_size_o,
+    output logic [1:0]                  mst_ar_burst_o,
+    output logic                        mst_ar_lock_o,
+    output logic [3:0]                  mst_ar_cache_o,
+    output logic [3:0]                  mst_ar_qos_o,
+    output logic [AXI_ID_WIDTH-1:0]     mst_ar_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   mst_ar_user_o,
+    input  logic                        mst_ar_ready_i,
+    output logic                        mst_ar_valid_o,
+
+    output logic [AXI_DATA_WIDTH-1:0]   mst_w_data_o,
+    output logic [AXI_STRB_WIDTH-1:0]   mst_w_strb_o,
+    output logic [AXI_USER_WIDTH-1:0]   mst_w_user_o,
+    output logic                        mst_w_last_o,
+    input  logic                        mst_w_ready_i,
+    output logic                        mst_w_valid_o,
+
+    input  logic [AXI_DATA_WIDTH-1:0]   mst_r_data_i,
+    input  logic [1:0]                  mst_r_resp_i,
+    input  logic                        mst_r_last_i,
+    input  logic [AXI_ID_WIDTH-1:0]     mst_r_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   mst_r_user_i,
+    output logic                        mst_r_ready_o,
+    input  logic                        mst_r_valid_i,
+
+    input  logic [1:0]                  mst_b_resp_i,
+    input  logic [AXI_ID_WIDTH-1:0]     mst_b_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   mst_b_user_i,
+    output logic                        mst_b_ready_o,
+    input  logic                        mst_b_valid_i
+);
+
+    localparam int unsigned OUTSTND_BURSTS_WIDTH = $clog2(AXI_MAX_WRITE_TXNS+1);
+    localparam int unsigned AXI_ALU_RATIO        = AXI_DATA_WIDTH/RISCV_WORD_WIDTH;
+
+    // State types
+    typedef enum logic [1:0] { FEEDTHROUGH_AW, WAIT_RESULT_AW, SEND_AW } aw_state_t;
+    aw_state_t   aw_state_d, aw_state_q;
+
+    typedef enum logic [2:0] { FEEDTHROUGH_W, WAIT_DATA_W, WAIT_RESULT_W, WAIT_CHANNEL_W, SEND_W } w_state_t;
+    w_state_t    w_state_d, w_state_q;
+
+    typedef enum logic [1:0] { FEEDTHROUGH_B, WAIT_COMPLETE_B, WAIT_CHANNEL_B, SEND_B } b_state_t;
+    b_state_t    b_state_d, b_state_q;
+
+    typedef enum logic [1:0] { FEEDTHROUGH_AR, WAIT_CHANNEL_AR, SEND_AR } ar_state_t;
+    ar_state_t   ar_state_d, ar_state_q;
+
+    typedef enum logic [1:0] { FEEDTHROUGH_R, WAIT_DATA_R, WAIT_CHANNEL_R, SEND_R } r_state_t;
+    r_state_t    r_state_d, r_state_q;
+
+    typedef enum logic [1:0] { NONE, INVALID, LOAD, STORE } atop_req_t;
+    atop_req_t   atop_valid_d, atop_valid_q;
+
+    // Signal declarations
+    // Transaction FF
+    logic [AXI_ADDR_WIDTH-1:0]          addr_d,         addr_q;
+    logic [AXI_ID_WIDTH-1:0]            id_d,           id_q;
+    logic [AXI_STRB_WIDTH-1:0]          strb_d,         strb_q;
+    logic [2:0]                         size_d,         size_q;
+    logic [5:0]                         atop_d,         atop_q;
+    logic [3:0]                         cache_d,        cache_q;
+    logic [2:0]                         prot_d,         prot_q;
+    logic [3:0]                         qos_d,          qos_q;
+    logic [3:0]                         region_d,       region_q;
+    logic [1:0]                         r_resp_d,       r_resp_q;
+    logic [AXI_USER_WIDTH-1:0]          aw_user_d,      aw_user_q,
+                                        w_user_d,       w_user_q,
+                                        r_user_d,       r_user_q;
+    // Data FF
+    logic [AXI_DATA_WIDTH-1:0]          w_data_d,       w_data_q;       // AMO operand
+    logic [AXI_DATA_WIDTH-1:0]          r_data_d,       r_data_q;       // Data from memory
+    logic [AXI_DATA_WIDTH-1:0]          result_d,       result_q;       // Result of AMO operation
+    logic                               w_d_valid_d,    w_d_valid_q,    // AMO operand valid
+                                        r_d_valid_d,    r_d_valid_q;    // Data from memory valid
+    // Counters
+    logic [OUTSTND_BURSTS_WIDTH-1:0]    w_cnt_d,        w_cnt_q;        // Outstanding W beats
+    logic [OUTSTND_BURSTS_WIDTH-1:0]    w_cnt_req_d,    w_cnt_req_q;    // W beats until AMO can read W
+    logic [OUTSTND_BURSTS_WIDTH-1:0]    w_cnt_inj_d,    w_cnt_inj_q;    // W beats until AMO can insert its W
+    // States
+    logic                               adapter_ready;
+    logic                               transaction_collision;
+    logic                               aw_valid,       aw_ready,       aw_free,
+                                        w_valid,        w_ready,        w_free,
+                                        b_valid,        b_ready,        b_free,
+                                        ar_valid,       ar_ready,       ar_free,
+                                        r_valid,        r_ready,        r_free;
+    // ALU Signals
+    logic [RISCV_WORD_WIDTH-1:0]                        alu_operand_a;
+    logic [RISCV_WORD_WIDTH-1:0]                        alu_operand_b;
+    logic [RISCV_WORD_WIDTH-1:0]                        alu_result;
+    logic [AXI_DATA_WIDTH-1:0]                          alu_result_ext;
+    logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0]     op_a;
+    logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0]     op_b;
+    logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0]     op_a_sign_ext;
+    logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0]     op_b_sign_ext;
+    logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0]     res;
+    logic [AXI_STRB_WIDTH-1:0][7:0]                     strb_ext;
+    logic                                               sign_a;
+    logic                                               sign_b;
+
+    /**
+     * Calculate ready signals and channel states
+     */
+
+    // Check if all state machines are ready for the next atomic request
+    assign adapter_ready = (aw_state_q == FEEDTHROUGH_AW) &&
+                           ( w_state_q == FEEDTHROUGH_W ) &&
+                           ( b_state_q == FEEDTHROUGH_B ) &&
+                           (ar_state_q == FEEDTHROUGH_AR) &&
+                           ( r_state_q == FEEDTHROUGH_R );
+
+    // Calculate if the channels are free
+    assign aw_free = ~aw_valid | aw_ready;
+    assign  w_free = ~ w_valid |  w_ready;
+    assign  b_free = ~ b_valid |  b_ready;
+    assign ar_free = ~ar_valid | ar_ready;
+    assign  r_free = ~ r_valid |  r_ready;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if(~rst_ni) begin
+            aw_valid <= 0;
+            aw_ready <= 0;
+            w_valid  <= 0;
+            w_ready  <= 0;
+            b_valid  <= 0;
+            b_ready  <= 0;
+            ar_valid <= 0;
+            ar_ready <= 0;
+            r_valid  <= 0;
+            r_ready  <= 0;
+        end else begin
+            aw_valid <= mst_aw_valid_o;
+            aw_ready <= mst_aw_ready_i;
+            w_valid  <= mst_w_valid_o;
+            w_ready  <= mst_w_ready_i;
+            b_valid  <= slv_b_valid_o;
+            b_ready  <= slv_b_ready_i;
+            ar_valid <= mst_ar_valid_o;
+            ar_ready <= mst_ar_ready_i;
+            r_valid  <= slv_r_valid_o;
+            r_ready  <= slv_r_ready_i;
+        end
+    end
+
+    // Calculate if the request interferes with the ongoing atomic transaction
+    // The protected bytes go from addr_q up to addr_q + (1 << size_q) - 1
+    // TODO Bursts need special treatment
+    assign transaction_collision = (slv_aw_addr_i < (     addr_q + (8'h01 <<      size_q))) &
+                                   (     addr_q < (slv_aw_addr_i + (8'h01 << slv_aw_size_i)));
+
+    always_comb begin : calc_atop_valid
+        atop_valid_d = atop_valid_q;
+        if (adapter_ready) begin
+            atop_valid_d = NONE;
+            if (slv_aw_valid_i && slv_aw_atop_i) begin
+                // Default is invalid request
+                atop_valid_d = INVALID;
+                // Valid load operation
+                if ((slv_aw_atop_i      ==  axi_pkg::ATOP_ATOMICSWAP) ||
+                    (slv_aw_atop_i[5:3] == {axi_pkg::ATOP_ATOMICLOAD , axi_pkg::ATOP_LITTLE_END})) begin
+                    atop_valid_d = LOAD;
+                end
+                // Valid store operation
+                if (slv_aw_atop_i[5:3] == {axi_pkg::ATOP_ATOMICSTORE, axi_pkg::ATOP_LITTLE_END}) begin
+                    atop_valid_d = STORE;
+                end
+                // Invalidate valid request if control signals do not match
+                // Burst or exclusive access
+                if (slv_aw_len_i | slv_aw_lock_i) begin
+                    atop_valid_d = INVALID;
+                end
+                // Unsupported size
+                if (slv_aw_size_i > $clog2(RISCV_WORD_WIDTH/8)) begin
+                    atop_valid_d = INVALID;
+                end
+            end
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : proc_atop_valid
+        if(~rst_ni) begin
+            atop_valid_q <= NONE;
+        end else begin
+            atop_valid_q <= atop_valid_d;
+        end
+    end
+
+    /**
+     * Write Channel: AW, W, B
+     */
+
+    /*====================================================================
+    =                                 AW                                 =
+    ====================================================================*/
+    always_comb begin : axi_aw_channel
+        // Defaults AXI Bus
+        mst_aw_id_o     = slv_aw_id_i;
+        mst_aw_addr_o   = slv_aw_addr_i;
+        mst_aw_len_o    = slv_aw_len_i;
+        mst_aw_size_o   = slv_aw_size_i;
+        mst_aw_burst_o  = slv_aw_burst_i;
+        mst_aw_lock_o   = slv_aw_lock_i;
+        mst_aw_cache_o  = slv_aw_cache_i;
+        mst_aw_prot_o   = slv_aw_prot_i;
+        mst_aw_qos_o    = slv_aw_qos_i;
+        mst_aw_region_o = slv_aw_region_i;
+        mst_aw_atop_o   = 6'b0;
+        mst_aw_user_o   = slv_aw_user_i;
+        // Defaults FF
+        addr_d          = addr_q;
+        id_d            = id_q;
+        size_d          = size_q;
+        atop_d          = atop_q;
+        cache_d         = cache_q;
+        prot_d          = prot_q;
+        qos_d           = qos_q;
+        region_d        = region_q;
+        aw_user_d       = aw_user_q;
+        w_cnt_inj_d     = w_cnt_inj_q;
+        // State Machine
+        aw_state_d      = aw_state_q;
+
+        // Default control: Block AW channel if...
+        if (slv_aw_valid_i && slv_aw_atop_i) begin
+            // Block if atomic request
+            mst_aw_valid_o = 1'b0;
+            slv_aw_ready_o = 1'b0;
+        end else if (w_cnt_q == AXI_MAX_WRITE_TXNS) begin
+            // Block if counter is overflowing
+            mst_aw_valid_o = 1'b0;
+            slv_aw_ready_o = 1'b0;
+        end else if (slv_aw_valid_i && transaction_collision && !adapter_ready) begin
+            // Block requests to the same address as current atomic transaction
+            mst_aw_valid_o = 1'b0;
+            slv_aw_ready_o = 1'b0;
+        end else begin
+            // Forward
+            mst_aw_valid_o  = slv_aw_valid_i;
+            slv_aw_ready_o  = mst_aw_ready_i;
+        end
+
+        // Count W burst to know when to inject the W data
+        if (w_cnt_inj_q && mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin
+            w_cnt_inj_d = w_cnt_inj_q - 1;
+        end
+
+        unique case (aw_state_q)
+
+            FEEDTHROUGH_AW: begin
+                // Feedthrough slave to master until atomic operation is detected
+                if (slv_aw_valid_i && slv_aw_atop_i && adapter_ready) begin
+                    // Acknowledge atomic transaction
+                    slv_aw_ready_o = 1'b1;
+                    // Remember request
+                    atop_d    = slv_aw_atop_i;
+                    addr_d    = slv_aw_addr_i;
+                    id_d      = slv_aw_id_i;
+                    size_d    = slv_aw_size_i;
+                    cache_d   = slv_aw_cache_i;
+                    prot_d    = slv_aw_prot_i;
+                    qos_d     = slv_aw_qos_i;
+                    region_d  = slv_aw_region_i;
+                    aw_user_d = slv_aw_user_i;
+                    // If valid AMO --> wait for result
+                    if (atop_valid_d != INVALID) begin
+                        aw_state_d = WAIT_RESULT_AW;
+                    end
+                end
+
+            end // FEEDTHROUGH_AW
+
+            WAIT_RESULT_AW, SEND_AW: begin
+                // If the result is ready and the channel is free --> inject AW request
+                if ((r_d_valid_q && w_d_valid_q && aw_free) || (aw_state_q == SEND_AW)) begin
+                    // Block
+                    slv_aw_ready_o  = 1'b0;
+                    // Make write request
+                    mst_aw_valid_o  = 1'b1;
+                    mst_aw_addr_o   = addr_q;
+                    mst_aw_len_o    = 8'h00;
+                    mst_aw_id_o     = id_q;
+                    mst_aw_size_o   = size_q;
+                    mst_aw_burst_o  = 2'b00;
+                    mst_aw_lock_o   = 1'b0;
+                    mst_aw_cache_o  = cache_q;
+                    mst_aw_prot_o   = prot_q;
+                    mst_aw_qos_o    = qos_q;
+                    mst_aw_region_o = region_q;
+                    mst_aw_user_o   = aw_user_q;
+                    // Check if request is acknowledged
+                    if (mst_aw_ready_i) begin
+                        aw_state_d = FEEDTHROUGH_AW;
+                    end else begin
+                        aw_state_d = SEND_AW;
+                    end
+                    // Remember outstanding W beats before injected request
+                    if (aw_state_q == WAIT_RESULT_AW) begin
+                        if (w_cnt_q && mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin
+                            w_cnt_inj_d = w_cnt_q - 1;
+                        end else begin
+                            w_cnt_inj_d = w_cnt_q;
+                        end
+                    end
+                end
+            end // WAIT_RESULT_AW, SEND_AW
+
+            default: aw_state_d = FEEDTHROUGH_AW;
+
+        endcase
+    end // axi_aw_channel
+
+    /*====================================================================
+    =                                 W                                  =
+    ====================================================================*/
+    always_comb begin : axi_w_channel
+        // Defaults AXI Bus
+        mst_w_data_o = slv_w_data_i;
+        mst_w_strb_o = slv_w_strb_i;
+        mst_w_last_o = slv_w_last_i;
+        mst_w_user_o = slv_w_user_i;
+        // Defaults FF
+        strb_d       = strb_q;
+        w_user_d     = w_user_q;
+        w_data_d     = w_data_q;
+        result_d     = result_q;
+        w_d_valid_d  = w_d_valid_q;
+        w_cnt_req_d  = w_cnt_req_q;
+        // State Machine
+        w_state_d    = w_state_q;
+
+        // Default control
+        // Make sure no data is sent without knowing if it's atomic
+        if (w_cnt_q == 0) begin
+            // Stall W as it precedes the AW request
+            slv_w_ready_o = 1'b0;
+            mst_w_valid_o = 1'b0;
+        end else begin
+            mst_w_valid_o = slv_w_valid_i;
+            slv_w_ready_o = mst_w_ready_i;
+        end
+
+        unique case (w_state_q)
+
+            FEEDTHROUGH_W: begin
+                if (adapter_ready) begin
+                    // Reset read flag
+                    w_d_valid_d = 1'b0;
+                    result_d    = '0;
+
+                    if (atop_valid_d != NONE) begin
+                        // Check if data is also available and does not belong to previous request
+                        if (w_cnt_q == 0) begin
+                            // Block downstream
+                            mst_w_valid_o = 1'b0;
+                            // Fetch data and wait for all data
+                            slv_w_ready_o  = 1'b1;
+                            if (slv_w_valid_i) begin
+                                if (atop_valid_d != INVALID) begin
+                                    w_data_d    = slv_w_data_i;
+                                    strb_d      = slv_w_strb_i;
+                                    w_user_d    = slv_w_user_i;
+                                    w_d_valid_d = 1'b1;
+                                    w_state_d   = WAIT_RESULT_W;
+                                end
+                            end else begin
+                                w_cnt_req_d = '0;
+                                w_state_d   = WAIT_DATA_W;
+                            end
+                        end else begin
+                            // Remember the amount of outstanding bursts and count down
+                            if (mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin
+                                w_cnt_req_d = w_cnt_q - 1;
+                            end else begin
+                                w_cnt_req_d = w_cnt_q;
+                            end
+                            w_state_d   = WAIT_DATA_W;
+                        end
+                    end
+                end
+            end // FEEDTHROUGH_W
+
+            WAIT_DATA_W: begin
+                // Count W beats until data arrives that belongs to the AMO request
+                if (w_cnt_req_q == 0) begin
+                    // Block downstream
+                    mst_w_valid_o = 1'b0;
+                    // Ready upstream
+                    slv_w_ready_o = 1'b1;
+
+                    if (slv_w_valid_i) begin
+                        if (atop_valid_q == INVALID) begin
+                            w_state_d    = FEEDTHROUGH_W;
+                        end else begin
+                            w_data_d    = slv_w_data_i;
+                            strb_d      = slv_w_strb_i;
+                            w_user_d    = slv_w_user_i;
+                            w_d_valid_d = 1'b1;
+                            w_state_d   = WAIT_RESULT_W;
+                        end
+                    end
+                end else if (mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin
+                    w_cnt_req_d = w_cnt_req_q - 1;
+                end
+            end // WAIT_DATA_W
+
+            WAIT_RESULT_W: begin
+                // If the result is ready, try to write it
+                if (r_d_valid_q && w_d_valid_q && aw_free) begin
+                    // Check if W channel is free and make sure data is not interleaved
+                    result_d = alu_result_ext;
+                    if (w_free && w_cnt_q == 0) begin
+                        // Block
+                        slv_w_ready_o = 1'b0;
+                        // Send write data
+                        mst_w_valid_o = 1'b1;
+                        mst_w_data_o  = alu_result_ext;
+                        mst_w_last_o  = 1'b1;
+                        mst_w_strb_o  = strb_q;
+                        mst_w_user_o  = w_user_q;
+                        if (mst_w_ready_i) begin
+                            w_state_d = FEEDTHROUGH_W;
+                        end else begin
+                            w_state_d = SEND_W;
+                        end
+                    end else begin
+                        w_state_d = WAIT_CHANNEL_W;
+                    end
+                end
+            end // WAIT_RESULT_W
+
+            WAIT_CHANNEL_W, SEND_W: begin
+                // Wait to not interleave the data
+                if ((w_free && w_cnt_inj_q == 0) || (w_state_q == SEND_W)) begin
+                    // Block
+                    slv_w_ready_o = 1'b0;
+                    // Send write data
+                    mst_w_valid_o = 1'b1;
+                    mst_w_data_o  = result_q;
+                    mst_w_last_o  = 1'b1;
+                    mst_w_strb_o  = strb_q;
+                    mst_w_user_o  = w_user_q;
+                    if (mst_w_ready_i) begin
+                        w_state_d = FEEDTHROUGH_W;
+                    end else begin
+                        w_state_d = SEND_W;
+                    end
+                end
+            end // WAIT_CHANNEL_W, SEND_W
+
+            default: w_state_d = FEEDTHROUGH_W;
+
+        endcase
+    end // axi_w_channel
+
+    /*====================================================================
+    =                                 B                                  =
+    ====================================================================*/
+    always_comb begin : axi_b_channel
+        // Defaults AXI Bus
+        mst_b_ready_o = slv_b_ready_i;
+        slv_b_id_o    = mst_b_id_i;
+        slv_b_resp_o  = mst_b_resp_i;
+        slv_b_user_o  = mst_b_user_i;
+        slv_b_valid_o = mst_b_valid_i;
+        // State Machine
+        b_state_d     = b_state_q;
+
+        unique case (b_state_q)
+
+            FEEDTHROUGH_B: begin
+                if (adapter_ready) begin
+                    if (atop_valid_d == LOAD || atop_valid_d == STORE) begin
+                        // Wait until write is complete
+                        b_state_d = WAIT_COMPLETE_B;
+                    end else if (atop_valid_d == INVALID) begin
+                        // Inject B error resp once the channel is free
+                        if (b_free) begin
+                            // Block downstream
+                            mst_b_ready_o = 1'b0;
+                            // Write B response
+                            slv_b_valid_o = 1'b1;
+                            slv_b_id_o    = slv_aw_id_i;
+                            slv_b_resp_o  = axi_pkg::RESP_SLVERR;
+                            slv_b_user_o  = '0;
+                            if (!slv_b_ready_i) begin
+                                b_state_d = SEND_B;
+                            end
+                        end else begin
+                            b_state_d = WAIT_CHANNEL_B;
+                        end
+                    end
+                end
+            end // FEEDTHROUGH_B
+
+            WAIT_CHANNEL_B, SEND_B: begin
+                if (b_free || (b_state_q == SEND_B)) begin
+                    // Block downstream
+                    mst_b_ready_o = 1'b0;
+                    // Write B response
+                    slv_b_valid_o = 1'b1;
+                    slv_b_id_o    = id_q;
+                    slv_b_resp_o  = axi_pkg::RESP_SLVERR;
+                    slv_b_user_o  = '0;
+                    if (slv_b_ready_i) begin
+                        b_state_d = FEEDTHROUGH_B;
+                    end else begin
+                        b_state_d = SEND_B;
+                    end
+                end
+            end // WAIT_CHANNEL_B, SEND_B
+
+            WAIT_COMPLETE_B: begin
+                if (mst_b_valid_i && (mst_b_id_i == id_q)) begin
+                    b_state_d = FEEDTHROUGH_B;
+                end
+            end // WAIT_COMPLETE_B
+
+            default: b_state_d = FEEDTHROUGH_B;
+
+        endcase
+    end // axi_b_channel
+
+    // Keep track of outstanding downstream write bursts and responses.
+    always_comb begin
+        w_cnt_d = w_cnt_q;
+        if (mst_aw_valid_o && mst_aw_ready_i) begin
+            w_cnt_d += 1;
+        end
+        if (mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin
+            w_cnt_d -= 1;
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : axi_write_channel_ff
+        if(~rst_ni) begin
+            aw_state_q  <= FEEDTHROUGH_AW;
+            w_state_q   <= FEEDTHROUGH_W;
+            b_state_q   <= FEEDTHROUGH_B;
+            w_cnt_q     <= '0;
+            w_cnt_req_q <= '0;
+            w_cnt_inj_q <= '0;
+            addr_q      <= '0;
+            id_q        <= '0;
+            size_q      <= '0;
+            strb_q      <= '0;
+            cache_q     <= '0;
+            prot_q      <= '0;
+            qos_q       <= '0;
+            region_q    <= '0;
+            aw_user_q   <= '0;
+            w_user_q    <= '0;
+            w_data_q    <= '0;
+            result_q    <= '0;
+            w_d_valid_q <= '0;
+            atop_q      <= 6'b0;
+        end else begin
+            aw_state_q  <= aw_state_d;
+            w_state_q   <= w_state_d;
+            b_state_q   <= b_state_d;
+            w_cnt_q     <= w_cnt_d;
+            w_cnt_req_q <= w_cnt_req_d;
+            w_cnt_inj_q <= w_cnt_inj_d;
+            addr_q      <= addr_d;
+            id_q        <= id_d;
+            size_q      <= size_d;
+            strb_q      <= strb_d;
+            cache_q     <= cache_d;
+            prot_q      <= prot_d;
+            qos_q       <= qos_d;
+            region_q    <= region_d;
+            aw_user_q   <= aw_user_d;
+            w_user_q    <= w_user_d;
+            w_data_q    <= w_data_d;
+            result_q    <= result_d;
+            w_d_valid_q <= w_d_valid_d;
+            atop_q      <= atop_d;
+        end
+    end
+
+    /**
+    * Read Channel: AR, R
+    */
+
+    /*====================================================================
+    =                                AR                                  =
+    ====================================================================*/
+    always_comb begin : axi_ar_channel
+        // Defaults AXI Bus
+        mst_ar_id_o     = slv_ar_id_i;
+        mst_ar_addr_o   = slv_ar_addr_i;
+        mst_ar_len_o    = slv_ar_len_i;
+        mst_ar_size_o   = slv_ar_size_i;
+        mst_ar_burst_o  = slv_ar_burst_i;
+        mst_ar_lock_o   = slv_ar_lock_i;
+        mst_ar_cache_o  = slv_ar_cache_i;
+        mst_ar_prot_o   = slv_ar_prot_i;
+        mst_ar_qos_o    = slv_ar_qos_i;
+        mst_ar_region_o = slv_ar_region_i;
+        mst_ar_user_o   = slv_ar_user_i;
+        mst_ar_valid_o  = 1'b0;
+        slv_ar_ready_o  = 1'b0;
+        // State Machine
+        ar_state_d      = ar_state_q;
+
+        unique case (ar_state_q)
+
+            FEEDTHROUGH_AR: begin
+                // Feed through
+                mst_ar_valid_o = slv_ar_valid_i;
+                slv_ar_ready_o = mst_ar_ready_i;
+
+                if (adapter_ready) begin
+                    if (atop_valid_d == LOAD | atop_valid_d == STORE) begin
+                        if (ar_free) begin
+                            // Acquire channel
+                            slv_ar_ready_o  = 1'b0;
+                            // Immediately start read request
+                            mst_ar_valid_o  = 1'b1;
+                            mst_ar_addr_o   = slv_aw_addr_i;
+                            mst_ar_id_o     = slv_aw_id_i;
+                            mst_ar_len_o    = 8'h00;
+                            mst_ar_size_o   = slv_aw_size_i;
+                            mst_ar_burst_o  = 2'b00;
+                            mst_ar_lock_o   = 1'h0;
+                            mst_ar_cache_o  = slv_aw_cache_i;
+                            mst_ar_prot_o   = slv_aw_prot_i;
+                            mst_ar_qos_o    = slv_aw_qos_i;
+                            mst_ar_region_o = slv_aw_region_i;
+                            mst_ar_user_o   = slv_aw_user_i;
+                            if (!mst_ar_ready_i) begin
+                                // Hold read request but do not depend on AW
+                                ar_state_d = SEND_AR;
+                            end
+                        end else begin
+                            // Wait until AR is free
+                            ar_state_d   = WAIT_CHANNEL_AR;
+                        end
+                    end
+                end
+            end // FEEDTHROUGH_AR
+
+            WAIT_CHANNEL_AR, SEND_AR: begin
+                // Issue read request
+                if (ar_free || (ar_state_q == SEND_AR)) begin
+                    // Inject read request
+                    mst_ar_valid_o  = 1'b1;
+                    mst_ar_addr_o   = addr_q;
+                    mst_ar_id_o     = id_q;
+                    mst_ar_len_o    = 8'h00;
+                    mst_ar_size_o   = size_q;
+                    mst_ar_burst_o  = 2'b00;
+                    mst_ar_lock_o   = 1'h0;
+                    mst_ar_cache_o  = cache_q;
+                    mst_ar_prot_o   = prot_q;
+                    mst_ar_qos_o    = qos_q;
+                    mst_ar_region_o = region_q;
+                    mst_ar_user_o   = aw_user_q;
+                    if (mst_ar_ready_i) begin
+                        // Request acknowledged
+                        ar_state_d = FEEDTHROUGH_AR;
+                    end else begin
+                        // Hold read request
+                        ar_state_d = SEND_AR;
+                    end
+                end else begin
+                    // Wait until AR is free
+                    mst_ar_valid_o = slv_ar_valid_i;
+                    slv_ar_ready_o = mst_ar_ready_i;
+                end
+            end // WAIT_CHANNEL_AR, SEND_AR
+
+            default: ar_state_d = FEEDTHROUGH_AR;
+
+        endcase
+    end // axi_ar_channel
+
+    /*====================================================================
+    =                                 R                                  =
+    ====================================================================*/
+    always_comb begin : axi_r_channel
+        // Defaults AXI Bus
+        mst_r_ready_o = slv_r_ready_i;
+        slv_r_id_o    = mst_r_id_i;
+        slv_r_data_o  = mst_r_data_i;
+        slv_r_resp_o  = mst_r_resp_i;
+        slv_r_last_o  = mst_r_last_i;
+        slv_r_user_o  = mst_r_user_i;
+        slv_r_valid_o = mst_r_valid_i;
+        // Defaults FF
+        r_data_d      = r_data_q;
+        r_resp_d      = r_resp_q;
+        r_user_d      = r_user_q;
+        r_d_valid_d   = r_d_valid_q;
+        // State Machine
+        r_state_d     = r_state_q;
+
+        unique case (r_state_q)
+
+            FEEDTHROUGH_R: begin
+                if (adapter_ready) begin
+                    // Reset read flag
+                    r_d_valid_d = 1'b0;
+
+                    if (atop_valid_d == LOAD || atop_valid_d == STORE) begin
+                        // Wait for R response to read data
+                        r_state_d = WAIT_DATA_R;
+                    end else if (atop_valid_d == INVALID) begin
+                        // Send R response once channel is free
+                        if (r_free) begin
+                            // Acquire the R channel
+                            // Block downstream
+                            mst_r_ready_o = 1'b0;
+                            // Send R error response
+                            slv_r_valid_o = 1'b1;
+                            slv_r_data_o  = '0;
+                            slv_r_id_o    = slv_aw_id_i;
+                            slv_r_last_o  = 1'b1;
+                            slv_r_resp_o  = axi_pkg::RESP_SLVERR;
+                            slv_r_user_o  = '0;
+                            if (!slv_r_ready_i) begin
+                                // Hold R response
+                                r_state_d = SEND_R;
+                            end
+                        end else begin
+                            r_state_d = WAIT_CHANNEL_R;
+                        end
+                    end
+                end
+            end // FEEDTHROUGH_R
+
+            WAIT_DATA_R: begin
+                // Read data
+                if (mst_r_valid_i && (mst_r_id_i == id_q)) begin
+                    // Acknowledge downstream and block upstream
+                    mst_r_ready_o = 1'b1;
+                    slv_r_valid_o = 1'b0;
+                    // Store data
+                    r_data_d    = mst_r_data_i;
+                    r_resp_d    = mst_r_resp_i;
+                    r_user_d    = mst_r_user_i;
+                    r_d_valid_d = 1'b1;
+                    if (atop_valid_q == STORE) begin
+                        r_state_d = FEEDTHROUGH_R;
+                    end else begin
+                        // Wait for B resp before injecting R
+                        r_state_d = WAIT_CHANNEL_R;
+                    end
+                end
+            end // WAIT_DATA_R
+
+            WAIT_CHANNEL_R, SEND_R: begin
+                // Wait for the R channel to become free and B response to be valid
+                // TODO: Use b_state_d to be one cycle quicker
+                if ((r_free && (b_state_q != WAIT_COMPLETE_B)) || (r_state_q == SEND_R)) begin
+                    // Block downstream
+                    mst_r_ready_o = 1'b0;
+                    // Send R response
+                    slv_r_valid_o = 1'b1;
+                    slv_r_data_o  = r_data_q;
+                    slv_r_id_o    = id_q;
+                    slv_r_last_o  = 1'b1;
+                    slv_r_resp_o  = r_resp_q;
+                    slv_r_user_o  = r_user_q;
+                    if (atop_valid_q == INVALID) begin
+                        slv_r_data_o = '0;
+                        slv_r_resp_o = axi_pkg::RESP_SLVERR;
+                        slv_r_user_o = '0;
+                    end
+                    if (slv_r_ready_i) begin
+                        r_state_d = FEEDTHROUGH_R;
+                    end else begin
+                        r_state_d = SEND_R;
+                    end
+                end
+            end // WAIT_CHANNEL_R, SEND_R
+
+            default: r_state_d = FEEDTHROUGH_R;
+
+        endcase
+    end // axi_r_channel
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : axi_read_channel_ff
+        if(~rst_ni) begin
+            ar_state_q  <= FEEDTHROUGH_AR;
+            r_state_q   <= FEEDTHROUGH_R;
+            r_data_q    <= '0;
+            r_resp_q    <= '0;
+            r_user_q    <= '0;
+            r_d_valid_q <= 1'b0;
+        end else begin
+            ar_state_q  <= ar_state_d;
+            r_state_q   <= r_state_d;
+            r_data_q    <= r_data_d;
+            r_resp_q    <= r_resp_d;
+            r_user_q    <= r_user_d;
+            r_d_valid_q <= r_d_valid_d;
+        end
+    end
+
+    /**
+     * ALU
+     */
+
+    assign op_a           = r_data_q & strb_ext;
+    assign op_b           = w_data_q & strb_ext;
+    assign sign_a         = |(op_a & ~(strb_ext >> 1));
+    assign sign_b         = |(op_b & ~(strb_ext >> 1));
+    assign alu_result_ext = res;
+
+    generate
+        if (AXI_ALU_RATIO == 1 && RISCV_WORD_WIDTH == 32) begin
+            assign alu_operand_a  = op_a;
+            assign alu_operand_b  = op_b;
+            assign res            = alu_result;
+        end else if (AXI_ALU_RATIO == 1 && RISCV_WORD_WIDTH == 64) begin
+            assign res        = alu_result;
+            always_comb begin
+                op_a_sign_ext = op_a | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_a}} & ~strb_ext);
+                op_b_sign_ext = op_b | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_b}} & ~strb_ext);
+
+                if (atop_q[2:0] == axi_pkg::ATOP_SMAX || atop_q[2:0] == axi_pkg::ATOP_SMIN) begin
+                    // Sign extend
+                    alu_operand_a = op_a_sign_ext;
+                    alu_operand_b = op_b_sign_ext;
+                end else begin
+                    // No sign extension necessary
+                    alu_operand_a = op_a;
+                    alu_operand_b = op_b;
+                end
+            end
+        end else begin
+            always_comb begin
+                op_a_sign_ext = op_a | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_a}} & ~strb_ext);
+                op_b_sign_ext = op_b | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_b}} & ~strb_ext);
+
+                if (atop_q[2:0] == axi_pkg::ATOP_SMAX || atop_q[2:0] == axi_pkg::ATOP_SMIN) begin
+                    // Sign extend
+                    alu_operand_a = op_a_sign_ext[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]];
+                    alu_operand_b = op_b_sign_ext[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]];
+                end else begin
+                    // No sign extension necessary
+                    alu_operand_a = op_a[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]];
+                    alu_operand_b = op_b[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]];
+                end
+                res = '0;
+                res[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]] = alu_result;
+            end
+        end
+    endgenerate
+
+    generate
+        for (genvar i = 0; i < AXI_STRB_WIDTH; i++) begin
+            always_comb begin
+                if (strb_q[i]) begin
+                    strb_ext[i] = 8'hFF;
+                end else begin
+                    strb_ext[i] = 8'h00;
+                end
+            end
+        end
+    endgenerate
+
+    axi_riscv_amos_alu #(
+        .DATA_WIDTH ( RISCV_WORD_WIDTH )
+    ) i_amo_alu (
+        .amo_op_i           ( atop_q        ),
+        .amo_operand_a_i    ( alu_operand_a ),
+        .amo_operand_b_i    ( alu_operand_b ),
+        .amo_result_o       ( alu_result    )
+    );
+
+    // Validate parameters.
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin: validate_params
+        assert (AXI_ADDR_WIDTH > 0)
+            else $fatal(1, "AXI_ADDR_WIDTH must be greater than 0!");
+        assert (AXI_DATA_WIDTH > 0)
+            else $fatal(1, "AXI_DATA_WIDTH must be greater than 0!");
+        assert (AXI_ID_WIDTH > 0)
+            else $fatal(1, "AXI_ID_WIDTH must be greater than 0!");
+        assert (AXI_MAX_WRITE_TXNS > 0)
+            else $fatal(1, "AXI_MAX_WRITE_TXNS must be greater than 0!");
+        assert (RISCV_WORD_WIDTH == 32 || RISCV_WORD_WIDTH == 64)
+            else $fatal(1, "RISCV_WORD_WIDTH must be 32 or 64!");
+        assert (RISCV_WORD_WIDTH <= AXI_DATA_WIDTH)
+            else $fatal(1, "RISCV_WORD_WIDTH must not be greater than AXI_DATA_WIDTH!");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv
new file mode 100644
index 0000000..40a52b0
--- /dev/null
+++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv
@@ -0,0 +1,78 @@
+// Copyright (c) 2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// AXI RISC-V Atomic Operations (AMOs) ALU
+module axi_riscv_amos_alu # (
+    parameter int unsigned DATA_WIDTH = 0
+) (
+    input  logic [5:0]              amo_op_i,
+    input  logic [DATA_WIDTH-1:0]   amo_operand_a_i,
+    input  logic [DATA_WIDTH-1:0]   amo_operand_b_i,
+    output logic [DATA_WIDTH-1:0]   amo_result_o
+);
+
+    logic [DATA_WIDTH:0] adder_sum;
+    logic [DATA_WIDTH:0] adder_operand_a, adder_operand_b;
+
+    assign adder_sum = adder_operand_a + adder_operand_b;
+
+    always_comb begin
+
+        adder_operand_a = $signed(amo_operand_a_i);
+        adder_operand_b = $signed(amo_operand_b_i);
+
+        amo_result_o = amo_operand_a_i;
+
+        if (amo_op_i == axi_pkg::ATOP_ATOMICSWAP) begin
+            // Swap operation
+            amo_result_o = amo_operand_b_i;
+        end else if ((amo_op_i[5:4] == axi_pkg::ATOP_ATOMICLOAD) | (amo_op_i[5:4] == axi_pkg::ATOP_ATOMICSTORE)) begin
+            // Load operation
+            unique case (amo_op_i[2:0])
+                // the default is to output operand_a
+                axi_pkg::ATOP_ADD: amo_result_o = adder_sum[DATA_WIDTH-1:0];
+                axi_pkg::ATOP_CLR: amo_result_o = amo_operand_a_i & (~amo_operand_b_i);
+                axi_pkg::ATOP_SET: amo_result_o = amo_operand_a_i | amo_operand_b_i;
+                axi_pkg::ATOP_EOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i;
+                axi_pkg::ATOP_SMAX: begin
+                    adder_operand_b = -$signed(amo_operand_b_i);
+                    amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_b_i : amo_operand_a_i;
+                end
+                axi_pkg::ATOP_SMIN: begin
+                    adder_operand_b = -$signed(amo_operand_b_i);
+                    amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_a_i : amo_operand_b_i;
+                end
+                axi_pkg::ATOP_UMAX: begin
+                    adder_operand_a = $unsigned(amo_operand_a_i);
+                    adder_operand_b = -$unsigned(amo_operand_b_i);
+                    amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_b_i : amo_operand_a_i;
+                end
+                axi_pkg::ATOP_UMIN: begin
+                    adder_operand_a = $unsigned(amo_operand_a_i);
+                    adder_operand_b = -$unsigned(amo_operand_b_i);
+                    amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_a_i : amo_operand_b_i;
+                end
+                default: amo_result_o = '0;
+            endcase
+        end
+    end
+
+    // Validate parameters.
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin: validate_params
+        assert (DATA_WIDTH > 0)
+            else $fatal(1, "DATA_WIDTH must be greater than 0!");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv
new file mode 100644
index 0000000..f9b72d3
--- /dev/null
+++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv
@@ -0,0 +1,400 @@
+// Copyright (c) 2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// AXI RISC-V Atomics ("A" Extension) Adapter
+//
+// This AXI adapter implements the RISC-V "A" extension and adheres to the RVWMO memory consistency
+// model.
+//
+// Maintainer: Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+module axi_riscv_atomics #(
+    /// AXI Parameters
+    parameter int unsigned AXI_ADDR_WIDTH = 0,
+    parameter int unsigned AXI_DATA_WIDTH = 0,
+    parameter int unsigned AXI_ID_WIDTH = 0,
+    parameter int unsigned AXI_USER_WIDTH = 0,
+    // Maximum number of AXI write bursts outstanding at the same time
+    parameter int unsigned AXI_MAX_WRITE_TXNS = 0,
+    // Word width of the widest RISC-V processor that can issue requests to this module.
+    // 32 for RV32; 64 for RV64, where both 32-bit (.W suffix) and 64-bit (.D suffix) AMOs are
+    // supported if `aw_strb` is set correctly.
+    parameter int unsigned RISCV_WORD_WIDTH = 0,
+    /// Derived Parameters (do NOT change manually!)
+    localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8
+) (
+    input logic                         clk_i,
+    input logic                         rst_ni,
+
+    /// Slave Interface
+    input  logic [AXI_ADDR_WIDTH-1:0]   slv_aw_addr_i,
+    input  logic [2:0]                  slv_aw_prot_i,
+    input  logic [3:0]                  slv_aw_region_i,
+    input  logic [5:0]                  slv_aw_atop_i,
+    input  logic [7:0]                  slv_aw_len_i,
+    input  logic [2:0]                  slv_aw_size_i,
+    input  logic [1:0]                  slv_aw_burst_i,
+    input  logic                        slv_aw_lock_i,
+    input  logic [3:0]                  slv_aw_cache_i,
+    input  logic [3:0]                  slv_aw_qos_i,
+    input  logic [AXI_ID_WIDTH-1:0]     slv_aw_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   slv_aw_user_i,
+    output logic                        slv_aw_ready_o,
+    input  logic                        slv_aw_valid_i,
+
+    input  logic [AXI_ADDR_WIDTH-1:0]   slv_ar_addr_i,
+    input  logic [2:0]                  slv_ar_prot_i,
+    input  logic [3:0]                  slv_ar_region_i,
+    input  logic [7:0]                  slv_ar_len_i,
+    input  logic [2:0]                  slv_ar_size_i,
+    input  logic [1:0]                  slv_ar_burst_i,
+    input  logic                        slv_ar_lock_i,
+    input  logic [3:0]                  slv_ar_cache_i,
+    input  logic [3:0]                  slv_ar_qos_i,
+    input  logic [AXI_ID_WIDTH-1:0]     slv_ar_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   slv_ar_user_i,
+    output logic                        slv_ar_ready_o,
+    input  logic                        slv_ar_valid_i,
+
+    input  logic [AXI_DATA_WIDTH-1:0]   slv_w_data_i,
+    input  logic [AXI_STRB_WIDTH-1:0]   slv_w_strb_i,
+    input  logic [AXI_USER_WIDTH-1:0]   slv_w_user_i,
+    input  logic                        slv_w_last_i,
+    output logic                        slv_w_ready_o,
+    input  logic                        slv_w_valid_i,
+
+    output logic [AXI_DATA_WIDTH-1:0]   slv_r_data_o,
+    output logic [1:0]                  slv_r_resp_o,
+    output logic                        slv_r_last_o,
+    output logic [AXI_ID_WIDTH-1:0]     slv_r_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   slv_r_user_o,
+    input  logic                        slv_r_ready_i,
+    output logic                        slv_r_valid_o,
+
+    output logic [1:0]                  slv_b_resp_o,
+    output logic [AXI_ID_WIDTH-1:0]     slv_b_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   slv_b_user_o,
+    input  logic                        slv_b_ready_i,
+    output logic                        slv_b_valid_o,
+
+    /// Master Interface
+    output logic [AXI_ADDR_WIDTH-1:0]   mst_aw_addr_o,
+    output logic [2:0]                  mst_aw_prot_o,
+    output logic [3:0]                  mst_aw_region_o,
+    output logic [5:0]                  mst_aw_atop_o,
+    output logic [7:0]                  mst_aw_len_o,
+    output logic [2:0]                  mst_aw_size_o,
+    output logic [1:0]                  mst_aw_burst_o,
+    output logic                        mst_aw_lock_o,
+    output logic [3:0]                  mst_aw_cache_o,
+    output logic [3:0]                  mst_aw_qos_o,
+    output logic [AXI_ID_WIDTH-1:0]     mst_aw_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   mst_aw_user_o,
+    input  logic                        mst_aw_ready_i,
+    output logic                        mst_aw_valid_o,
+
+    output logic [AXI_ADDR_WIDTH-1:0]   mst_ar_addr_o,
+    output logic [2:0]                  mst_ar_prot_o,
+    output logic [3:0]                  mst_ar_region_o,
+    output logic [7:0]                  mst_ar_len_o,
+    output logic [2:0]                  mst_ar_size_o,
+    output logic [1:0]                  mst_ar_burst_o,
+    output logic                        mst_ar_lock_o,
+    output logic [3:0]                  mst_ar_cache_o,
+    output logic [3:0]                  mst_ar_qos_o,
+    output logic [AXI_ID_WIDTH-1:0]     mst_ar_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   mst_ar_user_o,
+    input  logic                        mst_ar_ready_i,
+    output logic                        mst_ar_valid_o,
+
+    output logic [AXI_DATA_WIDTH-1:0]   mst_w_data_o,
+    output logic [AXI_STRB_WIDTH-1:0]   mst_w_strb_o,
+    output logic [AXI_USER_WIDTH-1:0]   mst_w_user_o,
+    output logic                        mst_w_last_o,
+    input  logic                        mst_w_ready_i,
+    output logic                        mst_w_valid_o,
+
+    input  logic [AXI_DATA_WIDTH-1:0]   mst_r_data_i,
+    input  logic [1:0]                  mst_r_resp_i,
+    input  logic                        mst_r_last_i,
+    input  logic [AXI_ID_WIDTH-1:0]     mst_r_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   mst_r_user_i,
+    output logic                        mst_r_ready_o,
+    input  logic                        mst_r_valid_i,
+
+    input  logic [1:0]                  mst_b_resp_i,
+    input  logic [AXI_ID_WIDTH-1:0]     mst_b_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   mst_b_user_i,
+    output logic                        mst_b_ready_o,
+    input  logic                        mst_b_valid_i
+);
+
+    // Make the entire address range exclusively accessible. Since the AMO adapter does not support
+    // address ranges, it would not make sense to expose the address range as a parameter of this
+    // module.
+    localparam longint unsigned ADDR_BEGIN  = '0;
+    localparam longint unsigned ADDR_END    = {AXI_ADDR_WIDTH{1'b1}};
+
+    logic [AXI_ADDR_WIDTH-1:0]   int_axi_aw_addr;
+    logic [2:0]                  int_axi_aw_prot;
+    logic [3:0]                  int_axi_aw_region;
+    logic [5:0]                  int_axi_aw_atop;
+    logic [7:0]                  int_axi_aw_len;
+    logic [2:0]                  int_axi_aw_size;
+    logic [1:0]                  int_axi_aw_burst;
+    logic                        int_axi_aw_lock;
+    logic [3:0]                  int_axi_aw_cache;
+    logic [3:0]                  int_axi_aw_qos;
+    logic [AXI_ID_WIDTH-1:0]     int_axi_aw_id;
+    logic [AXI_USER_WIDTH-1:0]   int_axi_aw_user;
+    logic                        int_axi_aw_ready;
+    logic                        int_axi_aw_valid;
+
+    logic [AXI_ADDR_WIDTH-1:0]   int_axi_ar_addr;
+    logic [2:0]                  int_axi_ar_prot;
+    logic [3:0]                  int_axi_ar_region;
+    logic [7:0]                  int_axi_ar_len;
+    logic [2:0]                  int_axi_ar_size;
+    logic [1:0]                  int_axi_ar_burst;
+    logic                        int_axi_ar_lock;
+    logic [3:0]                  int_axi_ar_cache;
+    logic [3:0]                  int_axi_ar_qos;
+    logic [AXI_ID_WIDTH-1:0]     int_axi_ar_id;
+    logic [AXI_USER_WIDTH-1:0]   int_axi_ar_user;
+    logic                        int_axi_ar_ready;
+    logic                        int_axi_ar_valid;
+
+    logic [AXI_DATA_WIDTH-1:0]   int_axi_w_data;
+    logic [AXI_STRB_WIDTH-1:0]   int_axi_w_strb;
+    logic [AXI_USER_WIDTH-1:0]   int_axi_w_user;
+    logic                        int_axi_w_last;
+    logic                        int_axi_w_ready;
+    logic                        int_axi_w_valid;
+
+    logic [AXI_DATA_WIDTH-1:0]   int_axi_r_data;
+    logic [1:0]                  int_axi_r_resp;
+    logic                        int_axi_r_last;
+    logic [AXI_ID_WIDTH-1:0]     int_axi_r_id;
+    logic [AXI_USER_WIDTH-1:0]   int_axi_r_user;
+    logic                        int_axi_r_ready;
+    logic                        int_axi_r_valid;
+
+    logic [1:0]                  int_axi_b_resp;
+    logic [AXI_ID_WIDTH-1:0]     int_axi_b_id;
+    logic [AXI_USER_WIDTH-1:0]   int_axi_b_user;
+    logic                        int_axi_b_ready;
+    logic                        int_axi_b_valid;
+
+    axi_riscv_amos #(
+        .AXI_ADDR_WIDTH     (AXI_ADDR_WIDTH),
+        .AXI_DATA_WIDTH     (AXI_DATA_WIDTH),
+        .AXI_ID_WIDTH       (AXI_ID_WIDTH),
+        .AXI_USER_WIDTH     (AXI_USER_WIDTH),
+        .AXI_MAX_WRITE_TXNS (AXI_MAX_WRITE_TXNS),
+        .RISCV_WORD_WIDTH   (RISCV_WORD_WIDTH)
+    ) i_amos (
+        .clk_i              ( clk_i             ),
+        .rst_ni             ( rst_ni            ),
+        .slv_aw_addr_i      ( slv_aw_addr_i     ),
+        .slv_aw_prot_i      ( slv_aw_prot_i     ),
+        .slv_aw_region_i    ( slv_aw_region_i   ),
+        .slv_aw_atop_i      ( slv_aw_atop_i     ),
+        .slv_aw_len_i       ( slv_aw_len_i      ),
+        .slv_aw_size_i      ( slv_aw_size_i     ),
+        .slv_aw_burst_i     ( slv_aw_burst_i    ),
+        .slv_aw_lock_i      ( slv_aw_lock_i     ),
+        .slv_aw_cache_i     ( slv_aw_cache_i    ),
+        .slv_aw_qos_i       ( slv_aw_qos_i      ),
+        .slv_aw_id_i        ( slv_aw_id_i       ),
+        .slv_aw_user_i      ( slv_aw_user_i     ),
+        .slv_aw_ready_o     ( slv_aw_ready_o    ),
+        .slv_aw_valid_i     ( slv_aw_valid_i    ),
+        .slv_ar_addr_i      ( slv_ar_addr_i     ),
+        .slv_ar_prot_i      ( slv_ar_prot_i     ),
+        .slv_ar_region_i    ( slv_ar_region_i   ),
+        .slv_ar_len_i       ( slv_ar_len_i      ),
+        .slv_ar_size_i      ( slv_ar_size_i     ),
+        .slv_ar_burst_i     ( slv_ar_burst_i    ),
+        .slv_ar_lock_i      ( slv_ar_lock_i     ),
+        .slv_ar_cache_i     ( slv_ar_cache_i    ),
+        .slv_ar_qos_i       ( slv_ar_qos_i      ),
+        .slv_ar_id_i        ( slv_ar_id_i       ),
+        .slv_ar_user_i      ( slv_ar_user_i     ),
+        .slv_ar_ready_o     ( slv_ar_ready_o    ),
+        .slv_ar_valid_i     ( slv_ar_valid_i    ),
+        .slv_w_data_i       ( slv_w_data_i      ),
+        .slv_w_strb_i       ( slv_w_strb_i      ),
+        .slv_w_user_i       ( slv_w_user_i      ),
+        .slv_w_last_i       ( slv_w_last_i      ),
+        .slv_w_ready_o      ( slv_w_ready_o     ),
+        .slv_w_valid_i      ( slv_w_valid_i     ),
+        .slv_r_data_o       ( slv_r_data_o      ),
+        .slv_r_resp_o       ( slv_r_resp_o      ),
+        .slv_r_last_o       ( slv_r_last_o      ),
+        .slv_r_id_o         ( slv_r_id_o        ),
+        .slv_r_user_o       ( slv_r_user_o      ),
+        .slv_r_ready_i      ( slv_r_ready_i     ),
+        .slv_r_valid_o      ( slv_r_valid_o     ),
+        .slv_b_resp_o       ( slv_b_resp_o      ),
+        .slv_b_id_o         ( slv_b_id_o        ),
+        .slv_b_user_o       ( slv_b_user_o      ),
+        .slv_b_ready_i      ( slv_b_ready_i     ),
+        .slv_b_valid_o      ( slv_b_valid_o     ),
+        .mst_aw_addr_o      ( int_axi_aw_addr   ),
+        .mst_aw_prot_o      ( int_axi_aw_prot   ),
+        .mst_aw_region_o    ( int_axi_aw_region ),
+        .mst_aw_atop_o      ( int_axi_aw_atop   ),
+        .mst_aw_len_o       ( int_axi_aw_len    ),
+        .mst_aw_size_o      ( int_axi_aw_size   ),
+        .mst_aw_burst_o     ( int_axi_aw_burst  ),
+        .mst_aw_lock_o      ( int_axi_aw_lock   ),
+        .mst_aw_cache_o     ( int_axi_aw_cache  ),
+        .mst_aw_qos_o       ( int_axi_aw_qos    ),
+        .mst_aw_id_o        ( int_axi_aw_id     ),
+        .mst_aw_user_o      ( int_axi_aw_user   ),
+        .mst_aw_ready_i     ( int_axi_aw_ready  ),
+        .mst_aw_valid_o     ( int_axi_aw_valid  ),
+        .mst_ar_addr_o      ( int_axi_ar_addr   ),
+        .mst_ar_prot_o      ( int_axi_ar_prot   ),
+        .mst_ar_region_o    ( int_axi_ar_region ),
+        .mst_ar_len_o       ( int_axi_ar_len    ),
+        .mst_ar_size_o      ( int_axi_ar_size   ),
+        .mst_ar_burst_o     ( int_axi_ar_burst  ),
+        .mst_ar_lock_o      ( int_axi_ar_lock   ),
+        .mst_ar_cache_o     ( int_axi_ar_cache  ),
+        .mst_ar_qos_o       ( int_axi_ar_qos    ),
+        .mst_ar_id_o        ( int_axi_ar_id     ),
+        .mst_ar_user_o      ( int_axi_ar_user   ),
+        .mst_ar_ready_i     ( int_axi_ar_ready  ),
+        .mst_ar_valid_o     ( int_axi_ar_valid  ),
+        .mst_w_data_o       ( int_axi_w_data    ),
+        .mst_w_strb_o       ( int_axi_w_strb    ),
+        .mst_w_user_o       ( int_axi_w_user    ),
+        .mst_w_last_o       ( int_axi_w_last    ),
+        .mst_w_ready_i      ( int_axi_w_ready   ),
+        .mst_w_valid_o      ( int_axi_w_valid   ),
+        .mst_r_data_i       ( int_axi_r_data    ),
+        .mst_r_resp_i       ( int_axi_r_resp    ),
+        .mst_r_last_i       ( int_axi_r_last    ),
+        .mst_r_id_i         ( int_axi_r_id      ),
+        .mst_r_user_i       ( int_axi_r_user    ),
+        .mst_r_ready_o      ( int_axi_r_ready   ),
+        .mst_r_valid_i      ( int_axi_r_valid   ),
+        .mst_b_resp_i       ( int_axi_b_resp    ),
+        .mst_b_id_i         ( int_axi_b_id      ),
+        .mst_b_user_i       ( int_axi_b_user    ),
+        .mst_b_ready_o      ( int_axi_b_ready   ),
+        .mst_b_valid_i      ( int_axi_b_valid   )
+    );
+
+    axi_riscv_lrsc #(
+        .ADDR_BEGIN     (ADDR_BEGIN),
+        .ADDR_END       (ADDR_END),
+        .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
+        .AXI_DATA_WIDTH (AXI_DATA_WIDTH),
+        .AXI_ID_WIDTH   (AXI_ID_WIDTH),
+        .AXI_USER_WIDTH (AXI_USER_WIDTH)
+    ) i_lrsc (
+        .clk_i              ( clk_i             ),
+        .rst_ni             ( rst_ni            ),
+        .slv_aw_addr_i      ( int_axi_aw_addr   ),
+        .slv_aw_prot_i      ( int_axi_aw_prot   ),
+        .slv_aw_region_i    ( int_axi_aw_region ),
+        .slv_aw_atop_i      ( int_axi_aw_atop   ),
+        .slv_aw_len_i       ( int_axi_aw_len    ),
+        .slv_aw_size_i      ( int_axi_aw_size   ),
+        .slv_aw_burst_i     ( int_axi_aw_burst  ),
+        .slv_aw_lock_i      ( int_axi_aw_lock   ),
+        .slv_aw_cache_i     ( int_axi_aw_cache  ),
+        .slv_aw_qos_i       ( int_axi_aw_qos    ),
+        .slv_aw_id_i        ( int_axi_aw_id     ),
+        .slv_aw_user_i      ( int_axi_aw_user   ),
+        .slv_aw_ready_o     ( int_axi_aw_ready  ),
+        .slv_aw_valid_i     ( int_axi_aw_valid  ),
+        .slv_ar_addr_i      ( int_axi_ar_addr   ),
+        .slv_ar_prot_i      ( int_axi_ar_prot   ),
+        .slv_ar_region_i    ( int_axi_ar_region ),
+        .slv_ar_len_i       ( int_axi_ar_len    ),
+        .slv_ar_size_i      ( int_axi_ar_size   ),
+        .slv_ar_burst_i     ( int_axi_ar_burst  ),
+        .slv_ar_lock_i      ( int_axi_ar_lock   ),
+        .slv_ar_cache_i     ( int_axi_ar_cache  ),
+        .slv_ar_qos_i       ( int_axi_ar_qos    ),
+        .slv_ar_id_i        ( int_axi_ar_id     ),
+        .slv_ar_user_i      ( int_axi_ar_user   ),
+        .slv_ar_ready_o     ( int_axi_ar_ready  ),
+        .slv_ar_valid_i     ( int_axi_ar_valid  ),
+        .slv_w_data_i       ( int_axi_w_data    ),
+        .slv_w_strb_i       ( int_axi_w_strb    ),
+        .slv_w_user_i       ( int_axi_w_user    ),
+        .slv_w_last_i       ( int_axi_w_last    ),
+        .slv_w_ready_o      ( int_axi_w_ready   ),
+        .slv_w_valid_i      ( int_axi_w_valid   ),
+        .slv_r_data_o       ( int_axi_r_data    ),
+        .slv_r_resp_o       ( int_axi_r_resp    ),
+        .slv_r_last_o       ( int_axi_r_last    ),
+        .slv_r_id_o         ( int_axi_r_id      ),
+        .slv_r_user_o       ( int_axi_r_user    ),
+        .slv_r_ready_i      ( int_axi_r_ready   ),
+        .slv_r_valid_o      ( int_axi_r_valid   ),
+        .slv_b_resp_o       ( int_axi_b_resp    ),
+        .slv_b_id_o         ( int_axi_b_id      ),
+        .slv_b_user_o       ( int_axi_b_user    ),
+        .slv_b_ready_i      ( int_axi_b_ready   ),
+        .slv_b_valid_o      ( int_axi_b_valid   ),
+        .mst_aw_addr_o      ( mst_aw_addr_o     ),
+        .mst_aw_prot_o      ( mst_aw_prot_o     ),
+        .mst_aw_region_o    ( mst_aw_region_o   ),
+        .mst_aw_atop_o      ( mst_aw_atop_o     ),
+        .mst_aw_len_o       ( mst_aw_len_o      ),
+        .mst_aw_size_o      ( mst_aw_size_o     ),
+        .mst_aw_burst_o     ( mst_aw_burst_o    ),
+        .mst_aw_lock_o      ( mst_aw_lock_o     ),
+        .mst_aw_cache_o     ( mst_aw_cache_o    ),
+        .mst_aw_qos_o       ( mst_aw_qos_o      ),
+        .mst_aw_id_o        ( mst_aw_id_o       ),
+        .mst_aw_user_o      ( mst_aw_user_o     ),
+        .mst_aw_ready_i     ( mst_aw_ready_i    ),
+        .mst_aw_valid_o     ( mst_aw_valid_o    ),
+        .mst_ar_addr_o      ( mst_ar_addr_o     ),
+        .mst_ar_prot_o      ( mst_ar_prot_o     ),
+        .mst_ar_region_o    ( mst_ar_region_o   ),
+        .mst_ar_len_o       ( mst_ar_len_o      ),
+        .mst_ar_size_o      ( mst_ar_size_o     ),
+        .mst_ar_burst_o     ( mst_ar_burst_o    ),
+        .mst_ar_lock_o      ( mst_ar_lock_o     ),
+        .mst_ar_cache_o     ( mst_ar_cache_o    ),
+        .mst_ar_qos_o       ( mst_ar_qos_o      ),
+        .mst_ar_id_o        ( mst_ar_id_o       ),
+        .mst_ar_user_o      ( mst_ar_user_o     ),
+        .mst_ar_ready_i     ( mst_ar_ready_i    ),
+        .mst_ar_valid_o     ( mst_ar_valid_o    ),
+        .mst_w_data_o       ( mst_w_data_o      ),
+        .mst_w_strb_o       ( mst_w_strb_o      ),
+        .mst_w_user_o       ( mst_w_user_o      ),
+        .mst_w_last_o       ( mst_w_last_o      ),
+        .mst_w_ready_i      ( mst_w_ready_i     ),
+        .mst_w_valid_o      ( mst_w_valid_o     ),
+        .mst_r_data_i       ( mst_r_data_i      ),
+        .mst_r_resp_i       ( mst_r_resp_i      ),
+        .mst_r_last_i       ( mst_r_last_i      ),
+        .mst_r_id_i         ( mst_r_id_i        ),
+        .mst_r_user_i       ( mst_r_user_i      ),
+        .mst_r_ready_o      ( mst_r_ready_o     ),
+        .mst_r_valid_i      ( mst_r_valid_i     ),
+        .mst_b_resp_i       ( mst_b_resp_i      ),
+        .mst_b_id_i         ( mst_b_id_i        ),
+        .mst_b_user_i       ( mst_b_user_i      ),
+        .mst_b_ready_o      ( mst_b_ready_o     ),
+        .mst_b_valid_i      ( mst_b_valid_i     )
+    );
+
+endmodule
diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv
new file mode 100644
index 0000000..ad3505c
--- /dev/null
+++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv
@@ -0,0 +1,151 @@
+// Copyright (c) 2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Wrapper for the AXI RISC-V Atomics Adapter that exposes AXI SystemVerilog interfaces.
+//
+// See the header of `axi_riscv_atomics` for a description.
+//
+// Maintainer: Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+module axi_riscv_atomics_wrap #(
+    /// AXI Parameters
+    parameter int unsigned AXI_ADDR_WIDTH = 0,
+    parameter int unsigned AXI_DATA_WIDTH = 0,
+    parameter int unsigned AXI_ID_WIDTH = 0,
+    parameter int unsigned AXI_USER_WIDTH = 0,
+    /// Maximum number of AXI bursts outstanding at the same time
+    parameter int unsigned AXI_MAX_WRITE_TXNS = 0,
+    // Word width of the widest RISC-V processor that can issue requests to this module.
+    // 32 for RV32; 64 for RV64, where both 32-bit (.W suffix) and 64-bit (.D suffix) AMOs are
+    // supported if `aw_strb` is set correctly.
+    parameter int unsigned RISCV_WORD_WIDTH = 0,
+    /// Derived Parameters (do NOT change manually!)
+    localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8
+) (
+    input  logic    clk_i,
+    input  logic    rst_ni,
+    AXI_BUS.Master  mst,
+    AXI_BUS.Slave   slv
+);
+
+    axi_riscv_atomics #(
+        .AXI_ADDR_WIDTH     (AXI_ADDR_WIDTH),
+        .AXI_DATA_WIDTH     (AXI_DATA_WIDTH),
+        .AXI_ID_WIDTH       (AXI_ID_WIDTH),
+        .AXI_USER_WIDTH     (AXI_USER_WIDTH),
+        .AXI_MAX_WRITE_TXNS (AXI_MAX_WRITE_TXNS),
+        .RISCV_WORD_WIDTH   (RISCV_WORD_WIDTH)
+    ) i_atomics (
+        .clk_i           ( clk_i         ),
+        .rst_ni          ( rst_ni        ),
+        .slv_aw_addr_i   ( slv.aw_addr   ),
+        .slv_aw_prot_i   ( slv.aw_prot   ),
+        .slv_aw_region_i ( slv.aw_region ),
+        .slv_aw_atop_i   ( slv.aw_atop   ),
+        .slv_aw_len_i    ( slv.aw_len    ),
+        .slv_aw_size_i   ( slv.aw_size   ),
+        .slv_aw_burst_i  ( slv.aw_burst  ),
+        .slv_aw_lock_i   ( slv.aw_lock   ),
+        .slv_aw_cache_i  ( slv.aw_cache  ),
+        .slv_aw_qos_i    ( slv.aw_qos    ),
+        .slv_aw_id_i     ( slv.aw_id     ),
+        .slv_aw_user_i   ( slv.aw_user   ),
+        .slv_aw_ready_o  ( slv.aw_ready  ),
+        .slv_aw_valid_i  ( slv.aw_valid  ),
+        .slv_ar_addr_i   ( slv.ar_addr   ),
+        .slv_ar_prot_i   ( slv.ar_prot   ),
+        .slv_ar_region_i ( slv.ar_region ),
+        .slv_ar_len_i    ( slv.ar_len    ),
+        .slv_ar_size_i   ( slv.ar_size   ),
+        .slv_ar_burst_i  ( slv.ar_burst  ),
+        .slv_ar_lock_i   ( slv.ar_lock   ),
+        .slv_ar_cache_i  ( slv.ar_cache  ),
+        .slv_ar_qos_i    ( slv.ar_qos    ),
+        .slv_ar_id_i     ( slv.ar_id     ),
+        .slv_ar_user_i   ( slv.ar_user   ),
+        .slv_ar_ready_o  ( slv.ar_ready  ),
+        .slv_ar_valid_i  ( slv.ar_valid  ),
+        .slv_w_data_i    ( slv.w_data    ),
+        .slv_w_strb_i    ( slv.w_strb    ),
+        .slv_w_user_i    ( slv.w_user    ),
+        .slv_w_last_i    ( slv.w_last    ),
+        .slv_w_ready_o   ( slv.w_ready   ),
+        .slv_w_valid_i   ( slv.w_valid   ),
+        .slv_r_data_o    ( slv.r_data    ),
+        .slv_r_resp_o    ( slv.r_resp    ),
+        .slv_r_last_o    ( slv.r_last    ),
+        .slv_r_id_o      ( slv.r_id      ),
+        .slv_r_user_o    ( slv.r_user    ),
+        .slv_r_ready_i   ( slv.r_ready   ),
+        .slv_r_valid_o   ( slv.r_valid   ),
+        .slv_b_resp_o    ( slv.b_resp    ),
+        .slv_b_id_o      ( slv.b_id      ),
+        .slv_b_user_o    ( slv.b_user    ),
+        .slv_b_ready_i   ( slv.b_ready   ),
+        .slv_b_valid_o   ( slv.b_valid   ),
+        .mst_aw_addr_o   ( mst.aw_addr   ),
+        .mst_aw_prot_o   ( mst.aw_prot   ),
+        .mst_aw_region_o ( mst.aw_region ),
+        .mst_aw_atop_o   ( mst.aw_atop   ),
+        .mst_aw_len_o    ( mst.aw_len    ),
+        .mst_aw_size_o   ( mst.aw_size   ),
+        .mst_aw_burst_o  ( mst.aw_burst  ),
+        .mst_aw_lock_o   ( mst.aw_lock   ),
+        .mst_aw_cache_o  ( mst.aw_cache  ),
+        .mst_aw_qos_o    ( mst.aw_qos    ),
+        .mst_aw_id_o     ( mst.aw_id     ),
+        .mst_aw_user_o   ( mst.aw_user   ),
+        .mst_aw_ready_i  ( mst.aw_ready  ),
+        .mst_aw_valid_o  ( mst.aw_valid  ),
+        .mst_ar_addr_o   ( mst.ar_addr   ),
+        .mst_ar_prot_o   ( mst.ar_prot   ),
+        .mst_ar_region_o ( mst.ar_region ),
+        .mst_ar_len_o    ( mst.ar_len    ),
+        .mst_ar_size_o   ( mst.ar_size   ),
+        .mst_ar_burst_o  ( mst.ar_burst  ),
+        .mst_ar_lock_o   ( mst.ar_lock   ),
+        .mst_ar_cache_o  ( mst.ar_cache  ),
+        .mst_ar_qos_o    ( mst.ar_qos    ),
+        .mst_ar_id_o     ( mst.ar_id     ),
+        .mst_ar_user_o   ( mst.ar_user   ),
+        .mst_ar_ready_i  ( mst.ar_ready  ),
+        .mst_ar_valid_o  ( mst.ar_valid  ),
+        .mst_w_data_o    ( mst.w_data    ),
+        .mst_w_strb_o    ( mst.w_strb    ),
+        .mst_w_user_o    ( mst.w_user    ),
+        .mst_w_last_o    ( mst.w_last    ),
+        .mst_w_ready_i   ( mst.w_ready   ),
+        .mst_w_valid_o   ( mst.w_valid   ),
+        .mst_r_data_i    ( mst.r_data    ),
+        .mst_r_resp_i    ( mst.r_resp    ),
+        .mst_r_last_i    ( mst.r_last    ),
+        .mst_r_id_i      ( mst.r_id      ),
+        .mst_r_user_i    ( mst.r_user    ),
+        .mst_r_ready_o   ( mst.r_ready   ),
+        .mst_r_valid_i   ( mst.r_valid   ),
+        .mst_b_resp_i    ( mst.b_resp    ),
+        .mst_b_id_i      ( mst.b_id      ),
+        .mst_b_user_i    ( mst.b_user    ),
+        .mst_b_ready_o   ( mst.b_ready   ),
+        .mst_b_valid_i   ( mst.b_valid   )
+    );
+
+    // Validate parameters.
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin: validate_params
+        assert (AXI_STRB_WIDTH == AXI_DATA_WIDTH/8)
+            else $fatal(1, "AXI_STRB_WIDTH must equal AXI_DATA_WIDTH/8!");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv
new file mode 100644
index 0000000..82c132c
--- /dev/null
+++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv
@@ -0,0 +1,509 @@
+// Copyright (c) 2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// AXI RISC-V LR/SC Adapter
+//
+// This adapter adds support for AXI4 exclusive accesses to a slave that natively does not support
+// exclusive accesses.  It is to be placed between that slave and the upstream master port, so that
+// the `mst` port of this module drives the slave and the `slv` port of this module is driven by
+// the upstream master.
+//
+// Exclusive accesses are only enabled for a range of addresses specified through parameters.  All
+// addresses within that range are guaranteed to fulfill the constraints described in A7.2 of the
+// AXI4 standard, both for normal and exclusive memory accesses.  Addresses outside that range
+// behave like a slave that does not support exclusive memory accesses (see AXI4, A7.2.5).
+//
+// Limitations:
+//  -   The adapter allows at most one read and one write access to be outstanding at any given
+//      time.
+//  -   The adapter does not support bursts in exclusive accessing.  Only single words can be
+//      reserved.
+//
+// Maintainer: Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+module axi_riscv_lrsc #(
+    /// Exclusively-accessible address range (closed interval from ADDR_BEGIN to ADDR_END)
+    parameter longint unsigned ADDR_BEGIN = 0,
+    parameter longint unsigned ADDR_END = 0,
+    /// AXI Parameters
+    parameter int unsigned AXI_ADDR_WIDTH = 0,
+    parameter int unsigned AXI_DATA_WIDTH = 0,
+    parameter int unsigned AXI_ID_WIDTH = 0,
+    parameter int unsigned AXI_USER_WIDTH = 0,
+    /// Derived Parameters (do NOT change manually!)
+    localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8
+) (
+    input logic                         clk_i,
+    input logic                         rst_ni,
+
+    /// Slave Interface
+    input  logic [AXI_ADDR_WIDTH-1:0]   slv_aw_addr_i,
+    input  logic [2:0]                  slv_aw_prot_i,
+    input  logic [3:0]                  slv_aw_region_i,
+    input  logic [5:0]                  slv_aw_atop_i,
+    input  logic [7:0]                  slv_aw_len_i,
+    input  logic [2:0]                  slv_aw_size_i,
+    input  logic [1:0]                  slv_aw_burst_i,
+    input  logic                        slv_aw_lock_i,
+    input  logic [3:0]                  slv_aw_cache_i,
+    input  logic [3:0]                  slv_aw_qos_i,
+    input  logic [AXI_ID_WIDTH-1:0]     slv_aw_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   slv_aw_user_i,
+    output logic                        slv_aw_ready_o,
+    input  logic                        slv_aw_valid_i,
+
+    input  logic [AXI_ADDR_WIDTH-1:0]   slv_ar_addr_i,
+    input  logic [2:0]                  slv_ar_prot_i,
+    input  logic [3:0]                  slv_ar_region_i,
+    input  logic [7:0]                  slv_ar_len_i,
+    input  logic [2:0]                  slv_ar_size_i,
+    input  logic [1:0]                  slv_ar_burst_i,
+    input  logic                        slv_ar_lock_i,
+    input  logic [3:0]                  slv_ar_cache_i,
+    input  logic [3:0]                  slv_ar_qos_i,
+    input  logic [AXI_ID_WIDTH-1:0]     slv_ar_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   slv_ar_user_i,
+    output logic                        slv_ar_ready_o,
+    input  logic                        slv_ar_valid_i,
+
+    input  logic [AXI_DATA_WIDTH-1:0]   slv_w_data_i,
+    input  logic [AXI_STRB_WIDTH-1:0]   slv_w_strb_i,
+    input  logic [AXI_USER_WIDTH-1:0]   slv_w_user_i,
+    input  logic                        slv_w_last_i,
+    output logic                        slv_w_ready_o,
+    input  logic                        slv_w_valid_i,
+
+    output logic [AXI_DATA_WIDTH-1:0]   slv_r_data_o,
+    output logic [1:0]                  slv_r_resp_o,
+    output logic                        slv_r_last_o,
+    output logic [AXI_ID_WIDTH-1:0]     slv_r_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   slv_r_user_o,
+    input  logic                        slv_r_ready_i,
+    output logic                        slv_r_valid_o,
+
+    output logic [1:0]                  slv_b_resp_o,
+    output logic [AXI_ID_WIDTH-1:0]     slv_b_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   slv_b_user_o,
+    input  logic                        slv_b_ready_i,
+    output logic                        slv_b_valid_o,
+
+    /// Master Interface
+    output logic [AXI_ADDR_WIDTH-1:0]   mst_aw_addr_o,
+    output logic [2:0]                  mst_aw_prot_o,
+    output logic [3:0]                  mst_aw_region_o,
+    output logic [5:0]                  mst_aw_atop_o,
+    output logic [7:0]                  mst_aw_len_o,
+    output logic [2:0]                  mst_aw_size_o,
+    output logic [1:0]                  mst_aw_burst_o,
+    output logic                        mst_aw_lock_o,
+    output logic [3:0]                  mst_aw_cache_o,
+    output logic [3:0]                  mst_aw_qos_o,
+    output logic [AXI_ID_WIDTH-1:0]     mst_aw_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   mst_aw_user_o,
+    input  logic                        mst_aw_ready_i,
+    output logic                        mst_aw_valid_o,
+
+    output logic [AXI_ADDR_WIDTH-1:0]   mst_ar_addr_o,
+    output logic [2:0]                  mst_ar_prot_o,
+    output logic [3:0]                  mst_ar_region_o,
+    output logic [7:0]                  mst_ar_len_o,
+    output logic [2:0]                  mst_ar_size_o,
+    output logic [1:0]                  mst_ar_burst_o,
+    output logic                        mst_ar_lock_o,
+    output logic [3:0]                  mst_ar_cache_o,
+    output logic [3:0]                  mst_ar_qos_o,
+    output logic [AXI_ID_WIDTH-1:0]     mst_ar_id_o,
+    output logic [AXI_USER_WIDTH-1:0]   mst_ar_user_o,
+    input  logic                        mst_ar_ready_i,
+    output logic                        mst_ar_valid_o,
+
+    output logic [AXI_DATA_WIDTH-1:0]   mst_w_data_o,
+    output logic [AXI_STRB_WIDTH-1:0]   mst_w_strb_o,
+    output logic [AXI_USER_WIDTH-1:0]   mst_w_user_o,
+    output logic                        mst_w_last_o,
+    input  logic                        mst_w_ready_i,
+    output logic                        mst_w_valid_o,
+
+    input  logic [AXI_DATA_WIDTH-1:0]   mst_r_data_i,
+    input  logic [1:0]                  mst_r_resp_i,
+    input  logic                        mst_r_last_i,
+    input  logic [AXI_ID_WIDTH-1:0]     mst_r_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   mst_r_user_i,
+    output logic                        mst_r_ready_o,
+    input  logic                        mst_r_valid_i,
+
+    input  logic [1:0]                  mst_b_resp_i,
+    input  logic [AXI_ID_WIDTH-1:0]     mst_b_id_i,
+    input  logic [AXI_USER_WIDTH-1:0]   mst_b_user_i,
+    output logic                        mst_b_ready_o,
+    input  logic                        mst_b_valid_i
+);
+
+    // Declarations of Signals and Types
+
+    logic [AXI_ID_WIDTH-1:0]        art_check_id,
+                                    art_set_id,
+                                    w_id_d,                     w_id_q;
+
+    logic [AXI_ADDR_WIDTH-1:0]      art_check_addr,
+                                    art_clr_addr,
+                                    art_set_addr,
+                                    rd_clr_addr,
+                                    wr_clr_addr,
+                                    w_addr_d,                   w_addr_q;
+
+    logic                           art_check_req,              art_check_gnt,
+                                    art_clr_req,                art_clr_gnt,
+                                    art_set_req,                art_set_gnt,
+                                    rd_clr_req,                 rd_clr_gnt,
+                                    wr_clr_req,                 wr_clr_gnt;
+
+    logic                           art_check_res;
+
+    logic                           b_excl_d,                   b_excl_q,
+                                    r_excl_d,                   r_excl_q;
+
+    typedef enum logic [1:0]    {R_IDLE, R_WAIT_AR, R_WAIT_R} r_state_t;
+    r_state_t                       r_state_d,                  r_state_q;
+
+    typedef enum logic [2:0]    {AW_IDLE, W_FORWARD, W_BYPASS, W_WAIT_ART_CLR, W_DROP, B_FORWARD,
+                                B_INJECT} w_state_t;
+    w_state_t                       w_state_d,                  w_state_q;
+
+    // AR and R Channel
+
+    // Time-Invariant Signal Assignments
+    assign mst_ar_addr_o      = slv_ar_addr_i;
+    assign mst_ar_prot_o      = slv_ar_prot_i;
+    assign mst_ar_region_o    = slv_ar_region_i;
+    assign mst_ar_len_o       = slv_ar_len_i;
+    assign mst_ar_size_o      = slv_ar_size_i;
+    assign mst_ar_burst_o     = slv_ar_burst_i;
+    assign mst_ar_lock_o      = 1'b0;
+    assign mst_ar_cache_o     = slv_ar_cache_i;
+    assign mst_ar_qos_o       = slv_ar_qos_i;
+    assign mst_ar_id_o        = slv_ar_id_i;
+    assign mst_ar_user_o      = slv_ar_user_i;
+    assign slv_r_data_o       = mst_r_data_i;
+    assign slv_r_last_o       = mst_r_last_i;
+    assign slv_r_id_o         = mst_r_id_i;
+    assign slv_r_user_o       = mst_r_user_i;
+
+    // FSM for Time-Variant Signal Assignments
+    always_comb begin
+        mst_ar_valid_o  = 1'b0;
+        slv_ar_ready_o  = 1'b0;
+        mst_r_ready_o   = 1'b0;
+        slv_r_valid_o   = 1'b0;
+        slv_r_resp_o    = '0;
+        art_set_addr    = '0;
+        art_set_id      = '0;
+        art_set_req     = 1'b0;
+        rd_clr_addr     = '0;
+        rd_clr_req      = 1'b0;
+        r_excl_d        = r_excl_q;
+        r_state_d       = r_state_q;
+
+        case (r_state_q)
+
+            R_IDLE: begin
+                if (slv_ar_valid_i) begin
+                    if (slv_ar_addr_i >= ADDR_BEGIN && slv_ar_addr_i <= ADDR_END && slv_ar_lock_i &&
+                            slv_ar_len_i == 8'h00) begin
+                        // Inside exclusively-accessible address range and exclusive access and no
+                        // burst
+                        art_set_addr    = slv_ar_addr_i;
+                        art_set_id      = slv_ar_id_i;
+                        art_set_req     = 1'b1;
+                        r_excl_d        = 1'b1;
+                        if (art_set_gnt) begin
+                            mst_ar_valid_o = 1'b1;
+                            if (mst_ar_ready_i) begin
+                                slv_ar_ready_o = 1'b1;
+                                r_state_d = R_WAIT_R;
+                            end else begin
+                                r_state_d = R_WAIT_AR;
+                            end
+                        end
+                    end else begin
+                        // Outside exclusively-accessible address range or regular access or burst
+                        r_excl_d = 1'b0;
+                        mst_ar_valid_o = 1'b1;
+                        if (mst_ar_ready_i) begin
+                            slv_ar_ready_o = 1'b1;
+                            r_state_d = R_WAIT_R;
+                        end else begin
+                            r_state_d = R_WAIT_AR;
+                        end
+                    end
+                end
+            end
+
+            R_WAIT_AR: begin
+                mst_ar_valid_o = slv_ar_valid_i;
+                slv_ar_ready_o = mst_ar_ready_i;
+                if (mst_ar_ready_i && mst_ar_valid_o) begin
+                    r_state_d = R_WAIT_R;
+                end
+            end
+
+            R_WAIT_R: begin
+                mst_r_ready_o = slv_r_ready_i;
+                slv_r_valid_o = mst_r_valid_i;
+                if (mst_r_resp_i[1] == 1'b0) begin
+                    slv_r_resp_o = {1'b0, r_excl_q};
+                end else begin
+                    slv_r_resp_o = mst_r_resp_i;
+                end
+                if (mst_r_valid_i && mst_r_ready_o && mst_r_last_i) begin
+                    r_excl_d    = 1'b0;
+                    r_state_d   = R_IDLE;
+                end
+            end
+
+            default: begin
+                r_state_d = R_IDLE;
+            end
+        endcase
+    end
+
+    // AW, W and B Channel
+
+    // Time-Invariant Signal Assignments
+    assign mst_aw_addr_o    = slv_aw_addr_i;
+    assign mst_aw_prot_o    = slv_aw_prot_i;
+    assign mst_aw_region_o  = slv_aw_region_i;
+    assign mst_aw_atop_o    = slv_aw_atop_i;
+    assign mst_aw_len_o     = slv_aw_len_i;
+    assign mst_aw_size_o    = slv_aw_size_i;
+    assign mst_aw_burst_o   = slv_aw_burst_i;
+    assign mst_aw_lock_o    = 1'b0;
+    assign mst_aw_cache_o   = slv_aw_cache_i;
+    assign mst_aw_qos_o     = slv_aw_qos_i;
+    assign mst_aw_id_o      = slv_aw_id_i;
+    assign mst_aw_user_o    = slv_aw_user_i;
+    assign mst_w_data_o     = slv_w_data_i;
+    assign mst_w_strb_o     = slv_w_strb_i;
+    assign mst_w_user_o     = slv_w_user_i;
+    assign mst_w_last_o     = slv_w_last_i;
+
+    always_comb begin
+        w_addr_d    = w_addr_q;
+        w_id_d      = w_id_q;
+        if (slv_aw_valid_i && slv_aw_ready_o) begin
+            w_addr_d    = slv_aw_addr_i;
+            w_id_d      = slv_aw_id_i;
+        end
+    end
+
+    // FSM for Time-Variant Signal Assignments
+    always_comb begin
+        mst_aw_valid_o  = 1'b0;
+        slv_aw_ready_o  = 1'b0;
+        mst_w_valid_o   = 1'b0;
+        slv_w_ready_o   = 1'b0;
+        slv_b_valid_o   = 1'b0;
+        mst_b_ready_o   = 1'b0;
+        slv_b_resp_o    = '0;
+        slv_b_id_o      = '0;
+        slv_b_user_o    = '0;
+        art_check_addr  = '0;
+        art_check_id    = '0;
+        art_check_req   = 1'b0;
+        wr_clr_addr     = '0;
+        wr_clr_req      = 1'b0;
+        b_excl_d        = b_excl_q;
+        w_state_d       = w_state_q;
+
+        case (w_state_q)
+
+            AW_IDLE: begin
+                if (slv_aw_valid_i) begin
+                    // New AW, and W channel is idle
+                    if (slv_aw_addr_i >= ADDR_BEGIN && slv_aw_addr_i <= ADDR_END) begin
+                        // Inside exclusively-accessible address range
+                        if (slv_aw_lock_i && slv_aw_len_i == 8'h00) begin
+                            // Exclusive access and no burst, so check if reservation exists
+                            art_check_addr  = slv_aw_addr_i;
+                            art_check_id    = slv_aw_id_i;
+                            art_check_req   = 1'b1;
+                            if (art_check_gnt) begin
+                                if (art_check_res) begin
+                                    // Yes, so forward downstream
+                                    mst_aw_valid_o = 1'b1;
+                                    if (mst_aw_ready_i) begin
+                                        slv_aw_ready_o    = 1'b1;
+                                        b_excl_d        = 1'b1;
+                                        w_state_d       = W_FORWARD;
+                                    end
+                                end else begin
+                                    // No, drop in W channel.
+                                    slv_aw_ready_o    = 1'b1;
+                                    w_state_d       = W_DROP;
+                                end
+                            end
+                        end else begin
+                            // Non-exclusive access or burst, so forward downstream
+                            mst_aw_valid_o = 1'b1;
+                            if (mst_aw_ready_i) begin
+                                slv_aw_ready_o    = 1'b1;
+                                w_state_d       = W_FORWARD;
+                            end
+                        end
+                    end else begin
+                        // Outside exclusively-accessible address range, so bypass any
+                        // modifications.
+                        mst_aw_valid_o = 1'b1;
+                        slv_aw_ready_o = mst_aw_ready_i;
+                        if (slv_aw_ready_o) begin
+                            w_state_d = W_BYPASS;
+                        end
+                    end
+                end
+            end
+
+            W_FORWARD: begin
+                mst_w_valid_o = slv_w_valid_i;
+                slv_w_ready_o = mst_w_ready_i;
+                if (slv_w_valid_i && slv_w_ready_o && slv_w_last_i) begin
+                    wr_clr_addr = w_addr_q;
+                    wr_clr_req  = 1'b1;
+                    if (wr_clr_gnt) begin
+                        w_state_d = B_FORWARD;
+                    end else begin
+                        w_state_d = W_WAIT_ART_CLR;
+                    end
+                end
+            end
+
+            W_BYPASS: begin
+                mst_w_valid_o = slv_w_valid_i;
+                slv_w_ready_o = mst_w_ready_i;
+                if (slv_w_valid_i && slv_w_ready_o && slv_w_last_i) begin
+                    w_state_d = B_FORWARD;
+                end
+            end
+
+            W_WAIT_ART_CLR: begin
+                wr_clr_addr = w_addr_q;
+                wr_clr_req  = 1'b1;
+                if (wr_clr_gnt) begin
+                    w_state_d = B_FORWARD;
+                end
+            end
+
+            W_DROP: begin
+                slv_w_ready_o = 1'b1;
+                if (slv_w_valid_i && slv_w_last_i) begin
+                    w_state_d = B_INJECT;
+                end
+            end
+
+            B_FORWARD: begin
+                mst_b_ready_o   = slv_b_ready_i;
+                slv_b_valid_o   = mst_b_valid_i;
+                slv_b_resp_o[1] = mst_b_resp_i[1];
+                slv_b_resp_o[0] = (mst_b_resp_i[1] == 1'b0) ? b_excl_q : mst_b_resp_i[0];
+                slv_b_user_o    = mst_b_user_i;
+                slv_b_id_o      = mst_b_id_i;
+                if (slv_b_valid_o && slv_b_ready_i) begin
+                    b_excl_d    = 1'b0;
+                    w_state_d   = AW_IDLE;
+                end
+            end
+
+            B_INJECT: begin
+                slv_b_id_o = w_id_q;
+                slv_b_resp_o = 2'b00;
+                slv_b_valid_o = 1'b1;
+                if (slv_b_ready_i) begin
+                    w_state_d = AW_IDLE;
+                end
+            end
+
+            default: begin
+                w_state_d = AW_IDLE;
+            end
+        endcase
+    end
+
+    // AXI Reservation Table
+    axi_res_tbl #(
+        .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
+        .AXI_ID_WIDTH   (AXI_ID_WIDTH)
+    ) i_art (
+        .clk_i                  (clk_i),
+        .rst_ni                 (rst_ni),
+        .clr_addr_i             (art_clr_addr),
+        .clr_req_i              (art_clr_req),
+        .clr_gnt_o              (art_clr_gnt),
+        .set_addr_i             (art_set_addr),
+        .set_id_i               (art_set_id),
+        .set_req_i              (art_set_req),
+        .set_gnt_o              (art_set_gnt),
+        .check_addr_i           (art_check_addr),
+        .check_id_i             (art_check_id),
+        .check_res_o            (art_check_res),
+        .check_req_i            (art_check_req),
+        .check_gnt_o            (art_check_gnt)
+    );
+
+    // ART Clear Arbiter
+    stream_arbiter #(
+        .DATA_T     (logic[AXI_ADDR_WIDTH-1:0]),
+        .N_INP      (2)
+    ) i_non_excl_acc_arb (
+        .clk_i          (clk_i),
+        .rst_ni         (rst_ni),
+        .inp_data_i     ({rd_clr_addr,  wr_clr_addr}),
+        .inp_valid_i    ({rd_clr_req,   wr_clr_req}),
+        .inp_ready_o    ({rd_clr_gnt,   wr_clr_gnt}),
+        .oup_data_o     (art_clr_addr),
+        .oup_valid_o    (art_clr_req),
+        .oup_ready_i    (art_clr_gnt)
+    );
+
+    // Registers
+    always_ff @(posedge clk_i, negedge rst_ni) begin
+        if (~rst_ni) begin
+            b_excl_q    <= 1'b0;
+            r_excl_q    <= 1'b0;
+            r_state_q   <= R_IDLE;
+            w_addr_q    <= '0;
+            w_id_q      <= '0;
+            w_state_q   <= AW_IDLE;
+        end else begin
+            b_excl_q    <= b_excl_d;
+            r_excl_q    <= r_excl_d;
+            r_state_q   <= r_state_d;
+            w_addr_q    <= w_addr_d;
+            w_id_q      <= w_id_d;
+            w_state_q   <= w_state_d;
+        end
+    end
+
+    // Validate parameters.
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin: validate_params
+        assert (ADDR_END > ADDR_BEGIN)
+            else $fatal(1, "ADDR_END must be greater than ADDR_BEGIN!");
+        assert (AXI_ADDR_WIDTH > 0)
+            else $fatal(1, "AXI_ADDR_WIDTH must be greater than 0!");
+        assert (AXI_DATA_WIDTH > 0)
+            else $fatal(1, "AXI_DATA_WIDTH must be greater than 0!");
+        assert (AXI_ID_WIDTH > 0)
+            else $fatal(1, "AXI_ID_WIDTH must be greater than 0!");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv
new file mode 100644
index 0000000..3eb409b
--- /dev/null
+++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv
@@ -0,0 +1,148 @@
+// Copyright (c) 2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Wrapper for the AXI RISC-V LR/SC Adapter that exposes AXI SystemVerilog interfaces.
+//
+// See the header of `axi_riscv_lrsc` for a description.
+//
+// Maintainer: Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+module axi_riscv_lrsc_wrap #(
+    /// Exclusively-accessible address range (closed interval from ADDR_BEGIN to ADDR_END)
+    parameter longint unsigned ADDR_BEGIN = 0,
+    parameter longint unsigned ADDR_END = 0,
+    /// AXI Parameters
+    parameter int unsigned AXI_ADDR_WIDTH = 0,
+    parameter int unsigned AXI_DATA_WIDTH = 0,
+    parameter int unsigned AXI_ID_WIDTH = 0,
+    parameter int unsigned AXI_USER_WIDTH = 0,
+    /// Derived Parameters (do NOT change manually!)
+    localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8
+) (
+    input  logic    clk_i,
+    input  logic    rst_ni,
+    AXI_BUS.Master  mst,
+    AXI_BUS.Slave   slv
+);
+
+    axi_riscv_lrsc #(
+        .ADDR_BEGIN     (ADDR_BEGIN),
+        .ADDR_END       (ADDR_END),
+        .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
+        .AXI_DATA_WIDTH (AXI_DATA_WIDTH),
+        .AXI_ID_WIDTH   (AXI_ID_WIDTH),
+        .AXI_USER_WIDTH (AXI_USER_WIDTH)
+    ) i_lrsc (
+        .clk_i           ( clk_i         ),
+        .rst_ni          ( rst_ni        ),
+        .slv_aw_addr_i   ( slv.aw_addr   ),
+        .slv_aw_prot_i   ( slv.aw_prot   ),
+        .slv_aw_region_i ( slv.aw_region ),
+        .slv_aw_atop_i   ( slv.aw_atop   ),
+        .slv_aw_len_i    ( slv.aw_len    ),
+        .slv_aw_size_i   ( slv.aw_size   ),
+        .slv_aw_burst_i  ( slv.aw_burst  ),
+        .slv_aw_lock_i   ( slv.aw_lock   ),
+        .slv_aw_cache_i  ( slv.aw_cache  ),
+        .slv_aw_qos_i    ( slv.aw_qos    ),
+        .slv_aw_id_i     ( slv.aw_id     ),
+        .slv_aw_user_i   ( slv.aw_user   ),
+        .slv_aw_ready_o  ( slv.aw_ready  ),
+        .slv_aw_valid_i  ( slv.aw_valid  ),
+        .slv_ar_addr_i   ( slv.ar_addr   ),
+        .slv_ar_prot_i   ( slv.ar_prot   ),
+        .slv_ar_region_i ( slv.ar_region ),
+        .slv_ar_len_i    ( slv.ar_len    ),
+        .slv_ar_size_i   ( slv.ar_size   ),
+        .slv_ar_burst_i  ( slv.ar_burst  ),
+        .slv_ar_lock_i   ( slv.ar_lock   ),
+        .slv_ar_cache_i  ( slv.ar_cache  ),
+        .slv_ar_qos_i    ( slv.ar_qos    ),
+        .slv_ar_id_i     ( slv.ar_id     ),
+        .slv_ar_user_i   ( slv.ar_user   ),
+        .slv_ar_ready_o  ( slv.ar_ready  ),
+        .slv_ar_valid_i  ( slv.ar_valid  ),
+        .slv_w_data_i    ( slv.w_data    ),
+        .slv_w_strb_i    ( slv.w_strb    ),
+        .slv_w_user_i    ( slv.w_user    ),
+        .slv_w_last_i    ( slv.w_last    ),
+        .slv_w_ready_o   ( slv.w_ready   ),
+        .slv_w_valid_i   ( slv.w_valid   ),
+        .slv_r_data_o    ( slv.r_data    ),
+        .slv_r_resp_o    ( slv.r_resp    ),
+        .slv_r_last_o    ( slv.r_last    ),
+        .slv_r_id_o      ( slv.r_id      ),
+        .slv_r_user_o    ( slv.r_user    ),
+        .slv_r_ready_i   ( slv.r_ready   ),
+        .slv_r_valid_o   ( slv.r_valid   ),
+        .slv_b_resp_o    ( slv.b_resp    ),
+        .slv_b_id_o      ( slv.b_id      ),
+        .slv_b_user_o    ( slv.b_user    ),
+        .slv_b_ready_i   ( slv.b_ready   ),
+        .slv_b_valid_o   ( slv.b_valid   ),
+        .mst_aw_addr_o   ( mst.aw_addr   ),
+        .mst_aw_prot_o   ( mst.aw_prot   ),
+        .mst_aw_region_o ( mst.aw_region ),
+        .mst_aw_atop_o   ( mst.aw_atop   ),
+        .mst_aw_len_o    ( mst.aw_len    ),
+        .mst_aw_size_o   ( mst.aw_size   ),
+        .mst_aw_burst_o  ( mst.aw_burst  ),
+        .mst_aw_lock_o   ( mst.aw_lock   ),
+        .mst_aw_cache_o  ( mst.aw_cache  ),
+        .mst_aw_qos_o    ( mst.aw_qos    ),
+        .mst_aw_id_o     ( mst.aw_id     ),
+        .mst_aw_user_o   ( mst.aw_user   ),
+        .mst_aw_ready_i  ( mst.aw_ready  ),
+        .mst_aw_valid_o  ( mst.aw_valid  ),
+        .mst_ar_addr_o   ( mst.ar_addr   ),
+        .mst_ar_prot_o   ( mst.ar_prot   ),
+        .mst_ar_region_o ( mst.ar_region ),
+        .mst_ar_len_o    ( mst.ar_len    ),
+        .mst_ar_size_o   ( mst.ar_size   ),
+        .mst_ar_burst_o  ( mst.ar_burst  ),
+        .mst_ar_lock_o   ( mst.ar_lock   ),
+        .mst_ar_cache_o  ( mst.ar_cache  ),
+        .mst_ar_qos_o    ( mst.ar_qos    ),
+        .mst_ar_id_o     ( mst.ar_id     ),
+        .mst_ar_user_o   ( mst.ar_user   ),
+        .mst_ar_ready_i  ( mst.ar_ready  ),
+        .mst_ar_valid_o  ( mst.ar_valid  ),
+        .mst_w_data_o    ( mst.w_data    ),
+        .mst_w_strb_o    ( mst.w_strb    ),
+        .mst_w_user_o    ( mst.w_user    ),
+        .mst_w_last_o    ( mst.w_last    ),
+        .mst_w_ready_i   ( mst.w_ready   ),
+        .mst_w_valid_o   ( mst.w_valid   ),
+        .mst_r_data_i    ( mst.r_data    ),
+        .mst_r_resp_i    ( mst.r_resp    ),
+        .mst_r_last_i    ( mst.r_last    ),
+        .mst_r_id_i      ( mst.r_id      ),
+        .mst_r_user_i    ( mst.r_user    ),
+        .mst_r_ready_o   ( mst.r_ready   ),
+        .mst_r_valid_i   ( mst.r_valid   ),
+        .mst_b_resp_i    ( mst.b_resp    ),
+        .mst_b_id_i      ( mst.b_id      ),
+        .mst_b_user_i    ( mst.b_user    ),
+        .mst_b_ready_o   ( mst.b_ready   ),
+        .mst_b_valid_i   ( mst.b_valid   )
+    );
+
+    // Validate parameters.
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin: validate_params
+        assert (AXI_STRB_WIDTH == AXI_DATA_WIDTH/8)
+            else $fatal(1, "AXI_STRB_WIDTH must equal AXI_DATA_WIDTH/8!");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/test/type_param/corev_apu/tb/ariane_axi_pkg.sv b/test/type_param/corev_apu/tb/ariane_axi_pkg.sv
new file mode 100644
index 0000000..c750336
--- /dev/null
+++ b/test/type_param/corev_apu/tb/ariane_axi_pkg.sv
@@ -0,0 +1,109 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:   ariane_axi_pkg.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   17.8.2018
+ *
+ * Description: Contains Ariane's AXI ports, does not contain user ports
+ */
+
+package ariane_axi;
+
+    localparam IdWidth   = cva6_config_pkg::CVA6ConfigAxiIdWidth; // Recommended by AXI standard
+    localparam UserWidth = cva6_config_pkg::CVA6ConfigDataUserWidth;
+    localparam AddrWidth = cva6_config_pkg::CVA6ConfigAxiAddrWidth;
+    localparam DataWidth = cva6_config_pkg::CVA6ConfigAxiDataWidth;
+    localparam StrbWidth = DataWidth / 8;
+
+    typedef logic [IdWidth-1:0]   id_t;
+    typedef logic [AddrWidth-1:0] addr_t;
+    typedef logic [DataWidth-1:0] data_t;
+    typedef logic [StrbWidth-1:0] strb_t;
+    typedef logic [UserWidth-1:0] user_t;
+
+    // AW Channel
+    typedef struct packed {
+        id_t              id;
+        addr_t            addr;
+        axi_pkg::len_t    len;
+        axi_pkg::size_t   size;
+        axi_pkg::burst_t  burst;
+        logic             lock;
+        axi_pkg::cache_t  cache;
+        axi_pkg::prot_t   prot;
+        axi_pkg::qos_t    qos;
+        axi_pkg::region_t region;
+        axi_pkg::atop_t   atop;
+        user_t            user;
+    } aw_chan_t;
+
+    // W Channel - AXI4 doesn't define a wid
+    typedef struct packed {
+        data_t data;
+        strb_t strb;
+        logic  last;
+        user_t user;
+    } w_chan_t;
+
+    // B Channel
+    typedef struct packed {
+        id_t            id;
+        axi_pkg::resp_t resp;
+        user_t          user;
+    } b_chan_t;
+
+    // AR Channel
+    typedef struct packed {
+        id_t             id;
+        addr_t            addr;
+        axi_pkg::len_t    len;
+        axi_pkg::size_t   size;
+        axi_pkg::burst_t  burst;
+        logic             lock;
+        axi_pkg::cache_t  cache;
+        axi_pkg::prot_t   prot;
+        axi_pkg::qos_t    qos;
+        axi_pkg::region_t region;
+        user_t            user;
+    } ar_chan_t;
+
+    // R Channel
+    typedef struct packed {
+        id_t            id;
+        data_t          data;
+        axi_pkg::resp_t resp;
+        logic           last;
+        user_t          user;
+    } r_chan_t;
+
+    // Request/Response structs
+    typedef struct packed {
+        aw_chan_t aw;
+        logic     aw_valid;
+        w_chan_t  w;
+        logic     w_valid;
+        logic     b_ready;
+        ar_chan_t ar;
+        logic     ar_valid;
+        logic     r_ready;
+    } req_t;
+
+    typedef struct packed {
+        logic     aw_ready;
+        logic     ar_ready;
+        logic     w_ready;
+        logic     b_valid;
+        b_chan_t  b;
+        logic     r_valid;
+        r_chan_t  r;
+    } resp_t;
+
+endpackage
diff --git a/test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv b/test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv
new file mode 100644
index 0000000..378b0d6
--- /dev/null
+++ b/test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv
@@ -0,0 +1,102 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the “License”); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File:   ariane_axi_soc_pkg.sv
+ * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * Date:   17.8.2018
+ *
+ * Description: Contains Ariane's AXI ports on SoC, does not contain user ports
+ */
+
+package ariane_axi_soc;
+
+    localparam UserWidth = ariane_axi::UserWidth;
+    localparam AddrWidth = ariane_axi::AddrWidth;
+    localparam DataWidth = ariane_axi::DataWidth;
+    localparam StrbWidth = DataWidth / 8;
+    localparam IdWidth = ariane_axi::IdWidth;
+    localparam IdWidthSlave = IdWidth + $clog2(ariane_soc::NrSlaves);
+
+    typedef logic [IdWidth-1:0] id_t;
+    typedef logic [IdWidthSlave-1:0] id_slv_t;
+    typedef logic [AddrWidth-1:0] addr_t;
+    typedef logic [DataWidth-1:0] data_t;
+    typedef logic [StrbWidth-1:0] strb_t;
+    typedef logic [UserWidth-1:0] user_t;
+
+    // AW Channel - Slave
+    typedef struct packed {
+        id_slv_t          id;
+        addr_t            addr;
+        axi_pkg::len_t    len;
+        axi_pkg::size_t   size;
+        axi_pkg::burst_t  burst;
+        logic             lock;
+        axi_pkg::cache_t  cache;
+        axi_pkg::prot_t   prot;
+        axi_pkg::qos_t    qos;
+        axi_pkg::region_t region;
+        axi_pkg::atop_t   atop;
+        user_t            user;
+    } aw_chan_slv_t;
+
+    // B Channel - Slave
+    typedef struct packed {
+        id_slv_t        id;
+        axi_pkg::resp_t resp;
+        user_t          user;
+    } b_chan_slv_t;
+
+    // AR Channel - Slave
+    typedef struct packed {
+        id_slv_t          id;
+        addr_t            addr;
+        axi_pkg::len_t    len;
+        axi_pkg::size_t   size;
+        axi_pkg::burst_t  burst;
+        logic             lock;
+        axi_pkg::cache_t  cache;
+        axi_pkg::prot_t   prot;
+        axi_pkg::qos_t    qos;
+        axi_pkg::region_t region;
+        user_t            user;
+    } ar_chan_slv_t;
+
+    // R Channel - Slave
+    typedef struct packed {
+        id_slv_t        id;
+        data_t          data;
+        axi_pkg::resp_t resp;
+        logic           last;
+        user_t          user;
+    } r_chan_slv_t;
+
+    typedef struct packed {
+        aw_chan_slv_t aw;
+        logic         aw_valid;
+        ariane_axi::w_chan_t      w;
+        logic         w_valid;
+        logic         b_ready;
+        ar_chan_slv_t ar;
+        logic         ar_valid;
+        logic         r_ready;
+    } req_slv_t;
+
+    typedef struct packed {
+        logic         aw_ready;
+        logic         ar_ready;
+        logic         w_ready;
+        logic         b_valid;
+        b_chan_slv_t  b;
+        logic         r_valid;
+        r_chan_slv_t  r;
+    } resp_slv_t;
+
+endpackage
diff --git a/test/type_param/corev_apu/tb/ariane_peripherals.sv b/test/type_param/corev_apu/tb/ariane_peripherals.sv
new file mode 100644
index 0000000..9865af4
--- /dev/null
+++ b/test/type_param/corev_apu/tb/ariane_peripherals.sv
@@ -0,0 +1,619 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+`include "register_interface/assign.svh"
+`include "register_interface/typedef.svh"
+
+// Xilinx Peripherals
+module ariane_peripherals #(
+    parameter int AxiAddrWidth = -1,
+    parameter int AxiDataWidth = -1,
+    parameter int AxiIdWidth   = -1,
+    parameter int AxiUserWidth = 1,
+    parameter bit InclUART     = 1,
+    parameter bit InclSPI      = 0,
+    parameter bit InclEthernet = 0,
+    parameter bit InclGPIO     = 0,
+    parameter bit InclTimer    = 1
+) (
+    input  logic       clk_i           , // Clock
+    input  logic       rst_ni          , // Asynchronous reset active low
+    AXI_BUS.Slave      plic            ,
+    AXI_BUS.Slave      uart            ,
+    AXI_BUS.Slave      spi             ,
+    AXI_BUS.Slave      ethernet        ,
+    AXI_BUS.Slave      timer           ,
+    output logic [1:0] irq_o           ,
+    // UART
+    input  logic       rx_i            ,
+    output logic       tx_o            ,
+    // Ethernet
+    input  wire        eth_txck        ,
+    input  wire        eth_rxck        ,
+    input  wire        eth_rxctl       ,
+    input  wire [3:0]  eth_rxd         ,
+    output wire        eth_rst_n       ,
+    output wire        eth_tx_en       ,
+    output wire [3:0]  eth_txd         ,
+    inout  wire        phy_mdio        ,
+    output logic       eth_mdc         ,
+    // MDIO Interface
+    inout              mdio            ,
+    output             mdc             ,
+    // SPI
+    output logic       spi_clk_o       ,
+    output logic       spi_mosi        ,
+    input  logic       spi_miso        ,
+    output logic       spi_ss
+);
+
+    // ---------------
+    // 1. PLIC
+    // ---------------
+    logic [ariane_soc::NumSources-1:0] irq_sources;
+
+    // Unused interrupt sources
+    assign irq_sources[ariane_soc::NumSources-1:7] = '0;
+
+    REG_BUS #(
+        .ADDR_WIDTH ( 32 ),
+        .DATA_WIDTH ( 32 )
+    ) reg_bus (clk_i);
+
+    logic         plic_penable;
+    logic         plic_pwrite;
+    logic [31:0]  plic_paddr;
+    logic         plic_psel;
+    logic [31:0]  plic_pwdata;
+    logic [31:0]  plic_prdata;
+    logic         plic_pready;
+    logic         plic_pslverr;
+
+    axi2apb_64_32 #(
+        .AXI4_ADDRESS_WIDTH ( AxiAddrWidth  ),
+        .AXI4_RDATA_WIDTH   ( AxiDataWidth  ),
+        .AXI4_WDATA_WIDTH   ( AxiDataWidth  ),
+        .AXI4_ID_WIDTH      ( AxiIdWidth    ),
+        .AXI4_USER_WIDTH    ( AxiUserWidth  ),
+        .BUFF_DEPTH_SLAVE   ( 2             ),
+        .APB_ADDR_WIDTH     ( 32            )
+    ) i_axi2apb_64_32_plic (
+        .ACLK      ( clk_i          ),
+        .ARESETn   ( rst_ni         ),
+        .test_en_i ( 1'b0           ),
+        .AWID_i    ( plic.aw_id     ),
+        .AWADDR_i  ( plic.aw_addr   ),
+        .AWLEN_i   ( plic.aw_len    ),
+        .AWSIZE_i  ( plic.aw_size   ),
+        .AWBURST_i ( plic.aw_burst  ),
+        .AWLOCK_i  ( plic.aw_lock   ),
+        .AWCACHE_i ( plic.aw_cache  ),
+        .AWPROT_i  ( plic.aw_prot   ),
+        .AWREGION_i( plic.aw_region ),
+        .AWUSER_i  ( plic.aw_user   ),
+        .AWQOS_i   ( plic.aw_qos    ),
+        .AWVALID_i ( plic.aw_valid  ),
+        .AWREADY_o ( plic.aw_ready  ),
+        .WDATA_i   ( plic.w_data    ),
+        .WSTRB_i   ( plic.w_strb    ),
+        .WLAST_i   ( plic.w_last    ),
+        .WUSER_i   ( plic.w_user    ),
+        .WVALID_i  ( plic.w_valid   ),
+        .WREADY_o  ( plic.w_ready   ),
+        .BID_o     ( plic.b_id      ),
+        .BRESP_o   ( plic.b_resp    ),
+        .BVALID_o  ( plic.b_valid   ),
+        .BUSER_o   ( plic.b_user    ),
+        .BREADY_i  ( plic.b_ready   ),
+        .ARID_i    ( plic.ar_id     ),
+        .ARADDR_i  ( plic.ar_addr   ),
+        .ARLEN_i   ( plic.ar_len    ),
+        .ARSIZE_i  ( plic.ar_size   ),
+        .ARBURST_i ( plic.ar_burst  ),
+        .ARLOCK_i  ( plic.ar_lock   ),
+        .ARCACHE_i ( plic.ar_cache  ),
+        .ARPROT_i  ( plic.ar_prot   ),
+        .ARREGION_i( plic.ar_region ),
+        .ARUSER_i  ( plic.ar_user   ),
+        .ARQOS_i   ( plic.ar_qos    ),
+        .ARVALID_i ( plic.ar_valid  ),
+        .ARREADY_o ( plic.ar_ready  ),
+        .RID_o     ( plic.r_id      ),
+        .RDATA_o   ( plic.r_data    ),
+        .RRESP_o   ( plic.r_resp    ),
+        .RLAST_o   ( plic.r_last    ),
+        .RUSER_o   ( plic.r_user    ),
+        .RVALID_o  ( plic.r_valid   ),
+        .RREADY_i  ( plic.r_ready   ),
+        .PENABLE   ( plic_penable   ),
+        .PWRITE    ( plic_pwrite    ),
+        .PADDR     ( plic_paddr     ),
+        .PSEL      ( plic_psel      ),
+        .PWDATA    ( plic_pwdata    ),
+        .PRDATA    ( plic_prdata    ),
+        .PREADY    ( plic_pready    ),
+        .PSLVERR   ( plic_pslverr   )
+    );
+
+    apb_to_reg i_apb_to_reg (
+        .clk_i     ( clk_i        ),
+        .rst_ni    ( rst_ni       ),
+        .penable_i ( plic_penable ),
+        .pwrite_i  ( plic_pwrite  ),
+        .paddr_i   ( plic_paddr   ),
+        .psel_i    ( plic_psel    ),
+        .pwdata_i  ( plic_pwdata  ),
+        .prdata_o  ( plic_prdata  ),
+        .pready_o  ( plic_pready  ),
+        .pslverr_o ( plic_pslverr ),
+        .reg_o     ( reg_bus      )
+    );
+
+    // define reg type according to REG_BUS above
+    `REG_BUS_TYPEDEF_ALL(plic, logic[31:0], logic[31:0], logic[3:0])
+    plic_req_t plic_req;
+    plic_rsp_t plic_rsp;
+
+    // assign REG_BUS.out to (req_t, rsp_t) pair
+    `REG_BUS_ASSIGN_TO_REQ(plic_req, reg_bus)
+    `REG_BUS_ASSIGN_FROM_RSP(reg_bus, plic_rsp)
+
+    plic_top #(
+      .N_SOURCE    ( ariane_soc::NumSources  ),
+      .N_TARGET    ( ariane_soc::NumTargets  ),
+      .MAX_PRIO    ( ariane_soc::MaxPriority ),
+      .reg_req_t   ( plic_req_t              ),
+      .reg_rsp_t   ( plic_rsp_t              )
+    ) i_plic (
+      .clk_i,
+      .rst_ni,
+      .req_i         ( plic_req    ),
+      .resp_o        ( plic_rsp    ),
+      .le_i          ( '0          ), // 0:level 1:edge
+      .irq_sources_i ( irq_sources ),
+      .eip_targets_o ( irq_o       )
+    );
+
+    // ---------------
+    // 2. UART
+    // ---------------
+    logic         uart_penable;
+    logic         uart_pwrite;
+    logic [31:0]  uart_paddr;
+    logic         uart_psel;
+    logic [31:0]  uart_pwdata;
+    logic [31:0]  uart_prdata;
+    logic         uart_pready;
+    logic         uart_pslverr;
+
+    axi2apb_64_32 #(
+        .AXI4_ADDRESS_WIDTH ( AxiAddrWidth ),
+        .AXI4_RDATA_WIDTH   ( AxiDataWidth ),
+        .AXI4_WDATA_WIDTH   ( AxiDataWidth ),
+        .AXI4_ID_WIDTH      ( AxiIdWidth   ),
+        .AXI4_USER_WIDTH    ( AxiUserWidth ),
+        .BUFF_DEPTH_SLAVE   ( 2            ),
+        .APB_ADDR_WIDTH     ( 32           )
+    ) i_axi2apb_64_32_uart (
+        .ACLK      ( clk_i          ),
+        .ARESETn   ( rst_ni         ),
+        .test_en_i ( 1'b0           ),
+        .AWID_i    ( uart.aw_id     ),
+        .AWADDR_i  ( uart.aw_addr   ),
+        .AWLEN_i   ( uart.aw_len    ),
+        .AWSIZE_i  ( uart.aw_size   ),
+        .AWBURST_i ( uart.aw_burst  ),
+        .AWLOCK_i  ( uart.aw_lock   ),
+        .AWCACHE_i ( uart.aw_cache  ),
+        .AWPROT_i  ( uart.aw_prot   ),
+        .AWREGION_i( uart.aw_region ),
+        .AWUSER_i  ( uart.aw_user   ),
+        .AWQOS_i   ( uart.aw_qos    ),
+        .AWVALID_i ( uart.aw_valid  ),
+        .AWREADY_o ( uart.aw_ready  ),
+        .WDATA_i   ( uart.w_data    ),
+        .WSTRB_i   ( uart.w_strb    ),
+        .WLAST_i   ( uart.w_last    ),
+        .WUSER_i   ( uart.w_user    ),
+        .WVALID_i  ( uart.w_valid   ),
+        .WREADY_o  ( uart.w_ready   ),
+        .BID_o     ( uart.b_id      ),
+        .BRESP_o   ( uart.b_resp    ),
+        .BVALID_o  ( uart.b_valid   ),
+        .BUSER_o   ( uart.b_user    ),
+        .BREADY_i  ( uart.b_ready   ),
+        .ARID_i    ( uart.ar_id     ),
+        .ARADDR_i  ( uart.ar_addr   ),
+        .ARLEN_i   ( uart.ar_len    ),
+        .ARSIZE_i  ( uart.ar_size   ),
+        .ARBURST_i ( uart.ar_burst  ),
+        .ARLOCK_i  ( uart.ar_lock   ),
+        .ARCACHE_i ( uart.ar_cache  ),
+        .ARPROT_i  ( uart.ar_prot   ),
+        .ARREGION_i( uart.ar_region ),
+        .ARUSER_i  ( uart.ar_user   ),
+        .ARQOS_i   ( uart.ar_qos    ),
+        .ARVALID_i ( uart.ar_valid  ),
+        .ARREADY_o ( uart.ar_ready  ),
+        .RID_o     ( uart.r_id      ),
+        .RDATA_o   ( uart.r_data    ),
+        .RRESP_o   ( uart.r_resp    ),
+        .RLAST_o   ( uart.r_last    ),
+        .RUSER_o   ( uart.r_user    ),
+        .RVALID_o  ( uart.r_valid   ),
+        .RREADY_i  ( uart.r_ready   ),
+        .PENABLE   ( uart_penable   ),
+        .PWRITE    ( uart_pwrite    ),
+        .PADDR     ( uart_paddr     ),
+        .PSEL      ( uart_psel      ),
+        .PWDATA    ( uart_pwdata    ),
+        .PRDATA    ( uart_prdata    ),
+        .PREADY    ( uart_pready    ),
+        .PSLVERR   ( uart_pslverr   )
+    );
+
+    if (InclUART) begin : gen_uart
+        apb_uart i_apb_uart (
+            .CLK     ( clk_i           ),
+            .RSTN    ( rst_ni          ),
+            .PSEL    ( uart_psel       ),
+            .PENABLE ( uart_penable    ),
+            .PWRITE  ( uart_pwrite     ),
+            .PADDR   ( uart_paddr[4:2] ),
+            .PWDATA  ( uart_pwdata     ),
+            .PRDATA  ( uart_prdata     ),
+            .PREADY  ( uart_pready     ),
+            .PSLVERR ( uart_pslverr    ),
+            .INT     ( irq_sources[0]  ),
+            .OUT1N   (                 ), // keep open
+            .OUT2N   (                 ), // keep open
+            .RTSN    (                 ), // no flow control
+            .DTRN    (                 ), // no flow control
+            .CTSN    ( 1'b0            ),
+            .DSRN    ( 1'b0            ),
+            .DCDN    ( 1'b0            ),
+            .RIN     ( 1'b0            ),
+            .SIN     ( rx_i            ),
+            .SOUT    ( tx_o            )
+        );
+    end else begin
+        assign irq_sources[0] = 1'b0;
+        /* pragma translate_off */
+        mock_uart i_mock_uart (
+            .clk_i     ( clk_i        ),
+            .rst_ni    ( rst_ni       ),
+            .penable_i ( uart_penable ),
+            .pwrite_i  ( uart_pwrite  ),
+            .paddr_i   ( uart_paddr   ),
+            .psel_i    ( uart_psel    ),
+            .pwdata_i  ( uart_pwdata  ),
+            .prdata_o  ( uart_prdata  ),
+            .pready_o  ( uart_pready  ),
+            .pslverr_o ( uart_pslverr )
+        );
+        /* pragma translate_on */
+    end
+
+    // ---------------
+    // 3. SPI
+    // ---------------
+    if (InclSPI) begin : gen_spi
+        logic [31:0] s_axi_spi_awaddr;
+        logic [7:0]  s_axi_spi_awlen;
+        logic [2:0]  s_axi_spi_awsize;
+        logic [1:0]  s_axi_spi_awburst;
+        logic [0:0]  s_axi_spi_awlock;
+        logic [3:0]  s_axi_spi_awcache;
+        logic [2:0]  s_axi_spi_awprot;
+        logic [3:0]  s_axi_spi_awregion;
+        logic [3:0]  s_axi_spi_awqos;
+        logic        s_axi_spi_awvalid;
+        logic        s_axi_spi_awready;
+        logic [31:0] s_axi_spi_wdata;
+        logic [3:0]  s_axi_spi_wstrb;
+        logic        s_axi_spi_wlast;
+        logic        s_axi_spi_wvalid;
+        logic        s_axi_spi_wready;
+        logic [1:0]  s_axi_spi_bresp;
+        logic        s_axi_spi_bvalid;
+        logic        s_axi_spi_bready;
+        logic [31:0] s_axi_spi_araddr;
+        logic [7:0]  s_axi_spi_arlen;
+        logic [2:0]  s_axi_spi_arsize;
+        logic [1:0]  s_axi_spi_arburst;
+        logic [0:0]  s_axi_spi_arlock;
+        logic [3:0]  s_axi_spi_arcache;
+        logic [2:0]  s_axi_spi_arprot;
+        logic [3:0]  s_axi_spi_arregion;
+        logic [3:0]  s_axi_spi_arqos;
+        logic        s_axi_spi_arvalid;
+        logic        s_axi_spi_arready;
+        logic [31:0] s_axi_spi_rdata;
+        logic [1:0]  s_axi_spi_rresp;
+        logic        s_axi_spi_rlast;
+        logic        s_axi_spi_rvalid;
+        logic        s_axi_spi_rready;
+
+        xlnx_axi_clock_converter i_xlnx_axi_clock_converter_spi (
+            .s_axi_aclk     ( clk_i              ),
+            .s_axi_aresetn  ( rst_ni             ),
+
+            .s_axi_awid     ( spi.aw_id          ),
+            .s_axi_awaddr   ( spi.aw_addr[31:0]  ),
+            .s_axi_awlen    ( spi.aw_len         ),
+            .s_axi_awsize   ( spi.aw_size        ),
+            .s_axi_awburst  ( spi.aw_burst       ),
+            .s_axi_awlock   ( spi.aw_lock        ),
+            .s_axi_awcache  ( spi.aw_cache       ),
+            .s_axi_awprot   ( spi.aw_prot        ),
+            .s_axi_awregion ( spi.aw_region      ),
+            .s_axi_awqos    ( spi.aw_qos         ),
+            .s_axi_awvalid  ( spi.aw_valid       ),
+            .s_axi_awready  ( spi.aw_ready       ),
+            .s_axi_wdata    ( spi.w_data         ),
+            .s_axi_wstrb    ( spi.w_strb         ),
+            .s_axi_wlast    ( spi.w_last         ),
+            .s_axi_wvalid   ( spi.w_valid        ),
+            .s_axi_wready   ( spi.w_ready        ),
+            .s_axi_bid      ( spi.b_id           ),
+            .s_axi_bresp    ( spi.b_resp         ),
+            .s_axi_bvalid   ( spi.b_valid        ),
+            .s_axi_bready   ( spi.b_ready        ),
+            .s_axi_arid     ( spi.ar_id          ),
+            .s_axi_araddr   ( spi.ar_addr[31:0]  ),
+            .s_axi_arlen    ( spi.ar_len         ),
+            .s_axi_arsize   ( spi.ar_size        ),
+            .s_axi_arburst  ( spi.ar_burst       ),
+            .s_axi_arlock   ( spi.ar_lock        ),
+            .s_axi_arcache  ( spi.ar_cache       ),
+            .s_axi_arprot   ( spi.ar_prot        ),
+            .s_axi_arregion ( spi.ar_region      ),
+            .s_axi_arqos    ( spi.ar_qos         ),
+            .s_axi_arvalid  ( spi.ar_valid       ),
+            .s_axi_arready  ( spi.ar_ready       ),
+            .s_axi_rid      ( spi.r_id           ),
+            .s_axi_rdata    ( spi.r_data         ),
+            .s_axi_rresp    ( spi.r_resp         ),
+            .s_axi_rlast    ( spi.r_last         ),
+            .s_axi_rvalid   ( spi.r_valid        ),
+            .s_axi_rready   ( spi.r_ready        ),
+
+            .m_axi_awaddr   ( s_axi_spi_awaddr   ),
+            .m_axi_awlen    ( s_axi_spi_awlen    ),
+            .m_axi_awsize   ( s_axi_spi_awsize   ),
+            .m_axi_awburst  ( s_axi_spi_awburst  ),
+            .m_axi_awlock   ( s_axi_spi_awlock   ),
+            .m_axi_awcache  ( s_axi_spi_awcache  ),
+            .m_axi_awprot   ( s_axi_spi_awprot   ),
+            .m_axi_awregion ( s_axi_spi_awregion ),
+            .m_axi_awqos    ( s_axi_spi_awqos    ),
+            .m_axi_awvalid  ( s_axi_spi_awvalid  ),
+            .m_axi_awready  ( s_axi_spi_awready  ),
+            .m_axi_wdata    ( s_axi_spi_wdata    ),
+            .m_axi_wstrb    ( s_axi_spi_wstrb    ),
+            .m_axi_wlast    ( s_axi_spi_wlast    ),
+            .m_axi_wvalid   ( s_axi_spi_wvalid   ),
+            .m_axi_wready   ( s_axi_spi_wready   ),
+            .m_axi_bresp    ( s_axi_spi_bresp    ),
+            .m_axi_bvalid   ( s_axi_spi_bvalid   ),
+            .m_axi_bready   ( s_axi_spi_bready   ),
+            .m_axi_araddr   ( s_axi_spi_araddr   ),
+            .m_axi_arlen    ( s_axi_spi_arlen    ),
+            .m_axi_arsize   ( s_axi_spi_arsize   ),
+            .m_axi_arburst  ( s_axi_spi_arburst  ),
+            .m_axi_arlock   ( s_axi_spi_arlock   ),
+            .m_axi_arcache  ( s_axi_spi_arcache  ),
+            .m_axi_arprot   ( s_axi_spi_arprot   ),
+            .m_axi_arregion ( s_axi_spi_arregion ),
+            .m_axi_arqos    ( s_axi_spi_arqos    ),
+            .m_axi_arvalid  ( s_axi_spi_arvalid  ),
+            .m_axi_arready  ( s_axi_spi_arready  ),
+            .m_axi_rdata    ( s_axi_spi_rdata    ),
+            .m_axi_rresp    ( s_axi_spi_rresp    ),
+            .m_axi_rlast    ( s_axi_spi_rlast    ),
+            .m_axi_rvalid   ( s_axi_spi_rvalid   ),
+            .m_axi_rready   ( s_axi_spi_rready   )
+        );
+
+        xlnx_axi_quad_spi i_xlnx_axi_quad_spi (
+            .ext_spi_clk    ( clk_i                  ),
+            .s_axi4_aclk    ( clk_i                  ),
+            .s_axi4_aresetn ( rst_ni                 ),
+            .s_axi4_awaddr  ( s_axi_spi_awaddr[23:0] ),
+            .s_axi4_awlen   ( s_axi_spi_awlen        ),
+            .s_axi4_awsize  ( s_axi_spi_awsize       ),
+            .s_axi4_awburst ( s_axi_spi_awburst      ),
+            .s_axi4_awlock  ( s_axi_spi_awlock       ),
+            .s_axi4_awcache ( s_axi_spi_awcache      ),
+            .s_axi4_awprot  ( s_axi_spi_awprot       ),
+            .s_axi4_awvalid ( s_axi_spi_awvalid      ),
+            .s_axi4_awready ( s_axi_spi_awready      ),
+            .s_axi4_wdata   ( s_axi_spi_wdata        ),
+            .s_axi4_wstrb   ( s_axi_spi_wstrb        ),
+            .s_axi4_wlast   ( s_axi_spi_wlast        ),
+            .s_axi4_wvalid  ( s_axi_spi_wvalid       ),
+            .s_axi4_wready  ( s_axi_spi_wready       ),
+            .s_axi4_bresp   ( s_axi_spi_bresp        ),
+            .s_axi4_bvalid  ( s_axi_spi_bvalid       ),
+            .s_axi4_bready  ( s_axi_spi_bready       ),
+            .s_axi4_araddr  ( s_axi_spi_araddr[23:0] ),
+            .s_axi4_arlen   ( s_axi_spi_arlen        ),
+            .s_axi4_arsize  ( s_axi_spi_arsize       ),
+            .s_axi4_arburst ( s_axi_spi_arburst      ),
+            .s_axi4_arlock  ( s_axi_spi_arlock       ),
+            .s_axi4_arcache ( s_axi_spi_arcache      ),
+            .s_axi4_arprot  ( s_axi_spi_arprot       ),
+            .s_axi4_arvalid ( s_axi_spi_arvalid      ),
+            .s_axi4_arready ( s_axi_spi_arready      ),
+            .s_axi4_rdata   ( s_axi_spi_rdata        ),
+            .s_axi4_rresp   ( s_axi_spi_rresp        ),
+            .s_axi4_rlast   ( s_axi_spi_rlast        ),
+            .s_axi4_rvalid  ( s_axi_spi_rvalid       ),
+            .s_axi4_rready  ( s_axi_spi_rready       ),
+
+            .io0_i          ( '0                     ),
+            .io0_o          ( spi_mosi               ),
+            .io0_t          ( '0                     ),
+            .io1_i          ( spi_miso               ),
+            .io1_o          (                        ),
+            .io1_t          ( '0                     ),
+            .ss_i           ( '0                     ),
+            .ss_o           ( spi_ss                 ),
+            .ss_t           ( '0                     ),
+            .sck_o          ( spi_clk_o              ),
+            .sck_i          ( '0                     ),
+            .sck_t          (                        ),
+            .ip2intc_irpt   ( irq_sources[1]         )
+            // .ip2intc_irpt   ( irq_sources[1]         )
+        );
+        // assign irq_sources [1] = 1'b0;
+    end else begin
+        assign spi_clk_o = 1'b0;
+        assign spi_mosi = 1'b0;
+        assign spi_ss = 1'b0;
+
+        assign irq_sources [1] = 1'b0;
+        assign spi.aw_ready = 1'b1;
+        assign spi.ar_ready = 1'b1;
+        assign spi.w_ready = 1'b1;
+
+        assign spi.b_valid = spi.aw_valid;
+        assign spi.b_id = spi.aw_id;
+        assign spi.b_resp = axi_pkg::RESP_SLVERR;
+        assign spi.b_user = '0;
+
+        assign spi.r_valid = spi.ar_valid;
+        assign spi.r_resp = axi_pkg::RESP_SLVERR;
+        assign spi.r_data = 'hdeadbeef;
+        assign spi.r_last = 1'b1;
+    end
+
+
+    // ---------------
+    // 4. Ethernet
+    // ---------------
+    if (0)
+      begin
+      end
+    else
+      begin
+        assign irq_sources [2] = 1'b0;
+        assign ethernet.aw_ready = 1'b1;
+        assign ethernet.ar_ready = 1'b1;
+        assign ethernet.w_ready = 1'b1;
+
+        assign ethernet.b_valid = ethernet.aw_valid;
+        assign ethernet.b_id = ethernet.aw_id;
+        assign ethernet.b_resp = axi_pkg::RESP_SLVERR;
+        assign ethernet.b_user = '0;
+
+        assign ethernet.r_valid = ethernet.ar_valid;
+        assign ethernet.r_resp = axi_pkg::RESP_SLVERR;
+        assign ethernet.r_data = 'hdeadbeef;
+        assign ethernet.r_last = 1'b1;
+    end
+
+    // ---------------
+    // 5. Timer
+    // ---------------
+    if (InclTimer) begin : gen_timer
+        logic         timer_penable;
+        logic         timer_pwrite;
+        logic [31:0]  timer_paddr;
+        logic         timer_psel;
+        logic [31:0]  timer_pwdata;
+        logic [31:0]  timer_prdata;
+        logic         timer_pready;
+        logic         timer_pslverr;
+
+        axi2apb_64_32 #(
+            .AXI4_ADDRESS_WIDTH ( AxiAddrWidth ),
+            .AXI4_RDATA_WIDTH   ( AxiDataWidth ),
+            .AXI4_WDATA_WIDTH   ( AxiDataWidth ),
+            .AXI4_ID_WIDTH      ( AxiIdWidth   ),
+            .AXI4_USER_WIDTH    ( AxiUserWidth ),
+            .BUFF_DEPTH_SLAVE   ( 2            ),
+            .APB_ADDR_WIDTH     ( 32           )
+        ) i_axi2apb_64_32_timer (
+            .ACLK      ( clk_i           ),
+            .ARESETn   ( rst_ni          ),
+            .test_en_i ( 1'b0            ),
+            .AWID_i    ( timer.aw_id     ),
+            .AWADDR_i  ( timer.aw_addr   ),
+            .AWLEN_i   ( timer.aw_len    ),
+            .AWSIZE_i  ( timer.aw_size   ),
+            .AWBURST_i ( timer.aw_burst  ),
+            .AWLOCK_i  ( timer.aw_lock   ),
+            .AWCACHE_i ( timer.aw_cache  ),
+            .AWPROT_i  ( timer.aw_prot   ),
+            .AWREGION_i( timer.aw_region ),
+            .AWUSER_i  ( timer.aw_user   ),
+            .AWQOS_i   ( timer.aw_qos    ),
+            .AWVALID_i ( timer.aw_valid  ),
+            .AWREADY_o ( timer.aw_ready  ),
+            .WDATA_i   ( timer.w_data    ),
+            .WSTRB_i   ( timer.w_strb    ),
+            .WLAST_i   ( timer.w_last    ),
+            .WUSER_i   ( timer.w_user    ),
+            .WVALID_i  ( timer.w_valid   ),
+            .WREADY_o  ( timer.w_ready   ),
+            .BID_o     ( timer.b_id      ),
+            .BRESP_o   ( timer.b_resp    ),
+            .BVALID_o  ( timer.b_valid   ),
+            .BUSER_o   ( timer.b_user    ),
+            .BREADY_i  ( timer.b_ready   ),
+            .ARID_i    ( timer.ar_id     ),
+            .ARADDR_i  ( timer.ar_addr   ),
+            .ARLEN_i   ( timer.ar_len    ),
+            .ARSIZE_i  ( timer.ar_size   ),
+            .ARBURST_i ( timer.ar_burst  ),
+            .ARLOCK_i  ( timer.ar_lock   ),
+            .ARCACHE_i ( timer.ar_cache  ),
+            .ARPROT_i  ( timer.ar_prot   ),
+            .ARREGION_i( timer.ar_region ),
+            .ARUSER_i  ( timer.ar_user   ),
+            .ARQOS_i   ( timer.ar_qos    ),
+            .ARVALID_i ( timer.ar_valid  ),
+            .ARREADY_o ( timer.ar_ready  ),
+            .RID_o     ( timer.r_id      ),
+            .RDATA_o   ( timer.r_data    ),
+            .RRESP_o   ( timer.r_resp    ),
+            .RLAST_o   ( timer.r_last    ),
+            .RUSER_o   ( timer.r_user    ),
+            .RVALID_o  ( timer.r_valid   ),
+            .RREADY_i  ( timer.r_ready   ),
+            .PENABLE   ( timer_penable   ),
+            .PWRITE    ( timer_pwrite    ),
+            .PADDR     ( timer_paddr     ),
+            .PSEL      ( timer_psel      ),
+            .PWDATA    ( timer_pwdata    ),
+            .PRDATA    ( timer_prdata    ),
+            .PREADY    ( timer_pready    ),
+            .PSLVERR   ( timer_pslverr   )
+        );
+
+        apb_timer #(
+                .APB_ADDR_WIDTH ( 32 ),
+                .TIMER_CNT      ( 2  )
+        ) i_timer (
+            .HCLK    ( clk_i            ),
+            .HRESETn ( rst_ni           ),
+            .PSEL    ( timer_psel       ),
+            .PENABLE ( timer_penable    ),
+            .PWRITE  ( timer_pwrite     ),
+            .PADDR   ( timer_paddr      ),
+            .PWDATA  ( timer_pwdata     ),
+            .PRDATA  ( timer_prdata     ),
+            .PREADY  ( timer_pready     ),
+            .PSLVERR ( timer_pslverr    ),
+            .irq_o   ( irq_sources[6:3] )
+        );
+    end
+endmodule
diff --git a/test/type_param/corev_apu/tb/ariane_soc_pkg.sv b/test/type_param/corev_apu/tb/ariane_soc_pkg.sv
new file mode 100644
index 0000000..cc57f80
--- /dev/null
+++ b/test/type_param/corev_apu/tb/ariane_soc_pkg.sv
@@ -0,0 +1,68 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Description: Contains SoC information as constants
+package ariane_soc;
+  // M-Mode Hart, S-Mode Hart
+  localparam int unsigned NumTargets = 2;
+  // Uart, SPI, Ethernet, reserved
+  localparam int unsigned NumSources = 30;
+  localparam int unsigned MaxPriority = 7;
+
+  localparam NrSlaves = 2; // actually masters, but slaves on the crossbar
+
+  typedef enum int unsigned {
+    DRAM     = 0,
+    GPIO     = 1,
+    Ethernet = 2,
+    SPI      = 3,
+    Timer    = 4,
+    UART     = 5,
+    PLIC     = 6,
+    CLINT    = 7,
+    ROM      = 8,
+    Debug    = 9
+  } axi_slaves_t;
+
+  localparam NB_PERIPHERALS = Debug + 1;
+
+
+  localparam logic[63:0] DebugLength    = 64'h1000;
+  localparam logic[63:0] ROMLength      = 64'h10000;
+  localparam logic[63:0] CLINTLength    = 64'hC0000;
+  localparam logic[63:0] PLICLength     = 64'h3FF_FFFF;
+  localparam logic[63:0] UARTLength     = 64'h1000;
+  localparam logic[63:0] TimerLength    = 64'h1000;
+  localparam logic[63:0] SPILength      = 64'h800000;
+  localparam logic[63:0] EthernetLength = 64'h10000;
+  localparam logic[63:0] GPIOLength     = 64'h1000;
+  localparam logic[63:0] DRAMLength     = 64'h40000000; // 1GByte of DDR (split between two chips on Genesys2)
+  localparam logic[63:0] SRAMLength     = 64'h1800000;  // 24 MByte of SRAM
+  // Instantiate AXI protocol checkers
+  localparam bit GenProtocolChecker = 1'b0;
+
+  typedef enum logic [63:0] {
+    DebugBase    = 64'h0000_0000,
+    ROMBase      = 64'h0001_0000,
+    CLINTBase    = 64'h0200_0000,
+    PLICBase     = 64'h0C00_0000,
+    UARTBase     = 64'h1000_0000,
+    TimerBase    = 64'h1800_0000,
+    SPIBase      = 64'h2000_0000,
+    EthernetBase = 64'h3000_0000,
+    GPIOBase     = 64'h4000_0000,
+    DRAMBase     = 64'h8000_0000
+  } soc_bus_start_t;
+
+  localparam NrRegion = 1;
+  localparam logic [NrRegion-1:0][NB_PERIPHERALS-1:0] ValidRule = {{NrRegion * NB_PERIPHERALS}{1'b1}};
+
+endpackage
diff --git a/test/type_param/corev_apu/tb/ariane_testharness.sv b/test/type_param/corev_apu/tb/ariane_testharness.sv
new file mode 100644
index 0000000..3530da0
--- /dev/null
+++ b/test/type_param/corev_apu/tb/ariane_testharness.sv
@@ -0,0 +1,807 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 19.03.2017
+// Description: Test-harness for Ariane
+//              Instantiates an AXI-Bus and memories
+
+`include "axi/assign.svh"
+
+module ariane_testharness #(
+  parameter config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg,
+  parameter bit IsRVFI = bit'(cva6_config_pkg::CVA6ConfigRvfiTrace),
+  //
+  parameter int unsigned AXI_USER_WIDTH    = ariane_pkg::AXI_USER_WIDTH,
+  parameter int unsigned AXI_USER_EN       = ariane_pkg::AXI_USER_EN,
+  parameter int unsigned AXI_ADDRESS_WIDTH = 64,
+  parameter int unsigned AXI_DATA_WIDTH    = 64,
+  parameter bit          InclSimDTM        = 1'b1,
+  parameter int unsigned NUM_WORDS         = 2**25,         // memory size
+  parameter bit          StallRandomOutput = 1'b0,
+  parameter bit          StallRandomInput  = 1'b0
+) (
+  input  logic                           clk_i,
+  input  logic                           rtc_i,
+  input  logic                           rst_ni,
+  output logic [31:0]                    exit_o
+);
+
+  localparam [7:0] hart_id = '0;
+  
+  localparam type rvfi_instr_t = struct packed {
+      logic [config_pkg::NRET-1:0]                  valid;
+      logic [config_pkg::NRET*64-1:0]               order;
+      logic [config_pkg::NRET*config_pkg::ILEN-1:0] insn;
+      logic [config_pkg::NRET-1:0]                  trap;
+      logic [config_pkg::NRET*riscv::XLEN-1:0]      cause;
+      logic [config_pkg::NRET-1:0]                  halt;
+      logic [config_pkg::NRET-1:0]                  intr;
+      logic [config_pkg::NRET*2-1:0]                mode;
+      logic [config_pkg::NRET*2-1:0]                ixl;
+      logic [config_pkg::NRET*5-1:0]                rs1_addr;
+      logic [config_pkg::NRET*5-1:0]                rs2_addr;
+      logic [config_pkg::NRET*riscv::XLEN-1:0]      rs1_rdata;
+      logic [config_pkg::NRET*riscv::XLEN-1:0]      rs2_rdata;
+      logic [config_pkg::NRET*5-1:0]                rd_addr;
+      logic [config_pkg::NRET*riscv::XLEN-1:0]      rd_wdata;
+      logic [config_pkg::NRET*riscv::XLEN-1:0]      pc_rdata;
+      logic [config_pkg::NRET*riscv::XLEN-1:0]      pc_wdata;
+      logic [config_pkg::NRET*riscv::VLEN-1:0]      mem_addr;
+      logic [config_pkg::NRET*riscv::PLEN-1:0]      mem_paddr;
+      logic [config_pkg::NRET*(riscv::XLEN/8)-1:0]  mem_rmask;
+      logic [config_pkg::NRET*(riscv::XLEN/8)-1:0]  mem_wmask;
+      logic [config_pkg::NRET*riscv::XLEN-1:0]      mem_rdata;
+      logic [config_pkg::NRET*riscv::XLEN-1:0]      mem_wdata;
+  };
+    
+  localparam type rvfi_probes_t = struct packed { 
+    logic [ariane_pkg::TRANS_ID_BITS-1:0] issue_pointer; 
+    logic [CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] commit_pointer; 
+    logic                            flush_unissued_instr;
+    logic                            decoded_instr_valid;
+    logic                            decoded_instr_ack;
+    logic                            flush;
+    logic                            issue_instr_ack;
+    logic                            fetch_entry_valid;
+    logic [31:0]                     instruction;
+    logic                            is_compressed;
+    riscv::xlen_t                    rs1_forwarding;
+    riscv::xlen_t                    rs2_forwarding;
+    ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr;
+    ariane_pkg::exception_t ex_commit; 
+    riscv::priv_lvl_t priv_lvl;
+    ariane_pkg::lsu_ctrl_t                       lsu_ctrl;
+    logic [((CVA6Cfg.CvxifEn || CVA6Cfg.RVV) ? 5 : 4)-1:0][riscv::XLEN-1:0] wbdata;
+    logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack;
+    logic [riscv::PLEN-1:0] mem_paddr;
+    logic debug_mode;
+    logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata;
+  };
+
+  // disable test-enable
+  logic        test_en;
+  logic        ndmreset;
+  logic        ndmreset_n;
+  logic        debug_req_core;
+
+  int          jtag_enable;
+  logic        init_done;
+  logic [31:0] jtag_exit, dmi_exit;
+  logic [31:0] rvfi_exit;
+
+  logic        jtag_TCK;
+  logic        jtag_TMS;
+  logic        jtag_TDI;
+  logic        jtag_TRSTn;
+  logic        jtag_TDO_data;
+  logic        jtag_TDO_driven;
+
+  logic        debug_req_valid;
+  logic        debug_req_ready;
+  logic        debug_resp_valid;
+  logic        debug_resp_ready;
+
+  logic        jtag_req_valid;
+  logic [6:0]  jtag_req_bits_addr;
+  logic [1:0]  jtag_req_bits_op;
+  logic [31:0] jtag_req_bits_data;
+  logic        jtag_resp_ready;
+  logic        jtag_resp_valid;
+
+  logic        dmi_req_valid;
+  logic        dmi_resp_ready;
+  logic        dmi_resp_valid;
+
+  dm::dmi_req_t  jtag_dmi_req;
+  dm::dmi_req_t  dmi_req;
+
+  dm::dmi_req_t  debug_req;
+  dm::dmi_resp_t debug_resp;
+
+  assign test_en = 1'b0;
+
+  AXI_BUS #(
+    .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH       ),
+    .AXI_DATA_WIDTH ( AXI_DATA_WIDTH          ),
+    .AXI_ID_WIDTH   ( ariane_axi_soc::IdWidth ),
+    .AXI_USER_WIDTH ( AXI_USER_WIDTH          )
+  ) slave[ariane_soc::NrSlaves-1:0]();
+
+  AXI_BUS #(
+    .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH            ),
+    .AXI_DATA_WIDTH ( AXI_DATA_WIDTH               ),
+    .AXI_ID_WIDTH   ( ariane_axi_soc::IdWidthSlave ),
+    .AXI_USER_WIDTH ( AXI_USER_WIDTH               )
+  ) master[ariane_soc::NB_PERIPHERALS-1:0]();
+
+  rstgen i_rstgen_main (
+    .clk_i        ( clk_i                ),
+    .rst_ni       ( rst_ni & (~ndmreset) ),
+    .test_mode_i  ( test_en              ),
+    .rst_no       ( ndmreset_n           ),
+    .init_no      (                      ) // keep open
+  );
+
+  // ---------------
+  // Debug
+  // ---------------
+  assign init_done = rst_ni;
+
+  logic debug_enable;
+  initial begin
+    if (!$value$plusargs("jtag_rbb_enable=%b", jtag_enable)) jtag_enable = 'h0;
+    if ($test$plusargs("debug_disable")) debug_enable = 'h0; else debug_enable = 'h1;
+    if (riscv::XLEN != 32 & riscv::XLEN != 64) $error("XLEN different from 32 and 64");
+  end
+
+  // debug if MUX
+  assign debug_req_valid     = (jtag_enable[0]) ? jtag_req_valid     : dmi_req_valid;
+  assign debug_resp_ready    = (jtag_enable[0]) ? jtag_resp_ready    : dmi_resp_ready;
+  assign debug_req           = (jtag_enable[0]) ? jtag_dmi_req       : dmi_req;
+  if (ariane_pkg::RVFI) begin
+    assign exit_o              = (jtag_enable[0]) ? jtag_exit          : rvfi_exit;
+  end else begin
+    assign exit_o              = (jtag_enable[0]) ? jtag_exit          : dmi_exit;
+  end
+  assign jtag_resp_valid     = (jtag_enable[0]) ? debug_resp_valid   : 1'b0;
+  assign dmi_resp_valid      = (jtag_enable[0]) ? 1'b0               : debug_resp_valid;
+
+  // SiFive's SimJTAG Module
+  // Converts to DPI calls
+  SimJTAG i_SimJTAG (
+    .clock                ( clk_i                ),
+    .reset                ( ~rst_ni              ),
+    .enable               ( jtag_enable[0]       ),
+    .init_done            ( init_done            ),
+    .jtag_TCK             ( jtag_TCK             ),
+    .jtag_TMS             ( jtag_TMS             ),
+    .jtag_TDI             ( jtag_TDI             ),
+    .jtag_TRSTn           ( jtag_TRSTn           ),
+    .jtag_TDO_data        ( jtag_TDO_data        ),
+    .jtag_TDO_driven      ( jtag_TDO_driven      ),
+    .exit                 ( jtag_exit            )
+  );
+
+  dmi_jtag i_dmi_jtag (
+    .clk_i            ( clk_i           ),
+    .rst_ni           ( rst_ni          ),
+    .testmode_i       ( test_en         ),
+    .dmi_req_o        ( jtag_dmi_req    ),
+    .dmi_req_valid_o  ( jtag_req_valid  ),
+    .dmi_req_ready_i  ( debug_req_ready ),
+    .dmi_resp_i       ( debug_resp      ),
+    .dmi_resp_ready_o ( jtag_resp_ready ),
+    .dmi_resp_valid_i ( jtag_resp_valid ),
+    .dmi_rst_no       (                 ), // not connected
+    .tck_i            ( jtag_TCK        ),
+    .tms_i            ( jtag_TMS        ),
+    .trst_ni          ( jtag_TRSTn      ),
+    .td_i             ( jtag_TDI        ),
+    .td_o             ( jtag_TDO_data   ),
+    .tdo_oe_o         ( jtag_TDO_driven )
+  );
+
+  // SiFive's SimDTM Module
+  // Converts to DPI calls
+  logic [1:0] debug_req_bits_op;
+  assign dmi_req.op = dm::dtm_op_e'(debug_req_bits_op);
+
+  if (InclSimDTM) begin
+    SimDTM i_SimDTM (
+      .clk                  ( clk_i                 ),
+      .reset                ( ~rst_ni               ),
+      .debug_req_valid      ( dmi_req_valid         ),
+      .debug_req_ready      ( debug_req_ready       ),
+      .debug_req_bits_addr  ( dmi_req.addr          ),
+      .debug_req_bits_op    ( debug_req_bits_op     ),
+      .debug_req_bits_data  ( dmi_req.data          ),
+      .debug_resp_valid     ( dmi_resp_valid        ),
+      .debug_resp_ready     ( dmi_resp_ready        ),
+      .debug_resp_bits_resp ( debug_resp.resp       ),
+      .debug_resp_bits_data ( debug_resp.data       ),
+      .exit                 ( dmi_exit              )
+    );
+  end else begin
+    assign dmi_req_valid = '0;
+    assign debug_req_bits_op = '0;
+    assign dmi_exit = 1'b0;
+  end
+
+  // this delay window allows the core to read and execute init code
+  // from the bootrom before the first debug request can interrupt
+  // core. this is needed in cases where an fsbl is involved that
+  // expects a0 and a1 to be initialized with the hart id and a
+  // pointer to the dev tree, respectively.
+  localparam int unsigned DmiDelCycles = 500;
+
+  logic debug_req_core_ungtd;
+  int dmi_del_cnt_d, dmi_del_cnt_q;
+
+  assign dmi_del_cnt_d  = (dmi_del_cnt_q) ? dmi_del_cnt_q - 1 : 0;
+  assign debug_req_core = (dmi_del_cnt_q) ? 1'b0 :
+                          (!debug_enable) ? 1'b0 : debug_req_core_ungtd;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_dmi_del_cnt
+    if(!rst_ni) begin
+      dmi_del_cnt_q <= DmiDelCycles;
+    end else begin
+      dmi_del_cnt_q <= dmi_del_cnt_d;
+    end
+  end
+
+  ariane_axi::req_t    dm_axi_m_req;
+  ariane_axi::resp_t   dm_axi_m_resp;
+
+  logic                dm_slave_req;
+  logic                dm_slave_we;
+  logic [64-1:0]       dm_slave_addr;
+  logic [64/8-1:0]     dm_slave_be;
+  logic [64-1:0]       dm_slave_wdata;
+  logic [64-1:0]       dm_slave_rdata;
+
+  logic                dm_master_req;
+  logic [64-1:0]       dm_master_add;
+  logic                dm_master_we;
+  logic [64-1:0]       dm_master_wdata;
+  logic [64/8-1:0]     dm_master_be;
+  logic                dm_master_gnt;
+  logic                dm_master_r_valid;
+  logic [64-1:0]       dm_master_r_rdata;
+
+  // debug module
+  dm_top #(
+    .NrHarts              ( 1                           ),
+    .BusWidth             ( AXI_DATA_WIDTH              ),
+    .SelectableHarts      ( 1'b1                        )
+  ) i_dm_top (
+    .clk_i                ( clk_i                       ),
+    .rst_ni               ( rst_ni                      ), // PoR
+    .testmode_i           ( test_en                     ),
+    .ndmreset_o           ( ndmreset                    ),
+    .dmactive_o           (                             ), // active debug session
+    .debug_req_o          ( debug_req_core_ungtd        ),
+    .unavailable_i        ( '0                          ),
+    .hartinfo_i           ( {ariane_pkg::DebugHartInfo} ),
+    .slave_req_i          ( dm_slave_req                ),
+    .slave_we_i           ( dm_slave_we                 ),
+    .slave_addr_i         ( dm_slave_addr               ),
+    .slave_be_i           ( dm_slave_be                 ),
+    .slave_wdata_i        ( dm_slave_wdata              ),
+    .slave_rdata_o        ( dm_slave_rdata              ),
+    .master_req_o         ( dm_master_req               ),
+    .master_add_o         ( dm_master_add               ),
+    .master_we_o          ( dm_master_we                ),
+    .master_wdata_o       ( dm_master_wdata             ),
+    .master_be_o          ( dm_master_be                ),
+    .master_gnt_i         ( dm_master_gnt               ),
+    .master_r_valid_i     ( dm_master_r_valid           ),
+    .master_r_rdata_i     ( dm_master_r_rdata           ),
+    .dmi_rst_ni           ( rst_ni                      ),
+    .dmi_req_valid_i      ( debug_req_valid             ),
+    .dmi_req_ready_o      ( debug_req_ready             ),
+    .dmi_req_i            ( debug_req                   ),
+    .dmi_resp_valid_o     ( debug_resp_valid            ),
+    .dmi_resp_ready_i     ( debug_resp_ready            ),
+    .dmi_resp_o           ( debug_resp                  )
+  );
+
+
+  axi2mem #(
+    .AXI_ID_WIDTH   ( ariane_axi_soc::IdWidthSlave ),
+    .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH            ),
+    .AXI_DATA_WIDTH ( AXI_DATA_WIDTH               ),
+    .AXI_USER_WIDTH ( AXI_USER_WIDTH               )
+  ) i_dm_axi2mem (
+    .clk_i      ( clk_i                     ),
+    .rst_ni     ( rst_ni                    ),
+    .slave      ( master[ariane_soc::Debug] ),
+    .req_o      ( dm_slave_req              ),
+    .we_o       ( dm_slave_we               ),
+    .addr_o     ( dm_slave_addr             ),
+    .be_o       ( dm_slave_be               ),
+    .user_o     (                           ),
+    .data_o     ( dm_slave_wdata            ),
+    .user_i     ( '0                        ),
+    .data_i     ( dm_slave_rdata            )
+  );
+
+  `AXI_ASSIGN_FROM_REQ(slave[1], dm_axi_m_req)
+  `AXI_ASSIGN_TO_RESP(dm_axi_m_resp, slave[1])
+
+  axi_adapter #(
+    .CVA6Cfg               ( CVA6Cfg                   ),
+    .DATA_WIDTH            ( AXI_DATA_WIDTH            ),
+    .axi_req_t             ( ariane_axi::req_t         ),
+    .axi_rsp_t             ( ariane_axi::resp_t        )
+  ) i_dm_axi_master (
+    .clk_i                 ( clk_i                     ),
+    .rst_ni                ( rst_ni                    ),
+    .req_i                 ( dm_master_req             ),
+    .type_i                ( ariane_pkg::SINGLE_REQ    ),
+    .amo_i                 ( ariane_pkg::AMO_NONE      ),
+    .gnt_o                 ( dm_master_gnt             ),
+    .addr_i                ( dm_master_add             ),
+    .we_i                  ( dm_master_we              ),
+    .wdata_i               ( dm_master_wdata           ),
+    .be_i                  ( dm_master_be              ),
+    .size_i                ( 2'b11                     ), // always do 64bit here and use byte enables to gate
+    .id_i                  ( '0                        ),
+    .valid_o               ( dm_master_r_valid         ),
+    .rdata_o               ( dm_master_r_rdata         ),
+    .id_o                  (                           ),
+    .critical_word_o       (                           ),
+    .critical_word_valid_o (                           ),
+    .axi_req_o             ( dm_axi_m_req              ),
+    .axi_resp_i            ( dm_axi_m_resp             )
+  );
+
+
+  // ---------------
+  // ROM
+  // ---------------
+  logic                         rom_req;
+  logic [AXI_ADDRESS_WIDTH-1:0] rom_addr;
+  logic [AXI_DATA_WIDTH-1:0]    rom_rdata;
+
+  axi2mem #(
+    .AXI_ID_WIDTH   ( ariane_axi_soc::IdWidthSlave ),
+    .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH            ),
+    .AXI_DATA_WIDTH ( AXI_DATA_WIDTH               ),
+    .AXI_USER_WIDTH ( AXI_USER_WIDTH               )
+  ) i_axi2rom (
+    .clk_i  ( clk_i                   ),
+    .rst_ni ( ndmreset_n              ),
+    .slave  ( master[ariane_soc::ROM] ),
+    .req_o  ( rom_req                 ),
+    .we_o   (                         ),
+    .addr_o ( rom_addr                ),
+    .be_o   (                         ),
+    .user_o (                         ),
+    .data_o (                         ),
+    .user_i ( '0                      ),
+    .data_i ( rom_rdata               )
+  );
+
+  bootrom i_bootrom (
+    .clk_i      ( clk_i     ),
+    .req_i      ( rom_req   ),
+    .addr_i     ( rom_addr  ),
+    .rdata_o    ( rom_rdata )
+  );
+
+  // ------------------------------
+  // GPIO
+  // ------------------------------
+
+  // GPIO not implemented, adding an error slave here
+
+  ariane_axi_soc::req_slv_t  gpio_req;
+  ariane_axi_soc::resp_slv_t gpio_resp;
+  `AXI_ASSIGN_TO_REQ(gpio_req, master[ariane_soc::GPIO])
+  `AXI_ASSIGN_FROM_RESP(master[ariane_soc::GPIO], gpio_resp)
+  axi_err_slv #(
+    .AxiIdWidth ( ariane_axi_soc::IdWidthSlave ),
+    .req_t      ( ariane_axi_soc::req_slv_t    ),
+    .resp_t     ( ariane_axi_soc::resp_slv_t   )
+  ) i_gpio_err_slv (
+    .clk_i      ( clk_i      ),
+    .rst_ni     ( ndmreset_n ),
+    .test_i     ( test_en    ),
+    .slv_req_i  ( gpio_req ),
+    .slv_resp_o ( gpio_resp )
+  );
+
+
+  // ------------------------------
+  // Memory + Exclusive Access
+  // ------------------------------
+  AXI_BUS #(
+    .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH            ),
+    .AXI_DATA_WIDTH ( AXI_DATA_WIDTH               ),
+    .AXI_ID_WIDTH   ( ariane_axi_soc::IdWidthSlave ),
+    .AXI_USER_WIDTH ( AXI_USER_WIDTH               )
+  ) dram();
+
+  logic                         req;
+  logic                         we;
+  logic [AXI_ADDRESS_WIDTH-1:0] addr;
+  logic [AXI_DATA_WIDTH/8-1:0]  be;
+  logic [AXI_DATA_WIDTH-1:0]    wdata;
+  logic [AXI_DATA_WIDTH-1:0]    rdata;
+  logic [AXI_USER_WIDTH-1:0]    wuser;
+  logic [AXI_USER_WIDTH-1:0]    ruser;
+
+  axi_riscv_atomics_wrap #(
+    .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH            ),
+    .AXI_DATA_WIDTH ( AXI_DATA_WIDTH               ),
+    .AXI_ID_WIDTH   ( ariane_axi_soc::IdWidthSlave ),
+    .AXI_USER_WIDTH ( AXI_USER_WIDTH               ),
+    .AXI_MAX_WRITE_TXNS ( 1  ),
+    .RISCV_WORD_WIDTH   ( 64 )
+  ) i_axi_riscv_atomics (
+    .clk_i,
+    .rst_ni ( ndmreset_n               ),
+    .slv    ( master[ariane_soc::DRAM] ),
+    .mst    ( dram                     )
+  );
+
+  AXI_BUS #(
+    .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH            ),
+    .AXI_DATA_WIDTH ( AXI_DATA_WIDTH               ),
+    .AXI_ID_WIDTH   ( ariane_axi_soc::IdWidthSlave ),
+    .AXI_USER_WIDTH ( AXI_USER_WIDTH               )
+  ) dram_delayed();
+
+  axi_delayer_intf #(
+    .AXI_ID_WIDTH        ( ariane_axi_soc::IdWidthSlave ),
+    .AXI_ADDR_WIDTH      ( AXI_ADDRESS_WIDTH            ),
+    .AXI_DATA_WIDTH      ( AXI_DATA_WIDTH               ),
+    .AXI_USER_WIDTH      ( AXI_USER_WIDTH               ),
+    .STALL_RANDOM_INPUT  ( StallRandomInput             ),
+    .STALL_RANDOM_OUTPUT ( StallRandomOutput            ),
+    .FIXED_DELAY_INPUT   ( 0                            ),
+    .FIXED_DELAY_OUTPUT  ( 0                            )
+  ) i_axi_delayer (
+    .clk_i  ( clk_i        ),
+    .rst_ni ( ndmreset_n   ),
+    .slv    ( dram         ),
+    .mst    ( dram_delayed )
+  );
+
+  axi2mem #(
+    .AXI_ID_WIDTH   ( ariane_axi_soc::IdWidthSlave ),
+    .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH            ),
+    .AXI_DATA_WIDTH ( AXI_DATA_WIDTH               ),
+    .AXI_USER_WIDTH ( AXI_USER_WIDTH               )
+  ) i_axi2mem (
+    .clk_i  ( clk_i        ),
+    .rst_ni ( ndmreset_n   ),
+    .slave  ( dram_delayed ),
+    .req_o  ( req          ),
+    .we_o   ( we           ),
+    .addr_o ( addr         ),
+    .be_o   ( be           ),
+    .user_o ( wuser        ),
+    .data_o ( wdata        ),
+    .user_i ( ruser        ),
+    .data_i ( rdata        )
+  );
+
+  sram #(
+    .DATA_WIDTH ( AXI_DATA_WIDTH ),
+    .USER_WIDTH ( AXI_USER_WIDTH ),
+    .USER_EN    ( AXI_USER_EN    ),
+`ifdef VERILATOR
+    .SIM_INIT   ( "none"         ),
+`else
+    .SIM_INIT   ( "zeros"        ),
+`endif
+    .NUM_WORDS  ( NUM_WORDS      )
+  ) i_sram (
+    .clk_i      ( clk_i                                                                       ),
+    .rst_ni     ( rst_ni                                                                      ),
+    .req_i      ( req                                                                         ),
+    .we_i       ( we                                                                          ),
+    .addr_i     ( addr[$clog2(NUM_WORDS)-1+$clog2(AXI_DATA_WIDTH/8):$clog2(AXI_DATA_WIDTH/8)] ),
+    .wuser_i    ( wuser                                                                       ),
+    .wdata_i    ( wdata                                                                       ),
+    .be_i       ( be                                                                          ),
+    .ruser_o    ( ruser                                                                       ),
+    .rdata_o    ( rdata                                                                       )
+  );
+
+  // ---------------
+  // AXI Xbar
+  // ---------------
+
+  axi_pkg::xbar_rule_64_t [ariane_soc::NB_PERIPHERALS-1:0] addr_map;
+
+  assign addr_map = '{
+    '{ idx: ariane_soc::Debug,    start_addr: ariane_soc::DebugBase,    end_addr: ariane_soc::DebugBase + ariane_soc::DebugLength       },
+    '{ idx: ariane_soc::ROM,      start_addr: ariane_soc::ROMBase,      end_addr: ariane_soc::ROMBase + ariane_soc::ROMLength           },
+    '{ idx: ariane_soc::CLINT,    start_addr: ariane_soc::CLINTBase,    end_addr: ariane_soc::CLINTBase + ariane_soc::CLINTLength       },
+    '{ idx: ariane_soc::PLIC,     start_addr: ariane_soc::PLICBase,     end_addr: ariane_soc::PLICBase + ariane_soc::PLICLength         },
+    '{ idx: ariane_soc::UART,     start_addr: ariane_soc::UARTBase,     end_addr: ariane_soc::UARTBase + ariane_soc::UARTLength         },
+    '{ idx: ariane_soc::Timer,    start_addr: ariane_soc::TimerBase,    end_addr: ariane_soc::TimerBase + ariane_soc::TimerLength       },
+    '{ idx: ariane_soc::SPI,      start_addr: ariane_soc::SPIBase,      end_addr: ariane_soc::SPIBase + ariane_soc::SPILength           },
+    '{ idx: ariane_soc::Ethernet, start_addr: ariane_soc::EthernetBase, end_addr: ariane_soc::EthernetBase + ariane_soc::EthernetLength },
+    '{ idx: ariane_soc::GPIO,     start_addr: ariane_soc::GPIOBase,     end_addr: ariane_soc::GPIOBase + ariane_soc::GPIOLength         },
+    '{ idx: ariane_soc::DRAM,     start_addr: ariane_soc::DRAMBase,     end_addr: ariane_soc::DRAMBase + ariane_soc::DRAMLength         }
+  };
+
+  localparam axi_pkg::xbar_cfg_t AXI_XBAR_CFG = '{
+    NoSlvPorts: unsigned'(ariane_soc::NrSlaves),
+    NoMstPorts: unsigned'(ariane_soc::NB_PERIPHERALS),
+    MaxMstTrans: unsigned'(1), // Probably requires update
+    MaxSlvTrans: unsigned'(1), // Probably requires update
+    FallThrough: 1'b0,
+    LatencyMode: axi_pkg::NO_LATENCY,
+    AxiIdWidthSlvPorts: unsigned'(ariane_axi_soc::IdWidth),
+    AxiIdUsedSlvPorts: unsigned'(ariane_axi_soc::IdWidth),
+    UniqueIds: 1'b0,
+    AxiAddrWidth: unsigned'(AXI_ADDRESS_WIDTH),
+    AxiDataWidth: unsigned'(AXI_DATA_WIDTH),
+    NoAddrRules: unsigned'(ariane_soc::NB_PERIPHERALS)
+  };
+
+  axi_xbar_intf #(
+    .AXI_USER_WIDTH ( AXI_USER_WIDTH          ),
+    .Cfg            ( AXI_XBAR_CFG            ),
+    .rule_t         ( axi_pkg::xbar_rule_64_t )
+  ) i_axi_xbar (
+    .clk_i                 ( clk_i      ),
+    .rst_ni                ( ndmreset_n ),
+    .test_i                ( test_en    ),
+    .slv_ports             ( slave      ),
+    .mst_ports             ( master     ),
+    .addr_map_i            ( addr_map   ),
+    .en_default_mst_port_i ( '0         ),
+    .default_mst_port_i    ( '0         )
+  );
+
+  // ---------------
+  // CLINT
+  // ---------------
+  logic ipi;
+  logic timer_irq;
+
+  ariane_axi_soc::req_slv_t  axi_clint_req;
+  ariane_axi_soc::resp_slv_t axi_clint_resp;
+
+  clint #(
+    .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH            ),
+    .AXI_DATA_WIDTH ( AXI_DATA_WIDTH               ),
+    .AXI_ID_WIDTH   ( ariane_axi_soc::IdWidthSlave ),
+    .NR_CORES       ( 1                            ),
+    .axi_req_t      ( ariane_axi_soc::req_slv_t    ),
+    .axi_resp_t     ( ariane_axi_soc::resp_slv_t   )
+  ) i_clint (
+    .clk_i       ( clk_i          ),
+    .rst_ni      ( ndmreset_n     ),
+    .testmode_i  ( test_en        ),
+    .axi_req_i   ( axi_clint_req  ),
+    .axi_resp_o  ( axi_clint_resp ),
+    .rtc_i       ( rtc_i          ),
+    .timer_irq_o ( timer_irq      ),
+    .ipi_o       ( ipi            )
+  );
+
+  `AXI_ASSIGN_TO_REQ(axi_clint_req, master[ariane_soc::CLINT])
+  `AXI_ASSIGN_FROM_RESP(master[ariane_soc::CLINT], axi_clint_resp)
+
+  // ---------------
+  // Peripherals
+  // ---------------
+  logic tx, rx;
+  logic [1:0] irqs;
+
+  ariane_peripherals #(
+    .AxiAddrWidth ( AXI_ADDRESS_WIDTH            ),
+    .AxiDataWidth ( AXI_DATA_WIDTH               ),
+    .AxiIdWidth   ( ariane_axi_soc::IdWidthSlave ),
+    .AxiUserWidth ( AXI_USER_WIDTH               ),
+`ifndef VERILATOR
+    .InclUART     ( 1'b1                     ),
+`else
+    .InclUART     ( 1'b0                     ),
+`endif
+    .InclSPI      ( 1'b0                     ),
+    .InclEthernet ( 1'b0                     )
+  ) i_ariane_peripherals (
+    .clk_i     ( clk_i                        ),
+    .rst_ni    ( ndmreset_n                   ),
+    .plic      ( master[ariane_soc::PLIC]     ),
+    .uart      ( master[ariane_soc::UART]     ),
+    .spi       ( master[ariane_soc::SPI]      ),
+    .ethernet  ( master[ariane_soc::Ethernet] ),
+    .timer     ( master[ariane_soc::Timer]    ),
+    .irq_o     ( irqs                         ),
+    .rx_i      ( rx                           ),
+    .tx_o      ( tx                           ),
+    .eth_txck  ( ),
+    .eth_rxck  ( ),
+    .eth_rxctl ( ),
+    .eth_rxd   ( ),
+    .eth_rst_n ( ),
+    .eth_tx_en ( ),
+    .eth_txd   ( ),
+    .phy_mdio  ( ),
+    .eth_mdc   ( ),
+    .mdio      ( ),
+    .mdc       ( ),
+    .spi_clk_o ( ),
+    .spi_mosi  ( ),
+    .spi_miso  ( ),
+    .spi_ss    ( )
+  );
+
+  uart_bus #(.BAUD_RATE(115200), .PARITY_EN(0)) i_uart_bus (.rx(tx), .tx(rx), .rx_en(1'b1));
+
+  // ---------------
+  // Core
+  // ---------------
+  ariane_axi::req_t    axi_ariane_req;
+  ariane_axi::resp_t   axi_ariane_resp;
+  rvfi_probes_t rvfi_probes;
+  rvfi_instr_t [CVA6Cfg.NrCommitPorts-1:0]  rvfi_instr;
+  
+  ariane #(
+    .CVA6Cfg              ( CVA6Cfg             ),
+    .IsRVFI               ( IsRVFI              ),
+    .rvfi_probes_t        ( rvfi_probes_t       ),
+    .noc_req_t            ( ariane_axi::req_t   ),
+    .noc_resp_t           ( ariane_axi::resp_t  )
+  ) i_ariane (
+    .clk_i                ( clk_i               ),
+    .rst_ni               ( ndmreset_n          ),
+    .boot_addr_i          ( ariane_soc::ROMBase ), // start fetching from ROM
+    .hart_id_i            ( {56'h0, hart_id}    ),
+    .irq_i                ( irqs                ),
+    .ipi_i                ( ipi                 ),
+    .time_irq_i           ( timer_irq           ),
+    .rvfi_probes_o        ( rvfi_probes         ),
+// Disable Debug when simulating with Spike
+`ifdef SPIKE_TANDEM
+    .debug_req_i          ( 1'b0                ),
+`else
+    .debug_req_i          ( debug_req_core      ),
+`endif
+    .noc_req_o            ( axi_ariane_req      ),
+    .noc_resp_i           ( axi_ariane_resp     )
+  );
+
+  `AXI_ASSIGN_FROM_REQ(slave[0], axi_ariane_req)
+  `AXI_ASSIGN_TO_RESP(axi_ariane_resp, slave[0])
+
+  // -------------
+  // Simulation Helper Functions
+  // -------------
+  // check for response errors
+  always_ff @(posedge clk_i) begin : p_assert
+    if (axi_ariane_req.r_ready &&
+      axi_ariane_resp.r_valid &&
+      axi_ariane_resp.r.resp inside {axi_pkg::RESP_DECERR, axi_pkg::RESP_SLVERR}) begin
+      $warning("R Response Errored");
+    end
+    if (axi_ariane_req.b_ready &&
+      axi_ariane_resp.b_valid &&
+      axi_ariane_resp.b.resp inside {axi_pkg::RESP_DECERR, axi_pkg::RESP_SLVERR}) begin
+      $warning("B Response Errored");
+    end
+  end
+
+  cva6_rvfi #(
+      .CVA6Cfg   (CVA6Cfg),
+      .rvfi_instr_t(rvfi_instr_t),
+      .rvfi_probes_t(rvfi_probes_t)
+  ) i_cva6_rvfi (
+      .clk_i     (clk_i),
+      .rst_ni    (rst_ni),
+      .rvfi_probes_i(rvfi_probes),
+      .rvfi_o(rvfi_instr)
+  );
+
+  rvfi_tracer  #(
+    .CVA6Cfg(CVA6Cfg),
+    .rvfi_instr_t(rvfi_instr_t),
+    //
+    .HART_ID(hart_id),
+    .DEBUG_START(0),
+    .DEBUG_STOP(0)
+  ) i_rvfi_tracer (
+    .clk_i(clk_i),
+    .rst_ni(rst_ni),
+    .rvfi_i(rvfi_instr),
+    .end_of_test_o(rvfi_exit)
+  );
+
+`ifdef SPIKE_TANDEM
+    spike #(
+        .CVA6Cfg ( CVA6Cfg ),
+        .rvfi_instr_t(rvfi_instr_t)
+    ) i_spike (
+        .clk_i,
+        .rst_ni,
+        .clint_tick_i   ( rtc_i    ),
+        .rvfi_i         ( rvfi_instr )
+    );
+    initial begin
+        $display("Running binary in tandem mode");
+    end
+`endif
+
+
+`ifdef AXI_SVA
+  // AXI 4 Assertion IP integration - You will need to get your own copy of this IP if you want
+  // to use it
+  Axi4PC #(
+    .DATA_WIDTH(ariane_axi_soc::DataWidth),
+    .WID_WIDTH(ariane_axi_soc::IdWidthSlave),
+    .RID_WIDTH(ariane_axi_soc::IdWidthSlave),
+    .AWUSER_WIDTH(ariane_axi_soc::UserWidth),
+    .WUSER_WIDTH(ariane_axi_soc::UserWidth),
+    .BUSER_WIDTH(ariane_axi_soc::UserWidth),
+    .ARUSER_WIDTH(ariane_axi_soc::UserWidth),
+    .RUSER_WIDTH(ariane_axi_soc::UserWidth),
+    .ADDR_WIDTH(ariane_axi_soc::AddrWidth)
+  ) i_Axi4PC (
+    .ACLK(clk_i),
+    .ARESETn(ndmreset_n),
+    .AWID(dram.aw_id),
+    .AWADDR(dram.aw_addr),
+    .AWLEN(dram.aw_len),
+    .AWSIZE(dram.aw_size),
+    .AWBURST(dram.aw_burst),
+    .AWLOCK(dram.aw_lock),
+    .AWCACHE(dram.aw_cache),
+    .AWPROT(dram.aw_prot),
+    .AWQOS(dram.aw_qos),
+    .AWREGION(dram.aw_region),
+    .AWUSER(dram.aw_user),
+    .AWVALID(dram.aw_valid),
+    .AWREADY(dram.aw_ready),
+    .WLAST(dram.w_last),
+    .WDATA(dram.w_data),
+    .WSTRB(dram.w_strb),
+    .WUSER(dram.w_user),
+    .WVALID(dram.w_valid),
+    .WREADY(dram.w_ready),
+    .BID(dram.b_id),
+    .BRESP(dram.b_resp),
+    .BUSER(dram.b_user),
+    .BVALID(dram.b_valid),
+    .BREADY(dram.b_ready),
+    .ARID(dram.ar_id),
+    .ARADDR(dram.ar_addr),
+    .ARLEN(dram.ar_len),
+    .ARSIZE(dram.ar_size),
+    .ARBURST(dram.ar_burst),
+    .ARLOCK(dram.ar_lock),
+    .ARCACHE(dram.ar_cache),
+    .ARPROT(dram.ar_prot),
+    .ARQOS(dram.ar_qos),
+    .ARREGION(dram.ar_region),
+    .ARUSER(dram.ar_user),
+    .ARVALID(dram.ar_valid),
+    .ARREADY(dram.ar_ready),
+    .RID(dram.r_id),
+    .RLAST(dram.r_last),
+    .RDATA(dram.r_data),
+    .RRESP(dram.r_resp),
+    .RUSER(dram.r_user),
+    .RVALID(dram.r_valid),
+    .RREADY(dram.r_ready),
+    .CACTIVE('0),
+    .CSYSREQ('0),
+    .CSYSACK('0)
+  );
+`endif
+endmodule
diff --git a/test/type_param/corev_apu/tb/axi_intf.sv b/test/type_param/corev_apu/tb/axi_intf.sv
new file mode 100644
index 0000000..41d4b16
--- /dev/null
+++ b/test/type_param/corev_apu/tb/axi_intf.sv
@@ -0,0 +1,311 @@
+// Copyright (c) 2014-2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+//
+// This file defines the interfaces we support.
+
+
+
+/// An AXI4 interface.
+interface AXI_BUS #(
+  parameter AXI_ADDR_WIDTH = -1,
+  parameter AXI_DATA_WIDTH = -1,
+  parameter AXI_ID_WIDTH   = -1,
+  parameter AXI_USER_WIDTH = -1
+);
+
+  import axi_pkg::*;
+
+  localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8;
+
+  typedef logic [AXI_ID_WIDTH-1:0]   id_t;
+  typedef logic [AXI_ADDR_WIDTH-1:0] addr_t;
+  typedef logic [AXI_DATA_WIDTH-1:0] data_t;
+  typedef logic [AXI_STRB_WIDTH-1:0] strb_t;
+  typedef logic [AXI_USER_WIDTH-1:0] user_t;
+  typedef logic [5:0] atop_t;
+
+  id_t        aw_id;
+  addr_t      aw_addr;
+  logic [7:0] aw_len;
+  logic [2:0] aw_size;
+  burst_t     aw_burst;
+  logic       aw_lock;
+  cache_t     aw_cache;
+  prot_t      aw_prot;
+  qos_t       aw_qos;
+  atop_t      aw_atop;
+  region_t    aw_region;
+  user_t      aw_user;
+  logic       aw_valid;
+  logic       aw_ready;
+
+  data_t      w_data;
+  strb_t      w_strb;
+  logic       w_last;
+  user_t      w_user;
+  logic       w_valid;
+  logic       w_ready;
+
+  id_t        b_id;
+  resp_t      b_resp;
+  user_t      b_user;
+  logic       b_valid;
+  logic       b_ready;
+
+  id_t        ar_id;
+  addr_t      ar_addr;
+  logic [7:0] ar_len;
+  logic [2:0] ar_size;
+  burst_t     ar_burst;
+  logic       ar_lock;
+  cache_t     ar_cache;
+  prot_t      ar_prot;
+  qos_t       ar_qos;
+  region_t    ar_region;
+  user_t      ar_user;
+  logic       ar_valid;
+  logic       ar_ready;
+
+  id_t        r_id;
+  data_t      r_data;
+  resp_t      r_resp;
+  logic       r_last;
+  user_t      r_user;
+  logic       r_valid;
+  logic       r_ready;
+
+  modport Master (
+    output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_valid, input aw_ready,
+    output w_data, w_strb, w_last, w_user, w_valid, input w_ready,
+    input b_id, b_resp, b_user, b_valid, output b_ready,
+    output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, input ar_ready,
+    input r_id, r_data, r_resp, r_last, r_user, r_valid, output r_ready
+  );
+
+  modport Slave (
+    input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_valid, output aw_ready,
+    input w_data, w_strb, w_last, w_user, w_valid, output w_ready,
+    output b_id, b_resp, b_user, b_valid, input b_ready,
+    input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, output ar_ready,
+    output r_id, r_data, r_resp, r_last, r_user, r_valid, input r_ready
+  );
+
+endinterface
+
+
+/// An asynchronous AXI4 interface.
+interface AXI_BUS_ASYNC
+#(
+  parameter AXI_ADDR_WIDTH = -1,
+  parameter AXI_DATA_WIDTH = -1,
+  parameter AXI_ID_WIDTH   = -1,
+  parameter AXI_USER_WIDTH = -1,
+  parameter BUFFER_WIDTH   = -1
+);
+
+  localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8;
+
+
+  logic [AXI_ID_WIDTH-1:0]    aw_id;
+  logic [AXI_ADDR_WIDTH-1:0]  aw_addr;
+  logic [7:0]                 aw_len;
+  logic [2:0]                 aw_size;
+  logic [1:0]                 aw_burst;
+  logic                       aw_lock;
+  logic [3:0]                 aw_cache;
+  logic [2:0]                 aw_prot;
+  logic [3:0]                 aw_qos;
+  logic [5:0]                 aw_atop;
+  logic [3:0]                 aw_region;
+  logic [AXI_USER_WIDTH-1:0]  aw_user;
+  logic [BUFFER_WIDTH-1:0]    aw_writetoken;
+  logic [BUFFER_WIDTH-1:0]    aw_readpointer;
+
+  logic [AXI_DATA_WIDTH-1:0]  w_data;
+  logic [AXI_STRB_WIDTH-1:0]  w_strb;
+  logic                       w_last;
+  logic [AXI_USER_WIDTH-1:0]  w_user;
+  logic [BUFFER_WIDTH-1:0]    w_writetoken;
+  logic [BUFFER_WIDTH-1:0]    w_readpointer;
+
+  logic [AXI_ID_WIDTH-1:0]    b_id;
+  logic [1:0]                 b_resp;
+  logic [AXI_USER_WIDTH-1:0]  b_user;
+  logic [BUFFER_WIDTH-1:0]    b_writetoken;
+  logic [BUFFER_WIDTH-1:0]    b_readpointer;
+
+  logic [AXI_ID_WIDTH-1:0]    ar_id;
+  logic [AXI_ADDR_WIDTH-1:0]  ar_addr;
+  logic [7:0]                 ar_len;
+  logic [2:0]                 ar_size;
+  logic [1:0]                 ar_burst;
+  logic                       ar_lock;
+  logic [3:0]                 ar_cache;
+  logic [2:0]                 ar_prot;
+  logic [3:0]                 ar_qos;
+  logic [3:0]                 ar_region;
+  logic [AXI_USER_WIDTH-1:0]  ar_user;
+  logic [BUFFER_WIDTH-1:0]    ar_writetoken;
+  logic [BUFFER_WIDTH-1:0]    ar_readpointer;
+
+  logic [AXI_ID_WIDTH-1:0]    r_id;
+  logic [AXI_DATA_WIDTH-1:0]  r_data;
+  logic [1:0]                 r_resp;
+  logic                       r_last;
+  logic [AXI_USER_WIDTH-1:0]  r_user;
+  logic [BUFFER_WIDTH-1:0]    r_writetoken;
+  logic [BUFFER_WIDTH-1:0]    r_readpointer;
+
+  modport Master (
+    output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_writetoken, input aw_readpointer,
+    output w_data, w_strb, w_last, w_user, w_writetoken, input w_readpointer,
+    input b_id, b_resp, b_user, b_writetoken, output b_readpointer,
+    output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_writetoken, input ar_readpointer,
+    input r_id, r_data, r_resp, r_last, r_user, r_writetoken, output r_readpointer
+  );
+
+  modport Slave (
+    input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_writetoken, output aw_readpointer,
+    input w_data, w_strb, w_last, w_user, w_writetoken, output w_readpointer,
+    output b_id, b_resp, b_user, b_writetoken, input b_readpointer,
+    input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_writetoken, output ar_readpointer,
+    output r_id, r_data, r_resp, r_last, r_user, r_writetoken, input r_readpointer
+  );
+
+endinterface
+
+
+/// An AXI4-Lite interface.
+interface AXI_LITE #(
+  parameter AXI_ADDR_WIDTH = -1,
+  parameter AXI_DATA_WIDTH = -1
+);
+
+  import axi_pkg::*;
+
+  localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8;
+
+  typedef logic [AXI_ADDR_WIDTH-1:0] addr_t;
+  typedef logic [AXI_DATA_WIDTH-1:0] data_t;
+  typedef logic [AXI_STRB_WIDTH-1:0] strb_t;
+
+  // AW channel
+  addr_t aw_addr;
+  prot_t aw_prot;
+  logic  aw_valid;
+  logic  aw_ready;
+
+  data_t w_data;
+  strb_t w_strb;
+  logic  w_valid;
+  logic  w_ready;
+
+  resp_t b_resp;
+  logic  b_valid;
+  logic  b_ready;
+
+  addr_t ar_addr;
+  prot_t ar_prot;
+  logic  ar_valid;
+  logic  ar_ready;
+
+  data_t r_data;
+  resp_t r_resp;
+  logic  r_valid;
+  logic  r_ready;
+
+  modport Master (
+    output aw_addr, aw_prot, aw_valid, input aw_ready,
+    output w_data, w_strb, w_valid, input w_ready,
+    input b_resp, b_valid, output b_ready,
+    output ar_addr, ar_prot, ar_valid, input ar_ready,
+    input r_data, r_resp, r_valid, output r_ready
+  );
+
+  modport Slave (
+    input aw_addr, aw_prot, aw_valid, output aw_ready,
+    input w_data, w_strb, w_valid, output w_ready,
+    output b_resp, b_valid, input b_ready,
+    input ar_addr, ar_prot, ar_valid, output ar_ready,
+    output r_data, r_resp, r_valid, input r_ready
+  );
+
+  /// The interface as an output (issuing requests, initiator, master).
+  modport out (
+    output aw_addr, aw_valid, input aw_ready,
+    output w_data, w_strb, w_valid, input w_ready,
+    input b_resp, b_valid, output b_ready,
+    output ar_addr, ar_valid, input ar_ready,
+    input r_data, r_resp, r_valid, output r_ready
+  );
+
+  /// The interface as an input (accepting requests, target, slave).
+  modport in (
+    input aw_addr, aw_valid, output aw_ready,
+    input w_data, w_strb, w_valid, output w_ready,
+    output b_resp, b_valid, input b_ready,
+    input ar_addr, ar_valid, output ar_ready,
+    output r_data, r_resp, r_valid, input r_ready
+  );
+
+endinterface
+
+
+/// An AXI routing table.
+///
+/// For each slave, multiple rules can be defined. Each rule consists of an
+/// address mask and a base. Addresses are masked and then compared against the
+/// base to decide where transfers need to go.
+interface AXI_ROUTING_RULES #(
+  /// The address width.
+  parameter int AXI_ADDR_WIDTH = -1,
+  /// The number of slaves in the routing table.
+  parameter int NUM_SLAVE  = -1,
+  /// The number of rules in the routing table.
+  parameter int NUM_RULES  = -1
+);
+
+  struct packed {
+    logic enabled;
+    logic [AXI_ADDR_WIDTH-1:0] mask;
+    logic [AXI_ADDR_WIDTH-1:0] base;
+  } [NUM_RULES-1:0] rules [NUM_SLAVE];
+
+  modport xbar(input rules);
+  modport cfg(output rules);
+
+endinterface
+
+
+/// An AXI arbitration interface.
+interface AXI_ARBITRATION #(
+  /// The number of requestors.
+  parameter int NUM_REQ = -1
+);
+
+  // Incoming requests.
+  logic [NUM_REQ-1:0] in_req;
+  logic [NUM_REQ-1:0] in_ack;
+
+  // Outgoing request.
+  logic out_req;
+  logic out_ack;
+  logic [$clog2(NUM_REQ)-1:0] out_sel;
+
+  // The arbiter side of the interface.
+  modport arb(input  in_req, out_ack, output out_req, out_sel, in_ack);
+
+  // The requestor side of the interface.
+  modport req(output in_req, out_ack, input  out_req, out_sel, in_ack);
+
+endinterface
diff --git a/test/type_param/corev_apu/tb/common/mock_uart.sv b/test/type_param/corev_apu/tb/common/mock_uart.sv
new file mode 100644
index 0000000..6a14904
--- /dev/null
+++ b/test/type_param/corev_apu/tb/common/mock_uart.sv
@@ -0,0 +1,120 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 28/09/2018
+// Description: Mock replacement for UART in testbench (not synthesiesable!)
+
+module mock_uart (
+    input  logic          clk_i,
+    input  logic          rst_ni,
+    input  logic          penable_i,
+    input  logic          pwrite_i,
+    input  logic [31:0]   paddr_i,
+    input  logic          psel_i,
+    input  logic [31:0]   pwdata_i,
+    output logic [31:0]   prdata_o,
+    output logic          pready_o,
+    output logic          pslverr_o
+);
+    localparam RBR = 0;
+    localparam THR = 0;
+    localparam IER = 1;
+    localparam IIR = 2;
+    localparam FCR = 2;
+    localparam LCR = 3;
+    localparam MCR = 4;
+    localparam LSR = 5;
+    localparam MSR = 6;
+    localparam SCR = 7;
+    localparam DLL = 0;
+    localparam DLM = 1;
+
+    localparam THRE = 5; // transmit holding register empty
+    localparam TEMT = 6; // transmit holding register empty
+
+    byte lcr = 0;
+    byte dlm = 0;
+    byte dll = 0;
+    byte mcr = 0;
+    byte lsr = 0;
+    byte ier = 0;
+    byte msr = 0;
+    byte scr = 0;
+    logic fifo_enabled = 1'b0;
+
+    assign pready_o = 1'b1;
+    assign pslverr_o = 1'b0;
+
+    function void uart_tx(byte ch);
+        $write("%c", ch);
+    endfunction : uart_tx
+
+/* verilator lint_off WIDTHTRUNC */
+/* verilator lint_off WIDTHEXPAND */
+/* verilator lint_off WIDTHCONCAT */
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (rst_ni) begin
+            if (psel_i & penable_i & pwrite_i) begin
+                case ((paddr_i >> 'h2) & 'h7)
+                    THR: begin
+                        if (lcr & 'h80) dll <= byte'(pwdata_i[7:0]);
+                        else uart_tx(byte'(pwdata_i[7:0]));
+                    end
+                    IER: begin
+                        if (lcr & 'h80) dlm <= byte'(pwdata_i[7:0]);
+                        else ier <= byte'(pwdata_i[7:0] & 'hF);
+                    end
+                    FCR: begin
+                        if (pwdata_i[0]) fifo_enabled <= 1'b1;
+                        else fifo_enabled <= 1'b0;
+                    end
+                    LCR: lcr <= byte'(pwdata_i[7:0]);
+                    MCR: mcr <= byte'(pwdata_i[7:0] & 'h1F);
+                    LSR: lsr <= byte'(pwdata_i[7:0]);
+                    MSR: msr <= byte'(pwdata_i[7:0]);
+                    SCR: scr <= byte'(pwdata_i[7:0]);
+                    default:;
+                endcase
+            end
+        end
+    end
+
+    always_comb begin
+        prdata_o = '0;
+        if (psel_i & penable_i & ~pwrite_i) begin
+            case ((paddr_i >> 'h2) & 'h7)
+                THR: begin
+                    if (lcr & 'h80) prdata_o = {24'b0, dll};
+                end
+                IER: begin
+                    if (lcr & 'h80) prdata_o = {24'b0, dlm};
+                    else prdata_o = {24'b0, ier};
+                end
+                IIR: begin
+                    if (fifo_enabled) prdata_o = {24'b0, 8'hc0};
+                    else prdata_o = {24'b0, 8'b0};
+                end
+                LCR: prdata_o = {24'b0, lcr};
+                MCR: prdata_o = {24'b0, mcr};
+                LSR: prdata_o = {24'b0, (lsr | (1 << THRE) | (1 << TEMT))};
+                MSR: prdata_o = {24'b0, msr};
+                SCR: prdata_o = {24'b0, scr};
+                default:;
+            endcase
+        end
+    end
+
+/* verilator lint_on WIDTHTRUNC */
+/* verilator lint_on WIDTHEXPAND */
+/* verilator lint_on WIDTHCONCAT */
+
+endmodule
diff --git a/test/type_param/corev_apu/tb/common/uart.sv b/test/type_param/corev_apu/tb/common/uart.sv
new file mode 100644
index 0000000..d45f39a
--- /dev/null
+++ b/test/type_param/corev_apu/tb/common/uart.sv
@@ -0,0 +1,104 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Unknown
+// Date: Unknown
+// Description: This module takes data over UART and prints them to the console
+//              A string is printed to the console as soon as a '\n' character is found
+
+interface uart_bus #(
+    parameter int unsigned BAUD_RATE = 115200,
+    parameter int unsigned PARITY_EN = 0
+)(
+    input  logic rx,
+    output logic tx,
+    input  logic rx_en
+);
+
+/* pragma translate_off */
+`ifndef VERILATOR
+  localparam time BIT_PERIOD = (1000000000 / BAUD_RATE) * 1ns;
+
+  logic [7:0]       character;
+  logic [256*8-1:0] stringa;
+  logic             parity;
+  integer           charnum;
+  integer           file;
+
+  initial begin
+    tx   = 1'bZ;
+    file = $fopen("uart", "w");
+  end
+
+  always begin
+    if (rx_en) begin
+      @(negedge rx);
+      #(BIT_PERIOD/2);
+      for (int i = 0; i <= 7; i++) begin
+        #BIT_PERIOD character[i] = rx;
+      end
+
+      if (PARITY_EN == 1) begin
+        // check parity
+        #BIT_PERIOD parity = rx;
+
+        for (int i=7;i>=0;i--) begin
+          parity = character[i] ^ parity;
+        end
+
+        if (parity == 1'b1) begin
+          $display("Parity error detected");
+        end
+      end
+
+      // STOP BIT
+      #BIT_PERIOD;
+
+      $fwrite(file, "%c", character);
+      stringa[(255-charnum)*8 +: 8] = character;
+      if (character == 8'h0A || charnum == 254) begin // line feed or max. chars reached
+        if (character == 8'h0A) begin
+          stringa[(255-charnum)*8 +: 8] = 8'h0; // null terminate string, replace line feed
+        end else begin
+          stringa[(255-charnum-1)*8 +: 8] = 8'h0; // null terminate string
+        end
+
+        $write("[UART]: %s\n", stringa);
+        charnum = 0;
+        stringa = "";
+      end else begin
+        charnum = charnum + 1;
+      end
+    end else begin
+      charnum = 0;
+      stringa = "";
+      #10;
+    end
+  end
+
+  task send_char(input logic [7:0] c);
+    int i;
+
+    // start bit
+    tx = 1'b0;
+
+    for (i = 0; i < 8; i++) begin
+      #(BIT_PERIOD);
+      tx = c[i];
+    end
+
+    // stop bit
+    #(BIT_PERIOD);
+    tx = 1'b1;
+    #(BIT_PERIOD);
+  endtask
+`endif
+/* pragma translate_on */
+endinterface
diff --git a/test/type_param/corev_apu/tb/rvfi_tracer.sv b/test/type_param/corev_apu/tb/rvfi_tracer.sv
new file mode 100644
index 0000000..75f68be
--- /dev/null
+++ b/test/type_param/corev_apu/tb/rvfi_tracer.sv
@@ -0,0 +1,134 @@
+// Copyright 2020 Thales DIS design services SAS
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses/
+//
+// Original Author: Jean-Roch COULON - Thales
+
+module rvfi_tracer #(
+  parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
+  parameter type rvfi_instr_t = logic,
+  //
+  parameter logic [7:0] HART_ID      = '0,
+  parameter int unsigned DEBUG_START = 0,
+  parameter int unsigned DEBUG_STOP  = 0
+)(
+  input logic                           clk_i,
+  input logic                           rst_ni,
+  input rvfi_instr_t[CVA6Cfg.NrCommitPorts-1:0] rvfi_i,
+  output logic[31:0]                    end_of_test_o
+);
+
+  logic[riscv::PLEN-1:0] TOHOST_ADDR;
+  int f;
+  int unsigned SIM_FINISH;
+  initial begin
+    f = $fopen($sformatf("trace_rvfi_hart_%h.dasm", HART_ID), "w");
+    if (!$value$plusargs("time_out=%d", SIM_FINISH)) SIM_FINISH = 2000000;
+    if (!$value$plusargs("tohost_addr=%h", TOHOST_ADDR)) TOHOST_ADDR = '0;
+    if (TOHOST_ADDR == '0) begin
+      $display("*** [rvf_tracer] WARNING: No valid address of 'tohost' (tohost == 0x%h), termination possible only by timeout or Ctrl-C!\n", TOHOST_ADDR);
+      $fwrite(f, "*** [rvfi_tracer] WARNING No valid address of 'tohost' (tohost == 0x%h), termination possible only by timeout or Ctrl-C!\n", TOHOST_ADDR);
+    end
+  end
+
+  final $fclose(f);
+
+  logic [31:0] cycles;
+  // Generate the trace based on RVFI
+  logic [63:0] pc64;
+  string cause;
+  logic[31:0] end_of_test_q;
+  logic[31:0] end_of_test_d;
+
+  assign end_of_test_o = end_of_test_d;
+  always_ff @(posedge clk_i) begin
+    end_of_test_q = (rst_ni && (end_of_test_d[0] == 1'b1)) ? end_of_test_d : 0;
+    for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
+      pc64 = {{riscv::XLEN-riscv::VLEN{rvfi_i[i].pc_rdata[riscv::VLEN-1]}}, rvfi_i[i].pc_rdata};
+      // print the instruction information if the instruction is valid or a trap is taken
+      if (rvfi_i[i].valid) begin
+        // Instruction information
+        $fwrite(f, "core   0: 0x%h (0x%h) DASM(%h)\n",
+          pc64, rvfi_i[i].insn, rvfi_i[i].insn);
+        // Destination register information
+        if (rvfi_i[i].insn[1:0] != 2'b11) begin
+          $fwrite(f, "%h 0x%h (0x%h)",
+            rvfi_i[i].mode, pc64, rvfi_i[i].insn[15:0]);
+        end else begin
+          $fwrite(f, "%h 0x%h (0x%h)",
+            rvfi_i[i].mode, pc64, rvfi_i[i].insn);
+        end
+        // Decode instruction to know if destination register is FP register.
+        // Handle both uncompressed and compressed instructions.
+        if ( rvfi_i[i].insn[6:0] == 7'b1001111 ||
+             rvfi_i[i].insn[6:0] == 7'b1001011 ||
+             rvfi_i[i].insn[6:0] == 7'b1000111 ||
+             rvfi_i[i].insn[6:0] == 7'b1000011 ||
+             rvfi_i[i].insn[6:0] == 7'b0000111 ||
+            (rvfi_i[i].insn[6:0] == 7'b1010011 && rvfi_i[i].insn[31:26] != 6'b111000
+                                               && rvfi_i[i].insn[31:26] != 6'b101000
+                                               && rvfi_i[i].insn[31:26] != 6'b110000) ||
+            (rvfi_i[i].insn[0] == 1'b0 && ((rvfi_i[i].insn[15:13] == 3'b001 && riscv::XLEN == 64) ||
+                                           (rvfi_i[i].insn[15:13] == 3'b011 && riscv::XLEN == 32) ))) begin
+          $fwrite(f, " f%d 0x%h", rvfi_i[i].rd_addr, rvfi_i[i].rd_wdata);
+        end else if (rvfi_i[i].rd_addr != 0) begin
+          $fwrite(f, " x%d 0x%h", rvfi_i[i].rd_addr, rvfi_i[i].rd_wdata);
+          if (rvfi_i[i].mem_rmask != 0) begin
+            $fwrite(f, " mem 0x%h", rvfi_i[i].mem_addr);
+          end
+        end else begin
+          if (rvfi_i[i].mem_wmask != 0) begin
+            $fwrite(f, " mem 0x%h 0x%h", rvfi_i[i].mem_addr, rvfi_i[i].mem_wdata);
+            if (TOHOST_ADDR != '0 &&
+                rvfi_i[i].mem_paddr == TOHOST_ADDR &&
+                rvfi_i[i].mem_wdata[0] == 1'b1) begin
+              end_of_test_q = rvfi_i[i].mem_wdata[31:0];
+            end
+          end
+        end
+        $fwrite(f, "\n");
+      end else begin
+        if (rvfi_i[i].trap) begin
+          case (rvfi_i[i].cause)
+            32'h0: cause = "INSTR_ADDR_MISALIGNED";
+            32'h1: cause = "INSTR_ACCESS_FAULT";
+            32'h2: cause = "ILLEGAL_INSTR";
+            32'h3: cause = "BREAKPOINT";
+            32'h4: cause = "LD_ADDR_MISALIGNED";
+            32'h5: cause = "LD_ACCESS_FAULT";
+            32'h6: cause = "ST_ADDR_MISALIGNED";
+            32'h7: cause = "ST_ACCESS_FAULT";
+          endcase;
+          $fwrite(f, "%s exception @ 0x%h\n", cause, pc64);
+        end
+      end
+    end
+
+    if (~rst_ni)
+      cycles <= 0;
+    else
+      cycles <= cycles+1;
+    if (cycles > SIM_FINISH)
+      end_of_test_q = 32'hffff_ffff;
+
+    end_of_test_d <= end_of_test_q;
+  end
+
+
+  // Trace any custom signals
+  // Define signals to be traced by adding them into debug and name arrays
+  string name[0:10];
+  logic[63:0] debug[0:10], debug_previous[0:10];
+
+  always_ff @(posedge clk_i) begin
+    if (cycles > DEBUG_START && cycles < DEBUG_STOP)
+      for (int index = 0; index < 100; index++)
+        if (debug_previous[index] != debug[index])
+          $fwrite(f, "%d %s %x\n", cycles, name[index], debug[index]);
+    debug_previous <= debug;
+  end
+
+endmodule // rvfi_tracer
diff --git a/test/type_param/sv2v.sh b/test/type_param/sv2v.sh
new file mode 100755
index 0000000..4d739e0
--- /dev/null
+++ b/test/type_param/sv2v.sh
@@ -0,0 +1,249 @@
+sv2v -v --top=ariane_testharness --define=VERILATOR > cva6_nonsys.v \
+--incdir=vendor/pulp-platform/common_cells/include/ \
+--incdir=vendor/pulp-platform/common_cells/src/ \
+--incdir=vendor/pulp-platform/axi/include/ \
+--incdir=common/local/util/ \
+--incdir=corev_apu/axi_node \
+--incdir=core/cache_subsystem/hpdcache/rtl/include \
+--incdir=corev_apu/register_interface/include \
+vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv \
+vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv \
+vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_fma.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_top.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv \
+core/include/config_pkg.sv \
+core/include/cv64a6_imafdc_sv39_config_pkg.sv \
+core/include/riscv_pkg.sv \
+core/include/ariane_pkg.sv \
+vendor/pulp-platform/axi/src/axi_pkg.sv \
+core/include/wt_cache_pkg.sv \
+core/include/std_cache_pkg.sv \
+core/include/instr_tracer_pkg.sv \
+core/include/acc_pkg.sv \
+core/include/cvxif_pkg.sv \
+core/cvxif_example/include/cvxif_instr_pkg.sv \
+core/cvxif_fu.sv \
+core/cvxif_example/cvxif_example_coprocessor.sv \
+core/cvxif_example/instr_decoder.sv \
+vendor/pulp-platform/common_cells/src/cf_math_pkg.sv \
+vendor/pulp-platform/common_cells/src/fifo_v3.sv \
+vendor/pulp-platform/common_cells/src/lfsr.sv \
+vendor/pulp-platform/common_cells/src/lfsr_8bit.sv \
+vendor/pulp-platform/common_cells/src/stream_arbiter.sv \
+vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv \
+vendor/pulp-platform/common_cells/src/stream_mux.sv \
+vendor/pulp-platform/common_cells/src/stream_demux.sv \
+vendor/pulp-platform/common_cells/src/lzc.sv \
+vendor/pulp-platform/common_cells/src/rr_arb_tree.sv \
+vendor/pulp-platform/common_cells/src/shift_reg.sv \
+vendor/pulp-platform/common_cells/src/unread.sv \
+vendor/pulp-platform/common_cells/src/popcount.sv \
+vendor/pulp-platform/common_cells/src/exp_backoff.sv \
+vendor/pulp-platform/common_cells/src/counter.sv \
+vendor/pulp-platform/common_cells/src/delta_counter.sv \
+core/cva6.sv \
+core/cva6_rvfi_probes.sv \
+core/alu.sv \
+core/fpu_wrap.sv \
+core/branch_unit.sv \
+core/compressed_decoder.sv \
+core/controller.sv \
+core/csr_buffer.sv \
+core/csr_regfile.sv \
+core/decoder.sv \
+core/ex_stage.sv \
+core/instr_realign.sv \
+core/id_stage.sv \
+core/issue_read_operands.sv \
+core/issue_stage.sv \
+core/load_unit.sv \
+core/load_store_unit.sv \
+core/lsu_bypass.sv \
+core/mult.sv \
+core/multiplier.sv \
+core/serdiv.sv \
+core/perf_counters.sv \
+core/ariane_regfile_ff.sv \
+core/ariane_regfile_fpga.sv \
+core/scoreboard.sv \
+core/store_buffer.sv \
+core/amo_buffer.sv \
+core/store_unit.sv \
+core/commit_stage.sv \
+core/axi_shim.sv \
+core/cva6_accel_first_pass_decoder_stub.sv \
+core/acc_dispatcher.sv \
+core/frontend/btb.sv \
+core/frontend/bht.sv \
+core/frontend/ras.sv \
+core/frontend/instr_scan.sv \
+core/frontend/instr_queue.sv \
+core/frontend/frontend.sv \
+core/cache_subsystem/wt_dcache_ctrl.sv \
+core/cache_subsystem/wt_dcache_mem.sv \
+core/cache_subsystem/wt_dcache_missunit.sv \
+core/cache_subsystem/wt_dcache_wbuffer.sv \
+core/cache_subsystem/wt_dcache.sv \
+core/cache_subsystem/cva6_icache.sv \
+core/cache_subsystem/wt_cache_subsystem.sv \
+core/cache_subsystem/wt_axi_adapter.sv \
+core/cache_subsystem/tag_cmp.sv \
+core/cache_subsystem/axi_adapter.sv \
+core/cache_subsystem/miss_handler.sv \
+core/cache_subsystem/cache_ctrl.sv \
+core/cache_subsystem/cva6_icache_axi_wrapper.sv \
+core/cache_subsystem/std_cache_subsystem.sv \
+core/cache_subsystem/std_nbdcache.sv \
+core/include/cva6_hpdcache_default_config_pkg.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv \
+core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv \
+core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv \
+core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv \
+core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv \
+core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv \
+core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv \
+core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv \
+core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv \
+core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv \
+core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv \
+core/cache_subsystem/cva6_hpdcache_subsystem.sv \
+core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv \
+core/cache_subsystem/cva6_hpdcache_if_adapter.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv \
+core/pmp/src/pmp.sv \
+core/pmp/src/pmp_entry.sv \
+common/local/util/instr_tracer_if.sv \
+common/local/util/instr_tracer.sv \
+common/local/util/tc_sram_wrapper.sv \
+vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv \
+common/local/util/sram.sv \
+core/mmu_sv39/mmu.sv \
+core/mmu_sv39/ptw.sv \
+core/mmu_sv39/tlb.sv \
+core/mmu_sv32/cva6_mmu_sv32.sv \
+core/mmu_sv32/cva6_ptw_sv32.sv \
+core/mmu_sv32/cva6_tlb_sv32.sv \
+core/mmu_sv32/cva6_shared_tlb_sv32.sv \
+	  core/cva6_rvfi.sv \
+	  corev_apu/tb/ariane_axi_pkg.sv \
+	  corev_apu/tb/axi_intf.sv \
+	  corev_apu/register_interface/src/reg_intf.sv \
+	  corev_apu/tb/ariane_soc_pkg.sv \
+	  corev_apu/riscv-dbg/src/dm_pkg.sv \
+	  corev_apu/tb/ariane_axi_soc_pkg.sv \
+	  corev_apu/src/ariane.sv \
+	  corev_apu/bootrom/bootrom.sv \
+	  corev_apu/clint/axi_lite_interface.sv \
+	  corev_apu/clint/clint.sv \
+	  corev_apu/fpga/src/axi2apb/src/axi2apb.sv \
+	  corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv \
+	  corev_apu/fpga/src/apb_timer/apb_timer.sv \
+	  corev_apu/fpga/src/apb_timer/timer.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_slice.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv \
+	  corev_apu/axi_mem_if/src/axi2mem.sv \
+	  corev_apu/rv_plic/rtl/rv_plic_target.sv \
+	  corev_apu/rv_plic/rtl/rv_plic_gateway.sv \
+	  corev_apu/rv_plic/rtl/plic_regmap.sv \
+	  corev_apu/rv_plic/rtl/plic_top.sv \
+	  corev_apu/riscv-dbg/src/dmi_cdc.sv \
+	  corev_apu/riscv-dbg/src/dmi_jtag.sv \
+	  corev_apu/riscv-dbg/src/dmi_jtag_tap.sv \
+	  corev_apu/riscv-dbg/src/dm_csrs.sv \
+	  corev_apu/riscv-dbg/src/dm_mem.sv \
+	  corev_apu/riscv-dbg/src/dm_sba.sv \
+	  corev_apu/riscv-dbg/src/dm_top.sv \
+	  corev_apu/riscv-dbg/debug_rom/debug_rom.sv \
+	  corev_apu/register_interface/src/apb_to_reg.sv \
+	  vendor/pulp-platform/axi/src/axi_multicut.sv \
+	  vendor/pulp-platform/common_cells/src/rstgen_bypass.sv \
+	  vendor/pulp-platform/common_cells/src/rstgen.sv \
+	  vendor/pulp-platform/common_cells/src/addr_decode.sv \
+	  vendor/pulp-platform/common_cells/src/stream_register.sv \
+	  vendor/pulp-platform/axi/src/axi_cut.sv \
+	  vendor/pulp-platform/axi/src/axi_join.sv \
+	  vendor/pulp-platform/axi/src/axi_delayer.sv \
+	  vendor/pulp-platform/axi/src/axi_to_axi_lite.sv \
+	  vendor/pulp-platform/axi/src/axi_id_prepend.sv \
+	  vendor/pulp-platform/axi/src/axi_atop_filter.sv \
+	  vendor/pulp-platform/axi/src/axi_err_slv.sv \
+	  vendor/pulp-platform/axi/src/axi_mux.sv \
+	  vendor/pulp-platform/axi/src/axi_demux.sv \
+	  vendor/pulp-platform/axi/src/axi_xbar.sv \
+	  vendor/pulp-platform/common_cells/src/cdc_2phase.sv \
+	  vendor/pulp-platform/common_cells/src/spill_register_flushable.sv \
+	  vendor/pulp-platform/common_cells/src/spill_register.sv \
+	  vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv \
+	  vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv \
+	  vendor/pulp-platform/common_cells/src/stream_delay.sv \
+	  vendor/pulp-platform/common_cells/src/lfsr_16bit.sv \
+	  vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv \
+	  vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv \
+	  vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv \
+	  corev_apu/tb/ariane_testharness.sv \
+	  corev_apu/tb/ariane_peripherals.sv \
+	  corev_apu/tb/rvfi_tracer.sv \
+	  corev_apu/tb/common/uart.sv \
+	  corev_apu/tb/common/mock_uart.sv \
diff --git a/test/type_param/sv2v_corrected.sh b/test/type_param/sv2v_corrected.sh
new file mode 100755
index 0000000..4eb7acc
--- /dev/null
+++ b/test/type_param/sv2v_corrected.sh
@@ -0,0 +1,249 @@
+sv2v -v --top=ariane_testharness --define=VERILATOR > cva6_nonsys.v \
+--incdir=vendor/pulp-platform/common_cells/include/ \
+--incdir=vendor/pulp-platform/common_cells/src/ \
+--incdir=vendor/pulp-platform/axi/include/ \
+--incdir=common/local/util/ \
+--incdir=corev_apu/axi_node \
+--incdir=core/cache_subsystem/hpdcache/rtl/include \
+--incdir=corev_apu/register_interface/include \
+vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv \
+vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv \
+vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_fma.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv \
+vendor/openhwgroup/cvfpu/src/fpnew_top.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv \
+vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv \
+core/include/config_pkg.sv \
+core/include/cv64a6_imafdc_sv39_config_pkg.sv \
+core/include/riscv_pkg.sv \
+core/include/ariane_pkg.sv \
+vendor/pulp-platform/axi/src/axi_pkg.sv \
+core/include/wt_cache_pkg.sv \
+core/include/std_cache_pkg.sv \
+core/include/instr_tracer_pkg.sv \
+core/include/acc_pkg.sv \
+core/include/cvxif_pkg.sv \
+core/cvxif_example/include/cvxif_instr_pkg.sv \
+core/cvxif_fu.sv \
+core/cvxif_example/cvxif_example_coprocessor.sv \
+core/cvxif_example/instr_decoder.sv \
+vendor/pulp-platform/common_cells/src/cf_math_pkg.sv \
+vendor/pulp-platform/common_cells/src/fifo_v3.sv \
+vendor/pulp-platform/common_cells/src/lfsr.sv \
+vendor/pulp-platform/common_cells/src/lfsr_8bit.sv \
+vendor/pulp-platform/common_cells/src/stream_arbiter.sv \
+vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv \
+vendor/pulp-platform/common_cells/src/stream_mux.sv \
+vendor/pulp-platform/common_cells/src/stream_demux.sv \
+vendor/pulp-platform/common_cells/src/lzc.sv \
+vendor/pulp-platform/common_cells/src/rr_arb_tree.sv \
+vendor/pulp-platform/common_cells/src/shift_reg.sv \
+vendor/pulp-platform/common_cells/src/unread.sv \
+vendor/pulp-platform/common_cells/src/popcount.sv \
+vendor/pulp-platform/common_cells/src/exp_backoff.sv \
+vendor/pulp-platform/common_cells/src/counter.sv \
+vendor/pulp-platform/common_cells/src/delta_counter.sv \
+core/cva6.sv \
+core/cva6_rvfi_probes.sv \
+core/alu.sv \
+core/fpu_wrap.sv \
+core/branch_unit.sv \
+core/compressed_decoder.sv \
+core/controller.sv \
+core/csr_buffer.sv \
+core/csr_regfile.sv \
+core/decoder.sv \
+core/ex_stage.sv \
+core/instr_realign.sv \
+core/id_stage.sv \
+core/issue_read_operands.sv \
+core/issue_stage.sv \
+core/load_unit.sv \
+core/load_store_unit.sv \
+core/lsu_bypass.sv \
+core/mult.sv \
+core/multiplier.sv \
+core/serdiv.sv \
+core/perf_counters.sv \
+core/ariane_regfile_ff.sv \
+core/ariane_regfile_fpga.sv \
+core/scoreboard.sv \
+core/store_buffer.sv \
+core/amo_buffer.sv \
+core/store_unit.sv \
+core/commit_stage.sv \
+core/axi_shim.sv \
+core/cva6_accel_first_pass_decoder_stub.sv \
+core/acc_dispatcher_corrected.sv \
+core/frontend/btb.sv \
+core/frontend/bht.sv \
+core/frontend/ras.sv \
+core/frontend/instr_scan.sv \
+core/frontend/instr_queue.sv \
+core/frontend/frontend.sv \
+core/cache_subsystem/wt_dcache_ctrl.sv \
+core/cache_subsystem/wt_dcache_mem.sv \
+core/cache_subsystem/wt_dcache_missunit.sv \
+core/cache_subsystem/wt_dcache_wbuffer.sv \
+core/cache_subsystem/wt_dcache.sv \
+core/cache_subsystem/cva6_icache.sv \
+core/cache_subsystem/wt_cache_subsystem.sv \
+core/cache_subsystem/wt_axi_adapter.sv \
+core/cache_subsystem/tag_cmp.sv \
+core/cache_subsystem/axi_adapter.sv \
+core/cache_subsystem/miss_handler.sv \
+core/cache_subsystem/cache_ctrl.sv \
+core/cache_subsystem/cva6_icache_axi_wrapper.sv \
+core/cache_subsystem/std_cache_subsystem.sv \
+core/cache_subsystem/std_nbdcache.sv \
+core/include/cva6_hpdcache_default_config_pkg.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv \
+core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv \
+core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv \
+core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv \
+core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv \
+core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv \
+core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv \
+core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv \
+core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv \
+core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv \
+core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv \
+core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv \
+core/cache_subsystem/cva6_hpdcache_subsystem.sv \
+core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv \
+core/cache_subsystem/cva6_hpdcache_if_adapter.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv \
+core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv \
+core/pmp/src/pmp.sv \
+core/pmp/src/pmp_entry.sv \
+common/local/util/instr_tracer_if.sv \
+common/local/util/instr_tracer.sv \
+common/local/util/tc_sram_wrapper.sv \
+vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv \
+common/local/util/sram.sv \
+core/mmu_sv39/mmu.sv \
+core/mmu_sv39/ptw.sv \
+core/mmu_sv39/tlb.sv \
+core/mmu_sv32/cva6_mmu_sv32.sv \
+core/mmu_sv32/cva6_ptw_sv32.sv \
+core/mmu_sv32/cva6_tlb_sv32.sv \
+core/mmu_sv32/cva6_shared_tlb_sv32.sv \
+	  core/cva6_rvfi.sv \
+	  corev_apu/tb/ariane_axi_pkg.sv \
+	  corev_apu/tb/axi_intf.sv \
+	  corev_apu/register_interface/src/reg_intf.sv \
+	  corev_apu/tb/ariane_soc_pkg.sv \
+	  corev_apu/riscv-dbg/src/dm_pkg.sv \
+	  corev_apu/tb/ariane_axi_soc_pkg.sv \
+	  corev_apu/src/ariane.sv \
+	  corev_apu/bootrom/bootrom.sv \
+	  corev_apu/clint/axi_lite_interface.sv \
+	  corev_apu/clint/clint.sv \
+	  corev_apu/fpga/src/axi2apb/src/axi2apb.sv \
+	  corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv \
+	  corev_apu/fpga/src/apb_timer/apb_timer.sv \
+	  corev_apu/fpga/src/apb_timer/timer.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_slice.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv \
+	  corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv \
+	  corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv \
+	  corev_apu/axi_mem_if/src/axi2mem.sv \
+	  corev_apu/rv_plic/rtl/rv_plic_target.sv \
+	  corev_apu/rv_plic/rtl/rv_plic_gateway.sv \
+	  corev_apu/rv_plic/rtl/plic_regmap.sv \
+	  corev_apu/rv_plic/rtl/plic_top.sv \
+	  corev_apu/riscv-dbg/src/dmi_cdc.sv \
+	  corev_apu/riscv-dbg/src/dmi_jtag.sv \
+	  corev_apu/riscv-dbg/src/dmi_jtag_tap.sv \
+	  corev_apu/riscv-dbg/src/dm_csrs.sv \
+	  corev_apu/riscv-dbg/src/dm_mem.sv \
+	  corev_apu/riscv-dbg/src/dm_sba.sv \
+	  corev_apu/riscv-dbg/src/dm_top.sv \
+	  corev_apu/riscv-dbg/debug_rom/debug_rom.sv \
+	  corev_apu/register_interface/src/apb_to_reg.sv \
+	  vendor/pulp-platform/axi/src/axi_multicut.sv \
+	  vendor/pulp-platform/common_cells/src/rstgen_bypass.sv \
+	  vendor/pulp-platform/common_cells/src/rstgen.sv \
+	  vendor/pulp-platform/common_cells/src/addr_decode.sv \
+	  vendor/pulp-platform/common_cells/src/stream_register.sv \
+	  vendor/pulp-platform/axi/src/axi_cut.sv \
+	  vendor/pulp-platform/axi/src/axi_join.sv \
+	  vendor/pulp-platform/axi/src/axi_delayer.sv \
+	  vendor/pulp-platform/axi/src/axi_to_axi_lite.sv \
+	  vendor/pulp-platform/axi/src/axi_id_prepend.sv \
+	  vendor/pulp-platform/axi/src/axi_atop_filter.sv \
+	  vendor/pulp-platform/axi/src/axi_err_slv.sv \
+	  vendor/pulp-platform/axi/src/axi_mux.sv \
+	  vendor/pulp-platform/axi/src/axi_demux.sv \
+	  vendor/pulp-platform/axi/src/axi_xbar.sv \
+	  vendor/pulp-platform/common_cells/src/cdc_2phase.sv \
+	  vendor/pulp-platform/common_cells/src/spill_register_flushable.sv \
+	  vendor/pulp-platform/common_cells/src/spill_register.sv \
+	  vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv \
+	  vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv \
+	  vendor/pulp-platform/common_cells/src/stream_delay.sv \
+	  vendor/pulp-platform/common_cells/src/lfsr_16bit.sv \
+	  vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv \
+	  vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv \
+	  vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv \
+	  corev_apu/tb/ariane_testharness.sv \
+	  corev_apu/tb/ariane_peripherals.sv \
+	  corev_apu/tb/rvfi_tracer.sv \
+	  corev_apu/tb/common/uart.sv \
+	  corev_apu/tb/common/mock_uart.sv \
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv
new file mode 100644
index 0000000..e166d0b
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv
@@ -0,0 +1,794 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_cast_multi #(
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig  = '1,
+  parameter fpnew_pkg::ifmt_logic_t  IntFmtConfig = '1,
+  // FPU configuration
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+  // Do not change
+  localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig),
+                                                     fpnew_pkg::max_int_width(IntFmtConfig)),
+  localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input  logic                   clk_i,
+  input  logic                   rst_ni,
+  // Input signals
+  input  logic [WIDTH-1:0]       operands_i, // 1 operand
+  input  logic [NUM_FORMATS-1:0] is_boxed_i, // 1 operand
+  input  fpnew_pkg::roundmode_e  rnd_mode_i,
+  input  fpnew_pkg::operation_e  op_i,
+  input  logic                   op_mod_i,
+  input  fpnew_pkg::fp_format_e  src_fmt_i,
+  input  fpnew_pkg::fp_format_e  dst_fmt_i,
+  input  fpnew_pkg::int_format_e int_fmt_i,
+  input  TagType                 tag_i,
+  input  logic                   mask_i,
+  input  AuxType                 aux_i,
+  // Input Handshake
+  input  logic                   in_valid_i,
+  output logic                   in_ready_o,
+  input  logic                   flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]       result_o,
+  output fpnew_pkg::status_t     status_o,
+  output logic                   extension_bit_o,
+  output TagType                 tag_o,
+  output logic                   mask_o,
+  output AuxType                 aux_o,
+  // Output handshake
+  output logic                   out_valid_o,
+  input  logic                   out_ready_i,
+  // Indication of valid data in flight
+  output logic                   busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
+  localparam int unsigned MAX_INT_WIDTH   = fpnew_pkg::max_int_width(IntFmtConfig);
+
+  localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
+
+  localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
+  localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
+  localparam int unsigned SUPER_BIAS     = 2**(SUPER_EXP_BITS - 1) - 1;
+
+  // The internal mantissa includes normal bit or an entire integer
+  localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SUPER_MAN_BITS + 1, MAX_INT_WIDTH);
+  // If needed, there will be a LZC for renormalization
+  localparam int unsigned LZC_RESULT_WIDTH = $clog2(INT_MAN_WIDTH);
+  // The internal exponent must be able to represent the smallest denormal input value as signed
+  // or the number of bits in an integer
+  localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH),
+      fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1;
+  // Pipelines
+  localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+                          ? NumPipeRegs
+                          : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                             ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+                             : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 3) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Selected pipeline output signals as non-arrays
+  logic [WIDTH-1:0]       operands_q;
+  logic [NUM_FORMATS-1:0] is_boxed_q;
+  logic                   op_mod_q;
+  fpnew_pkg::fp_format_e  src_fmt_q;
+  fpnew_pkg::fp_format_e  dst_fmt_q;
+  fpnew_pkg::int_format_e int_fmt_q;
+
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                   [0:NUM_INP_REGS][WIDTH-1:0]       inp_pipe_operands_q;
+  logic                   [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e  [0:NUM_INP_REGS]                  inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e  [0:NUM_INP_REGS]                  inp_pipe_op_q;
+  logic                   [0:NUM_INP_REGS]                  inp_pipe_op_mod_q;
+  fpnew_pkg::fp_format_e  [0:NUM_INP_REGS]                  inp_pipe_src_fmt_q;
+  fpnew_pkg::fp_format_e  [0:NUM_INP_REGS]                  inp_pipe_dst_fmt_q;
+  fpnew_pkg::int_format_e [0:NUM_INP_REGS]                  inp_pipe_int_fmt_q;
+  TagType                 [0:NUM_INP_REGS]                  inp_pipe_tag_q;
+  logic                   [0:NUM_INP_REGS]                  inp_pipe_mask_q;
+  AuxType                 [0:NUM_INP_REGS]                  inp_pipe_aux_q;
+  logic                   [0:NUM_INP_REGS]                  inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_src_fmt_q[0]  = src_fmt_i;
+  assign inp_pipe_dst_fmt_q[0]  = dst_fmt_i;
+  assign inp_pipe_int_fmt_q[0]  = int_fmt_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_mask_q[0]     = mask_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_src_fmt_q[i+1],  inp_pipe_src_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_dst_fmt_q[i+1],  inp_pipe_dst_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_int_fmt_q[i+1],  inp_pipe_int_fmt_q[i],  reg_ena, fpnew_pkg::int_format_e'(0))
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_mask_q[i+1],     inp_pipe_mask_q[i],     reg_ena, '0)
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+  assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS];
+  assign op_mod_q   = inp_pipe_op_mod_q[NUM_INP_REGS];
+  assign src_fmt_q  = inp_pipe_src_fmt_q[NUM_INP_REGS];
+  assign dst_fmt_q  = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+  assign int_fmt_q  = inp_pipe_int_fmt_q[NUM_INP_REGS];
+
+  // -----------------
+  // Input processing
+  // -----------------
+  logic src_is_int, dst_is_int; // if 0, it's a float
+
+  assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F);
+  assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I);
+
+  logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
+
+  logic        [NUM_FORMATS-1:0]                    fmt_sign;
+  logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
+  logic        [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa;
+  logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC
+
+  fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info;
+
+  logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val;
+  logic                                          int_sign;
+  logic [INT_MAN_WIDTH-1:0]                      int_value, int_mantissa;
+
+  // FP Input initialization
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      // Classify input
+      fpnew_classifier #(
+        .FpFormat    ( fpnew_pkg::fp_format_e'(fmt) ),
+        .NumOperands ( 1                            )
+      ) i_fpnew_classifier (
+        .operands_i ( operands_q[FP_WIDTH-1:0] ),
+        .is_boxed_i ( is_boxed_q[fmt]          ),
+        .info_o     ( info[fmt]                )
+      );
+
+      assign fmt_sign[fmt]     = operands_q[FP_WIDTH-1];
+      assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]});
+      assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad
+      // Compensation for the difference in mantissa widths used for leading-zero count
+      assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS);
+    end else begin : inactive_format
+      assign info[fmt]                   = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_sign[fmt]               = fpnew_pkg::DONT_CARE;             // format disabled
+      assign fmt_exponent[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_mantissa[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_shift_compensation[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+    end
+  end
+
+  // Sign-extend INT input
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_sign_extend_int
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format // only active formats
+      always_comb begin : sign_ext_input
+        // sign-extend value only if it's signed
+        ifmt_input_val[ifmt]                = '{default: operands_q[INT_WIDTH-1] & ~op_mod_q};
+        ifmt_input_val[ifmt][INT_WIDTH-1:0] = operands_q[INT_WIDTH-1:0];
+      end
+    end else begin : inactive_format
+      assign ifmt_input_val[ifmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+    end
+  end
+
+  // Construct input mantissa from integer
+  assign int_value    = ifmt_input_val[int_fmt_q];
+  assign int_sign     = int_value[INT_MAN_WIDTH-1] & ~op_mod_q; // only signed ints are negative
+  assign int_mantissa = int_sign ? unsigned'(-int_value) : int_value; // get magnitude of negative
+
+  // select mantissa with source format
+  assign encoded_mant = src_is_int ? int_mantissa : fmt_mantissa[src_fmt_q];
+
+  // --------------
+  // Normalization
+  // --------------
+  logic signed [INT_EXP_WIDTH-1:0] src_bias;      // src format bias
+  logic signed [INT_EXP_WIDTH-1:0] src_exp;       // src format exponent (biased)
+  logic signed [INT_EXP_WIDTH-1:0] src_subnormal; // src is subnormal
+  logic signed [INT_EXP_WIDTH-1:0] src_offset;    // src offset within mantissa
+
+  assign src_bias      = signed'(fpnew_pkg::bias(src_fmt_q));
+  assign src_exp       = fmt_exponent[src_fmt_q];
+  assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal});
+  assign src_offset    = fmt_shift_compensation[src_fmt_q];
+
+  logic                            input_sign;   // input sign
+  logic signed [INT_EXP_WIDTH-1:0] input_exp;    // unbiased true exponent
+  logic        [INT_MAN_WIDTH-1:0] input_mant;   // normalized input mantissa
+  logic                            mant_is_zero; // for integer zeroes
+
+  logic signed [INT_EXP_WIDTH-1:0] fp_input_exp;
+  logic signed [INT_EXP_WIDTH-1:0] int_input_exp;
+
+  // Input mantissa needs to be normalized
+  logic [LZC_RESULT_WIDTH-1:0] renorm_shamt;     // renormalization shift amount
+  logic [LZC_RESULT_WIDTH:0]   renorm_shamt_sgn; // signed form for calculations
+
+  // Leading-zero counter is needed for renormalization
+  lzc #(
+    .WIDTH ( INT_MAN_WIDTH ),
+    .MODE  ( 1             ) // MODE = 1 counts leading zeroes
+  ) i_lzc (
+    .in_i    ( encoded_mant ),
+    .cnt_o   ( renorm_shamt ),
+    .empty_o ( mant_is_zero )
+  );
+  assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt});
+
+  // Get the sign from the proper source
+  assign input_sign = src_is_int ? int_sign : fmt_sign[src_fmt_q];
+  // Realign input mantissa, append zeroes if destination is wider
+  assign input_mant = encoded_mant << renorm_shamt;
+  // Unbias exponent and compensate for shift
+  assign fp_input_exp  = signed'(src_exp + src_subnormal - src_bias -
+                                 renorm_shamt_sgn + src_offset); // compensate for shift
+  assign int_input_exp = signed'(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
+
+  assign input_exp     = src_is_int ? int_input_exp : fp_input_exp;
+
+  logic signed [INT_EXP_WIDTH-1:0] destination_exp;  // re-biased exponent for destination
+
+  // Rebias the exponent
+  assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q));
+
+  // ---------------
+  // Internal pipeline
+  // ---------------
+  // Pipeline output signals as non-arrays
+  logic                            input_sign_q;
+  logic signed [INT_EXP_WIDTH-1:0] input_exp_q;
+  logic [INT_MAN_WIDTH-1:0]        input_mant_q;
+  logic signed [INT_EXP_WIDTH-1:0] destination_exp_q;
+  logic                            src_is_int_q;
+  logic                            dst_is_int_q;
+  fpnew_pkg::fp_info_t             info_q;
+  logic                            mant_is_zero_q;
+  logic                            op_mod_q2;
+  fpnew_pkg::roundmode_e           rnd_mode_q;
+  fpnew_pkg::fp_format_e           src_fmt_q2;
+  fpnew_pkg::fp_format_e           dst_fmt_q2;
+  fpnew_pkg::int_format_e          int_fmt_q2;
+  // Internal pipeline signals, index i holds signal after i register stages
+
+
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_input_sign_q;
+  logic signed            [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q;
+  logic                   [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q;
+  logic signed            [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_src_is_int_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_dst_is_int_q;
+  fpnew_pkg::fp_info_t    [0:NUM_MID_REGS]                    mid_pipe_info_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_mant_zero_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_op_mod_q;
+  fpnew_pkg::roundmode_e  [0:NUM_MID_REGS]                    mid_pipe_rnd_mode_q;
+  fpnew_pkg::fp_format_e  [0:NUM_MID_REGS]                    mid_pipe_src_fmt_q;
+  fpnew_pkg::fp_format_e  [0:NUM_MID_REGS]                    mid_pipe_dst_fmt_q;
+  fpnew_pkg::int_format_e [0:NUM_MID_REGS]                    mid_pipe_int_fmt_q;
+  TagType                 [0:NUM_MID_REGS]                    mid_pipe_tag_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_mask_q;
+  AuxType                 [0:NUM_MID_REGS]                    mid_pipe_aux_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_MID_REGS] mid_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from upstream logic
+  assign mid_pipe_input_sign_q[0] = input_sign;
+  assign mid_pipe_input_exp_q[0]  = input_exp;
+  assign mid_pipe_input_mant_q[0] = input_mant;
+  assign mid_pipe_dest_exp_q[0]   = destination_exp;
+  assign mid_pipe_src_is_int_q[0] = src_is_int;
+  assign mid_pipe_dst_is_int_q[0] = dst_is_int;
+  assign mid_pipe_info_q[0]       = info[src_fmt_q];
+  assign mid_pipe_mant_zero_q[0]  = mant_is_zero;
+  assign mid_pipe_op_mod_q[0]     = op_mod_q;
+  assign mid_pipe_rnd_mode_q[0]   = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign mid_pipe_src_fmt_q[0]    = src_fmt_q;
+  assign mid_pipe_dst_fmt_q[0]    = dst_fmt_q;
+  assign mid_pipe_int_fmt_q[0]    = int_fmt_q;
+  assign mid_pipe_tag_q[0]        = inp_pipe_tag_q[NUM_INP_REGS];
+  assign mid_pipe_mask_q[0]       = inp_pipe_mask_q[NUM_INP_REGS];
+  assign mid_pipe_aux_q[0]        = inp_pipe_aux_q[NUM_INP_REGS];
+  assign mid_pipe_valid_q[0]      = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to input pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0)
+    `FFL(mid_pipe_input_exp_q[i+1],  mid_pipe_input_exp_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0)
+    `FFL(mid_pipe_dest_exp_q[i+1],   mid_pipe_dest_exp_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0)
+    `FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0)
+    `FFL(mid_pipe_info_q[i+1],       mid_pipe_info_q[i],       reg_ena, '0)
+    `FFL(mid_pipe_mant_zero_q[i+1],  mid_pipe_mant_zero_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_op_mod_q[i+1],     mid_pipe_op_mod_q[i],     reg_ena, '0)
+    `FFL(mid_pipe_rnd_mode_q[i+1],   mid_pipe_rnd_mode_q[i],   reg_ena, fpnew_pkg::RNE)
+    `FFL(mid_pipe_src_fmt_q[i+1],    mid_pipe_src_fmt_q[i],    reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(mid_pipe_dst_fmt_q[i+1],    mid_pipe_dst_fmt_q[i],    reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(mid_pipe_int_fmt_q[i+1],    mid_pipe_int_fmt_q[i],    reg_ena, fpnew_pkg::int_format_e'(0))
+    `FFL(mid_pipe_tag_q[i+1],        mid_pipe_tag_q[i],        reg_ena, TagType'('0))
+    `FFL(mid_pipe_mask_q[i+1],       mid_pipe_mask_q[i],       reg_ena, '0)
+    `FFL(mid_pipe_aux_q[i+1],        mid_pipe_aux_q[i],        reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign input_sign_q      = mid_pipe_input_sign_q[NUM_MID_REGS];
+  assign input_exp_q       = mid_pipe_input_exp_q[NUM_MID_REGS];
+  assign input_mant_q      = mid_pipe_input_mant_q[NUM_MID_REGS];
+  assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS];
+  assign src_is_int_q      = mid_pipe_src_is_int_q[NUM_MID_REGS];
+  assign dst_is_int_q      = mid_pipe_dst_is_int_q[NUM_MID_REGS];
+  assign info_q            = mid_pipe_info_q[NUM_MID_REGS];
+  assign mant_is_zero_q    = mid_pipe_mant_zero_q[NUM_MID_REGS];
+  assign op_mod_q2         = mid_pipe_op_mod_q[NUM_MID_REGS];
+  assign rnd_mode_q        = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+  assign src_fmt_q2        = mid_pipe_src_fmt_q[NUM_MID_REGS];
+  assign dst_fmt_q2        = mid_pipe_dst_fmt_q[NUM_MID_REGS];
+  assign int_fmt_q2        = mid_pipe_int_fmt_q[NUM_MID_REGS];
+
+  // --------
+  // Casting
+  // --------
+  logic [INT_EXP_WIDTH-1:0] final_exp;        // after eventual adjustments
+
+  logic [2*INT_MAN_WIDTH:0]  preshift_mant;    // mantissa before final shift
+  logic [2*INT_MAN_WIDTH:0]  destination_mant; // mantissa from shifter, with rnd bit
+  logic [SUPER_MAN_BITS-1:0] final_mant;       // mantissa after adjustments
+  logic [MAX_INT_WIDTH-1:0]  final_int;        // integer shifted in position
+
+  logic [$clog2(INT_MAN_WIDTH+1)-1:0] denorm_shamt; // shift amount for denormalization
+
+  logic [1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
+  logic       of_before_round, uf_before_round;
+
+
+  // Perform adjustments to mantissa and exponent
+  always_comb begin : cast_value
+    // Default assignment
+    final_exp       = unsigned'(destination_exp_q); // take exponent as is, only look at lower bits
+    preshift_mant   = '0;  // initialize mantissa container with zeroes
+    denorm_shamt    = SUPER_MAN_BITS - fpnew_pkg::man_bits(dst_fmt_q2); // right of mantissa
+    of_before_round = 1'b0;
+    uf_before_round = 1'b0;
+
+    // Place mantissa to the left of the shifter
+    preshift_mant = input_mant_q << (INT_MAN_WIDTH + 1);
+
+    // Handle INT casts
+    if (dst_is_int_q) begin
+      // By default right shift mantissa to be an integer
+      denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q);
+      // overflow: when converting to unsigned the range is larger by one
+      if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin
+        denorm_shamt    = '0; // prevent shifting
+        of_before_round = 1'b1;
+      // underflow
+      end else if (input_exp_q < -1) begin
+        denorm_shamt    = MAX_INT_WIDTH + 1; // all bits go to the sticky
+        uf_before_round = 1'b1;
+      end
+    // Handle FP over-/underflows
+    end else begin
+      // Overflow or infinities (for proper rounding)
+      if ((destination_exp_q >= signed'(2**fpnew_pkg::exp_bits(dst_fmt_q2))-1) ||
+          (~src_is_int_q && info_q.is_inf)) begin
+        final_exp       = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value
+        preshift_mant   = '1;                           // largest normal value and RS bits set
+        of_before_round = 1'b1;
+      // Denormalize underflowing values
+      end else if (destination_exp_q < 1 &&
+                   destination_exp_q >= -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
+        final_exp       = '0; // denormal result
+        denorm_shamt    = unsigned'(denorm_shamt + 1 - destination_exp_q); // adjust right shifting
+        uf_before_round = 1'b1;
+      // Limit the shift to retain sticky bits
+      end else if (destination_exp_q < -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
+        final_exp       = '0; // denormal result
+        denorm_shamt    = unsigned'(denorm_shamt + 2 + fpnew_pkg::man_bits(dst_fmt_q2)); // to sticky
+        uf_before_round = 1'b1;
+      end
+    end
+  end
+
+  localparam NUM_FP_STICKY  = 2 * INT_MAN_WIDTH - SUPER_MAN_BITS - 1; // removed mantissa, 1. and R
+  localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R
+
+  // Mantissa adjustment shift
+  assign destination_mant = preshift_mant >> denorm_shamt;
+  // Extract final mantissa and round bit, discard the normal bit (for FP)
+  assign {final_mant, fp_round_sticky_bits[1]} =
+      destination_mant[2*INT_MAN_WIDTH-1-:SUPER_MAN_BITS+1];
+  assign {final_int, int_round_sticky_bits[1]} = destination_mant[2*INT_MAN_WIDTH-:MAX_INT_WIDTH+1];
+  // Collapse sticky bits
+  assign fp_round_sticky_bits[0]  = (| {destination_mant[NUM_FP_STICKY-1:0]});
+  assign int_round_sticky_bits[0] = (| {destination_mant[NUM_INT_STICKY-1:0]});
+
+  // select RS bits for destination operation
+  assign round_sticky_bits = dst_is_int_q ? int_round_sticky_bits : fp_round_sticky_bits;
+
+  // ----------------------------
+  // Rounding and classification
+  // ----------------------------
+  logic [WIDTH-1:0] pre_round_abs;  // absolute value of result before rnd
+  logic             of_after_round; // overflow
+  logic             uf_after_round; // underflow
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_pre_round_abs; // per format
+  logic [NUM_FORMATS-1:0]            fmt_of_after_round;
+  logic [NUM_FORMATS-1:0]            fmt_uf_after_round;
+
+  logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format
+  logic [NUM_INT_FORMATS-1:0]            ifmt_of_after_round;
+
+  logic             rounded_sign;
+  logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding
+  logic             result_true_zero;
+
+  logic [WIDTH-1:0] rounded_int_res; // after possible inversion
+  logic             rounded_int_res_zero; // after rounding
+
+
+  // Pack exponent and mantissa into proper rounding form
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
+    // Set up some constants
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : assemble_result
+        fmt_pre_round_abs[fmt] = {final_exp[EXP_BITS-1:0], final_mant[MAN_BITS-1:0]}; // 0-extend
+      end
+    end else begin : inactive_format
+      assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Sign-extend integer result
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_res_sign_ext
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format
+      always_comb begin : assemble_result
+        // sign-extend reusult
+        ifmt_pre_round_abs[ifmt]                = '{default: final_int[INT_WIDTH-1]};
+        ifmt_pre_round_abs[ifmt][INT_WIDTH-1:0] = final_int[INT_WIDTH-1:0];
+      end
+    end else begin : inactive_format
+      assign ifmt_pre_round_abs[ifmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Select output with destination format and operation
+  assign pre_round_abs = dst_is_int_q ? ifmt_pre_round_abs[int_fmt_q2] : fmt_pre_round_abs[dst_fmt_q2];
+
+  fpnew_rounding #(
+    .AbsWidth ( WIDTH )
+  ) i_fpnew_rounding (
+    .abs_value_i             ( pre_round_abs     ),
+    .sign_i                  ( input_sign_q      ), // source format
+    .round_sticky_bits_i     ( round_sticky_bits ),
+    .rnd_mode_i              ( rnd_mode_q        ),
+    .effective_subtraction_i ( 1'b0              ), // no operation happened
+    .abs_rounded_o           ( rounded_abs       ),
+    .sign_o                  ( rounded_sign      ),
+    .exact_zero_o            ( result_true_zero  )
+  );
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
+
+  // Detect overflows and inject sign
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : post_process
+        // detect of / uf
+        fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
+        fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
+
+        // Assemble regular result, nan box short ones. Int zeroes need to be detected`
+        fmt_result[fmt]               = '1;
+        fmt_result[fmt][FP_WIDTH-1:0] = src_is_int_q & mant_is_zero_q
+                                        ? '0
+                                        : {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
+      end
+    end else begin : inactive_format
+      assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_result[fmt]         = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Negative integer result needs to be brought into two's complement
+  assign rounded_int_res      = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs;
+  assign rounded_int_res_zero = (rounded_int_res == '0);
+
+  // Detect integer overflows after rounding (only positives)
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_overflow
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format
+      always_comb begin : detect_overflow
+        ifmt_of_after_round[ifmt] = 1'b0;
+        // Int result can overflow if we're at the max exponent
+        if (!rounded_sign && input_exp_q == signed'(INT_WIDTH - 2 + op_mod_q2)) begin
+          // Check whether the rounded MSB differs from unrounded MSB
+          ifmt_of_after_round[ifmt] = ~rounded_int_res[INT_WIDTH-2+op_mod_q2];
+        end
+      end
+    end else begin : inactive_format
+      assign ifmt_of_after_round[ifmt] = fpnew_pkg::DONT_CARE;
+    end
+  end
+
+  // Classification after rounding select by destination format
+  assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
+  assign of_after_round = dst_is_int_q ? ifmt_of_after_round[int_fmt_q2] : fmt_of_after_round[dst_fmt_q2];
+
+  // -------------------------
+  // FP Special case handling
+  // -------------------------
+  logic [WIDTH-1:0]   fp_special_result;
+  fpnew_pkg::status_t fp_special_status;
+  logic               fp_result_is_special;
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result;
+
+  // Special result construction
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
+    localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : special_results
+        logic [FP_WIDTH-1:0] special_res;
+        special_res = info_q.is_zero
+                      ? input_sign_q << FP_WIDTH-1 // signed zero
+                      : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
+
+        // Initialize special result with ones (NaN-box)
+        fmt_special_result[fmt]               = '1;
+        fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
+      end
+    end else begin : inactive_format
+      assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Detect special case from source format, I2F casts don't produce a special result
+  assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero |
+                                                 info_q.is_nan |
+                                                 ~info_q.is_boxed);
+
+  // Signalling input NaNs raise invalid flag, otherwise no flags set
+  assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0};
+
+  // Assemble result according to destination format
+  assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format
+
+  // --------------------------
+  // INT Special case handling
+  // --------------------------
+  logic [WIDTH-1:0]   int_special_result;
+  fpnew_pkg::status_t int_special_status;
+  logic               int_result_is_special;
+
+  logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_special_result;
+
+  // Special result construction
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_special_results_int
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format
+      always_comb begin : special_results
+        automatic logic [INT_WIDTH-1:0] special_res;
+
+        // Default is overflow to positive max, which is 2**INT_WIDTH-1 or 2**(INT_WIDTH-1)-1
+        special_res[INT_WIDTH-2:0] = '1;       // alone yields 2**(INT_WIDTH-1)-1
+        special_res[INT_WIDTH-1]   = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1
+
+        // Negative special case (except for nans) tie to -max or 0
+        if (input_sign_q && !info_q.is_nan)
+          special_res = ~special_res;
+
+        // Initialize special result with sign-extension
+        ifmt_special_result[ifmt]                = '{default: special_res[INT_WIDTH-1]};
+        ifmt_special_result[ifmt][INT_WIDTH-1:0] = special_res;
+      end
+    end else begin : inactive_format
+      assign ifmt_special_result[ifmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
+  assign int_result_is_special = info_q.is_nan | info_q.is_inf |
+                                 of_before_round | of_after_round | ~info_q.is_boxed |
+                                 (input_sign_q & op_mod_q2 & ~rounded_int_res_zero);
+
+  // All integer special cases are invalid
+  assign int_special_status = '{NV: 1'b1, default: 1'b0};
+
+  // Assemble result according to destination format
+  assign int_special_result = ifmt_special_result[int_fmt_q2]; // destination format
+
+  // -----------------
+  // Result selection
+  // -----------------
+  fpnew_pkg::status_t int_regular_status, fp_regular_status;
+
+  logic [WIDTH-1:0]   fp_result, int_result;
+  fpnew_pkg::status_t fp_status, int_status;
+
+  assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts
+  assign fp_regular_status.DZ = 1'b0; // no divisions
+  assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF
+  assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX;
+  assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f
+            : (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round));
+  assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0};
+
+  assign fp_result  = fp_result_is_special  ? fp_special_result  : fmt_result[dst_fmt_q2];
+  assign fp_status  = fp_result_is_special  ? fp_special_status  : fp_regular_status;
+  assign int_result = int_result_is_special ? int_special_result : rounded_int_res;
+  assign int_status = int_result_is_special ? int_special_status : int_regular_status;
+
+  // Final results for output pipeline
+  logic [WIDTH-1:0]   result_d;
+  fpnew_pkg::status_t status_d;
+  logic               extension_bit;
+
+  // Select output depending on special case detection
+  assign result_d = dst_is_int_q ? int_result : fp_result;
+  assign status_d = dst_is_int_q ? int_status : fp_status;
+
+  // MSB of int result decides extension, otherwise NaN box
+  assign extension_bit = dst_is_int_q ? int_result[WIDTH-1] : 1'b1;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  logic               [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS]            out_pipe_status_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_ext_bit_q;
+  TagType             [0:NUM_OUT_REGS]            out_pipe_tag_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_mask_q;
+  AuxType             [0:NUM_OUT_REGS]            out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0]  = result_d;
+  assign out_pipe_status_q[0]  = status_d;
+  assign out_pipe_ext_bit_q[0] = extension_bit;
+  assign out_pipe_tag_q[0]     = mid_pipe_tag_q[NUM_MID_REGS];
+  assign out_pipe_mask_q[0]    = mid_pipe_mask_q[NUM_MID_REGS];
+  assign out_pipe_aux_q[0]     = mid_pipe_aux_q[NUM_MID_REGS];
+  assign out_pipe_valid_q[0]   = mid_pipe_valid_q[NUM_MID_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1],  out_pipe_result_q[i],  reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1],  out_pipe_status_q[i],  reg_ena, '0)
+    `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],     out_pipe_tag_q[i],     reg_ena, TagType'('0))
+    `FFL(out_pipe_mask_q[i+1],    out_pipe_mask_q[i],    reg_ena, '0)
+    `FFL(out_pipe_aux_q[i+1],     out_pipe_aux_q[i],     reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS];
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign mask_o          = out_pipe_mask_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv
new file mode 100644
index 0000000..a322946
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv
@@ -0,0 +1,74 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_classifier #(
+  parameter fpnew_pkg::fp_format_e   FpFormat = fpnew_pkg::fp_format_e'(0),
+  parameter int unsigned             NumOperands = 1,
+  // Do not change
+  localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat)
+) (
+  input  logic                [NumOperands-1:0][WIDTH-1:0] operands_i,
+  input  logic                [NumOperands-1:0]            is_boxed_i,
+  output fpnew_pkg::fp_info_t [NumOperands-1:0]            info_o
+);
+
+  localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+  localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+
+  // Type definition
+  typedef struct packed {
+    logic                sign;
+    logic [EXP_BITS-1:0] exponent;
+    logic [MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // Iterate through all operands
+  for (genvar op = 0; op < int'(NumOperands); op++) begin : gen_num_values
+
+    fp_t value;
+    logic is_boxed;
+    logic is_normal;
+    logic is_inf;
+    logic is_nan;
+    logic is_signalling;
+    logic is_quiet;
+    logic is_zero;
+    logic is_subnormal;
+
+    // ---------------
+    // Classify Input
+    // ---------------
+    always_comb begin : classify_input
+      value         = operands_i[op];
+      is_boxed      = is_boxed_i[op];
+      is_normal     = is_boxed && (value.exponent != '0) && (value.exponent != '1);
+      is_zero       = is_boxed && (value.exponent == '0) && (value.mantissa == '0);
+      is_subnormal  = is_boxed && (value.exponent == '0) && !is_zero;
+      is_inf        = is_boxed && ((value.exponent == '1) && (value.mantissa == '0));
+      is_nan        = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0));
+      is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0);
+      is_quiet      = is_nan && !is_signalling;
+      // Assign output for current input
+      info_o[op].is_normal     = is_normal;
+      info_o[op].is_subnormal  = is_subnormal;
+      info_o[op].is_zero       = is_zero;
+      info_o[op].is_inf        = is_inf;
+      info_o[op].is_nan        = is_nan;
+      info_o[op].is_signalling = is_signalling;
+      info_o[op].is_quiet      = is_quiet;
+      info_o[op].is_boxed      = is_boxed;
+    end
+  end
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv
new file mode 100644
index 0000000..0f7ea5d
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv
@@ -0,0 +1,366 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_divsqrt_multi #(
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig  = '1,
+  // FPU configuration
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::AFTER,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+  // Do not change
+  localparam int unsigned WIDTH       = fpnew_pkg::max_fp_width(FpFmtConfig),
+  localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input  logic                        clk_i,
+  input  logic                        rst_ni,
+  // Input signals
+  input  logic [1:0][WIDTH-1:0]       operands_i, // 2 operands
+  input  logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands
+  input  fpnew_pkg::roundmode_e       rnd_mode_i,
+  input  fpnew_pkg::operation_e       op_i,
+  input  fpnew_pkg::fp_format_e       dst_fmt_i,
+  input  TagType                      tag_i,
+  input  logic                        mask_i,
+  input  AuxType                      aux_i,
+  // Input Handshake
+  input  logic                        in_valid_i,
+  output logic                        in_ready_o,
+  output logic                        divsqrt_done_o,
+  input  logic                        simd_synch_done_i,
+  output logic                        divsqrt_ready_o,
+  input  logic                        simd_synch_rdy_i,
+  input  logic                        flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]            result_o,
+  output fpnew_pkg::status_t          status_o,
+  output logic                        extension_bit_o,
+  output TagType                      tag_o,
+  output logic                        mask_o,
+  output AuxType                      aux_o,
+  // Output handshake
+  output logic                        out_valid_o,
+  input  logic                        out_ready_i,
+  // Indication of valid data in flight
+  output logic                        busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  // Pipelines
+  localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE)
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 2) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE)
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 2) // First to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Selected pipeline output signals as non-arrays
+  logic [1:0][WIDTH-1:0] operands_q;
+  fpnew_pkg::roundmode_e rnd_mode_q;
+  fpnew_pkg::operation_e op_q;
+  fpnew_pkg::fp_format_e dst_fmt_q;
+  logic                  in_valid_q;
+
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][1:0][WIDTH-1:0]       inp_pipe_operands_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                       inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                       inp_pipe_op_q;
+  fpnew_pkg::fp_format_e [0:NUM_INP_REGS]                       inp_pipe_dst_fmt_q;
+  TagType                [0:NUM_INP_REGS]                       inp_pipe_tag_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_mask_q;
+  AuxType                [0:NUM_INP_REGS]                       inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_dst_fmt_q[0]  = dst_fmt_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_mask_q[0]     = mask_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_dst_fmt_q[i+1],  inp_pipe_dst_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_mask_q[i+1],     inp_pipe_mask_q[i],     reg_ena, '0)
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+  assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign op_q       = inp_pipe_op_q[NUM_INP_REGS];
+  assign dst_fmt_q  = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+  assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS];
+
+  // -----------------
+  // Input processing
+  // -----------------
+  logic [1:0]       divsqrt_fmt;
+  logic [1:0][63:0] divsqrt_operands; // those are fixed to 64bit
+  logic             input_is_fp8;
+
+  // Translate fpnew formats into divsqrt formats
+  always_comb begin : translate_fmt
+    unique case (dst_fmt_q)
+      fpnew_pkg::FP32:    divsqrt_fmt = 2'b00;
+      fpnew_pkg::FP64:    divsqrt_fmt = 2'b01;
+      fpnew_pkg::FP16:    divsqrt_fmt = 2'b10;
+      fpnew_pkg::FP16ALT: divsqrt_fmt = 2'b11;
+      default:            divsqrt_fmt = 2'b10; // maps also FP8 to FP16
+    endcase
+
+    // Only if FP8 is enabled
+    input_is_fp8 = FpFmtConfig[fpnew_pkg::FP8] & (dst_fmt_q == fpnew_pkg::FP8);
+
+    // If FP8 is supported, map it to an FP16 value
+    divsqrt_operands[0] = input_is_fp8 ? operands_q[0] << 8 : operands_q[0];
+    divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1];
+  end
+
+  // ------------
+  // Control FSM
+  // ------------
+
+  logic in_ready;               // input handshake with upstream
+  logic div_valid, sqrt_valid;  // input signalling with unit
+  logic unit_ready, unit_done, unit_done_q;  // status signals from unit instance
+  logic op_starting;            // high in the cycle a new operation starts
+  logic out_valid, out_ready;   // output handshake with downstream
+  logic unit_busy;              // valid data in flight
+  // FSM states
+  typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
+  fsm_state_e state_q, state_d;
+
+  // Ready synch with other lanes
+  // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
+  assign divsqrt_ready_o = in_ready;
+  // Upstream ready comes from sanitization FSM, and it is synched among all the lanes
+  assign inp_pipe_ready[NUM_INP_REGS] = simd_synch_rdy_i;
+
+  // Valid synch with other lanes
+  // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes
+  // As soon as all the lanes are over, we can clear this FF and start with a new operation
+  `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done_i, 1'b0, clk_i, rst_ni);
+  // Tell the other units that this unit has finished now or in the past
+  assign divsqrt_done_o = unit_done_q | unit_done;
+
+  // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr.
+  assign div_valid   = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i;
+  assign sqrt_valid  = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i;
+  assign op_starting = div_valid | sqrt_valid;
+
+  // FSM to safely apply and receive data from DIVSQRT unit
+  always_comb begin : flag_fsm
+    // Default assignments
+    in_ready     = 1'b0;
+    out_valid    = 1'b0;
+    unit_busy    = 1'b0;
+    state_d      = state_q;
+
+    unique case (state_q)
+      // Waiting for work
+      IDLE: begin
+        in_ready = 1'b1; // we're ready
+        if (in_valid_q && unit_ready) begin // New work arrives
+          state_d = BUSY; // go into processing state
+        end
+      end
+      // Operation in progress
+      BUSY: begin
+        unit_busy = 1'b1; // data in flight
+        // If all the lanes are done with processing
+        if (simd_synch_done_i) begin
+          out_valid = 1'b1; // try to commit result downstream
+          // If downstream accepts our result
+          if (out_ready) begin
+            state_d = IDLE; // we anticipate going back to idling..
+            if (in_valid_q && unit_ready) begin // ..unless new work comes in
+              in_ready = 1'b1; // we acknowledge the instruction
+              state_d  = BUSY; // and stay busy with it
+            end
+          // Otherwise if downstream is not ready for the result
+          end else begin
+            state_d     = HOLD; // wait for the pipeline to take the data
+          end
+        end
+      end
+      // Waiting with valid result for downstream
+      HOLD: begin
+        unit_busy    = 1'b1; // data in flight
+        out_valid    = 1'b1; // try to commit result downstream
+        // If the result is accepted by downstream
+        if (out_ready) begin
+          state_d = IDLE; // go back to idle..
+          if (in_valid_q && unit_ready) begin // ..unless new work comes in
+            in_ready = 1'b1; // acknowledge the new transaction
+            state_d  = BUSY; // will be busy with the next instruction
+          end
+        end
+      end
+      // fall into idle state otherwise
+      default: state_d = IDLE;
+    endcase
+
+    // Flushing overrides the other actions
+    if (flush_i) begin
+      unit_busy = 1'b0; // data is invalidated
+      out_valid = 1'b0; // cancel any valid data
+      state_d   = IDLE; // go to default state
+    end
+  end
+
+  // FSM status register (asynch active low reset)
+  `FF(state_q, state_d, IDLE)
+
+  // Hold additional information while the operation is in progress
+  logic result_is_fp8_q;
+  TagType result_tag_q;
+  logic result_mask_q;
+  AuxType result_aux_q;
+
+  // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
+  `FFL(result_is_fp8_q, input_is_fp8,                 op_starting, '0)
+  `FFL(result_tag_q,    inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0)
+  `FFL(result_mask_q,   inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0)
+  `FFL(result_aux_q,    inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
+
+  // -----------------
+  // DIVSQRT instance
+  // -----------------
+  logic [63:0]        unit_result;
+  logic [WIDTH-1:0]   adjusted_result, held_result_q;
+  fpnew_pkg::status_t unit_status, held_status_q;
+  logic               hold_en;
+
+  div_sqrt_top_mvp i_divsqrt_lei (
+   .Clk_CI           ( clk_i               ),
+   .Rst_RBI          ( rst_ni              ),
+   .Div_start_SI     ( div_valid           ),
+   .Sqrt_start_SI    ( sqrt_valid          ),
+   .Operand_a_DI     ( divsqrt_operands[0] ),
+   .Operand_b_DI     ( divsqrt_operands[1] ),
+   .RM_SI            ( rnd_mode_q          ),
+   .Precision_ctl_SI ( '0                  ),
+   .Format_sel_SI    ( divsqrt_fmt         ),
+   .Kill_SI          ( flush_i             ),
+   .Result_DO        ( unit_result         ),
+   .Fflags_SO        ( unit_status         ),
+   .Ready_SO         ( unit_ready          ),
+   .Done_SO          ( unit_done           )
+  );
+
+  // Adjust result width and fix FP8
+  assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result;
+
+  // Hold the result when one lane has finished execution, except when all the lanes finish together
+  // and the result can be accepted downstream
+  assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready);
+  // The Hold register (load, no reset)
+  `FFLNR(held_result_q, adjusted_result, hold_en, clk_i)
+  `FFLNR(held_status_q, unit_status,     hold_en, clk_i)
+
+  // --------------
+  // Output Select
+  // --------------
+  logic [WIDTH-1:0]   result_d;
+  fpnew_pkg::status_t status_d;
+  // Prioritize hold register data
+  assign result_d = unit_done_q ? held_result_q : adjusted_result;
+  assign status_d = unit_done_q ? held_status_q : unit_status;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  logic               [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS]            out_pipe_status_q;
+  TagType             [0:NUM_OUT_REGS]            out_pipe_tag_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_mask_q;
+  AuxType             [0:NUM_OUT_REGS]            out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0] = result_d;
+  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_tag_q[0]    = result_tag_q;
+  assign out_pipe_mask_q[0]   = result_mask_q;
+  assign out_pipe_aux_q[0]    = result_aux_q;
+  assign out_pipe_valid_q[0]  = out_valid;
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign out_ready = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],    out_pipe_tag_q[i],    reg_ena, TagType'('0))
+    `FFL(out_pipe_mask_q[i+1],   out_pipe_mask_q[i],   reg_ena, '0)
+    `FFL(out_pipe_aux_q[i+1],    out_pipe_aux_q[i],    reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = 1'b1; // always NaN-Box result
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign mask_o          = out_pipe_mask_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q});
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv
new file mode 100644
index 0000000..c29e7b3
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv
@@ -0,0 +1,690 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_fma #(
+  parameter fpnew_pkg::fp_format_e   FpFormat    = fpnew_pkg::fp_format_e'(0),
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+
+  localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
+) (
+  input logic                      clk_i,
+  input logic                      rst_ni,
+  // Input signals
+  input logic [2:0][WIDTH-1:0]     operands_i, // 3 operands
+  input logic [2:0]                is_boxed_i, // 3 operands
+  input fpnew_pkg::roundmode_e     rnd_mode_i,
+  input fpnew_pkg::operation_e     op_i,
+  input logic                      op_mod_i,
+  input TagType                    tag_i,
+  input logic                      mask_i,
+  input AuxType                    aux_i,
+  // Input Handshake
+  input  logic                     in_valid_i,
+  output logic                     in_ready_o,
+  input  logic                     flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]         result_o,
+  output fpnew_pkg::status_t       status_o,
+  output logic                     extension_bit_o,
+  output TagType                   tag_o,
+  output logic                     mask_o,
+  output AuxType                   aux_o,
+  // Output handshake
+  output logic                     out_valid_o,
+  input  logic                     out_ready_i,
+  // Indication of valid data in flight
+  output logic                     busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+  localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+  localparam int unsigned BIAS     = fpnew_pkg::bias(FpFormat);
+  // Precision bits 'p' include the implicit bit
+  localparam int unsigned PRECISION_BITS = MAN_BITS + 1;
+  // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
+  localparam int unsigned LOWER_SUM_WIDTH  = 2 * PRECISION_BITS + 3;
+  localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
+  // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
+  // datapath leakage. This is either given by the exponent bits or the width of the LZC result.
+  // In most reasonable FP formats the internal exponent will be wider than the LZC result.
+  localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH));
+  // Shift amount width: maximum internal mantissa size is 3p+4 bits
+  localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5);
+  // Pipelines
+  localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+                          ? NumPipeRegs
+                          : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                             ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+                             : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 3) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ----------------
+  // Type definition
+  // ----------------
+  typedef struct packed {
+    logic                sign;
+    logic [EXP_BITS-1:0] exponent;
+    logic [MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q;
+  logic                  [0:NUM_INP_REGS][2:0]            inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                 inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                 inp_pipe_op_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_op_mod_q;
+  TagType                [0:NUM_INP_REGS]                 inp_pipe_tag_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_mask_q;
+  AuxType                [0:NUM_INP_REGS]                 inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_mask_q[0]     = mask_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_mask_q[i+1],     inp_pipe_mask_q[i],     reg_ena, '0)
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+
+  // -----------------
+  // Input processing
+  // -----------------
+  fpnew_pkg::fp_info_t [2:0] info_q;
+
+  // Classify input
+  fpnew_classifier #(
+    .FpFormat    ( FpFormat ),
+    .NumOperands ( 3        )
+    ) i_class_inputs (
+    .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
+    .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
+    .info_o     ( info_q                            )
+  );
+
+  fp_t                 operand_a, operand_b, operand_c;
+  fpnew_pkg::fp_info_t info_a,    info_b,    info_c;
+
+  // Operation selection and operand adjustment
+  // | \c op_q  | \c op_mod_q | Operation Adjustment
+  // |:--------:|:-----------:|---------------------
+  // | FMADD    | \c 0        | FMADD: none
+  // | FMADD    | \c 1        | FMSUB: Invert sign of operand C
+  // | FNMSUB   | \c 0        | FNMSUB: Invert sign of operand A
+  // | FNMSUB   | \c 1        | FNMADD: Invert sign of operands A and C
+  // | ADD      | \c 0        | ADD: Set operand A to +1.0
+  // | ADD      | \c 1        | SUB: Set operand A to +1.0, invert sign of operand C
+  // | MUL      | \c 0        | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode
+  // | *others* | \c -        | *invalid*
+  // \note \c op_mod_q always inverts the sign of the addend.
+  always_comb begin : op_select
+
+    // Default assignments - packing-order-agnostic
+    operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
+    operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
+    operand_c = inp_pipe_operands_q[NUM_INP_REGS][2];
+    info_a    = info_q[0];
+    info_b    = info_q[1];
+    info_c    = info_q[2];
+
+    // op_mod_q inverts sign of operand C
+    operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+
+    unique case (inp_pipe_op_q[NUM_INP_REGS])
+      fpnew_pkg::FMADD:  ; // do nothing
+      fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
+      fpnew_pkg::ADD: begin // Set multiplicand to +1
+        operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0};
+        info_a    = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
+      end
+      fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN
+        if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN)
+          operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0};
+        else
+          operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
+        info_c    = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
+      end
+      default: begin // propagate don't cares
+        operand_a  = '{default: fpnew_pkg::DONT_CARE};
+        operand_b  = '{default: fpnew_pkg::DONT_CARE};
+        operand_c  = '{default: fpnew_pkg::DONT_CARE};
+        info_a     = '{default: fpnew_pkg::DONT_CARE};
+        info_b     = '{default: fpnew_pkg::DONT_CARE};
+        info_c     = '{default: fpnew_pkg::DONT_CARE};
+      end
+    endcase
+  end
+
+  // ---------------------
+  // Input classification
+  // ---------------------
+  logic any_operand_inf;
+  logic any_operand_nan;
+  logic signalling_nan;
+  logic effective_subtraction;
+  logic tentative_sign;
+
+  // Reduction for special case handling
+  assign any_operand_inf = (| {info_a.is_inf,        info_b.is_inf,        info_c.is_inf});
+  assign any_operand_nan = (| {info_a.is_nan,        info_b.is_nan,        info_c.is_nan});
+  assign signalling_nan  = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
+  // Effective subtraction in FMA occurs when product and addend signs differ
+  assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
+  // The tentative sign of the FMA shall be the sign of the product
+  assign tentative_sign = operand_a.sign ^ operand_b.sign;
+
+  // ----------------------
+  // Special case handling
+  // ----------------------
+  fp_t                special_result;
+  fpnew_pkg::status_t special_status;
+  logic               result_is_special;
+
+  always_comb begin : special_cases
+    // Default assignments
+    special_result    = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
+    special_status    = '0;
+    result_is_special = 1'b0;
+
+    // Handle potentially mixed nan & infinity input => important for the case where infinity and
+    // zero are multiplied and added to a qnan.
+    // RISC-V mandates raising the NV exception in these cases:
+    // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
+    if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
+      result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN
+      special_status.NV = 1'b1; // invalid operation
+    // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
+    end else if (any_operand_nan) begin
+      result_is_special = 1'b1;           // bypass FMA, output is the canonical qNaN
+      special_status.NV = signalling_nan; // raise the invalid operation flag if signalling
+    // Special cases involving infinity
+    end else if (any_operand_inf) begin
+      result_is_special = 1'b1; // bypass FMA
+      // Effective addition of opposite infinities (±inf - ±inf) is invalid!
+      if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
+        special_status.NV = 1'b1; // invalid operation
+      // Handle cases where output will be inf because of inf product input
+      else if (info_a.is_inf || info_b.is_inf) begin
+        // Result is infinity with the sign of the product
+        special_result    = '{sign: operand_a.sign ^ operand_b.sign, exponent: '1, mantissa: '0};
+      // Handle cases where the addend is inf
+      end else if (info_c.is_inf) begin
+        // Result is inifinity with sign of the addend (= operand_c)
+        special_result    = '{sign: operand_c.sign, exponent: '1, mantissa: '0};
+      end
+    end
+  end
+
+  // ---------------------------
+  // Initial exponent data path
+  // ---------------------------
+  logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
+  logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
+  logic signed [EXP_WIDTH-1:0] tentative_exponent;
+
+  // Zero-extend exponents into signed container - implicit width extension
+  assign exponent_a = signed'({1'b0, operand_a.exponent});
+  assign exponent_b = signed'({1'b0, operand_b.exponent});
+  assign exponent_c = signed'({1'b0, operand_c.exponent});
+
+  // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
+  // with Ex the encoded exponent and nx the implicit bit. Internal exponents stay biased.
+  assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
+  // Biased product exponent is the sum of encoded exponents minus the bias.
+  assign exponent_product = (info_a.is_zero || info_b.is_zero)
+                            ? 2 - signed'(BIAS) // in case the product is zero, set minimum exp.
+                            : signed'(exponent_a + info_a.is_subnormal
+                                      + exponent_b + info_b.is_subnormal
+                                      - signed'(BIAS));
+  // Exponent difference is the addend exponent minus the product exponent
+  assign exponent_difference = exponent_addend - exponent_product;
+  // The tentative exponent will be the larger of the product or addend exponent
+  assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
+
+  // Shift amount for addend based on exponents (unsigned as only right shifts)
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
+
+  always_comb begin : addend_shift_amount
+    // Product-anchored case, saturated shift (addend is only in the sticky bit)
+    if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
+      addend_shamt = 3 * PRECISION_BITS + 4;
+    // Addend and product will have mutual bits to add
+    else if (exponent_difference <= signed'(PRECISION_BITS + 2))
+      addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
+    // Addend-anchored case, saturated shift (product is only in the sticky bit)
+    else
+      addend_shamt = 0;
+  end
+
+  // ------------------
+  // Product data path
+  // ------------------
+  logic [PRECISION_BITS-1:0]   mantissa_a, mantissa_b, mantissa_c;
+  logic [2*PRECISION_BITS-1:0] product;             // the p*p product is 2p bits wide
+  logic [3*PRECISION_BITS+3:0] product_shifted;     // addends are 3p+4 bit wide (including G/R)
+
+  // Add implicit bits to mantissae
+  assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
+  assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
+  assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
+
+  // Mantissa multiplier (a*b)
+  assign product = mantissa_a * mantissa_b;
+
+  // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
+  // | 000...000 | product | RS |
+  //  <-  p+2  -> <-  2p -> < 2>
+  assign product_shifted = product << 2; // constant shift
+
+  // -----------------
+  // Addend data path
+  // -----------------
+  logic [3*PRECISION_BITS+3:0] addend_after_shift;  // upper 3p+4 bits are needed to go on
+  logic [PRECISION_BITS-1:0]   addend_sticky_bits;  // up to p bit of shifted addend are sticky
+  logic                        sticky_before_add;   // they are compressed into a single sticky bit
+  logic [3*PRECISION_BITS+3:0] addend_shifted;      // addends are 3p+4 bit wide (including G/R)
+  logic                        inject_carry_in;     // inject carry for subtractions if needed
+
+  // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits
+  // are shifted out and compressed into a sticky bit.
+  // BEFORE THE SHIFT:
+  // | mantissa_c | 000..000 |
+  //  <-    p   -> <- 3p+4 ->
+  // AFTER THE SHIFT:
+  // | 000..........000 | mantissa_c | 000...............0GR |  sticky bits  |
+  //  <- addend_shamt -> <-    p   -> <- 2p+4-addend_shamt -> <-  up to p  ->
+  assign {addend_after_shift, addend_sticky_bits} =
+      (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
+
+  assign sticky_before_add     = (| addend_sticky_bits);
+  // assign addend_after_shift[0] = sticky_before_add;
+
+  // In case of a subtraction, the addend is inverted
+  assign addend_shifted  = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
+  assign inject_carry_in = effective_subtraction & ~sticky_before_add;
+
+  // ------
+  // Adder
+  // ------
+  logic [3*PRECISION_BITS+4:0] sum_raw;   // added one bit for the carry
+  logic                        sum_carry; // observe carry bit from sum for sign fixing
+  logic [3*PRECISION_BITS+3:0] sum;       // discard carry as sum won't overflow
+  logic                        final_sign;
+
+  //Mantissa adder (ab+c). In normal addition, it cannot overflow.
+  assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
+  assign sum_carry = sum_raw[3*PRECISION_BITS+4];
+
+  // Complement negative sum (can only happen in subtraction -> overflows for positive results)
+  assign sum        = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
+
+  // In case of a mispredicted subtraction result, do a sign flip
+  assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
+                      ? 1'b1
+                      : (effective_subtraction ? 1'b0 : tentative_sign);
+
+  // ---------------
+  // Internal pipeline
+  // ---------------
+  // Pipeline output signals as non-arrays
+  logic                          effective_subtraction_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_product_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_difference_q;
+  logic signed [EXP_WIDTH-1:0]   tentative_exponent_q;
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
+  logic                          sticky_before_add_q;
+  logic [3*PRECISION_BITS+3:0]   sum_q;
+  logic                          final_sign_q;
+  fpnew_pkg::roundmode_e         rnd_mode_q;
+  logic                          result_is_special_q;
+  fp_t                           special_result_q;
+  fpnew_pkg::status_t            special_status_q;
+  // Internal pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_eff_sub_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_prod_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_diff_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_tent_exp_q;
+  logic                  [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_sticky_q;
+  logic                  [0:NUM_MID_REGS][3*PRECISION_BITS+3:0]   mid_pipe_sum_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_final_sign_q;
+  fpnew_pkg::roundmode_e [0:NUM_MID_REGS]                         mid_pipe_rnd_mode_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_res_is_spec_q;
+  fp_t                   [0:NUM_MID_REGS]                         mid_pipe_spec_res_q;
+  fpnew_pkg::status_t    [0:NUM_MID_REGS]                         mid_pipe_spec_stat_q;
+  TagType                [0:NUM_MID_REGS]                         mid_pipe_tag_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_mask_q;
+  AuxType                [0:NUM_MID_REGS]                         mid_pipe_aux_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_MID_REGS] mid_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from upstream logic
+  assign mid_pipe_eff_sub_q[0]     = effective_subtraction;
+  assign mid_pipe_exp_prod_q[0]    = exponent_product;
+  assign mid_pipe_exp_diff_q[0]    = exponent_difference;
+  assign mid_pipe_tent_exp_q[0]    = tentative_exponent;
+  assign mid_pipe_add_shamt_q[0]   = addend_shamt;
+  assign mid_pipe_sticky_q[0]      = sticky_before_add;
+  assign mid_pipe_sum_q[0]         = sum;
+  assign mid_pipe_final_sign_q[0]  = final_sign;
+  assign mid_pipe_rnd_mode_q[0]    = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign mid_pipe_res_is_spec_q[0] = result_is_special;
+  assign mid_pipe_spec_res_q[0]    = special_result;
+  assign mid_pipe_spec_stat_q[0]   = special_status;
+  assign mid_pipe_tag_q[0]         = inp_pipe_tag_q[NUM_INP_REGS];
+  assign mid_pipe_mask_q[0]        = inp_pipe_mask_q[NUM_INP_REGS];
+  assign mid_pipe_aux_q[0]         = inp_pipe_aux_q[NUM_INP_REGS];
+  assign mid_pipe_valid_q[0]       = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to input pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(mid_pipe_eff_sub_q[i+1],     mid_pipe_eff_sub_q[i],     reg_ena, '0)
+    `FFL(mid_pipe_exp_prod_q[i+1],    mid_pipe_exp_prod_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_exp_diff_q[i+1],    mid_pipe_exp_diff_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_tent_exp_q[i+1],    mid_pipe_tent_exp_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_add_shamt_q[i+1],   mid_pipe_add_shamt_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_sticky_q[i+1],      mid_pipe_sticky_q[i],      reg_ena, '0)
+    `FFL(mid_pipe_sum_q[i+1],         mid_pipe_sum_q[i],         reg_ena, '0)
+    `FFL(mid_pipe_final_sign_q[i+1],  mid_pipe_final_sign_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_rnd_mode_q[i+1],    mid_pipe_rnd_mode_q[i],    reg_ena, fpnew_pkg::RNE)
+    `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
+    `FFL(mid_pipe_spec_res_q[i+1],    mid_pipe_spec_res_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_spec_stat_q[i+1],   mid_pipe_spec_stat_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_tag_q[i+1],         mid_pipe_tag_q[i],         reg_ena, TagType'('0))
+    `FFL(mid_pipe_mask_q[i+1],        mid_pipe_mask_q[i],        reg_ena, '0)
+    `FFL(mid_pipe_aux_q[i+1],         mid_pipe_aux_q[i],         reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
+  assign exponent_product_q      = mid_pipe_exp_prod_q[NUM_MID_REGS];
+  assign exponent_difference_q   = mid_pipe_exp_diff_q[NUM_MID_REGS];
+  assign tentative_exponent_q    = mid_pipe_tent_exp_q[NUM_MID_REGS];
+  assign addend_shamt_q          = mid_pipe_add_shamt_q[NUM_MID_REGS];
+  assign sticky_before_add_q     = mid_pipe_sticky_q[NUM_MID_REGS];
+  assign sum_q                   = mid_pipe_sum_q[NUM_MID_REGS];
+  assign final_sign_q            = mid_pipe_final_sign_q[NUM_MID_REGS];
+  assign rnd_mode_q              = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+  assign result_is_special_q     = mid_pipe_res_is_spec_q[NUM_MID_REGS];
+  assign special_result_q        = mid_pipe_spec_res_q[NUM_MID_REGS];
+  assign special_status_q        = mid_pipe_spec_stat_q[NUM_MID_REGS];
+
+  // --------------
+  // Normalization
+  // --------------
+  logic        [LOWER_SUM_WIDTH-1:0]  sum_lower;              // lower 2p+3 bits of sum are searched
+  logic        [LZC_RESULT_WIDTH-1:0] leading_zero_count;     // the number of leading zeroes
+  logic signed [LZC_RESULT_WIDTH:0]   leading_zero_count_sgn; // signed leading-zero count
+  logic                               lzc_zeroes;             // in case only zeroes found
+
+  logic        [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
+  logic signed [EXP_WIDTH-1:0]          normalized_exponent;
+
+  logic [3*PRECISION_BITS+4:0] sum_shifted;       // result after first normalization shift
+  logic [PRECISION_BITS:0]     final_mantissa;    // final mantissa before rounding with round bit
+  logic [2*PRECISION_BITS+2:0] sum_sticky_bits;   // remaining 2p+3 sticky bits after normalization
+  logic                        sticky_after_norm; // sticky bit after normalization
+
+  logic signed [EXP_WIDTH-1:0] final_exponent;
+
+  assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
+
+  // Leading zero counter for cancellations
+  lzc #(
+    .WIDTH ( LOWER_SUM_WIDTH ),
+    .MODE  ( 1               ) // MODE = 1 counts leading zeroes
+  ) i_lzc (
+    .in_i    ( sum_lower          ),
+    .cnt_o   ( leading_zero_count ),
+    .empty_o ( lzc_zeroes         )
+  );
+
+  assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
+
+  // Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
+  always_comb begin : norm_shift_amount
+    // Product-anchored case or cancellations require LZC
+    if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
+      // Normal result (biased exponent > 0 and not a zero)
+      if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
+        // Undo initial product shift, remove the counted zeroes
+        norm_shamt          = PRECISION_BITS + 2 + leading_zero_count;
+        normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
+      // Subnormal result
+      end else begin
+        // Cap the shift distance to align mantissa with minimum exponent
+        norm_shamt          = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q);
+        normalized_exponent = 0; // subnormals encoded as 0
+      end
+    // Addend-anchored case
+    end else begin
+      norm_shamt          = addend_shamt_q; // Undo the initial shift
+      normalized_exponent = tentative_exponent_q;
+    end
+  end
+
+  // Do the large normalization shift
+  assign sum_shifted       = sum_q << norm_shamt;
+
+  // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
+  // or right of the (non-carry) MSB of the sum.
+  always_comb begin : small_norm
+    // Default assignment, discarding carry bit
+    {final_mantissa, sum_sticky_bits} = sum_shifted;
+    final_exponent                    = normalized_exponent;
+
+    // The normalized sum has overflown, align right and fix exponent
+    if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
+      {final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
+      final_exponent                    = normalized_exponent + 1;
+    // The normalized sum is normal, nothing to do
+    end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
+      // do nothing
+    // The normalized sum is still denormal, align left - unless the result is not already subnormal
+    end else if (normalized_exponent > 1) begin
+      {final_mantissa, sum_sticky_bits} = sum_shifted << 1;
+      final_exponent                    = normalized_exponent - 1;
+    // Otherwise we're denormal
+    end else begin
+      final_exponent = '0;
+    end
+  end
+
+  // Update the sticky bit with the shifted-out bits
+  assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
+
+  // ----------------------------
+  // Rounding and classification
+  // ----------------------------
+  logic                         pre_round_sign;
+  logic [EXP_BITS-1:0]          pre_round_exponent;
+  logic [MAN_BITS-1:0]          pre_round_mantissa;
+  logic [EXP_BITS+MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
+  logic [1:0]                   round_sticky_bits;
+
+  logic of_before_round, of_after_round; // overflow
+  logic uf_before_round, uf_after_round; // underflow
+  logic result_zero;
+
+  logic                         rounded_sign;
+  logic [EXP_BITS+MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
+
+  // Classification before round. RISC-V mandates checking underflow AFTER rounding!
+  assign of_before_round = final_exponent >= 2**(EXP_BITS)-1; // infinity exponent is all ones
+  assign uf_before_round = final_exponent == 0;               // exponent for subnormals capped to 0
+
+  // Assemble result before rounding. In case of overflow, the largest normal value is set.
+  assign pre_round_sign     = final_sign_q;
+  assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]);
+  assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit
+  assign pre_round_abs      = {pre_round_exponent, pre_round_mantissa};
+
+  // In case of overflow, the round and sticky bits are set for proper rounding
+  assign round_sticky_bits  = (of_before_round) ? 2'b11 : {final_mantissa[0], sticky_after_norm};
+
+  // Perform the rounding
+  fpnew_rounding #(
+    .AbsWidth ( EXP_BITS + MAN_BITS )
+  ) i_fpnew_rounding (
+    .abs_value_i             ( pre_round_abs           ),
+    .sign_i                  ( pre_round_sign          ),
+    .round_sticky_bits_i     ( round_sticky_bits       ),
+    .rnd_mode_i              ( rnd_mode_q              ),
+    .effective_subtraction_i ( effective_subtraction_q ),
+    .abs_rounded_o           ( rounded_abs             ),
+    .sign_o                  ( rounded_sign            ),
+    .exact_zero_o            ( result_zero             )
+  );
+
+  // Classification after rounding
+  assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0
+  assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones
+
+  // -----------------
+  // Result selection
+  // -----------------
+  logic [WIDTH-1:0]     regular_result;
+  fpnew_pkg::status_t   regular_status;
+
+  // Assemble regular result
+  assign regular_result    = {rounded_sign, rounded_abs};
+  assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
+  assign regular_status.DZ = 1'b0; // no divisions
+  assign regular_status.OF = of_before_round | of_after_round;   // rounding can introduce overflow
+  assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
+  assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
+
+  // Final results for output pipeline
+  fp_t                result_d;
+  fpnew_pkg::status_t status_d;
+
+  // Select output depending on special case detection
+  assign result_d = result_is_special_q ? special_result_q : regular_result;
+  assign status_d = result_is_special_q ? special_status_q : regular_status;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  fp_t                [0:NUM_OUT_REGS] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
+  TagType             [0:NUM_OUT_REGS] out_pipe_tag_q;
+  logic               [0:NUM_OUT_REGS] out_pipe_mask_q;
+  AuxType             [0:NUM_OUT_REGS] out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS] out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0] = result_d;
+  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_tag_q[0]    = mid_pipe_tag_q[NUM_MID_REGS];
+  assign out_pipe_mask_q[0]   = mid_pipe_mask_q[NUM_MID_REGS];
+  assign out_pipe_aux_q[0]    = mid_pipe_aux_q[NUM_MID_REGS];
+  assign out_pipe_valid_q[0]  = mid_pipe_valid_q[NUM_MID_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],    out_pipe_tag_q[i],    reg_ena, TagType'('0))
+    `FFL(out_pipe_mask_q[i+1],   out_pipe_mask_q[i],   reg_ena, '0)
+    `FFL(out_pipe_aux_q[i+1],    out_pipe_aux_q[i],    reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = 1'b1; // always NaN-Box result
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign mask_o          = out_pipe_mask_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv
new file mode 100644
index 0000000..cceeae3
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv
@@ -0,0 +1,839 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_fma_multi #(
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig = '1,
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+  // Do not change
+  localparam int unsigned WIDTH       = fpnew_pkg::max_fp_width(FpFmtConfig),
+  localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input  logic                        clk_i,
+  input  logic                        rst_ni,
+  // Input signals
+  input  logic [2:0][WIDTH-1:0]       operands_i, // 3 operands
+  input  logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands
+  input  fpnew_pkg::roundmode_e       rnd_mode_i,
+  input  fpnew_pkg::operation_e       op_i,
+  input  logic                        op_mod_i,
+  input  fpnew_pkg::fp_format_e       src_fmt_i, // format of the multiplicands
+  input  fpnew_pkg::fp_format_e       dst_fmt_i, // format of the addend and result
+  input  TagType                      tag_i,
+  input  logic                        mask_i,
+  input  AuxType                      aux_i,
+  // Input Handshake
+  input  logic                        in_valid_i,
+  output logic                        in_ready_o,
+  input  logic                        flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]            result_o,
+  output fpnew_pkg::status_t          status_o,
+  output logic                        extension_bit_o,
+  output TagType                      tag_o,
+  output logic                        mask_o,
+  output AuxType                      aux_o,
+  // Output handshake
+  output logic                        out_valid_o,
+  input  logic                        out_ready_i,
+  // Indication of valid data in flight
+  output logic                        busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  // The super-format that can hold all formats
+  localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
+
+  localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
+  localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
+
+  // Precision bits 'p' include the implicit bit
+  localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1;
+  // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
+  localparam int unsigned LOWER_SUM_WIDTH  = 2 * PRECISION_BITS + 3;
+  localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
+  // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
+  // datapath leakage. This is either given by the exponent bits or the width of the LZC result.
+  // In most reasonable FP formats the internal exponent will be wider than the LZC result.
+  localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH);
+  // Shift amount width: maximum internal mantissa size is 3p+4 bits
+  localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5);
+  // Pipelines
+  localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+                          ? NumPipeRegs
+                          : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                             ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+                             : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 3) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ----------------
+  // Type definition
+  // ----------------
+  typedef struct packed {
+    logic                      sign;
+    logic [SUPER_EXP_BITS-1:0] exponent;
+    logic [SUPER_MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Selected pipeline output signals as non-arrays
+  logic [2:0][WIDTH-1:0] operands_q;
+  fpnew_pkg::fp_format_e src_fmt_q;
+  fpnew_pkg::fp_format_e dst_fmt_q;
+
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][2:0][WIDTH-1:0]       inp_pipe_operands_q;
+  logic                  [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                       inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                       inp_pipe_op_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_op_mod_q;
+  fpnew_pkg::fp_format_e [0:NUM_INP_REGS]                       inp_pipe_src_fmt_q;
+  fpnew_pkg::fp_format_e [0:NUM_INP_REGS]                       inp_pipe_dst_fmt_q;
+  TagType                [0:NUM_INP_REGS]                       inp_pipe_tag_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_mask_q;
+  AuxType                [0:NUM_INP_REGS]                       inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_src_fmt_q[0]  = src_fmt_i;
+  assign inp_pipe_dst_fmt_q[0]  = dst_fmt_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_mask_q[0]     = mask_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_src_fmt_q[i+1],  inp_pipe_src_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_dst_fmt_q[i+1],  inp_pipe_dst_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_mask_q[i+1],     inp_pipe_mask_q[i],     reg_ena, '0)
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+  assign src_fmt_q  = inp_pipe_src_fmt_q[NUM_INP_REGS];
+  assign dst_fmt_q  = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+
+  // -----------------
+  // Input processing
+  // -----------------
+  logic        [NUM_FORMATS-1:0][2:0]                     fmt_sign;
+  logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent;
+  logic        [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa;
+
+  fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q;
+
+  // FP Input initialization
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      logic [2:0][FP_WIDTH-1:0] trimmed_ops;
+
+      // Classify input
+      fpnew_classifier #(
+        .FpFormat    ( fpnew_pkg::fp_format_e'(fmt) ),
+        .NumOperands ( 3                            )
+      ) i_fpnew_classifier (
+        .operands_i ( trimmed_ops                            ),
+        .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ),
+        .info_o     ( info_q[fmt]                            )
+      );
+      for (genvar op = 0; op < 3; op++) begin : gen_operands
+        assign trimmed_ops[op]       = operands_q[op][FP_WIDTH-1:0];
+        assign fmt_sign[fmt][op]     = operands_q[op][FP_WIDTH-1];
+        assign fmt_exponent[fmt][op] = signed'({1'b0, operands_q[op][MAN_BITS+:EXP_BITS]});
+        assign fmt_mantissa[fmt][op] = {info_q[fmt][op].is_normal, operands_q[op][MAN_BITS-1:0]} <<
+                                       (SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa
+      end
+    end else begin : inactive_format
+      assign info_q[fmt]                 = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_sign[fmt]               = fpnew_pkg::DONT_CARE;             // format disabled
+      assign fmt_exponent[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_mantissa[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+    end
+  end
+
+  fp_t                 operand_a, operand_b, operand_c;
+  fpnew_pkg::fp_info_t info_a,    info_b,    info_c;
+
+  // Operation selection and operand adjustment
+  // | \c op_q  | \c op_mod_q | Operation Adjustment
+  // |:--------:|:-----------:|---------------------
+  // | FMADD    | \c 0        | FMADD: none
+  // | FMADD    | \c 1        | FMSUB: Invert sign of operand C
+  // | FNMSUB   | \c 0        | FNMSUB: Invert sign of operand A
+  // | FNMSUB   | \c 1        | FNMADD: Invert sign of operands A and C
+  // | ADD      | \c 0        | ADD: Set operand A to +1.0
+  // | ADD      | \c 1        | SUB: Set operand A to +1.0, invert sign of operand C
+  // | MUL      | \c 0        | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode
+  // | *others* | \c -        | *invalid*
+  // \note \c op_mod_q always inverts the sign of the addend.
+  always_comb begin : op_select
+
+    // Default assignments - packing-order-agnostic
+    operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]};
+    operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]};
+    operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]};
+    info_a    = info_q[src_fmt_q][0];
+    info_b    = info_q[src_fmt_q][1];
+    info_c    = info_q[dst_fmt_q][2];
+
+    // op_mod_q inverts sign of operand C
+    operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+
+    unique case (inp_pipe_op_q[NUM_INP_REGS])
+      fpnew_pkg::FMADD:  ; // do nothing
+      fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
+      fpnew_pkg::ADD: begin // Set multiplicand to +1
+        operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0};
+        info_a    = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
+      end
+      fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN
+        if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN)
+          operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0};
+        else
+          operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
+        info_c    = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
+      end
+      default: begin // propagate don't cares
+        operand_a  = '{default: fpnew_pkg::DONT_CARE};
+        operand_b  = '{default: fpnew_pkg::DONT_CARE};
+        operand_c  = '{default: fpnew_pkg::DONT_CARE};
+        info_a     = '{default: fpnew_pkg::DONT_CARE};
+        info_b     = '{default: fpnew_pkg::DONT_CARE};
+        info_c     = '{default: fpnew_pkg::DONT_CARE};
+      end
+    endcase
+  end
+
+  // ---------------------
+  // Input classification
+  // ---------------------
+  logic any_operand_inf;
+  logic any_operand_nan;
+  logic signalling_nan;
+  logic effective_subtraction;
+  logic tentative_sign;
+
+  // Reduction for special case handling
+  assign any_operand_inf = (| {info_a.is_inf,        info_b.is_inf,        info_c.is_inf});
+  assign any_operand_nan = (| {info_a.is_nan,        info_b.is_nan,        info_c.is_nan});
+  assign signalling_nan  = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
+  // Effective subtraction in FMA occurs when product and addend signs differ
+  assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
+  // The tentative sign of the FMA shall be the sign of the product
+  assign tentative_sign = operand_a.sign ^ operand_b.sign;
+
+  // ----------------------
+  // Special case handling
+  // ----------------------
+  logic [WIDTH-1:0]   special_result;
+  fpnew_pkg::status_t special_status;
+  logic               result_is_special;
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0]    fmt_special_result;
+  fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status;
+  logic [NUM_FORMATS-1:0]               fmt_result_is_special;
+
+
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
+    localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
+    localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0;
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : special_results
+        logic [FP_WIDTH-1:0] special_res;
+
+        // Default assignment
+        special_res                = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
+        fmt_special_status[fmt]    = '0;
+        fmt_result_is_special[fmt] = 1'b0;
+
+        // Handle potentially mixed nan & infinity input => important for the case where infinity and
+        // zero are multiplied and added to a qnan.
+        // RISC-V mandates raising the NV exception in these cases:
+        // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
+        if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
+          fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN
+          fmt_special_status[fmt].NV = 1'b1; // invalid operation
+        // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
+        end else if (any_operand_nan) begin
+          fmt_result_is_special[fmt] = 1'b1;           // bypass FMA, output is the canonical qNaN
+          fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling
+        // Special cases involving infinity
+        end else if (any_operand_inf) begin
+          fmt_result_is_special[fmt] = 1'b1; // bypass FMA
+          // Effective addition of opposite infinities (±inf - ±inf) is invalid!
+          if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
+            fmt_special_status[fmt].NV = 1'b1; // invalid operation
+          // Handle cases where output will be inf because of inf product input
+          else if (info_a.is_inf || info_b.is_inf) begin
+            // Result is infinity with the sign of the product
+            special_res = {operand_a.sign ^ operand_b.sign, QNAN_EXPONENT, ZERO_MANTISSA};
+          // Handle cases where the addend is inf
+          end else if (info_c.is_inf) begin
+            // Result is inifinity with sign of the addend (= operand_c)
+            special_res = {operand_c.sign, QNAN_EXPONENT, ZERO_MANTISSA};
+          end
+        end
+        // Initialize special result with ones (NaN-box)
+        fmt_special_result[fmt]               = '1;
+        fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
+      end
+    end else begin : inactive_format
+      assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_special_status[fmt] = '0;
+      assign fmt_result_is_special[fmt] = 1'b0;
+    end
+  end
+
+  // Detect special case from source format, I2F casts don't produce a special result
+  assign result_is_special = fmt_result_is_special[dst_fmt_q]; // they're all the same
+  // Signalling input NaNs raise invalid flag, otherwise no flags set
+  assign special_status = fmt_special_status[dst_fmt_q];
+  // Assemble result according to destination format
+  assign special_result = fmt_special_result[dst_fmt_q]; // destination format
+
+  // ---------------------------
+  // Initial exponent data path
+  // ---------------------------
+  logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
+  logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
+  logic signed [EXP_WIDTH-1:0] tentative_exponent;
+
+  // Zero-extend exponents into signed container - implicit width extension
+  assign exponent_a = signed'({1'b0, operand_a.exponent});
+  assign exponent_b = signed'({1'b0, operand_b.exponent});
+  assign exponent_c = signed'({1'b0, operand_c.exponent});
+
+  // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
+  // with Ex the encoded exponent and nx the implicit bit. Internal exponents are biased to dst fmt.
+  assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
+  // Biased product exponent is the sum of encoded exponents minus the bias.
+  assign exponent_product = (info_a.is_zero || info_b.is_zero) // in case the product is zero, set minimum exp.
+                            ? 2 - signed'(fpnew_pkg::bias(dst_fmt_q))
+                            : signed'(exponent_a + info_a.is_subnormal
+                                      + exponent_b + info_b.is_subnormal
+                                      - 2*signed'(fpnew_pkg::bias(src_fmt_q))
+                                      + signed'(fpnew_pkg::bias(dst_fmt_q))); // rebias for dst fmt
+  // Exponent difference is the addend exponent minus the product exponent
+  assign exponent_difference = exponent_addend - exponent_product;
+  // The tentative exponent will be the larger of the product or addend exponent
+  assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
+
+  // Shift amount for addend based on exponents (unsigned as only right shifts)
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
+
+  always_comb begin : addend_shift_amount
+    // Product-anchored case, saturated shift (addend is only in the sticky bit)
+    if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
+      addend_shamt = 3 * PRECISION_BITS + 4;
+    // Addend and product will have mutual bits to add
+    else if (exponent_difference <= signed'(PRECISION_BITS + 2))
+      addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
+    // Addend-anchored case, saturated shift (product is only in the sticky bit)
+    else
+      addend_shamt = 0;
+  end
+
+  // ------------------
+  // Product data path
+  // ------------------
+  logic [PRECISION_BITS-1:0]   mantissa_a, mantissa_b, mantissa_c;
+  logic [2*PRECISION_BITS-1:0] product;             // the p*p product is 2p bits wide
+  logic [3*PRECISION_BITS+3:0] product_shifted;     // addends are 3p+4 bit wide (including G/R)
+
+  // Add implicit bits to mantissae
+  assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
+  assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
+  assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
+
+  // Mantissa multiplier (a*b)
+  assign product = mantissa_a * mantissa_b;
+
+  // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
+  // | 000...000 | product | RS |
+  //  <-  p+2  -> <-  2p -> < 2>
+  assign product_shifted = product << 2; // constant shift
+
+  // -----------------
+  // Addend data path
+  // -----------------
+  logic [3*PRECISION_BITS+3:0] addend_after_shift;  // upper 3p+4 bits are needed to go on
+  logic [PRECISION_BITS-1:0]   addend_sticky_bits;  // up to p bit of shifted addend are sticky
+  logic                        sticky_before_add;   // they are compressed into a single sticky bit
+  logic [3*PRECISION_BITS+3:0] addend_shifted;      // addends are 3p+4 bit wide (including G/R)
+  logic                        inject_carry_in;     // inject carry for subtractions if needed
+
+  // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits are
+  // shifted out and compressed into a sticky bit.
+  // BEFORE THE SHIFT:
+  // | mantissa_c | 000..000 |
+  //  <-    p   -> <- 3p+4 ->
+  // AFTER THE SHIFT:
+  // | 000..........000 | mantissa_c | 000...............0GR |  sticky bits  |
+  //  <- addend_shamt -> <-    p   -> <- 2p+4-addend_shamt -> <-  up to p  ->
+  assign {addend_after_shift, addend_sticky_bits} =
+      (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
+
+  assign sticky_before_add     = (| addend_sticky_bits);
+
+  // In case of a subtraction, the addend is inverted
+  assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
+  assign inject_carry_in = effective_subtraction & ~sticky_before_add;
+
+  // ------
+  // Adder
+  // ------
+  logic [3*PRECISION_BITS+4:0] sum_raw;   // added one bit for the carry
+  logic                        sum_carry; // observe carry bit from sum for sign fixing
+  logic [3*PRECISION_BITS+3:0] sum;       // discard carry as sum won't overflow
+  logic                        final_sign;
+
+  //Mantissa adder (ab+c). In normal addition, it cannot overflow.
+  assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
+  assign sum_carry = sum_raw[3*PRECISION_BITS+4];
+
+  // Complement negative sum (can only happen in subtraction -> overflows for positive results)
+  assign sum        = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
+
+  // In case of a mispredicted subtraction result, do a sign flip
+  assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
+                      ? 1'b1
+                      : (effective_subtraction ? 1'b0 : tentative_sign);
+
+  // ---------------
+  // Internal pipeline
+  // ---------------
+  // Pipeline output signals as non-arrays
+  logic                          effective_subtraction_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_product_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_difference_q;
+  logic signed [EXP_WIDTH-1:0]   tentative_exponent_q;
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
+  logic                          sticky_before_add_q;
+  logic [3*PRECISION_BITS+3:0]   sum_q;
+  logic                          final_sign_q;
+  fpnew_pkg::fp_format_e         dst_fmt_q2;
+  fpnew_pkg::roundmode_e         rnd_mode_q;
+  logic                          result_is_special_q;
+  fp_t                           special_result_q;
+  fpnew_pkg::status_t            special_status_q;
+  // Internal pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_eff_sub_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_prod_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_diff_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_tent_exp_q;
+  logic                  [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_sticky_q;
+  logic                  [0:NUM_MID_REGS][3*PRECISION_BITS+3:0]   mid_pipe_sum_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_final_sign_q;
+  fpnew_pkg::roundmode_e [0:NUM_MID_REGS]                         mid_pipe_rnd_mode_q;
+  fpnew_pkg::fp_format_e [0:NUM_MID_REGS]                         mid_pipe_dst_fmt_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_res_is_spec_q;
+  fp_t                   [0:NUM_MID_REGS]                         mid_pipe_spec_res_q;
+  fpnew_pkg::status_t    [0:NUM_MID_REGS]                         mid_pipe_spec_stat_q;
+  TagType                [0:NUM_MID_REGS]                         mid_pipe_tag_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_mask_q;
+  AuxType                [0:NUM_MID_REGS]                         mid_pipe_aux_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_MID_REGS] mid_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from upstream logic
+  assign mid_pipe_eff_sub_q[0]     = effective_subtraction;
+  assign mid_pipe_exp_prod_q[0]    = exponent_product;
+  assign mid_pipe_exp_diff_q[0]    = exponent_difference;
+  assign mid_pipe_tent_exp_q[0]    = tentative_exponent;
+  assign mid_pipe_add_shamt_q[0]   = addend_shamt;
+  assign mid_pipe_sticky_q[0]      = sticky_before_add;
+  assign mid_pipe_sum_q[0]         = sum;
+  assign mid_pipe_final_sign_q[0]  = final_sign;
+  assign mid_pipe_rnd_mode_q[0]    = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign mid_pipe_dst_fmt_q[0]     = dst_fmt_q;
+  assign mid_pipe_res_is_spec_q[0] = result_is_special;
+  assign mid_pipe_spec_res_q[0]    = special_result;
+  assign mid_pipe_spec_stat_q[0]   = special_status;
+  assign mid_pipe_tag_q[0]         = inp_pipe_tag_q[NUM_INP_REGS];
+  assign mid_pipe_mask_q[0]        = inp_pipe_mask_q[NUM_INP_REGS];
+  assign mid_pipe_aux_q[0]         = inp_pipe_aux_q[NUM_INP_REGS];
+  assign mid_pipe_valid_q[0]       = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to input pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(mid_pipe_eff_sub_q[i+1],     mid_pipe_eff_sub_q[i],     reg_ena, '0)
+    `FFL(mid_pipe_exp_prod_q[i+1],    mid_pipe_exp_prod_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_exp_diff_q[i+1],    mid_pipe_exp_diff_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_tent_exp_q[i+1],    mid_pipe_tent_exp_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_add_shamt_q[i+1],   mid_pipe_add_shamt_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_sticky_q[i+1],      mid_pipe_sticky_q[i],      reg_ena, '0)
+    `FFL(mid_pipe_sum_q[i+1],         mid_pipe_sum_q[i],         reg_ena, '0)
+    `FFL(mid_pipe_final_sign_q[i+1],  mid_pipe_final_sign_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_rnd_mode_q[i+1],    mid_pipe_rnd_mode_q[i],    reg_ena, fpnew_pkg::RNE)
+    `FFL(mid_pipe_dst_fmt_q[i+1],     mid_pipe_dst_fmt_q[i],     reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
+    `FFL(mid_pipe_spec_res_q[i+1],    mid_pipe_spec_res_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_spec_stat_q[i+1],   mid_pipe_spec_stat_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_tag_q[i+1],         mid_pipe_tag_q[i],         reg_ena, TagType'('0))
+    `FFL(mid_pipe_mask_q[i+1],        mid_pipe_mask_q[i],        reg_ena, '0)
+    `FFL(mid_pipe_aux_q[i+1],         mid_pipe_aux_q[i],         reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
+  assign exponent_product_q      = mid_pipe_exp_prod_q[NUM_MID_REGS];
+  assign exponent_difference_q   = mid_pipe_exp_diff_q[NUM_MID_REGS];
+  assign tentative_exponent_q    = mid_pipe_tent_exp_q[NUM_MID_REGS];
+  assign addend_shamt_q          = mid_pipe_add_shamt_q[NUM_MID_REGS];
+  assign sticky_before_add_q     = mid_pipe_sticky_q[NUM_MID_REGS];
+  assign sum_q                   = mid_pipe_sum_q[NUM_MID_REGS];
+  assign final_sign_q            = mid_pipe_final_sign_q[NUM_MID_REGS];
+  assign rnd_mode_q              = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+  assign dst_fmt_q2              = mid_pipe_dst_fmt_q[NUM_MID_REGS];
+  assign result_is_special_q     = mid_pipe_res_is_spec_q[NUM_MID_REGS];
+  assign special_result_q        = mid_pipe_spec_res_q[NUM_MID_REGS];
+  assign special_status_q        = mid_pipe_spec_stat_q[NUM_MID_REGS];
+
+  // --------------
+  // Normalization
+  // --------------
+  logic        [LOWER_SUM_WIDTH-1:0]  sum_lower;              // lower 2p+3 bits of sum are searched
+  logic        [LZC_RESULT_WIDTH-1:0] leading_zero_count;     // the number of leading zeroes
+  logic signed [LZC_RESULT_WIDTH:0]   leading_zero_count_sgn; // signed leading-zero count
+  logic                               lzc_zeroes;             // in case only zeroes found
+
+  logic        [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
+  logic signed [EXP_WIDTH-1:0]          normalized_exponent;
+
+  logic [3*PRECISION_BITS+4:0] sum_shifted;       // result after first normalization shift
+  logic [PRECISION_BITS:0]     final_mantissa;    // final mantissa before rounding with round bit
+  logic [2*PRECISION_BITS+2:0] sum_sticky_bits;   // remaining 2p+3 sticky bits after normalization
+  logic                        sticky_after_norm; // sticky bit after normalization
+
+  logic signed [EXP_WIDTH-1:0] final_exponent;
+
+  assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
+
+  // Leading zero counter for cancellations
+  lzc #(
+    .WIDTH ( LOWER_SUM_WIDTH ),
+    .MODE  ( 1               ) // MODE = 1 counts leading zeroes
+  ) i_lzc (
+    .in_i    ( sum_lower          ),
+    .cnt_o   ( leading_zero_count ),
+    .empty_o ( lzc_zeroes         )
+  );
+
+  assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
+
+  // Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
+  always_comb begin : norm_shift_amount
+    // Product-anchored case or cancellations require LZC
+    if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
+      // Normal result (biased exponent > 0 and not a zero)
+      if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
+        // Undo initial product shift, remove the counted zeroes
+        norm_shamt          = PRECISION_BITS + 2 + leading_zero_count;
+        normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
+      // Subnormal result
+      end else begin
+        // Cap the shift distance to align mantissa with minimum exponent
+        norm_shamt          = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q));
+        normalized_exponent = 0; // subnormals encoded as 0
+      end
+    // Addend-anchored case
+    end else begin
+      norm_shamt          = addend_shamt_q; // Undo the initial shift
+      normalized_exponent = tentative_exponent_q;
+    end
+  end
+
+  // Do the large normalization shift
+  assign sum_shifted       = sum_q << norm_shamt;
+
+  // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
+  // or right of the (non-carry) MSB of the sum.
+  always_comb begin : small_norm
+    // Default assignment, discarding carry bit
+    {final_mantissa, sum_sticky_bits} = sum_shifted;
+    final_exponent                    = normalized_exponent;
+
+    // The normalized sum has overflown, align right and fix exponent
+    if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
+      {final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
+      final_exponent                    = normalized_exponent + 1;
+    // The normalized sum is normal, nothing to do
+    end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
+      // do nothing
+    // The normalized sum is still denormal, align left - unless the result is not already subnormal
+    end else if (normalized_exponent > 1) begin
+      {final_mantissa, sum_sticky_bits} = sum_shifted << 1;
+      final_exponent                    = normalized_exponent - 1;
+    // Otherwise we're denormal
+    end else begin
+      final_exponent = '0;
+    end
+  end
+
+  // Update the sticky bit with the shifted-out bits
+  assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
+
+  // ----------------------------
+  // Rounding and classification
+  // ----------------------------
+  logic                                     pre_round_sign;
+  logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
+  logic [1:0]                               round_sticky_bits;
+
+  logic of_before_round, of_after_round; // overflow
+  logic uf_before_round, uf_after_round; // underflow
+
+  logic [NUM_FORMATS-1:0][SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] fmt_pre_round_abs; // per format
+  logic [NUM_FORMATS-1:0][1:0]                               fmt_round_sticky_bits;
+
+  logic [NUM_FORMATS-1:0]                                    fmt_of_after_round;
+  logic [NUM_FORMATS-1:0]                                    fmt_uf_after_round;
+
+  logic                                     rounded_sign;
+  logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
+  logic                                     result_zero;
+
+  // Classification before round. RISC-V mandates checking underflow AFTER rounding!
+  assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones
+  assign uf_before_round = final_exponent == 0;               // exponent for subnormals capped to 0
+
+  // Pack exponent and mantissa into proper rounding form
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
+    // Set up some constants
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    logic [EXP_BITS-1:0] pre_round_exponent;
+    logic [MAN_BITS-1:0] pre_round_mantissa;
+
+    if (FpFmtConfig[fmt]) begin : active_format
+
+      assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0];
+      assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_MAN_BITS-:MAN_BITS];
+      // Assemble result before rounding. In case of overflow, the largest normal value is set.
+      assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend
+
+      // Round bit is after mantissa (1 in case of overflow for rounding)
+      assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_MAN_BITS-MAN_BITS] |
+                                             of_before_round;
+
+      // remaining bits in mantissa to sticky (1 in case of overflow for rounding)
+      if (MAN_BITS < SUPER_MAN_BITS) begin : narrow_sticky
+        assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_MAN_BITS-MAN_BITS-1:0]) |
+                                               sticky_after_norm | of_before_round;
+      end else begin : normal_sticky
+        assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round;
+      end
+    end else begin : inactive_format
+      assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Assemble result before rounding. In case of overflow, the largest normal value is set.
+  assign pre_round_sign     = final_sign_q;
+  assign pre_round_abs      = fmt_pre_round_abs[dst_fmt_q2];
+
+  // In case of overflow, the round and sticky bits are set for proper rounding
+  assign round_sticky_bits  = fmt_round_sticky_bits[dst_fmt_q2];
+
+  // Perform the rounding
+  fpnew_rounding #(
+    .AbsWidth ( SUPER_EXP_BITS + SUPER_MAN_BITS )
+  ) i_fpnew_rounding (
+    .abs_value_i             ( pre_round_abs           ),
+    .sign_i                  ( pre_round_sign          ),
+    .round_sticky_bits_i     ( round_sticky_bits       ),
+    .rnd_mode_i              ( rnd_mode_q              ),
+    .effective_subtraction_i ( effective_subtraction_q ),
+    .abs_rounded_o           ( rounded_abs             ),
+    .sign_o                  ( rounded_sign            ),
+    .exact_zero_o            ( result_zero             )
+  );
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
+
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : post_process
+        // detect of / uf
+        fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
+        fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
+
+        // Assemble regular result, nan box short ones.
+        fmt_result[fmt]               = '1;
+        fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
+      end
+    end else begin : inactive_format
+      assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_result[fmt]         = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Classification after rounding select by destination format
+  assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
+  assign of_after_round = fmt_of_after_round[dst_fmt_q2];
+
+
+  // -----------------
+  // Result selection
+  // -----------------
+  logic [WIDTH-1:0]     regular_result;
+  fpnew_pkg::status_t   regular_status;
+
+  // Assemble regular result
+  assign regular_result = fmt_result[dst_fmt_q2];
+  assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
+  assign regular_status.DZ = 1'b0; // no divisions
+  assign regular_status.OF = of_before_round | of_after_round;   // rounding can introduce overflow
+  assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
+  assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
+
+  // Final results for output pipeline
+  logic [WIDTH-1:0]   result_d;
+  fpnew_pkg::status_t status_d;
+
+  // Select output depending on special case detection
+  assign result_d = result_is_special_q ? special_result_q : regular_result;
+  assign status_d = result_is_special_q ? special_status_q : regular_status;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  logic               [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS]            out_pipe_status_q;
+  TagType             [0:NUM_OUT_REGS]            out_pipe_tag_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_mask_q;
+  AuxType             [0:NUM_OUT_REGS]            out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0] = result_d;
+  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_tag_q[0]    = mid_pipe_tag_q[NUM_MID_REGS];
+  assign out_pipe_mask_q[0]   = mid_pipe_mask_q[NUM_MID_REGS];
+  assign out_pipe_aux_q[0]    = mid_pipe_aux_q[NUM_MID_REGS];
+  assign out_pipe_valid_q[0]  = mid_pipe_valid_q[NUM_MID_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],    out_pipe_tag_q[i],    reg_ena, TagType'('0))
+    `FFL(out_pipe_mask_q[i+1],   out_pipe_mask_q[i],   reg_ena, '0)
+    `FFL(out_pipe_aux_q[i+1],    out_pipe_aux_q[i],    reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = 1'b1; // always NaN-Box result
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign mask_o          = out_pipe_mask_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv
new file mode 100644
index 0000000..8a18261
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv
@@ -0,0 +1,415 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_noncomp #(
+  parameter fpnew_pkg::fp_format_e   FpFormat    = fpnew_pkg::fp_format_e'(0),
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+
+  localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
+) (
+  input logic                  clk_i,
+  input logic                  rst_ni,
+  // Input signals
+  input logic [1:0][WIDTH-1:0]     operands_i, // 2 operands
+  input logic [1:0]                is_boxed_i, // 2 operands
+  input fpnew_pkg::roundmode_e     rnd_mode_i,
+  input fpnew_pkg::operation_e     op_i,
+  input logic                      op_mod_i,
+  input TagType                    tag_i,
+  input logic                      mask_i,
+  input AuxType                    aux_i,
+  // Input Handshake
+  input  logic                     in_valid_i,
+  output logic                     in_ready_o,
+  input  logic                     flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]         result_o,
+  output fpnew_pkg::status_t       status_o,
+  output logic                     extension_bit_o,
+  output fpnew_pkg::classmask_e    class_mask_o,
+  output logic                     is_class_o,
+  output TagType                   tag_o,
+  output logic                     mask_o,
+  output AuxType                   aux_o,
+  // Output handshake
+  output logic                     out_valid_o,
+  input  logic                     out_ready_i,
+  // Indication of valid data in flight
+  output logic                     busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+  localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+  // Pipelines
+  localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE)
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 2) // First to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 2) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ----------------
+  // Type definition
+  // ----------------
+  typedef struct packed {
+    logic                sign;
+    logic [EXP_BITS-1:0] exponent;
+    logic [MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q;
+  logic                  [0:NUM_INP_REGS][1:0]            inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                 inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                 inp_pipe_op_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_op_mod_q;
+  TagType                [0:NUM_INP_REGS]                 inp_pipe_tag_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_mask_q;
+  AuxType                [0:NUM_INP_REGS]                 inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_mask_q[0]     = mask_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_mask_q[i+1],     inp_pipe_mask_q[i],     reg_ena, '0)
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+
+  // ---------------------
+  // Input classification
+  // ---------------------
+  fpnew_pkg::fp_info_t [1:0] info_q;
+
+  // Classify input
+  fpnew_classifier #(
+    .FpFormat    ( FpFormat ),
+    .NumOperands ( 2        )
+    ) i_class_a (
+    .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
+    .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
+    .info_o     ( info_q                            )
+  );
+
+  fp_t                 operand_a, operand_b;
+  fpnew_pkg::fp_info_t info_a,    info_b;
+
+  // Packing-order-agnostic assignments
+  assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
+  assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
+  assign info_a    = info_q[0];
+  assign info_b    = info_q[1];
+
+  logic any_operand_inf;
+  logic any_operand_nan;
+  logic signalling_nan;
+
+  // Reduction for special case handling
+  assign any_operand_inf = (| {info_a.is_inf,        info_b.is_inf});
+  assign any_operand_nan = (| {info_a.is_nan,        info_b.is_nan});
+  assign signalling_nan  = (| {info_a.is_signalling, info_b.is_signalling});
+
+  logic operands_equal, operand_a_smaller;
+
+  // Equality checks for zeroes too
+  assign operands_equal    = (operand_a == operand_b) || (info_a.is_zero && info_b.is_zero);
+  // Invert result if non-zero signs involved (unsigned comparison)
+  assign operand_a_smaller = (operand_a < operand_b) ^ (operand_a.sign || operand_b.sign);
+
+  // ---------------
+  // Sign Injection
+  // ---------------
+  fp_t                sgnj_result;
+  fpnew_pkg::status_t sgnj_status;
+  logic               sgnj_extension_bit;
+
+  // Sign Injection - operation is encoded in rnd_mode_q:
+  // RNE = SGNJ, RTZ = SGNJN, RDN = SGNJX, RUP = Passthrough (no NaN-box check)
+  always_comb begin : sign_injections
+    logic sign_a, sign_b; // internal signs
+    // Default assignment
+    sgnj_result = operand_a; // result based on operand a
+
+    // NaN-boxing check will treat invalid inputs as canonical NaNs
+    if (!info_a.is_boxed) sgnj_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)};
+
+    // Internal signs are treated as positive in case of non-NaN-boxed values
+    sign_a = operand_a.sign & info_a.is_boxed;
+    sign_b = operand_b.sign & info_b.is_boxed;
+
+    // Do the sign injection based on rm field
+    unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+      fpnew_pkg::RNE: sgnj_result.sign = sign_b;          // SGNJ
+      fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b;         // SGNJN
+      fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX
+      fpnew_pkg::RUP: sgnj_result      = operand_a;       // passthrough
+      default: sgnj_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+    endcase
+  end
+
+  assign sgnj_status = '0;        // sign injections never raise exceptions
+
+  // op_mod_q enables integer sign-extension of result (for storing to integer regfile)
+  assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1;
+
+  // ------------------
+  // Minimum / Maximum
+  // ------------------
+  fp_t                minmax_result;
+  fpnew_pkg::status_t minmax_status;
+  logic               minmax_extension_bit;
+
+  // Minimum/Maximum - operation is encoded in rnd_mode_q:
+  // RNE = MIN, RTZ = MAX
+  always_comb begin : min_max
+    // Default assignment
+    minmax_status = '0;
+
+    // Min/Max use quiet comparisons - only sNaN are invalid
+    minmax_status.NV = signalling_nan;
+
+    // Both NaN inputs cause a NaN output
+    if (info_a.is_nan && info_b.is_nan)
+      minmax_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
+    // If one operand is NaN, the non-NaN operand is returned
+    else if (info_a.is_nan) minmax_result = operand_b;
+    else if (info_b.is_nan) minmax_result = operand_a;
+    // Otherwise decide according to the operation
+    else begin
+      unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+        fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN
+        fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX
+        default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+      endcase
+    end
+  end
+
+  assign minmax_extension_bit = 1'b1; // NaN-box as result is always a float value
+
+  // ------------
+  // Comparisons
+  // ------------
+  fp_t                cmp_result;
+  fpnew_pkg::status_t cmp_status;
+  logic               cmp_extension_bit;
+
+  // Comparisons - operation is encoded in rnd_mode_q:
+  // RNE = LE, RTZ = LT, RDN = EQ
+  // op_mod_q inverts boolean outputs
+  always_comb begin : comparisons
+    // Default assignment
+    cmp_result = '0; // false
+    cmp_status = '0; // no flags
+
+    // Signalling NaNs always compare as false and are illegal
+    if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation
+    // Otherwise do comparisons
+    else begin
+      unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+        fpnew_pkg::RNE: begin // Less than or equal
+          if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
+          else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+        end
+        fpnew_pkg::RTZ: begin // Less than
+          if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
+          else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+        end
+        fpnew_pkg::RDN: begin // Equal
+          if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal
+          else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+        end
+        default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+      endcase
+    end
+  end
+
+  assign cmp_extension_bit = 1'b0; // Comparisons always produce booleans in integer registers
+
+  // ---------------
+  // Classification
+  // ---------------
+  fpnew_pkg::status_t    class_status;
+  logic                  class_extension_bit;
+  fpnew_pkg::classmask_e class_mask_d; // the result is actually here
+
+  // Classification - always return the classification mask on the dedicated port
+  always_comb begin : classify
+    if (info_a.is_normal) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGNORM    : fpnew_pkg::POSNORM;
+    end else if (info_a.is_subnormal) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGSUBNORM : fpnew_pkg::POSSUBNORM;
+    end else if (info_a.is_zero) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGZERO    : fpnew_pkg::POSZERO;
+    end else if (info_a.is_inf) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGINF     : fpnew_pkg::POSINF;
+    end else if (info_a.is_nan) begin
+      class_mask_d = info_a.is_signalling ? fpnew_pkg::SNAN       : fpnew_pkg::QNAN;
+    end else begin
+      class_mask_d = fpnew_pkg::QNAN; // default value
+    end
+  end
+
+  assign class_status        = '0;   // classification does not set flags
+  assign class_extension_bit = 1'b0; // classification always produces results in integer registers
+
+  // -----------------
+  // Result selection
+  // -----------------
+  fp_t                   result_d;
+  fpnew_pkg::status_t    status_d;
+  logic                  extension_bit_d;
+  logic                  is_class_d;
+
+  // Select result
+  always_comb begin : select_result
+    unique case (inp_pipe_op_q[NUM_INP_REGS])
+      fpnew_pkg::SGNJ: begin
+        result_d        = sgnj_result;
+        status_d        = sgnj_status;
+        extension_bit_d = sgnj_extension_bit;
+      end
+      fpnew_pkg::MINMAX: begin
+        result_d        = minmax_result;
+        status_d        = minmax_status;
+        extension_bit_d = minmax_extension_bit;
+      end
+      fpnew_pkg::CMP: begin
+        result_d        = cmp_result;
+        status_d        = cmp_status;
+        extension_bit_d = cmp_extension_bit;
+      end
+      fpnew_pkg::CLASSIFY: begin
+        result_d        = '{default: fpnew_pkg::DONT_CARE}; // unused
+        status_d        = class_status;
+        extension_bit_d = class_extension_bit;
+      end
+      default: begin
+        result_d        = '{default: fpnew_pkg::DONT_CARE}; // dont care
+        status_d        = '{default: fpnew_pkg::DONT_CARE}; // dont care
+        extension_bit_d = fpnew_pkg::DONT_CARE;             // dont care
+      end
+    endcase
+  end
+
+  assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY);
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  fp_t                   [0:NUM_OUT_REGS] out_pipe_result_q;
+  fpnew_pkg::status_t    [0:NUM_OUT_REGS] out_pipe_status_q;
+  logic                  [0:NUM_OUT_REGS] out_pipe_extension_bit_q;
+  fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q;
+  logic                  [0:NUM_OUT_REGS] out_pipe_is_class_q;
+  TagType                [0:NUM_OUT_REGS] out_pipe_tag_q;
+  logic                  [0:NUM_OUT_REGS] out_pipe_mask_q;
+  AuxType                [0:NUM_OUT_REGS] out_pipe_aux_q;
+  logic                  [0:NUM_OUT_REGS] out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0]        = result_d;
+  assign out_pipe_status_q[0]        = status_d;
+  assign out_pipe_extension_bit_q[0] = extension_bit_d;
+  assign out_pipe_class_mask_q[0]    = class_mask_d;
+  assign out_pipe_is_class_q[0]      = is_class_d;
+  assign out_pipe_tag_q[0]           = inp_pipe_tag_q[NUM_INP_REGS];
+  assign out_pipe_mask_q[0]          = inp_pipe_mask_q[NUM_INP_REGS];
+  assign out_pipe_aux_q[0]           = inp_pipe_aux_q[NUM_INP_REGS];
+  assign out_pipe_valid_q[0]         = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1],        out_pipe_result_q[i],        reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1],        out_pipe_status_q[i],        reg_ena, '0)
+    `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0)
+    `FFL(out_pipe_class_mask_q[i+1],    out_pipe_class_mask_q[i],    reg_ena, fpnew_pkg::QNAN)
+    `FFL(out_pipe_is_class_q[i+1],      out_pipe_is_class_q[i],      reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],           out_pipe_tag_q[i],           reg_ena, TagType'('0))
+    `FFL(out_pipe_mask_q[i+1],          out_pipe_mask_q[i],          reg_ena, '0)
+    `FFL(out_pipe_aux_q[i+1],           out_pipe_aux_q[i],           reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS];
+  assign class_mask_o    = out_pipe_class_mask_q[NUM_OUT_REGS];
+  assign is_class_o      = out_pipe_is_class_q[NUM_OUT_REGS];
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign mask_o          = out_pipe_mask_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv
new file mode 100644
index 0000000..2633406
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv
@@ -0,0 +1,244 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_opgroup_block #(
+  parameter fpnew_pkg::opgroup_e        OpGroup       = fpnew_pkg::ADDMUL,
+  // FPU configuration
+  parameter int unsigned                Width         = 32,
+  parameter logic                       EnableVectors = 1'b1,
+  parameter fpnew_pkg::fmt_logic_t      FpFmtMask     = '1,
+  parameter fpnew_pkg::ifmt_logic_t     IntFmtMask    = '1,
+  parameter fpnew_pkg::fmt_unsigned_t   FmtPipeRegs   = '{default: 0},
+  parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes  = '{default: fpnew_pkg::PARALLEL},
+  parameter fpnew_pkg::pipe_config_t    PipeConfig    = fpnew_pkg::BEFORE,
+  parameter type                        TagType       = logic,
+  parameter int unsigned                TrueSIMDClass = 0,
+  // Do not change
+  localparam int unsigned NUM_FORMATS  = fpnew_pkg::NUM_FP_FORMATS,
+  localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
+  localparam int unsigned NUM_LANES    = fpnew_pkg::max_num_lanes(Width, FpFmtMask, EnableVectors),
+  localparam type         MaskType     = logic [NUM_LANES-1:0]
+) (
+  input logic                                     clk_i,
+  input logic                                     rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][Width-1:0]       operands_i,
+  input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
+  input fpnew_pkg::roundmode_e                    rnd_mode_i,
+  input fpnew_pkg::operation_e                    op_i,
+  input logic                                     op_mod_i,
+  input fpnew_pkg::fp_format_e                    src_fmt_i,
+  input fpnew_pkg::fp_format_e                    dst_fmt_i,
+  input fpnew_pkg::int_format_e                   int_fmt_i,
+  input logic                                     vectorial_op_i,
+  input TagType                                   tag_i,
+  input MaskType                                  simd_mask_i,
+  // Input Handshake
+  input  logic                                    in_valid_i,
+  output logic                                    in_ready_o,
+  input  logic                                    flush_i,
+  // Output signals
+  output logic [Width-1:0]                        result_o,
+  output fpnew_pkg::status_t                      status_o,
+  output logic                                    extension_bit_o,
+  output TagType                                  tag_o,
+  // Output handshake
+  output logic                                    out_valid_o,
+  input  logic                                    out_ready_i,
+  // Indication of valid data in flight
+  output logic                                    busy_o
+);
+
+  // ----------------
+  // Type Definition
+  // ----------------
+  typedef struct packed {
+    logic [Width-1:0]   result;
+    fpnew_pkg::status_t status;
+    logic               ext_bit;
+    TagType             tag;
+  } output_t;
+
+  // Handshake signals for the slices
+  logic [NUM_FORMATS-1:0] fmt_in_ready, fmt_out_valid, fmt_out_ready, fmt_busy;
+  output_t [NUM_FORMATS-1:0] fmt_outputs;
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o = in_valid_i & fmt_in_ready[dst_fmt_i]; // Ready is given by selected format
+
+  // -------------------------
+  // Generate Parallel Slices
+  // -------------------------
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices
+    // Some constants for this format
+    localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask);
+    localparam logic IS_FIRST_MERGED =
+        fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask);
+
+    // Generate slice only if format enabled
+    if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format
+
+      logic in_valid;
+
+      assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format
+
+      // Forward masks related to the right SIMD lane
+      localparam int unsigned INTERNAL_LANES = fpnew_pkg::num_lanes(Width, fpnew_pkg::fp_format_e'(fmt), EnableVectors);
+      logic [INTERNAL_LANES-1:0] mask_slice;
+      always_comb for (int b = 0; b < INTERNAL_LANES; b++) mask_slice[b] = simd_mask_i[(NUM_LANES/INTERNAL_LANES)*b];
+
+      fpnew_opgroup_fmt_slice #(
+        .OpGroup       ( OpGroup                      ),
+        .FpFormat      ( fpnew_pkg::fp_format_e'(fmt) ),
+        .Width         ( Width                        ),
+        .EnableVectors ( EnableVectors                ),
+        .NumPipeRegs   ( FmtPipeRegs[fmt]             ),
+        .PipeConfig    ( PipeConfig                   ),
+        .TagType       ( TagType                      ),
+        .TrueSIMDClass ( TrueSIMDClass                )
+      ) i_fmt_slice (
+        .clk_i,
+        .rst_ni,
+        .operands_i     ( operands_i               ),
+        .is_boxed_i     ( is_boxed_i[fmt]          ),
+        .rnd_mode_i,
+        .op_i,
+        .op_mod_i,
+        .vectorial_op_i,
+        .tag_i,
+        .simd_mask_i    ( mask_slice               ),
+        .in_valid_i     ( in_valid                 ),
+        .in_ready_o     ( fmt_in_ready[fmt]        ),
+        .flush_i,
+        .result_o       ( fmt_outputs[fmt].result  ),
+        .status_o       ( fmt_outputs[fmt].status  ),
+        .extension_bit_o( fmt_outputs[fmt].ext_bit ),
+        .tag_o          ( fmt_outputs[fmt].tag     ),
+        .out_valid_o    ( fmt_out_valid[fmt]       ),
+        .out_ready_i    ( fmt_out_ready[fmt]       ),
+        .busy_o         ( fmt_busy[fmt]            )
+      );
+    // If the format wants to use merged ops, tie off the dangling ones not used here
+    end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused
+
+      localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
+      // Ready is split up into formats
+      assign fmt_in_ready[fmt]  = fmt_in_ready[int'(FMT)];
+
+      assign fmt_out_valid[fmt] = 1'b0; // don't emit values
+      assign fmt_busy[fmt]      = 1'b0; // never busy
+      // Outputs are don't care
+      assign fmt_outputs[fmt].result  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].status  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
+      assign fmt_outputs[fmt].tag     = TagType'(fpnew_pkg::DONT_CARE);
+
+    // Tie off disabled formats
+    end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt
+      assign fmt_in_ready[fmt]  = 1'b0; // don't accept operations
+      assign fmt_out_valid[fmt] = 1'b0; // don't emit values
+      assign fmt_busy[fmt]      = 1'b0; // never busy
+      // Outputs are don't care
+      assign fmt_outputs[fmt].result  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].status  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
+      assign fmt_outputs[fmt].tag     = TagType'(fpnew_pkg::DONT_CARE);
+    end
+  end
+
+  // ----------------------
+  // Generate Merged Slice
+  // ----------------------
+  if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice
+
+    localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
+    localparam REG = fpnew_pkg::get_num_regs_multi(FmtPipeRegs, FmtUnitTypes, FpFmtMask);
+
+    logic in_valid;
+
+    assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED);
+
+    fpnew_opgroup_multifmt_slice #(
+      .OpGroup       ( OpGroup          ),
+      .Width         ( Width            ),
+      .FpFmtConfig   ( FpFmtMask        ),
+      .IntFmtConfig  ( IntFmtMask       ),
+      .EnableVectors ( EnableVectors    ),
+      .NumPipeRegs   ( REG              ),
+      .PipeConfig    ( PipeConfig       ),
+      .TagType       ( TagType          )
+    ) i_multifmt_slice (
+      .clk_i,
+      .rst_ni,
+      .operands_i,
+      .is_boxed_i,
+      .rnd_mode_i,
+      .op_i,
+      .op_mod_i,
+      .src_fmt_i,
+      .dst_fmt_i,
+      .int_fmt_i,
+      .vectorial_op_i,
+      .tag_i,
+      .simd_mask_i     ( simd_mask_i              ),
+      .in_valid_i      ( in_valid                 ),
+      .in_ready_o      ( fmt_in_ready[FMT]        ),
+      .flush_i,
+      .result_o        ( fmt_outputs[FMT].result  ),
+      .status_o        ( fmt_outputs[FMT].status  ),
+      .extension_bit_o ( fmt_outputs[FMT].ext_bit ),
+      .tag_o           ( fmt_outputs[FMT].tag     ),
+      .out_valid_o     ( fmt_out_valid[FMT]       ),
+      .out_ready_i     ( fmt_out_ready[FMT]       ),
+      .busy_o          ( fmt_busy[FMT]            )
+    );
+
+  end
+
+  // ------------------
+  // Arbitrate Outputs
+  // ------------------
+  output_t arbiter_output;
+
+  // Round-Robin arbiter to decide which result to use
+  rr_arb_tree #(
+    .NumIn     ( NUM_FORMATS ),
+    .DataType  ( output_t    ),
+    .AxiVldRdy ( 1'b1        )
+  ) i_arbiter (
+    .clk_i,
+    .rst_ni,
+    .flush_i,
+    .rr_i   ( '0             ),
+    .req_i  ( fmt_out_valid  ),
+    .gnt_o  ( fmt_out_ready  ),
+    .data_i ( fmt_outputs    ),
+    .gnt_i  ( out_ready_i    ),
+    .req_o  ( out_valid_o    ),
+    .data_o ( arbiter_output ),
+    .idx_o  ( /* unused */   )
+  );
+
+  // Unpack output
+  assign result_o        = arbiter_output.result;
+  assign status_o        = arbiter_output.status;
+  assign extension_bit_o = arbiter_output.ext_bit;
+  assign tag_o           = arbiter_output.tag;
+
+  assign busy_o = (| fmt_busy);
+
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv
new file mode 100644
index 0000000..35fbe48
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv
@@ -0,0 +1,292 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_opgroup_fmt_slice #(
+  parameter fpnew_pkg::opgroup_e     OpGroup       = fpnew_pkg::ADDMUL,
+  parameter fpnew_pkg::fp_format_e   FpFormat      = fpnew_pkg::fp_format_e'(0),
+  // FPU configuration
+  parameter int unsigned             Width         = 32,
+  parameter logic                    EnableVectors = 1'b1,
+  parameter int unsigned             NumPipeRegs   = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig    = fpnew_pkg::BEFORE,
+  parameter type                     TagType       = logic,
+  parameter int unsigned             TrueSIMDClass = 0,
+  // Do not change
+  localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
+  localparam int unsigned NUM_LANES    = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors),
+  localparam type         MaskType     = logic [NUM_LANES-1:0]
+) (
+  input logic                               clk_i,
+  input logic                               rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
+  input logic [NUM_OPERANDS-1:0]            is_boxed_i,
+  input fpnew_pkg::roundmode_e              rnd_mode_i,
+  input fpnew_pkg::operation_e              op_i,
+  input logic                               op_mod_i,
+  input logic                               vectorial_op_i,
+  input TagType                             tag_i,
+  input MaskType                            simd_mask_i,
+  // Input Handshake
+  input  logic                              in_valid_i,
+  output logic                              in_ready_o,
+  input  logic                              flush_i,
+  // Output signals
+  output logic [Width-1:0]                  result_o,
+  output fpnew_pkg::status_t                status_o,
+  output logic                              extension_bit_o,
+  output TagType                            tag_o,
+  // Output handshake
+  output logic                              out_valid_o,
+  input  logic                              out_ready_i,
+  // Indication of valid data in flight
+  output logic                              busy_o
+);
+
+  localparam int unsigned FP_WIDTH  = fpnew_pkg::fp_width(FpFormat);
+  localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES);
+
+
+  logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
+  logic                 vectorial_op;
+
+  logic [NUM_LANES*FP_WIDTH-1:0] slice_result;
+  logic [Width-1:0]              slice_regular_result, slice_class_result, slice_vec_class_result;
+
+  fpnew_pkg::status_t    [NUM_LANES-1:0] lane_status;
+  logic                  [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
+  fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask;
+  TagType                [NUM_LANES-1:0] lane_tags; // only the first one is actually used
+  logic                  [NUM_LANES-1:0] lane_masks;
+  logic                  [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito
+
+  logic result_is_vector, result_is_class;
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o   = lane_in_ready[0]; // Upstream ready is given by first lane
+  assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
+
+  // ---------------
+  // Generate Lanes
+  // ---------------
+  for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
+    logic [FP_WIDTH-1:0] local_result; // lane-local results
+    logic                local_sign;
+
+    // Generate instances only if needed, lane 0 always generated
+    if ((lane == 0) || EnableVectors) begin : active_lane
+      logic in_valid, out_valid, out_ready; // lane-local handshake
+
+      logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands
+      logic [FP_WIDTH-1:0]                   op_result;      // lane-local results
+      fpnew_pkg::status_t                    op_status;
+
+      assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
+      // Slice out the operands for this lane
+      always_comb begin : prepare_input
+        for (int i = 0; i < int'(NUM_OPERANDS); i++) begin
+          local_operands[i] = operands_i[i][(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH];
+        end
+      end
+
+      // Instantiate the operation from the selected opgroup
+      if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
+        fpnew_fma #(
+          .FpFormat    ( FpFormat    ),
+          .NumPipeRegs ( NumPipeRegs ),
+          .PipeConfig  ( PipeConfig  ),
+          .TagType     ( TagType     ),
+          .AuxType     ( logic       )
+        ) i_fma (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands               ),
+          .is_boxed_i      ( is_boxed_i[NUM_OPERANDS-1:0] ),
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .tag_i,
+          .mask_i          ( simd_mask_i[lane]    ),
+          .aux_i           ( vectorial_op         ), // Remember whether operation was vectorial
+          .in_valid_i      ( in_valid             ),
+          .in_ready_o      ( lane_in_ready[lane]  ),
+          .flush_i,
+          .result_o        ( op_result            ),
+          .status_o        ( op_status            ),
+          .extension_bit_o ( lane_ext_bit[lane]   ),
+          .tag_o           ( lane_tags[lane]      ),
+          .mask_o          ( lane_masks[lane]     ),
+          .aux_o           ( lane_vectorial[lane] ),
+          .out_valid_o     ( out_valid            ),
+          .out_ready_i     ( out_ready            ),
+          .busy_o          ( lane_busy[lane]      )
+        );
+        assign lane_is_class[lane]   = 1'b0;
+        assign lane_class_mask[lane] = fpnew_pkg::NEGINF;
+      end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
+        // fpnew_divsqrt #(
+        //   .FpFormat   (FpFormat),
+        //   .NumPipeRegs(NumPipeRegs),
+        //   .PipeConfig (PipeConfig),
+        //   .TagType    (TagType),
+        //   .AuxType    (logic)
+        // ) i_divsqrt (
+        //   .clk_i,
+        //   .rst_ni,
+        //   .operands_i      ( local_operands               ),
+        //   .is_boxed_i      ( is_boxed_i[NUM_OPERANDS-1:0] ),
+        //   .rnd_mode_i,
+        //   .op_i,
+        //   .op_mod_i,
+        //   .tag_i,
+        //   .aux_i           ( vectorial_op         ), // Remember whether operation was vectorial
+        //   .in_valid_i      ( in_valid             ),
+        //   .in_ready_o      ( lane_in_ready[lane]  ),
+        //   .flush_i,
+        //   .result_o        ( op_result            ),
+        //   .status_o        ( op_status            ),
+        //   .extension_bit_o ( lane_ext_bit[lane]   ),
+        //   .tag_o           ( lane_tags[lane]      ),
+        //   .aux_o           ( lane_vectorial[lane] ),
+        //   .out_valid_o     ( out_valid            ),
+        //   .out_ready_i     ( out_ready            ),
+        //   .busy_o          ( lane_busy[lane]      )
+        // );
+        // assign lane_is_class[lane] = 1'b0;
+      end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
+        fpnew_noncomp #(
+          .FpFormat   (FpFormat),
+          .NumPipeRegs(NumPipeRegs),
+          .PipeConfig (PipeConfig),
+          .TagType    (TagType),
+          .AuxType    (logic)
+        ) i_noncomp (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands               ),
+          .is_boxed_i      ( is_boxed_i[NUM_OPERANDS-1:0] ),
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .tag_i,
+          .mask_i          ( simd_mask_i[lane]     ),
+          .aux_i           ( vectorial_op          ), // Remember whether operation was vectorial
+          .in_valid_i      ( in_valid              ),
+          .in_ready_o      ( lane_in_ready[lane]   ),
+          .flush_i,
+          .result_o        ( op_result             ),
+          .status_o        ( op_status             ),
+          .extension_bit_o ( lane_ext_bit[lane]    ),
+          .class_mask_o    ( lane_class_mask[lane] ),
+          .is_class_o      ( lane_is_class[lane]   ),
+          .tag_o           ( lane_tags[lane]       ),
+          .mask_o          ( lane_masks[lane]      ),
+          .aux_o           ( lane_vectorial[lane]  ),
+          .out_valid_o     ( out_valid             ),
+          .out_ready_i     ( out_ready             ),
+          .busy_o          ( lane_busy[lane]       )
+        );
+      end // ADD OTHER OPTIONS HERE
+
+      // Handshakes are only done if the lane is actually used
+      assign out_ready            = out_ready_i & ((lane == 0) | result_is_vector);
+      assign lane_out_valid[lane] = out_valid   & ((lane == 0) | result_is_vector);
+
+      // Properly NaN-box or sign-extend the slice result if not in use
+      assign local_result      = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
+      assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
+
+    // Otherwise generate constant sign-extension
+    end else begin
+      assign lane_out_valid[lane] = 1'b0; // unused lane
+      assign lane_in_ready[lane]  = 1'b0; // unused lane
+      assign local_result         = '{default: lane_ext_bit[0]}; // sign-extend/nan box
+      assign lane_status[lane]    = '0;
+      assign lane_busy[lane]      = 1'b0;
+      assign lane_is_class[lane]  = 1'b0;
+    end
+
+    // Insert lane result into slice result
+    assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result;
+
+    // Create Classification results
+    if (TrueSIMDClass && SIMD_WIDTH >= 10) begin : vectorial_true_class // true vectorial class blocks are 10bits in size
+      assign slice_vec_class_result[lane*SIMD_WIDTH +: 10] = lane_class_mask[lane];
+      assign slice_vec_class_result[(lane+1)*SIMD_WIDTH-1 -: SIMD_WIDTH-10] = '0;
+    end else if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size
+      assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF ||
+                           lane_class_mask[lane] == fpnew_pkg::NEGNORM ||
+                           lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM ||
+                           lane_class_mask[lane] == fpnew_pkg::NEGZERO);
+      // Write the current block segment
+      assign slice_vec_class_result[(lane+1)*8-1:lane*8] = {
+        local_sign,  // BIT 7
+        ~local_sign, // BIT 6
+        lane_class_mask[lane] == fpnew_pkg::QNAN, // BIT 5
+        lane_class_mask[lane] == fpnew_pkg::SNAN, // BIT 4
+        lane_class_mask[lane] == fpnew_pkg::POSZERO
+            || lane_class_mask[lane] == fpnew_pkg::NEGZERO, // BIT 3
+        lane_class_mask[lane] == fpnew_pkg::POSSUBNORM
+            || lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM, // BIT 2
+        lane_class_mask[lane] == fpnew_pkg::POSNORM
+            || lane_class_mask[lane] == fpnew_pkg::NEGNORM, // BIT 1
+        lane_class_mask[lane] == fpnew_pkg::POSINF
+            || lane_class_mask[lane] == fpnew_pkg::NEGINF // BIT 0
+      };
+    end
+  end
+
+  // ------------
+  // Output Side
+  // ------------
+  assign result_is_vector = lane_vectorial[0];
+  assign result_is_class  = lane_is_class[0];
+
+  assign slice_regular_result = $signed({extension_bit_o, slice_result});
+
+  localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8;
+
+  // Pad out unused vec_class bits if each classify result is on 8 bits
+  if (!(TrueSIMDClass && SIMD_WIDTH >= 10)) begin
+    if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class
+      assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0;
+    end
+  end
+
+  // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1;
+
+  assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0];
+
+  // Select the proper result
+  assign result_o = result_is_class ? slice_class_result : slice_regular_result;
+
+  assign extension_bit_o                              = lane_ext_bit[0]; // upper lanes unused
+  assign tag_o                                        = lane_tags[0];    // upper lanes unused
+  assign busy_o                                       = (| lane_busy);
+  assign out_valid_o                                  = lane_out_valid[0]; // upper lanes unused
+
+
+  // Collapse the lane status
+  always_comb begin : output_processing
+    // Collapse the status
+    automatic fpnew_pkg::status_t temp_status;
+    temp_status = '0;
+    for (int i = 0; i < int'(NUM_LANES); i++)
+      temp_status |= lane_status[i] & {5{lane_masks[i]}};
+    status_o = temp_status;
+  end
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv
new file mode 100644
index 0000000..08facb8
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv
@@ -0,0 +1,449 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_opgroup_multifmt_slice #(
+  parameter fpnew_pkg::opgroup_e     OpGroup       = fpnew_pkg::CONV,
+  parameter int unsigned             Width         = 64,
+  // FPU configuration
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig   = '1,
+  parameter fpnew_pkg::ifmt_logic_t  IntFmtConfig  = '1,
+  parameter logic                    EnableVectors = 1'b1,
+  parameter int unsigned             NumPipeRegs   = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig    = fpnew_pkg::BEFORE,
+  parameter type                     TagType       = logic,
+  // Do not change
+  localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
+  localparam int unsigned NUM_FORMATS  = fpnew_pkg::NUM_FP_FORMATS,
+  localparam int unsigned NUM_SIMD_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, EnableVectors),
+  localparam type         MaskType     = logic [NUM_SIMD_LANES-1:0]
+) (
+  input logic                                     clk_i,
+  input logic                                     rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][Width-1:0]       operands_i,
+  input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
+  input fpnew_pkg::roundmode_e                    rnd_mode_i,
+  input fpnew_pkg::operation_e                    op_i,
+  input logic                                     op_mod_i,
+  input fpnew_pkg::fp_format_e                    src_fmt_i,
+  input fpnew_pkg::fp_format_e                    dst_fmt_i,
+  input fpnew_pkg::int_format_e                   int_fmt_i,
+  input logic                                     vectorial_op_i,
+  input TagType                                   tag_i,
+  input MaskType                                  simd_mask_i,
+  // Input Handshake
+  input  logic                                    in_valid_i,
+  output logic                                    in_ready_o,
+  input  logic                                    flush_i,
+  // Output signals
+  output logic [Width-1:0]                        result_o,
+  output fpnew_pkg::status_t                      status_o,
+  output logic                                    extension_bit_o,
+  output TagType                                  tag_o,
+  // Output handshake
+  output logic                                    out_valid_o,
+  input  logic                                    out_ready_i,
+  // Indication of valid data in flight
+  output logic                                    busy_o
+);
+
+  localparam int unsigned MAX_FP_WIDTH   = fpnew_pkg::max_fp_width(FpFmtConfig);
+  localparam int unsigned MAX_INT_WIDTH  = fpnew_pkg::max_int_width(IntFmtConfig);
+  localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1);
+  localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
+  // We will send the format information along with the data
+  localparam int unsigned FMT_BITS =
+      fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS));
+  localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags
+
+  logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes
+  logic                 vectorial_op;
+  logic [FMT_BITS-1:0]  dst_fmt; // destination format to pass along with operation
+  logic [AUX_BITS-1:0]  aux_data;
+
+  // additional flags for CONV
+  logic       dst_fmt_is_int, dst_is_cpk;
+  logic [1:0] dst_vec_op; // info for vectorial results (for packing)
+  logic [2:0] target_aux_d, target_aux_q;
+  logic       is_up_cast, is_down_cast;
+
+  logic [NUM_FORMATS-1:0][Width-1:0]     fmt_slice_result;
+  logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result;
+  logic [Width-1:0]                      conv_slice_result;
+
+
+  logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register
+
+  fpnew_pkg::status_t [NUM_LANES-1:0]   lane_status;
+  logic   [NUM_LANES-1:0]               lane_ext_bit; // only the first one is actually used
+  TagType [NUM_LANES-1:0]               lane_tags; // only the first one is actually used
+  logic   [NUM_LANES-1:0]               lane_masks;
+  logic   [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used
+  logic   [NUM_LANES-1:0]               lane_busy; // dito
+
+  logic                result_is_vector;
+  logic [FMT_BITS-1:0] result_fmt;
+  logic                result_fmt_is_int, result_is_cpk;
+  logic [1:0]          result_vec_op; // info for vectorial results (for packing)
+
+  logic simd_synch_rdy, simd_synch_done;
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o   = lane_in_ready[0]; // Upstream ready is given by first lane
+  assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
+
+  // Cast-and-Pack ops are encoded in operation and modifier
+  assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I);
+  assign dst_is_cpk     = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB ||
+                                                          op_i == fpnew_pkg::CPKCD);
+  assign dst_vec_op     = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i};
+
+  assign is_up_cast   = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i));
+  assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i));
+
+  // The destination format is the int format for F2I casts
+  assign dst_fmt    = dst_fmt_is_int ? int_fmt_i : dst_fmt_i;
+
+  // The data sent along consists of the vectorial flag and format bits
+  assign aux_data      = {dst_fmt_is_int, vectorial_op, dst_fmt};
+  assign target_aux_d  = {dst_vec_op, dst_is_cpk};
+
+  // CONV passes one operand for assembly after the unit: opC for cpk, opB for others
+  if (OpGroup == fpnew_pkg::CONV) begin : conv_target
+    assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1];
+  end
+
+  // For 2-operand units, prepare boxing info
+  logic [NUM_FORMATS-1:0]      is_boxed_1op;
+  logic [NUM_FORMATS-1:0][1:0] is_boxed_2op;
+
+  always_comb begin : boxed_2op
+    for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin
+      is_boxed_1op[fmt] = is_boxed_i[fmt][0];
+      is_boxed_2op[fmt] = is_boxed_i[fmt][1:0];
+    end
+  end
+
+  // ---------------
+  // Generate Lanes
+  // ---------------
+  for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
+    localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter
+    // Get a mask of active formats for this lane
+    localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS =
+        fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE);
+    localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS =
+        fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
+    localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS);
+
+    // Cast-specific parameters
+    localparam fpnew_pkg::fmt_logic_t CONV_FORMATS =
+        fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE);
+    localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS =
+        fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
+    localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS);
+
+    // Lane parameters from Opgroup
+    localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV)
+                                                     ? CONV_FORMATS : ACTIVE_FORMATS;
+    localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH;
+
+    logic [LANE_WIDTH-1:0] local_result; // lane-local results
+
+    // Generate instances only if needed, lane 0 always generated
+    if ((lane == 0) || EnableVectors) begin : active_lane
+      logic in_valid, out_valid, out_ready; // lane-local handshake
+
+      logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands;  // lane-local oprands
+      logic [LANE_WIDTH-1:0]                   op_result;       // lane-local results
+      fpnew_pkg::status_t                      op_status;
+
+      assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
+
+      // Slice out the operands for this lane, upper bits are ignored in the unit
+      always_comb begin : prepare_input
+        for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin
+          local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i);
+        end
+
+        // override operand 0 for some conversions
+        if (OpGroup == fpnew_pkg::CONV) begin
+          // Source is an integer
+          if (op_i == fpnew_pkg::I2F) begin
+            local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i);
+          // vectorial F2F up casts
+          end else if (op_i == fpnew_pkg::F2F) begin
+            if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half
+              local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) +
+                                                   MAX_FP_WIDTH/2;
+            end
+          // CPK
+          end else if (dst_is_cpk) begin
+            if (lane == 1) begin
+              local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument
+            end
+          end
+        end
+      end
+
+      // Instantiate the operation from the selected opgroup
+      if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
+        fpnew_fma_multi #(
+          .FpFmtConfig ( LANE_FORMATS         ),
+          .NumPipeRegs ( NumPipeRegs          ),
+          .PipeConfig  ( PipeConfig           ),
+          .TagType     ( TagType              ),
+          .AuxType     ( logic [AUX_BITS-1:0] )
+        ) i_fpnew_fma_multi (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands  ),
+          .is_boxed_i,
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .src_fmt_i,
+          .dst_fmt_i,
+          .tag_i,
+          .mask_i          ( simd_mask_i[lane]   ),
+          .aux_i           ( aux_data            ),
+          .in_valid_i      ( in_valid            ),
+          .in_ready_o      ( lane_in_ready[lane] ),
+          .flush_i,
+          .result_o        ( op_result           ),
+          .status_o        ( op_status           ),
+          .extension_bit_o ( lane_ext_bit[lane]  ),
+          .tag_o           ( lane_tags[lane]     ),
+          .mask_o          ( lane_masks[lane]    ),
+          .aux_o           ( lane_aux[lane]      ),
+          .out_valid_o     ( out_valid           ),
+          .out_ready_i     ( out_ready           ),
+          .busy_o          ( lane_busy[lane]     )
+        );
+
+      end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
+        fpnew_divsqrt_multi #(
+          .FpFmtConfig ( LANE_FORMATS         ),
+          .NumPipeRegs ( NumPipeRegs          ),
+          .PipeConfig  ( PipeConfig           ),
+          .TagType     ( TagType              ),
+          .AuxType     ( logic [AUX_BITS-1:0] )
+        ) i_fpnew_divsqrt_multi (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands[1:0] ), // 2 operands
+          .is_boxed_i      ( is_boxed_2op        ), // 2 operands
+          .rnd_mode_i,
+          .op_i,
+          .dst_fmt_i,
+          .tag_i,
+          .mask_i          ( simd_mask_i[lane]   ),
+          .aux_i           ( aux_data            ),
+          .in_valid_i      ( in_valid            ),
+          .in_ready_o      ( lane_in_ready[lane] ),
+          .divsqrt_done_o   ( divsqrt_done[lane] ),
+          .simd_synch_done_i( simd_synch_done    ),
+          .divsqrt_ready_o  ( divsqrt_ready[lane]),
+          .simd_synch_rdy_i( simd_synch_rdy    ),
+          .flush_i,
+          .result_o        ( op_result           ),
+          .status_o        ( op_status           ),
+          .extension_bit_o ( lane_ext_bit[lane]  ),
+          .tag_o           ( lane_tags[lane]     ),
+          .mask_o          ( lane_masks[lane]    ),
+          .aux_o           ( lane_aux[lane]      ),
+          .out_valid_o     ( out_valid           ),
+          .out_ready_i     ( out_ready           ),
+          .busy_o          ( lane_busy[lane]     )
+        );
+      end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
+
+      end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance
+        fpnew_cast_multi #(
+          .FpFmtConfig  ( LANE_FORMATS         ),
+          .IntFmtConfig ( CONV_INT_FORMATS     ),
+          .NumPipeRegs  ( NumPipeRegs          ),
+          .PipeConfig   ( PipeConfig           ),
+          .TagType      ( TagType              ),
+          .AuxType      ( logic [AUX_BITS-1:0] )
+        ) i_fpnew_cast_multi (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands[0]   ),
+          .is_boxed_i      ( is_boxed_1op        ),
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .src_fmt_i,
+          .dst_fmt_i,
+          .int_fmt_i,
+          .tag_i,
+          .mask_i          ( simd_mask_i[lane]   ),
+          .aux_i           ( aux_data            ),
+          .in_valid_i      ( in_valid            ),
+          .in_ready_o      ( lane_in_ready[lane] ),
+          .flush_i,
+          .result_o        ( op_result           ),
+          .status_o        ( op_status           ),
+          .extension_bit_o ( lane_ext_bit[lane]  ),
+          .tag_o           ( lane_tags[lane]     ),
+          .mask_o          ( lane_masks[lane]    ),
+          .aux_o           ( lane_aux[lane]      ),
+          .out_valid_o     ( out_valid           ),
+          .out_ready_i     ( out_ready           ),
+          .busy_o          ( lane_busy[lane]     )
+        );
+      end // ADD OTHER OPTIONS HERE
+
+      // Handshakes are only done if the lane is actually used
+      assign out_ready            = out_ready_i & ((lane == 0) | result_is_vector);
+      assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
+
+      // Properly NaN-box or sign-extend the slice result if not in use
+      assign local_result      = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
+      assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
+
+    // Otherwise generate constant sign-extension
+    end else begin : inactive_lane
+      assign lane_out_valid[lane] = 1'b0; // unused lane
+      assign lane_in_ready[lane]  = 1'b0; // unused lane
+      assign local_result         = '{default: lane_ext_bit[0]}; // sign-extend/nan box
+      assign lane_status[lane]    = '0;
+      assign lane_busy[lane]      = 1'b0;
+    end
+
+    // Generate result packing depending on float format
+    for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result
+      // Set up some constants
+      localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+      // only for active formats within the lane
+      if (ACTIVE_FORMATS[fmt]) begin
+        assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
+            local_result[FP_WIDTH-1:0];
+      end else if ((LANE+1)*FP_WIDTH <= Width) begin
+        assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
+            '{default: lane_ext_bit[LANE]};
+      end else if (LANE*FP_WIDTH < Width) begin
+        assign fmt_slice_result[fmt][Width-1:LANE*FP_WIDTH] =
+            '{default: lane_ext_bit[LANE]};
+      end
+    end
+
+    // Generate result packing depending on integer format
+    if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled
+      for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result
+        // Set up some constants
+        localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+        if (ACTIVE_INT_FORMATS[ifmt]) begin
+          assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] =
+            local_result[INT_WIDTH-1:0];
+        end else if ((LANE+1)*INT_WIDTH <= Width) begin
+          assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = '0;
+        end else if (LANE*INT_WIDTH < Width) begin
+          assign ifmt_slice_result[ifmt][Width-1:LANE*INT_WIDTH] = '0;
+        end
+      end
+    end
+  end
+
+  // Extend slice result if needed
+  for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    if (NUM_LANES*FP_WIDTH < Width)
+      assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]};
+  end
+
+  // Mute int results if unused
+  for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled
+    if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result
+      assign ifmt_slice_result[ifmt] = '0;
+    end
+  end
+
+  // Bypass lanes with target operand for vectorial casts
+  if (OpGroup == fpnew_pkg::CONV) begin : target_regs
+    // Bypass pipeline signals, index i holds signal after i register stages
+    logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q;
+    logic [0:NumPipeRegs][2:0]       byp_pipe_aux_q;
+    logic [0:NumPipeRegs]            byp_pipe_valid_q;
+    // Ready signal is combinatorial for all stages
+    logic [0:NumPipeRegs] byp_pipe_ready;
+
+    // Input stage: First element of pipeline is taken from inputs
+    assign byp_pipe_target_q[0]  = conv_target_d;
+    assign byp_pipe_aux_q[0]     = target_aux_d;
+    assign byp_pipe_valid_q[0]   = in_valid_i & vectorial_op;
+    // Generate the register stages
+    for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline
+      // Internal register enable for this stage
+      logic reg_ena;
+      // Determine the ready signal of the current stage - advance the pipeline:
+      // 1. if the next stage is ready for our data
+      // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+      assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1];
+      // Valid: enabled by ready signal, synchronous clear with the flush signal
+      `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+      // Enable register if pipleine ready and a valid data item is present
+      assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i];
+      // Generate the pipeline registers within the stages, use enable-registers
+      `FFL(byp_pipe_target_q[i+1],  byp_pipe_target_q[i],  reg_ena, '0)
+      `FFL(byp_pipe_aux_q[i+1],     byp_pipe_aux_q[i],     reg_ena, '0)
+    end
+    // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+    assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector;
+    // Output stage: assign module outputs
+    assign conv_target_q = byp_pipe_target_q[NumPipeRegs];
+
+    // decode the aux data
+    assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs];
+  end else begin : no_conv
+    assign {result_vec_op, result_is_cpk} = '0;
+  end
+
+  // Synch lanes if there is more than one
+  assign simd_synch_rdy  = EnableVectors ? &divsqrt_ready : divsqrt_ready[0];
+  assign simd_synch_done = EnableVectors ? &divsqrt_done  : divsqrt_done[0];
+
+  // ------------
+  // Output Side
+  // ------------
+  assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0];
+
+  assign result_o = result_fmt_is_int
+                    ? ifmt_slice_result[result_fmt]
+                    : fmt_slice_result[result_fmt];
+
+  assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones
+  assign tag_o           = lane_tags[0];    // don't care about upper ones
+  assign busy_o          = (| lane_busy);
+
+  assign out_valid_o     = lane_out_valid[0]; // don't care about upper ones
+
+  // Collapse the status
+  always_comb begin : output_processing
+    // Collapse the status
+    automatic fpnew_pkg::status_t temp_status;
+    temp_status = '0;
+    for (int i = 0; i < int'(NUM_LANES); i++)
+      temp_status |= lane_status[i] & {5{lane_masks[i]}};
+    status_o = temp_status;
+  end
+
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv
new file mode 100644
index 0000000..7addc3e
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv
@@ -0,0 +1,495 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+package fpnew_pkg;
+
+  // ---------
+  // FP TYPES
+  // ---------
+  // | Enumerator | Format           | Width  | EXP_BITS | MAN_BITS
+  // |:----------:|------------------|-------:|:--------:|:--------:
+  // | FP32       | IEEE binary32    | 32 bit | 8        | 23
+  // | FP64       | IEEE binary64    | 64 bit | 11       | 52
+  // | FP16       | IEEE binary16    | 16 bit | 5        | 10
+  // | FP8        | binary8          |  8 bit | 5        | 2
+  // | FP16ALT    | binary16alt      | 16 bit | 8        | 7
+  // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
+
+  // Encoding for a format
+  typedef struct packed {
+    int unsigned exp_bits;
+    int unsigned man_bits;
+  } fp_encoding_t;
+
+  localparam int unsigned NUM_FP_FORMATS = 5; // change me to add formats
+  localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS);
+
+  // FP formats
+  typedef enum logic [FP_FORMAT_BITS-1:0] {
+    FP32    = 'd0,
+    FP64    = 'd1,
+    FP16    = 'd2,
+    FP8     = 'd3,
+    FP16ALT = 'd4
+    // add new formats here
+  } fp_format_e;
+
+  // Encodings for supported FP formats
+  localparam fp_encoding_t [0:NUM_FP_FORMATS-1] FP_ENCODINGS  = '{
+    '{8,  23}, // IEEE binary32 (single)
+    '{11, 52}, // IEEE binary64 (double)
+    '{5,  10}, // IEEE binary16 (half)
+    '{5,  2},  // custom binary8
+    '{8,  7}   // custom binary16alt
+    // add new formats here
+  };
+
+  typedef logic [0:NUM_FP_FORMATS-1]       fmt_logic_t;    // Logic indexed by FP format (for masks)
+  typedef logic [0:NUM_FP_FORMATS-1][31:0] fmt_unsigned_t; // Unsigned indexed by FP format
+
+  localparam fmt_logic_t CPK_FORMATS = 5'b11000; // FP32 and FP64 can provide CPK only
+
+  // ---------
+  // INT TYPES
+  // ---------
+  // | Enumerator | Width  |
+  // |:----------:|-------:|
+  // | INT8       |  8 bit |
+  // | INT16      | 16 bit |
+  // | INT32      | 32 bit |
+  // | INT64      | 64 bit |
+  // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
+
+  localparam int unsigned NUM_INT_FORMATS = 4; // change me to add formats
+  localparam int unsigned INT_FORMAT_BITS = $clog2(NUM_INT_FORMATS);
+
+  // Int formats
+  typedef enum logic [INT_FORMAT_BITS-1:0] {
+    INT8,
+    INT16,
+    INT32,
+    INT64
+    // add new formats here
+  } int_format_e;
+
+  // Returns the width of an INT format by index
+  function automatic int unsigned int_width(int_format_e ifmt);
+    unique case (ifmt)
+      INT8:  return 8;
+      INT16: return 16;
+      INT32: return 32;
+      INT64: return 64;
+      default: begin
+        // pragma translate_off
+        $fatal(1, "Invalid INT format supplied");
+        // pragma translate_on
+        // just return any integer to avoid any latches
+        // hopefully this error is caught by simulation
+        return INT8;
+      end
+    endcase
+  endfunction
+
+  typedef logic [0:NUM_INT_FORMATS-1] ifmt_logic_t; // Logic indexed by INT format (for masks)
+
+  // --------------
+  // FP OPERATIONS
+  // --------------
+  localparam int unsigned NUM_OPGROUPS = 4;
+
+  // Each FP operation belongs to an operation group
+  typedef enum logic [1:0] {
+    ADDMUL, DIVSQRT, NONCOMP, CONV
+  } opgroup_e;
+
+  localparam int unsigned OP_BITS = 4;
+
+  typedef enum logic [OP_BITS-1:0] {
+    FMADD, FNMSUB, ADD, MUL,     // ADDMUL operation group
+    DIV, SQRT,                   // DIVSQRT operation group
+    SGNJ, MINMAX, CMP, CLASSIFY, // NONCOMP operation group
+    F2F, F2I, I2F, CPKAB, CPKCD  // CONV operation group
+  } operation_e;
+
+  // -------------------
+  // RISC-V FP-SPECIFIC
+  // -------------------
+  // Rounding modes
+  typedef enum logic [2:0] {
+    RNE = 3'b000,
+    RTZ = 3'b001,
+    RDN = 3'b010,
+    RUP = 3'b011,
+    RMM = 3'b100,
+    ROD = 3'b101,  // This mode is not defined in RISC-V FP-SPEC
+    DYN = 3'b111
+  } roundmode_e;
+
+  // Status flags
+  typedef struct packed {
+    logic NV; // Invalid
+    logic DZ; // Divide by zero
+    logic OF; // Overflow
+    logic UF; // Underflow
+    logic NX; // Inexact
+  } status_t;
+
+  // Information about a floating point value
+  typedef struct packed {
+    logic is_normal;     // is the value normal
+    logic is_subnormal;  // is the value subnormal
+    logic is_zero;       // is the value zero
+    logic is_inf;        // is the value infinity
+    logic is_nan;        // is the value NaN
+    logic is_signalling; // is the value a signalling NaN
+    logic is_quiet;      // is the value a quiet NaN
+    logic is_boxed;      // is the value properly NaN-boxed (RISC-V specific)
+  } fp_info_t;
+
+  // Classification mask
+  typedef enum logic [9:0] {
+    NEGINF     = 10'b00_0000_0001,
+    NEGNORM    = 10'b00_0000_0010,
+    NEGSUBNORM = 10'b00_0000_0100,
+    NEGZERO    = 10'b00_0000_1000,
+    POSZERO    = 10'b00_0001_0000,
+    POSSUBNORM = 10'b00_0010_0000,
+    POSNORM    = 10'b00_0100_0000,
+    POSINF     = 10'b00_1000_0000,
+    SNAN       = 10'b01_0000_0000,
+    QNAN       = 10'b10_0000_0000
+  } classmask_e;
+
+  // ------------------
+  // FPU configuration
+  // ------------------
+  // Pipelining registers can be inserted (at elaboration time) into operational units
+  typedef enum logic [1:0] {
+    BEFORE,     // registers are inserted at the inputs of the unit
+    AFTER,      // registers are inserted at the outputs of the unit
+    INSIDE,     // registers are inserted at predetermined (suboptimal) locations in the unit
+    DISTRIBUTED // registers are evenly distributed, INSIDE >= AFTER >= BEFORE
+  } pipe_config_t;
+
+  // Arithmetic units can be arranged in parallel (per format), merged (multi-format) or not at all.
+  typedef enum logic [1:0] {
+    DISABLED, // arithmetic units are not generated
+    PARALLEL, // arithmetic units are generated in prallel slices, one for each format
+    MERGED    // arithmetic units are contained within a merged unit holding multiple formats
+  } unit_type_t;
+
+  // Array of unit types indexed by format
+  typedef unit_type_t [0:NUM_FP_FORMATS-1] fmt_unit_types_t;
+
+  // Array of format-specific unit types by opgroup
+  typedef fmt_unit_types_t [0:NUM_OPGROUPS-1] opgrp_fmt_unit_types_t;
+  // same with unsigned
+  typedef fmt_unsigned_t [0:NUM_OPGROUPS-1] opgrp_fmt_unsigned_t;
+
+  // FPU configuration: features
+  typedef struct packed {
+    int unsigned Width;
+    logic        EnableVectors;
+    logic        EnableNanBox;
+    fmt_logic_t  FpFmtMask;
+    ifmt_logic_t IntFmtMask;
+  } fpu_features_t;
+
+  localparam fpu_features_t RV64D = '{
+    Width:         64,
+    EnableVectors: 1'b0,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b11000,
+    IntFmtMask:    4'b0011
+  };
+
+  localparam fpu_features_t RV32D = '{
+    Width:         64,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b11000,
+    IntFmtMask:    4'b0010
+  };
+
+  localparam fpu_features_t RV32F = '{
+    Width:         32,
+    EnableVectors: 1'b0,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b10000,
+    IntFmtMask:    4'b0010
+  };
+
+  localparam fpu_features_t RV64D_Xsflt = '{
+    Width:         64,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b11111,
+    IntFmtMask:    4'b1111
+  };
+
+  localparam fpu_features_t RV32F_Xsflt = '{
+    Width:         32,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b10111,
+    IntFmtMask:    4'b1110
+  };
+
+  localparam fpu_features_t RV32F_Xf16alt_Xfvec = '{
+    Width:         32,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b10001,
+    IntFmtMask:    4'b0110
+  };
+
+
+  // FPU configuraion: implementation
+  typedef struct packed {
+    opgrp_fmt_unsigned_t   PipeRegs;
+    opgrp_fmt_unit_types_t UnitTypes;
+    pipe_config_t          PipeConfig;
+  } fpu_implementation_t;
+
+  localparam fpu_implementation_t DEFAULT_NOREGS = '{
+    PipeRegs:   '{default: 0},
+    UnitTypes:  '{'{default: PARALLEL}, // ADDMUL
+                  '{default: MERGED},   // DIVSQRT
+                  '{default: PARALLEL}, // NONCOMP
+                  '{default: MERGED}},  // CONV
+    PipeConfig: BEFORE
+  };
+
+  localparam fpu_implementation_t DEFAULT_SNITCH = '{
+    PipeRegs:   '{default: 1},
+    UnitTypes:  '{'{default: PARALLEL}, // ADDMUL
+                  '{default: DISABLED}, // DIVSQRT
+                  '{default: PARALLEL}, // NONCOMP
+                  '{default: MERGED}},  // CONV
+    PipeConfig: BEFORE
+  };
+
+  // -----------------------
+  // Synthesis optimization
+  // -----------------------
+  localparam logic DONT_CARE = 1'b1; // the value to assign as don't care
+
+  // -------------------------
+  // General helper functions
+  // -------------------------
+  function automatic int minimum(int a, int b);
+    return (a < b) ? a : b;
+  endfunction
+
+  function automatic int maximum(int a, int b);
+    return (a > b) ? a : b;
+  endfunction
+
+  // -------------------------------------------
+  // Helper functions for FP formats and values
+  // -------------------------------------------
+  // Returns the width of a FP format
+  function automatic int unsigned fp_width(fp_format_e fmt);
+    return FP_ENCODINGS[fmt].exp_bits + FP_ENCODINGS[fmt].man_bits + 1;
+  endfunction
+
+  // Returns the widest FP format present
+  function automatic int unsigned max_fp_width(fmt_logic_t cfg);
+    automatic int unsigned res = 0;
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i])
+        res = unsigned'(maximum(res, fp_width(fp_format_e'(i))));
+    return res;
+  endfunction
+
+  // Returns the narrowest FP format present
+  function automatic int unsigned min_fp_width(fmt_logic_t cfg);
+    automatic int unsigned res = max_fp_width(cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i])
+        res = unsigned'(minimum(res, fp_width(fp_format_e'(i))));
+    return res;
+  endfunction
+
+  // Returns the number of expoent bits for a format
+  function automatic int unsigned exp_bits(fp_format_e fmt);
+    return FP_ENCODINGS[fmt].exp_bits;
+  endfunction
+
+  // Returns the number of mantissa bits for a format
+  function automatic int unsigned man_bits(fp_format_e fmt);
+    return FP_ENCODINGS[fmt].man_bits;
+  endfunction
+
+  // Returns the bias value for a given format (as per IEEE 754-2008)
+  function automatic int unsigned bias(fp_format_e fmt);
+    return unsigned'(2**(FP_ENCODINGS[fmt].exp_bits-1)-1); // symmetrical bias
+  endfunction
+
+  function automatic fp_encoding_t super_format(fmt_logic_t cfg);
+    automatic fp_encoding_t res;
+    res = '0;
+    for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+      if (cfg[fmt]) begin // only active format
+        res.exp_bits = unsigned'(maximum(res.exp_bits, exp_bits(fp_format_e'(fmt))));
+        res.man_bits = unsigned'(maximum(res.man_bits, man_bits(fp_format_e'(fmt))));
+      end
+    return res;
+  endfunction
+
+  // -------------------------------------------
+  // Helper functions for INT formats and values
+  // -------------------------------------------
+  // Returns the widest INT format present
+  function automatic int unsigned max_int_width(ifmt_logic_t cfg);
+    automatic int unsigned res = 0;
+    for (int ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin
+      if (cfg[ifmt]) res = maximum(res, int_width(int_format_e'(ifmt)));
+    end
+    return res;
+  endfunction
+
+  // --------------------------------------------------
+  // Helper functions for operations and FPU structure
+  // --------------------------------------------------
+  // Returns the operation group of the given operation
+  function automatic opgroup_e get_opgroup(operation_e op);
+    unique case (op)
+      FMADD, FNMSUB, ADD, MUL:     return ADDMUL;
+      DIV, SQRT:                   return DIVSQRT;
+      SGNJ, MINMAX, CMP, CLASSIFY: return NONCOMP;
+      F2F, F2I, I2F, CPKAB, CPKCD: return CONV;
+      default:                     return NONCOMP;
+    endcase
+  endfunction
+
+  // Returns the number of operands by operation group
+  function automatic int unsigned num_operands(opgroup_e grp);
+    unique case (grp)
+      ADDMUL:  return 3;
+      DIVSQRT: return 2;
+      NONCOMP: return 2;
+      CONV:    return 3; // vectorial casts use 3 operands
+      default: return 0;
+    endcase
+  endfunction
+
+  // Returns the number of lanes according to width, format and vectors
+  function automatic int unsigned num_lanes(int unsigned width, fp_format_e fmt, logic vec);
+    return vec ? width / fp_width(fmt) : 1; // if no vectors, only one lane
+  endfunction
+
+  // Returns the maximum number of lanes in the FPU according to width, format config and vectors
+  function automatic int unsigned max_num_lanes(int unsigned width, fmt_logic_t cfg, logic vec);
+    return vec ? width / min_fp_width(cfg) : 1; // if no vectors, only one lane
+  endfunction
+
+  // Returns a mask of active FP formats that are present in lane lane_no of a multiformat slice
+  function automatic fmt_logic_t get_lane_formats(int unsigned width,
+                                                  fmt_logic_t cfg,
+                                                  int unsigned lane_no);
+    automatic fmt_logic_t res;
+    for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+      // Mask active formats with the number of lanes for that format
+      res[fmt] = cfg[fmt] & (width / fp_width(fp_format_e'(fmt)) > lane_no);
+    return res;
+  endfunction
+
+  // Returns a mask of active INT formats that are present in lane lane_no of a multiformat slice
+  function automatic ifmt_logic_t get_lane_int_formats(int unsigned width,
+                                                       fmt_logic_t cfg,
+                                                       ifmt_logic_t icfg,
+                                                       int unsigned lane_no);
+    automatic ifmt_logic_t res;
+    automatic fmt_logic_t lanefmts;
+    res = '0;
+    lanefmts = get_lane_formats(width, cfg, lane_no);
+
+    for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
+      for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+        // Mask active int formats with the width of the float formats
+        if ((fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt))))
+          res[ifmt] |= icfg[ifmt] && lanefmts[fmt];
+    return res;
+  endfunction
+
+  // Returns a mask of active FP formats that are present in lane lane_no of a CONV slice
+  function automatic fmt_logic_t get_conv_lane_formats(int unsigned width,
+                                                       fmt_logic_t cfg,
+                                                       int unsigned lane_no);
+    automatic fmt_logic_t res;
+    for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+      // Mask active formats with the number of lanes for that format, CPK at least twice
+      res[fmt] = cfg[fmt] && ((width / fp_width(fp_format_e'(fmt)) > lane_no) ||
+                             (CPK_FORMATS[fmt] && (lane_no < 2)));
+    return res;
+  endfunction
+
+  // Returns a mask of active INT formats that are present in lane lane_no of a CONV slice
+  function automatic ifmt_logic_t get_conv_lane_int_formats(int unsigned width,
+                                                            fmt_logic_t cfg,
+                                                            ifmt_logic_t icfg,
+                                                            int unsigned lane_no);
+    automatic ifmt_logic_t res;
+    automatic fmt_logic_t lanefmts;
+    res = '0;
+    lanefmts = get_conv_lane_formats(width, cfg, lane_no);
+
+    for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
+      for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+        // Mask active int formats with the width of the float formats
+        res[ifmt] |= icfg[ifmt] && lanefmts[fmt] &&
+                     (fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt)));
+    return res;
+  endfunction
+
+  // Return whether any active format is set as MERGED
+  function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i] && types[i] == MERGED)
+        return 1'b1;
+      return 1'b0;
+  endfunction
+
+  // Return whether the given format is the first active one set as MERGED
+  function automatic logic is_first_enabled_multi(fp_format_e fmt,
+                                                  fmt_unit_types_t types,
+                                                  fmt_logic_t cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
+      if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt);
+    end
+    return 1'b0;
+  endfunction
+
+  // Returns the first format that is active and is set as MERGED
+  function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i] && types[i] == MERGED)
+        return fp_format_e'(i);
+      return fp_format_e'(0);
+  endfunction
+
+  // Returns the largest number of regs that is active and is set as MERGED
+  function automatic int unsigned get_num_regs_multi(fmt_unsigned_t regs,
+                                                     fmt_unit_types_t types,
+                                                     fmt_logic_t cfg);
+    automatic int unsigned res = 0;
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
+      if (cfg[i] && types[i] == MERGED) res = maximum(res, regs[i]);
+    end
+    return res;
+  endfunction
+
+endpackage
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv
new file mode 100644
index 0000000..4e67720
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv
@@ -0,0 +1,76 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_rounding #(
+  parameter int unsigned AbsWidth=2 // Width of the abolute value, without sign bit
+) (
+  // Input value
+  input logic [AbsWidth-1:0]   abs_value_i,             // absolute value without sign
+  input logic                  sign_i,
+  // Rounding information
+  input logic [1:0]            round_sticky_bits_i,     // round and sticky bits {RS}
+  input fpnew_pkg::roundmode_e rnd_mode_i,
+  input logic                  effective_subtraction_i, // sign of inputs affects rounding of zeroes
+  // Output value
+  output logic [AbsWidth-1:0]  abs_rounded_o,           // absolute value without sign
+  output logic                 sign_o,
+  // Output classification
+  output logic                 exact_zero_o             // output is an exact zero
+);
+
+  logic round_up; // Rounding decision
+
+  // Take the rounding decision according to RISC-V spec
+  // RoundMode | Mnemonic | Meaning
+  // :--------:|:--------:|:-------
+  //    000    |   RNE    | Round to Nearest, ties to Even
+  //    001    |   RTZ    | Round towards Zero
+  //    010    |   RDN    | Round Down (towards -\infty)
+  //    011    |   RUP    | Round Up (towards \infty)
+  //    100    |   RMM    | Round to Nearest, ties to Max Magnitude
+  //    101    |   ROD    | Round towards odd (this mode is not define in RISC-V FP-SPEC)
+  //  others   |          | *invalid*
+  always_comb begin : rounding_decision
+    unique case (rnd_mode_i)
+      fpnew_pkg::RNE: // Decide accoring to round/sticky bits
+        unique case (round_sticky_bits_i)
+          2'b00,
+          2'b01: round_up = 1'b0;           // < ulp/2 away, round down
+          2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result
+          2'b11: round_up = 1'b1;           // > ulp/2 away, round up
+          default: round_up = fpnew_pkg::DONT_CARE;
+        endcase
+      fpnew_pkg::RTZ: round_up = 1'b0; // always round down
+      fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i  : 1'b0; // to 0 if +, away if -
+      fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if +
+      fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
+      fpnew_pkg::ROD: round_up = ~abs_value_i[0] & (| round_sticky_bits_i);
+      default: round_up = fpnew_pkg::DONT_CARE; // propagate x
+    endcase
+  end
+
+  // Perform the rounding, exponent change and overflow to inf happens automagically
+  assign abs_rounded_o = abs_value_i + round_up;
+
+  // True zero result is a zero result without dirty round/sticky bits
+  assign exact_zero_o = (abs_value_i == '0) && (round_sticky_bits_i == '0);
+
+  // In case of effective subtraction (thus signs of addition operands must have differed) and a
+  // true zero result, the result sign is '-' in case of RDN and '+' for other modes.
+  assign sign_o = (exact_zero_o && effective_subtraction_i)
+                  ? (rnd_mode_i == fpnew_pkg::RDN)
+                  : sign_i;
+
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv
new file mode 100644
index 0000000..f6116a5
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv
@@ -0,0 +1,185 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_top #(
+  // FPU configuration
+  parameter fpnew_pkg::fpu_features_t       Features       = fpnew_pkg::RV64D_Xsflt,
+  parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS,
+  parameter type                            TagType        = logic,
+  parameter int unsigned                    TrueSIMDClass  = 0,
+  parameter int unsigned                    EnableSIMDMask = 0,
+  // Do not change
+  localparam int unsigned NumLanes     = fpnew_pkg::max_num_lanes(Features.Width, Features.FpFmtMask, Features.EnableVectors),
+  localparam type         MaskType     = logic [NumLanes-1:0],
+  localparam int unsigned WIDTH        = Features.Width,
+  localparam int unsigned NUM_OPERANDS = 3
+) (
+  input logic                               clk_i,
+  input logic                               rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
+  input fpnew_pkg::roundmode_e              rnd_mode_i,
+  input fpnew_pkg::operation_e              op_i,
+  input logic                               op_mod_i,
+  input fpnew_pkg::fp_format_e              src_fmt_i,
+  input fpnew_pkg::fp_format_e              dst_fmt_i,
+  input fpnew_pkg::int_format_e             int_fmt_i,
+  input logic                               vectorial_op_i,
+  input TagType                             tag_i,
+  input MaskType                            simd_mask_i,
+  // Input Handshake
+  input  logic                              in_valid_i,
+  output logic                              in_ready_o,
+  input  logic                              flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]                  result_o,
+  output fpnew_pkg::status_t                status_o,
+  output TagType                            tag_o,
+  // Output handshake
+  output logic                              out_valid_o,
+  input  logic                              out_ready_i,
+  // Indication of valid data in flight
+  output logic                              busy_o
+);
+
+  localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS;
+  localparam int unsigned NUM_FORMATS  = fpnew_pkg::NUM_FP_FORMATS;
+
+  // ----------------
+  // Type Definition
+  // ----------------
+  typedef struct packed {
+    logic [WIDTH-1:0]   result;
+    fpnew_pkg::status_t status;
+    TagType             tag;
+  } output_t;
+
+  // Handshake signals for the blocks
+  logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy;
+  output_t [NUM_OPGROUPS-1:0] opgrp_outputs;
+
+  logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed;
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)];
+
+  // NaN-boxing check
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    // NaN boxing is only generated if it's enabled and needed
+    if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check
+      for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands
+        assign is_boxed[fmt][op] = (!vectorial_op_i)
+                                   ? operands_i[op][WIDTH-1:FP_WIDTH] == '1
+                                   : 1'b1;
+      end
+    end else begin : no_check
+      assign is_boxed[fmt] = '1;
+    end
+  end
+
+  // Filter out the mask if not used
+  MaskType simd_mask;
+  assign simd_mask = simd_mask_i | ~{NumLanes{logic'(EnableSIMDMask)}};
+
+  // -------------------------
+  // Generate Operation Blocks
+  // -------------------------
+  for (genvar opgrp = 0; opgrp < int'(NUM_OPGROUPS); opgrp++) begin : gen_operation_groups
+    localparam int unsigned NUM_OPS = fpnew_pkg::num_operands(fpnew_pkg::opgroup_e'(opgrp));
+
+    logic in_valid;
+    logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed;
+
+    assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp));
+
+    // slice out input boxing
+    always_comb begin : slice_inputs
+      for (int unsigned fmt = 0; fmt < NUM_FORMATS; fmt++)
+        input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0];
+    end
+
+    fpnew_opgroup_block #(
+      .OpGroup       ( fpnew_pkg::opgroup_e'(opgrp)    ),
+      .Width         ( WIDTH                           ),
+      .EnableVectors ( Features.EnableVectors          ),
+      .FpFmtMask     ( Features.FpFmtMask              ),
+      .IntFmtMask    ( Features.IntFmtMask             ),
+      .FmtPipeRegs   ( Implementation.PipeRegs[opgrp]  ),
+      .FmtUnitTypes  ( Implementation.UnitTypes[opgrp] ),
+      .PipeConfig    ( Implementation.PipeConfig       ),
+      .TagType       ( TagType                         ),
+      .TrueSIMDClass ( TrueSIMDClass                   )
+    ) i_opgroup_block (
+      .clk_i,
+      .rst_ni,
+      .operands_i      ( operands_i[NUM_OPS-1:0] ),
+      .is_boxed_i      ( input_boxed             ),
+      .rnd_mode_i,
+      .op_i,
+      .op_mod_i,
+      .src_fmt_i,
+      .dst_fmt_i,
+      .int_fmt_i,
+      .vectorial_op_i,
+      .tag_i,
+      .simd_mask_i     ( simd_mask             ),
+      .in_valid_i      ( in_valid              ),
+      .in_ready_o      ( opgrp_in_ready[opgrp] ),
+      .flush_i,
+      .result_o        ( opgrp_outputs[opgrp].result ),
+      .status_o        ( opgrp_outputs[opgrp].status ),
+      .extension_bit_o ( opgrp_ext[opgrp]            ),
+      .tag_o           ( opgrp_outputs[opgrp].tag    ),
+      .out_valid_o     ( opgrp_out_valid[opgrp]      ),
+      .out_ready_i     ( opgrp_out_ready[opgrp]      ),
+      .busy_o          ( opgrp_busy[opgrp]           )
+    );
+  end
+
+  // ------------------
+  // Arbitrate Outputs
+  // ------------------
+  output_t arbiter_output;
+
+  // Round-Robin arbiter to decide which result to use
+  rr_arb_tree #(
+    .NumIn     ( NUM_OPGROUPS ),
+    .DataType  ( output_t     ),
+    .AxiVldRdy ( 1'b1         )
+  ) i_arbiter (
+    .clk_i,
+    .rst_ni,
+    .flush_i,
+    .rr_i   ( '0             ),
+    .req_i  ( opgrp_out_valid ),
+    .gnt_o  ( opgrp_out_ready ),
+    .data_i ( opgrp_outputs   ),
+    .gnt_i  ( out_ready_i     ),
+    .req_o  ( out_valid_o     ),
+    .data_o ( arbiter_output  ),
+    .idx_o  ( /* unused */    )
+  );
+
+  // Unpack output
+  assign result_o        = arbiter_output.result;
+  assign status_o        = arbiter_output.status;
+  assign tag_o           = arbiter_output.tag;
+
+  assign busy_o = (| opgrp_busy);
+
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
new file mode 100644
index 0000000..bda9c01
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
@@ -0,0 +1,3413 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li                    lile@iis.ee.ethz.ch              //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    04/03/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    control_mvp.sv                                             //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    the control logic  of div and sqrt                         //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+// Revision Date:  13/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To fix some bug found in Control FSM                       //
+//                 when Iteration_unit_num_S  = 2'b10                         //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module control_mvp
+
+  (//Input
+   input logic                                        Clk_CI,
+   input logic                                        Rst_RBI,
+   input logic                                        Div_start_SI ,
+   input logic                                        Sqrt_start_SI,
+   input logic                                        Start_SI,
+   input logic                                        Kill_SI,
+   input logic                                        Special_case_SBI,
+   input logic                                        Special_case_dly_SBI,
+   input logic [C_PC-1:0]                             Precision_ctl_SI,
+   input logic [1:0]                                  Format_sel_SI,
+   input logic [C_MANT_FP64:0]                        Numerator_DI,
+   input logic [C_EXP_FP64:0]                         Exp_num_DI,
+   input logic [C_MANT_FP64:0]                        Denominator_DI,
+   input logic [C_EXP_FP64:0]                         Exp_den_DI,
+
+
+   output logic                                       Div_start_dly_SO ,
+   output logic                                       Sqrt_start_dly_SO,
+   output logic                                       Div_enable_SO,
+   output logic                                       Sqrt_enable_SO,
+
+
+   //To next stage
+   output logic                                       Full_precision_SO,
+   output logic                                       FP32_SO,
+   output logic                                       FP64_SO,
+   output logic                                       FP16_SO,
+   output logic                                       FP16ALT_SO,
+
+   output logic                                       Ready_SO,
+   output logic                                       Done_SO,
+
+   output logic [C_MANT_FP64+4:0]                     Mant_result_prenorm_DO,
+ //  output logic [3:0]                                 Round_bit_DO,
+   output logic [C_EXP_FP64+1:0]                      Exp_result_prenorm_DO
+ );
+
+   logic  [C_MANT_FP64+1+4:0]                         Partial_remainder_DN,Partial_remainder_DP; //58bits,r=q+2
+   logic  [C_MANT_FP64+4:0]                           Quotient_DP; //57bits
+   /////////////////////////////////////////////////////////////////////////////
+   // Assign Inputs                                                          //
+   /////////////////////////////////////////////////////////////////////////////
+   logic [C_MANT_FP64+1:0]                            Numerator_se_D;  //sign extension and hidden bit
+   logic [C_MANT_FP64+1:0]                            Denominator_se_D; //signa extension and hidden bit
+   logic [C_MANT_FP64+1:0]                            Denominator_se_DB;  //1's complement
+
+   assign  Numerator_se_D={1'b0,Numerator_DI};
+
+   assign  Denominator_se_D={1'b0,Denominator_DI};
+
+  always_comb
+   begin
+     if(FP32_SO)
+       begin
+         Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} };
+       end
+     else if(FP64_SO) begin
+         Denominator_se_DB=~Denominator_se_D;
+     end
+     else if(FP16_SO) begin
+         Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} };
+     end
+     else begin
+         Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} };
+     end
+   end
+
+
+   logic [C_MANT_FP64+1:0]                            Mant_D_sqrt_Norm;
+
+   assign Mant_D_sqrt_Norm=Exp_num_DI[0]?{1'b0,Numerator_DI}:{Numerator_DI,1'b0}; //for sqrt
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Format Selection                                                       //
+   /////////////////////////////////////////////////////////////////////////////
+   logic [1:0]                                      Format_sel_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Format_sel_S<='b0;
+          end
+        else if(Start_SI&&Ready_SO)
+          begin
+            Format_sel_S<=Format_sel_SI;
+          end
+        else
+          begin
+            Format_sel_S<=Format_sel_S;
+          end
+    end
+
+   assign FP32_SO = (Format_sel_S==2'b00);
+   assign FP64_SO = (Format_sel_S==2'b01);
+   assign FP16_SO = (Format_sel_S==2'b10);
+   assign FP16ALT_SO = (Format_sel_S==2'b11);
+
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Precision Control                                                       //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic [C_PC-1:0]                                   Precision_ctl_S;
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Precision_ctl_S<='b0;
+          end
+        else if(Start_SI&&Ready_SO)
+          begin
+            Precision_ctl_S<=Precision_ctl_SI;
+          end
+        else
+          begin
+            Precision_ctl_S<=Precision_ctl_S;
+          end
+    end
+  assign Full_precision_SO = (Precision_ctl_S==6'h00);
+
+
+
+     logic [5:0]                                     State_ctl_S;
+     logic [5:0]                                     State_Two_iteration_unit_S;
+     logic [5:0]                                     State_Four_iteration_unit_S;
+
+    assign State_Two_iteration_unit_S = Precision_ctl_S[C_PC-1:1];  //Two iteration units
+    assign State_Four_iteration_unit_S = Precision_ctl_S[C_PC-1:2];  //Four iteration units
+     always_comb
+       begin
+         case(Iteration_unit_num_S)
+//////////////////////one iteration unit, start///////////////////////////////////////
+           2'b00:  //one iteration unit
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h1b;  //24+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                   end
+                 2'b01: //FP64
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h38;  //53+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                   end
+                 2'b10: //FP16
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0e;  //11+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0b;  //8+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                  end
+                endcase
+              end
+//////////////////////one iteration unit, end///////////////////////////////////////
+
+//////////////////////two iteration units, start///////////////////////////////////////
+           2'b01:  //two iteration units
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0d;  //24+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                   end
+                 2'b01: //FP64
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h1b;  //53+3 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                   end
+                 2'b10: //FP16
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h06;  //11+3 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h05;  //8+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                  end
+                endcase
+              end
+//////////////////////two iteration units, end///////////////////////////////////////
+
+//////////////////////three iteration units, start///////////////////////////////////////
+           2'b10:  //three iteration units
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h08;  //24+3 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       6'h09,6'h0a,6'h0b:
+                         begin
+                           State_ctl_S = 6'h03;
+                         end
+                       6'h0c,6'h0d,6'h0e:
+                         begin
+                           State_ctl_S = 6'h04;
+                         end
+                       6'h0f,6'h10,6'h11:
+                         begin
+                           State_ctl_S = 6'h05;
+                         end
+                       6'h12,6'h13,6'h14:
+                         begin
+                           State_ctl_S = 6'h06;
+                         end
+                       6'h15,6'h16,6'h17:
+                         begin
+                           State_ctl_S = 6'h07;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h08;  //24+3 more iterations for rounding bits
+                         end
+                     endcase
+                   end
+                 2'b01: //FP64
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h12;  //53+4 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       6'h09,6'h0a,6'h0b:
+                         begin
+                           State_ctl_S = 6'h03;
+                         end
+                       6'h0c,6'h0d,6'h0e:
+                         begin
+                           State_ctl_S = 6'h04;
+                         end
+                       6'h0f,6'h10,6'h11:
+                         begin
+                           State_ctl_S = 6'h05;
+                         end
+                       6'h12,6'h13,6'h14:
+                         begin
+                           State_ctl_S = 6'h06;
+                         end
+                       6'h15,6'h16,6'h17:
+                         begin
+                           State_ctl_S = 6'h07;
+                         end
+                       6'h18,6'h19,6'h1a:
+                         begin
+                           State_ctl_S = 6'h08;
+                         end
+                       6'h1b,6'h1c,6'h1d:
+                         begin
+                           State_ctl_S = 6'h09;
+                         end
+                       6'h1e,6'h1f,6'h20:
+                         begin
+                           State_ctl_S = 6'h0a;
+                         end
+                       6'h21,6'h22,6'h23:
+                         begin
+                           State_ctl_S = 6'h0b;
+                         end
+                       6'h24,6'h25,6'h26:
+                         begin
+                           State_ctl_S = 6'h0c;
+                         end
+                       6'h27,6'h28,6'h29:
+                         begin
+                           State_ctl_S = 6'h0d;
+                         end
+                       6'h2a,6'h2b,6'h2c:
+                         begin
+                           State_ctl_S = 6'h0e;
+                         end
+                       6'h2d,6'h2e,6'h2f:
+                         begin
+                           State_ctl_S = 6'h0f;
+                         end
+                       6'h30,6'h31,6'h32:
+                         begin
+                           State_ctl_S = 6'h10;
+                         end
+                       6'h33,6'h34,6'h35:
+                         begin
+                           State_ctl_S = 6'h11;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h12;  //53+4 more iterations for rounding bits
+                         end
+                     endcase
+                   end
+                 2'b10: //FP16
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h04;  //12+3 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       6'h09,6'h0a,6'h0b:
+                         begin
+                           State_ctl_S = 6'h03;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h04;  //12+3 more iterations for rounding bits
+                         end
+                     endcase
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h03;  //8+4 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h03;  //8+4 more iterations for rounding bits
+                         end
+                     endcase
+                  end
+                endcase
+              end
+//////////////////////three iteration units, end///////////////////////////////////////
+
+//////////////////////four iteration units, start///////////////////////////////////////
+           2'b11:  //four iteration units
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h06;  //24+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                   end
+                 2'b01: //FP64
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0d;  //53+3 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                   end
+                 2'b10: //FP16
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h03;  //11+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h02;  //8+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                  end
+                endcase
+              end
+//////////////////////four iteration units, end///////////////////////////////////////
+
+           endcase
+        end
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // control logic                                                           //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic                                               Div_start_dly_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   //  generate Div_start_dly_S signal
+     begin
+        if(~Rst_RBI)
+          begin
+            Div_start_dly_S<=1'b0;
+          end
+        else if(Div_start_SI&&Ready_SO)
+         begin
+           Div_start_dly_S<=1'b1;
+         end
+        else
+          begin
+            Div_start_dly_S<=1'b0;
+          end
+    end
+
+   assign Div_start_dly_SO=Div_start_dly_S;
+
+  always_ff @(posedge Clk_CI, negedge Rst_RBI) begin  //  generate Div_enable_SO signal
+    if(~Rst_RBI)
+      Div_enable_SO<=1'b0;
+    // Synchronous reset with Flush
+    else if (Kill_SI)
+      Div_enable_SO <= 1'b0;
+    else if(Div_start_SI&&Ready_SO)
+      Div_enable_SO<=1'b1;
+    else if(Done_SO)
+      Div_enable_SO<=1'b0;
+    else
+      Div_enable_SO<=Div_enable_SO;
+  end
+
+   logic                                                Sqrt_start_dly_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   //  generate Sqrt_start_dly_SI signal
+     begin
+        if(~Rst_RBI)
+          begin
+            Sqrt_start_dly_S<=1'b0;
+          end
+        else if(Sqrt_start_SI&&Ready_SO)
+         begin
+           Sqrt_start_dly_S<=1'b1;
+         end
+        else
+          begin
+            Sqrt_start_dly_S<=1'b0;
+          end
+      end
+    assign Sqrt_start_dly_SO=Sqrt_start_dly_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI) begin   //  generate Sqrt_enable_SO signal
+    if(~Rst_RBI)
+      Sqrt_enable_SO<=1'b0;
+    else if (Kill_SI)
+      Sqrt_enable_SO <= 1'b0;
+    else if(Sqrt_start_SI&&Ready_SO)
+      Sqrt_enable_SO<=1'b1;
+    else if(Done_SO)
+      Sqrt_enable_SO<=1'b0;
+    else
+      Sqrt_enable_SO<=Sqrt_enable_SO;
+  end
+
+   logic [5:0]                                                  Crtl_cnt_S;
+   logic                                                        Start_dly_S;
+
+   assign   Start_dly_S=Div_start_dly_S |Sqrt_start_dly_S;
+
+   logic       Fsm_enable_S;
+   assign      Fsm_enable_S=( (Start_dly_S | (| Crtl_cnt_S)) && (~Kill_SI) && Special_case_dly_SBI);
+
+   logic                                                        Final_state_S;
+   assign     Final_state_S= (Crtl_cnt_S==State_ctl_S);
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI) //control_FSM
+     begin
+        if (~Rst_RBI)
+          begin
+             Crtl_cnt_S    <= '0;
+          end
+          else if (Final_state_S | Kill_SI)
+            begin
+              Crtl_cnt_S    <= '0;
+            end
+          else if(Fsm_enable_S) // one cycle Start_SI
+            begin
+              Crtl_cnt_S    <= Crtl_cnt_S+1;
+            end
+          else
+            begin
+              Crtl_cnt_S    <= '0;
+            end
+     end // always_ff
+
+
+
+    always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate  Done_SO,  they can share this Done_SO.
+      begin
+        if(~Rst_RBI)
+          begin
+            Done_SO<=1'b0;
+          end
+        else if(Start_SI&&Ready_SO)
+          begin
+            if(~Special_case_SBI)
+              begin
+                Done_SO<=1'b1;
+              end
+            else
+              begin
+                Done_SO<=1'b0;
+              end
+          end
+        else if(Final_state_S)
+          begin
+            Done_SO<=1'b1;
+          end
+        else
+          begin
+            Done_SO<=1'b0;
+          end
+       end
+
+
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate  Ready_SO
+     begin
+       if(~Rst_RBI)
+         begin
+           Ready_SO<=1'b1;
+         end
+
+       else if(Start_SI&&Ready_SO)
+         begin
+            if(~Special_case_SBI)
+              begin
+                Ready_SO<=1'b1;
+              end
+            else
+              begin
+                Ready_SO<=1'b0;
+              end
+         end
+       else if(Final_state_S | Kill_SI)
+         begin
+           Ready_SO<=1'b1;
+         end
+       else
+         begin
+           Ready_SO<=Ready_SO;
+         end
+     end
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b00, start  //
+   ////////////////////////////////////////////////////////////////////////////
+
+  logic                                    Qcnt_one_0;
+  logic                                    Qcnt_one_1;
+  logic [1:0]                              Qcnt_one_2;
+  logic [2:0]                              Qcnt_one_3;
+  logic [3:0]                              Qcnt_one_4;
+  logic [4:0]                              Qcnt_one_5;
+  logic [5:0]                              Qcnt_one_6;
+  logic [6:0]                              Qcnt_one_7;
+  logic [7:0]                              Qcnt_one_8;
+  logic [8:0]                              Qcnt_one_9;
+  logic [9:0]                              Qcnt_one_10;
+  logic [10:0]                             Qcnt_one_11;
+  logic [11:0]                             Qcnt_one_12;
+  logic [12:0]                             Qcnt_one_13;
+  logic [13:0]                             Qcnt_one_14;
+  logic [14:0]                             Qcnt_one_15;
+  logic [15:0]                             Qcnt_one_16;
+  logic [16:0]                             Qcnt_one_17;
+  logic [17:0]                             Qcnt_one_18;
+  logic [18:0]                             Qcnt_one_19;
+  logic [19:0]                             Qcnt_one_20;
+  logic [20:0]                             Qcnt_one_21;
+  logic [21:0]                             Qcnt_one_22;
+  logic [22:0]                             Qcnt_one_23;
+  logic [23:0]                             Qcnt_one_24;
+  logic [24:0]                             Qcnt_one_25;
+  logic [25:0]                             Qcnt_one_26;
+  logic [26:0]                             Qcnt_one_27;
+  logic [27:0]                             Qcnt_one_28;
+  logic [28:0]                             Qcnt_one_29;
+  logic [29:0]                             Qcnt_one_30;
+  logic [30:0]                             Qcnt_one_31;
+  logic [31:0]                             Qcnt_one_32;
+  logic [32:0]                             Qcnt_one_33;
+  logic [33:0]                             Qcnt_one_34;
+  logic [34:0]                             Qcnt_one_35;
+  logic [35:0]                             Qcnt_one_36;
+  logic [36:0]                             Qcnt_one_37;
+  logic [37:0]                             Qcnt_one_38;
+  logic [38:0]                             Qcnt_one_39;
+  logic [39:0]                             Qcnt_one_40;
+  logic [40:0]                             Qcnt_one_41;
+  logic [41:0]                             Qcnt_one_42;
+  logic [42:0]                             Qcnt_one_43;
+  logic [43:0]                             Qcnt_one_44;
+  logic [44:0]                             Qcnt_one_45;
+  logic [45:0]                             Qcnt_one_46;
+  logic [46:0]                             Qcnt_one_47;
+  logic [47:0]                             Qcnt_one_48;
+  logic [48:0]                             Qcnt_one_49;
+  logic [49:0]                             Qcnt_one_50;
+  logic [50:0]                             Qcnt_one_51;
+  logic [51:0]                             Qcnt_one_52;
+  logic [52:0]                             Qcnt_one_53;
+  logic [53:0]                             Qcnt_one_54;
+  logic [54:0]                             Qcnt_one_55;
+  logic [55:0]                             Qcnt_one_56;
+  logic [56:0]                             Qcnt_one_57;
+  logic [57:0]                             Qcnt_one_58;
+  logic [58:0]                             Qcnt_one_59;
+  logic [59:0]                             Qcnt_one_60;
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b00, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b01, start  //
+   ////////////////////////////////////////////////////////////////////////////
+  logic [1:0]                              Qcnt_two_0;
+  logic [2:0]                              Qcnt_two_1;
+  logic [4:0]                              Qcnt_two_2;
+  logic [6:0]                              Qcnt_two_3;
+  logic [8:0]                              Qcnt_two_4;
+  logic [10:0]                             Qcnt_two_5;
+  logic [12:0]                             Qcnt_two_6;
+  logic [14:0]                             Qcnt_two_7;
+  logic [16:0]                             Qcnt_two_8;
+  logic [18:0]                             Qcnt_two_9;
+  logic [20:0]                             Qcnt_two_10;
+  logic [22:0]                             Qcnt_two_11;
+  logic [24:0]                             Qcnt_two_12;
+  logic [26:0]                             Qcnt_two_13;
+  logic [28:0]                             Qcnt_two_14;
+  logic [30:0]                             Qcnt_two_15;
+  logic [32:0]                             Qcnt_two_16;
+  logic [34:0]                             Qcnt_two_17;
+  logic [36:0]                             Qcnt_two_18;
+  logic [38:0]                             Qcnt_two_19;
+  logic [40:0]                             Qcnt_two_20;
+  logic [42:0]                             Qcnt_two_21;
+  logic [44:0]                             Qcnt_two_22;
+  logic [46:0]                             Qcnt_two_23;
+  logic [48:0]                             Qcnt_two_24;
+  logic [50:0]                             Qcnt_two_25;
+  logic [52:0]                             Qcnt_two_26;
+  logic [54:0]                             Qcnt_two_27;
+  logic [56:0]                             Qcnt_two_28;
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b01, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b10, start  //
+   ////////////////////////////////////////////////////////////////////////////
+  logic [2:0]                              Qcnt_three_0;
+  logic [4:0]                              Qcnt_three_1;
+  logic [7:0]                              Qcnt_three_2;
+  logic [10:0]                             Qcnt_three_3;
+  logic [13:0]                             Qcnt_three_4;
+  logic [16:0]                             Qcnt_three_5;
+  logic [19:0]                             Qcnt_three_6;
+  logic [22:0]                             Qcnt_three_7;
+  logic [25:0]                             Qcnt_three_8;
+  logic [28:0]                             Qcnt_three_9;
+  logic [31:0]                             Qcnt_three_10;
+  logic [34:0]                             Qcnt_three_11;
+  logic [37:0]                             Qcnt_three_12;
+  logic [40:0]                             Qcnt_three_13;
+  logic [43:0]                             Qcnt_three_14;
+  logic [46:0]                             Qcnt_three_15;
+  logic [49:0]                             Qcnt_three_16;
+  logic [52:0]                             Qcnt_three_17;
+  logic [55:0]                             Qcnt_three_18;
+  logic [58:0]                             Qcnt_three_19;
+  logic [61:0]                             Qcnt_three_20;
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b10, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b11, start  //
+   ////////////////////////////////////////////////////////////////////////////
+  logic [3:0]                              Qcnt_four_0;
+  logic [6:0]                              Qcnt_four_1;
+  logic [10:0]                             Qcnt_four_2;
+  logic [14:0]                             Qcnt_four_3;
+  logic [18:0]                             Qcnt_four_4;
+  logic [22:0]                             Qcnt_four_5;
+  logic [26:0]                             Qcnt_four_6;
+  logic [30:0]                             Qcnt_four_7;
+  logic [34:0]                             Qcnt_four_8;
+  logic [38:0]                             Qcnt_four_9;
+  logic [42:0]                             Qcnt_four_10;
+  logic [46:0]                             Qcnt_four_11;
+  logic [50:0]                             Qcnt_four_12;
+  logic [54:0]                             Qcnt_four_13;
+  logic [58:0]                             Qcnt_four_14;
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b11, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R0,Sqrt_Q0,Q_sqrt0,Q_sqrt_com_0;
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R1,Sqrt_Q1,Q_sqrt1,Q_sqrt_com_1;
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R2,Sqrt_Q2,Q_sqrt2,Q_sqrt_com_2;
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R3,Sqrt_Q3,Q_sqrt3,Q_sqrt_com_3,Sqrt_R4; //Sqrt_Q4;
+
+
+   logic [1:0]                                                    Sqrt_DI  [3:0];
+   logic [1:0]                                                    Sqrt_DO  [3:0];
+   logic                                                          Sqrt_carry_DO;
+
+
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_a_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_b_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_a_BMASK_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_b_BMASK_D [3:0];
+  logic                                                           Iteration_cell_carry_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_sum_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_sum_AMASK_D [3:0];
+
+
+  logic [3:0]                                                     Sqrt_quotinent_S;
+
+
+   always_comb
+    begin  //
+      case (Format_sel_S)
+        2'b00:
+          begin
+            Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP32+5])};
+            Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt0[C_MANT_FP32+5:0] };
+            Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt1[C_MANT_FP32+5:0] };
+            Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt2[C_MANT_FP32+5:0] };
+            Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt3[C_MANT_FP32+5:0] };
+          end
+        2'b01:
+          begin
+            Sqrt_quotinent_S = {Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]};
+            Q_sqrt_com_0=~Q_sqrt0;
+            Q_sqrt_com_1=~Q_sqrt1;
+            Q_sqrt_com_2=~Q_sqrt2;
+            Q_sqrt_com_3=~Q_sqrt3;
+          end
+        2'b10:
+          begin
+            Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16+5])};
+            Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt0[C_MANT_FP16+5:0] };
+            Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt1[C_MANT_FP16+5:0] };
+            Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt2[C_MANT_FP16+5:0] };
+            Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt3[C_MANT_FP16+5:0] };
+          end
+        2'b11:
+          begin
+            Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16ALT+5])};
+            Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt0[C_MANT_FP16ALT+5:0] };
+            Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt1[C_MANT_FP16ALT+5:0] };
+            Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt2[C_MANT_FP16ALT+5:0] };
+            Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt3[C_MANT_FP16ALT+5:0] };
+          end
+        endcase
+    end
+
+
+
+  assign  Qcnt_one_0=    {1'b0};  //qk for each feedback
+  assign  Qcnt_one_1=    {Quotient_DP[0]};
+  assign  Qcnt_one_2=    {Quotient_DP[1:0]};
+  assign  Qcnt_one_3=    {Quotient_DP[2:0]};
+  assign  Qcnt_one_4=    {Quotient_DP[3:0]};
+  assign  Qcnt_one_5=    {Quotient_DP[4:0]};
+  assign  Qcnt_one_6=    {Quotient_DP[5:0]};
+  assign  Qcnt_one_7=    {Quotient_DP[6:0]};
+  assign  Qcnt_one_8=    {Quotient_DP[7:0]};
+  assign  Qcnt_one_9=    {Quotient_DP[8:0]};
+  assign  Qcnt_one_10=    {Quotient_DP[9:0]};
+  assign  Qcnt_one_11=    {Quotient_DP[10:0]};
+  assign  Qcnt_one_12=    {Quotient_DP[11:0]};
+  assign  Qcnt_one_13=    {Quotient_DP[12:0]};
+  assign  Qcnt_one_14=    {Quotient_DP[13:0]};
+  assign  Qcnt_one_15=    {Quotient_DP[14:0]};
+  assign  Qcnt_one_16=    {Quotient_DP[15:0]};
+  assign  Qcnt_one_17=    {Quotient_DP[16:0]};
+  assign  Qcnt_one_18=    {Quotient_DP[17:0]};
+  assign  Qcnt_one_19=    {Quotient_DP[18:0]};
+  assign  Qcnt_one_20=    {Quotient_DP[19:0]};
+  assign  Qcnt_one_21=    {Quotient_DP[20:0]};
+  assign  Qcnt_one_22=    {Quotient_DP[21:0]};
+  assign  Qcnt_one_23=    {Quotient_DP[22:0]};
+  assign  Qcnt_one_24=    {Quotient_DP[23:0]};
+  assign  Qcnt_one_25=    {Quotient_DP[24:0]};
+  assign  Qcnt_one_26=    {Quotient_DP[25:0]};
+  assign  Qcnt_one_27=    {Quotient_DP[26:0]};
+  assign  Qcnt_one_28=    {Quotient_DP[27:0]};
+  assign  Qcnt_one_29=    {Quotient_DP[28:0]};
+  assign  Qcnt_one_30=    {Quotient_DP[29:0]};
+  assign  Qcnt_one_31=    {Quotient_DP[30:0]};
+  assign  Qcnt_one_32=    {Quotient_DP[31:0]};
+  assign  Qcnt_one_33=    {Quotient_DP[32:0]};
+  assign  Qcnt_one_34=    {Quotient_DP[33:0]};
+  assign  Qcnt_one_35=    {Quotient_DP[34:0]};
+  assign  Qcnt_one_36=    {Quotient_DP[35:0]};
+  assign  Qcnt_one_37=    {Quotient_DP[36:0]};
+  assign  Qcnt_one_38=    {Quotient_DP[37:0]};
+  assign  Qcnt_one_39=    {Quotient_DP[38:0]};
+  assign  Qcnt_one_40=    {Quotient_DP[39:0]};
+  assign  Qcnt_one_41=    {Quotient_DP[40:0]};
+  assign  Qcnt_one_42=    {Quotient_DP[41:0]};
+  assign  Qcnt_one_43=    {Quotient_DP[42:0]};
+  assign  Qcnt_one_44=    {Quotient_DP[43:0]};
+  assign  Qcnt_one_45=    {Quotient_DP[44:0]};
+  assign  Qcnt_one_46=    {Quotient_DP[45:0]};
+  assign  Qcnt_one_47=    {Quotient_DP[46:0]};
+  assign  Qcnt_one_48=    {Quotient_DP[47:0]};
+  assign  Qcnt_one_49=    {Quotient_DP[48:0]};
+  assign  Qcnt_one_50=    {Quotient_DP[49:0]};
+  assign  Qcnt_one_51=    {Quotient_DP[50:0]};
+  assign  Qcnt_one_52=    {Quotient_DP[51:0]};
+  assign  Qcnt_one_53=    {Quotient_DP[52:0]};
+  assign  Qcnt_one_54=    {Quotient_DP[53:0]};
+  assign  Qcnt_one_55=    {Quotient_DP[54:0]};
+  assign  Qcnt_one_56=    {Quotient_DP[55:0]};
+  assign  Qcnt_one_57=    {Quotient_DP[56:0]};
+
+
+  assign  Qcnt_two_0 =    {1'b0,            Sqrt_quotinent_S[3]};  //qk for each feedback
+  assign  Qcnt_two_1 =    {Quotient_DP[1:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_2 =    {Quotient_DP[3:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_3 =    {Quotient_DP[5:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_4 =    {Quotient_DP[7:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_5 =    {Quotient_DP[9:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_6 =    {Quotient_DP[11:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_7 =    {Quotient_DP[13:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_8 =    {Quotient_DP[15:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_9 =    {Quotient_DP[17:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_10 =    {Quotient_DP[19:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_11 =    {Quotient_DP[21:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_12 =    {Quotient_DP[23:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_13 =    {Quotient_DP[25:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_14 =    {Quotient_DP[27:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_15 =    {Quotient_DP[29:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_16 =    {Quotient_DP[31:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_17 =    {Quotient_DP[33:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_18 =    {Quotient_DP[35:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_19 =    {Quotient_DP[37:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_20 =    {Quotient_DP[39:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_21 =    {Quotient_DP[41:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_22 =    {Quotient_DP[43:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_23 =    {Quotient_DP[45:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_24 =    {Quotient_DP[47:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_25 =    {Quotient_DP[49:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_26 =    {Quotient_DP[51:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_27 =    {Quotient_DP[53:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_28 =    {Quotient_DP[55:0],Sqrt_quotinent_S[3]};
+
+
+  assign  Qcnt_three_0 =    {1'b0,            Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};  //qk for each feedback
+  assign  Qcnt_three_1 =    {Quotient_DP[2:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_2 =    {Quotient_DP[5:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_3 =    {Quotient_DP[8:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_4 =    {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_5 =    {Quotient_DP[14:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_6 =    {Quotient_DP[17:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_7 =    {Quotient_DP[20:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_8 =    {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_9 =    {Quotient_DP[26:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_10 =    {Quotient_DP[29:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_11 =    {Quotient_DP[32:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_12 =    {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_13 =    {Quotient_DP[38:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_14 =    {Quotient_DP[41:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_15 =    {Quotient_DP[44:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_16 =    {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_17 =    {Quotient_DP[50:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_18 =    {Quotient_DP[53:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_19 =    {Quotient_DP[56:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+
+
+  assign      Qcnt_four_0 =    {1'b0,            Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_1 =    {Quotient_DP[3:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_2 =    {Quotient_DP[7:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_3 =    {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_4 =    {Quotient_DP[15:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_5 =    {Quotient_DP[19:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_6 =    {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_7 =    {Quotient_DP[27:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_8 =    {Quotient_DP[31:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_9 =    {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_10 =    {Quotient_DP[39:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_11 =    {Quotient_DP[43:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_12 =    {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_13 =    {Quotient_DP[51:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_14 =    {Quotient_DP[55:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+
+
+
+
+  always_comb begin  // the intermediate operands for sqrt
+
+  case(Iteration_unit_num_S)
+    2'b00:
+      begin
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b00, start       //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+
+
+        case(Crtl_cnt_S)
+
+          6'b000000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_0};
+              Sqrt_Q0=Q_sqrt_com_0;
+            end
+          6'b000001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_1};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_one_2};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+              Q_sqrt0={{(C_MANT_FP64+3){1'b0}},Qcnt_one_3};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+              Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_one_4};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+              Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_one_5};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+              Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_one_6};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+              Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_one_7};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+              Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_one_8};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+              Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_one_9};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+              Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_one_10};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+              Q_sqrt0={{(C_MANT_FP64-5){1'b0}},Qcnt_one_11};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+              Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_one_12};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+              Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_one_13};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+              Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_one_14};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+              Q_sqrt0={{(C_MANT_FP64-9){1'b0}},Qcnt_one_15};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+              Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_one_16};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+              Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_one_17};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+              Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_one_18};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+              Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_one_19};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+              Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_one_20};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+              Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_one_21};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+              Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_one_22};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+              Q_sqrt0={{(C_MANT_FP64-17){1'b0}},Qcnt_one_23};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+              Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_one_24};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+              Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_one_25};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+              Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_one_26};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-21){1'b0}},Qcnt_one_27};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_one_28};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_one_29};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_one_30};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_one_31};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_one_32};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_one_33};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_one_34};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-29){1'b0}},Qcnt_one_35};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_one_36};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_one_37};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_one_38};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-33){1'b0}},Qcnt_one_39};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_one_40};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_one_41};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_one_42};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_one_43};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_one_44};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_one_45};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_one_46};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-41){1'b0}},Qcnt_one_47};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_one_48};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_one_49};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_one_50};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-45){1'b0}},Qcnt_one_51};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_one_52};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_one_53};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_one_54};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_one_55};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b111000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_one_56};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+
+          default:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0='0;
+              Sqrt_Q0='0;
+            end
+        endcase
+      end
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b00, end         //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+    2'b01:
+      begin
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b01, start       //
+   /////////////////////////////////////////////////////////////////////////////
+        case(Crtl_cnt_S)
+
+          6'b000000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_two_1[2:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+              Q_sqrt1={{(C_MANT_FP64+3){1'b0}},Qcnt_two_1[2:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+              Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_two_2[4:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+              Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_two_2[4:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+              Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_two_3[6:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+              Q_sqrt1={{(C_MANT_FP64-1){1'b0}},Qcnt_two_3[6:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+              Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_two_4[8:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+              Q_sqrt1={{(C_MANT_FP64-3){1'b0}},Qcnt_two_4[8:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+            6'b000101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+              Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_two_5[10:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+              Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_two_5[10:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+              Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_two_6[12:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+              Q_sqrt1={{(C_MANT_FP64-7){1'b0}},Qcnt_two_6[12:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+              Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_two_7[14:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+              Q_sqrt1={{(C_MANT_FP64-9){1'b0}},Qcnt_two_7[14:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+              Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_two_8[16:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+              Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_two_8[16:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+              Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_two_9[18:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+              Q_sqrt1={{(C_MANT_FP64-13){1'b0}},Qcnt_two_9[18:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+              Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_two_10[20:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+              Q_sqrt1={{(C_MANT_FP64-15){1'b0}},Qcnt_two_10[20:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+              Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_two_11[22:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+              Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_two_11[22:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+              Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_two_12[24:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+              Q_sqrt1={{(C_MANT_FP64-19){1'b0}},Qcnt_two_12[24:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+              Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_two_13[26:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-21){1'b0}},Qcnt_two_13[26:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_two_14[28:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_two_14[28:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_two_15[30:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-25){1'b0}},Qcnt_two_15[30:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_two_16[32:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-27){1'b0}},Qcnt_two_16[32:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_two_17[34:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_two_17[34:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_two_18[36:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-31){1'b0}},Qcnt_two_18[36:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_two_19[38:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-33){1'b0}},Qcnt_two_19[38:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_two_20[40:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_two_20[40:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_two_21[42:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-37){1'b0}},Qcnt_two_21[42:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_two_22[44:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-39){1'b0}},Qcnt_two_22[44:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_two_23[46:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_two_23[46:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_two_24[48:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-43){1'b0}},Qcnt_two_24[48:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_two_25[50:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-45){1'b0}},Qcnt_two_25[50:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_two_26[52:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_two_26[52:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_two_27[54:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-49){1'b0}},Qcnt_two_27[54:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_two_28[56:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-51){1'b0}},Qcnt_two_28[56:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          default:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+        endcase
+      end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b01, end       //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+    2'b10:
+      begin
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b10, start       //
+   /////////////////////////////////////////////////////////////////////////////
+
+        case(Crtl_cnt_S)
+          6'b000000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+              Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_three_1[4:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+              Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_three_1[4:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+              Q_sqrt2={{(C_MANT_FP64){1'b0}},Qcnt_three_1[4:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+              Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_three_2[7:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+              Q_sqrt1={{(C_MANT_FP64-2){1'b0}},Qcnt_three_2[7:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+              Q_sqrt2={{(C_MANT_FP64-3){1'b0}},Qcnt_three_2[7:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+              Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_three_3[10:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+              Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_three_3[10:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+              Q_sqrt2={{(C_MANT_FP64-6){1'b0}},Qcnt_three_3[10:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+              Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_three_4[13:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+              Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_three_4[13:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+              Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_three_4[13:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+              Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_three_5[16:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+              Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_three_5[16:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+              Q_sqrt2={{(C_MANT_FP64-12){1'b0}},Qcnt_three_5[16:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+              Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_three_6[19:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+              Q_sqrt1={{(C_MANT_FP64-14){1'b0}},Qcnt_three_6[19:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+              Q_sqrt2={{(C_MANT_FP64-15){1'b0}},Qcnt_three_6[19:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+              Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_three_7[22:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+              Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_three_7[22:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+              Q_sqrt2={{(C_MANT_FP64-18){1'b0}},Qcnt_three_7[22:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+              Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_three_8[25:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+              Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_three_8[25:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+              Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_three_8[25:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_three_9[28:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_three_9[28:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-24){1'b0}},Qcnt_three_9[28:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_three_10[31:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-26){1'b0}},Qcnt_three_10[31:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-27){1'b0}},Qcnt_three_10[31:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_three_11[34:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_three_11[34:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-30){1'b0}},Qcnt_three_11[34:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_three_12[37:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_three_12[37:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_three_12[37:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_three_13[40:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_three_13[40:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-36){1'b0}},Qcnt_three_13[40:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_three_14[43:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-38){1'b0}},Qcnt_three_14[43:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-39){1'b0}},Qcnt_three_14[43:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_three_15[46:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_three_15[46:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-42){1'b0}},Qcnt_three_15[46:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b010000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_three_16[49:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_three_16[49:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_three_16[49:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b010001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_three_17[52:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_three_17[52:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-48){1'b0}},Qcnt_three_17[52:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b010010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_three_18[55:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-50){1'b0}},Qcnt_three_18[55:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-51){1'b0}},Qcnt_three_18[55:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          default :
+              begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+        endcase
+
+      end
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b10, end       //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+    2'b11:
+      begin
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b11, start       //
+   /////////////////////////////////////////////////////////////////////////////
+
+              case(Crtl_cnt_S)
+
+                6'b000000:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+                    Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]};
+                    Sqrt_Q0=Q_sqrt_com_0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+                    Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+                    Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+                    Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000001:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+                    Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_four_1[6:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+                    Q_sqrt1={{(C_MANT_FP64){1'b0}},Qcnt_four_1[6:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+                    Q_sqrt2={{(C_MANT_FP64-1){1'b0}},Qcnt_four_1[6:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+                    Q_sqrt3={{(C_MANT_FP64-2){1'b0}},Qcnt_four_1[6:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000010:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+                    Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_four_2[10:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+                    Q_sqrt1={{(C_MANT_FP64-4){1'b0}},Qcnt_four_2[10:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+                    Q_sqrt2={{(C_MANT_FP64-5){1'b0}},Qcnt_four_2[10:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+                    Q_sqrt3={{(C_MANT_FP64-6){1'b0}},Qcnt_four_2[10:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000011:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+                    Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_four_3[14:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+                    Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_four_3[14:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+                    Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_four_3[14:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+                    Q_sqrt3={{(C_MANT_FP64-10){1'b0}},Qcnt_four_3[14:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000100:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+                    Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_four_4[18:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+                    Q_sqrt1={{(C_MANT_FP64-12){1'b0}},Qcnt_four_4[18:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+                    Q_sqrt2={{(C_MANT_FP64-13){1'b0}},Qcnt_four_4[18:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+                    Q_sqrt3={{(C_MANT_FP64-14){1'b0}},Qcnt_four_4[18:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000101:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+                    Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_four_5[22:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+                    Q_sqrt1={{(C_MANT_FP64-16){1'b0}},Qcnt_four_5[22:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+                    Q_sqrt2={{(C_MANT_FP64-17){1'b0}},Qcnt_four_5[22:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+                    Q_sqrt3={{(C_MANT_FP64-18){1'b0}},Qcnt_four_5[22:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000110:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+                    Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_four_6[26:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+                    Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_four_6[26:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+                    Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_four_6[26:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-22){1'b0}},Qcnt_four_6[26:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000111:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_four_7[30:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-24){1'b0}},Qcnt_four_7[30:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-25){1'b0}},Qcnt_four_7[30:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-26){1'b0}},Qcnt_four_7[30:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001000:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_four_8[34:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-28){1'b0}},Qcnt_four_8[34:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-29){1'b0}},Qcnt_four_8[34:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-30){1'b0}},Qcnt_four_8[34:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001001:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_four_9[38:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_four_9[38:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_four_9[38:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-34){1'b0}},Qcnt_four_9[38:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001010:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_four_10[42:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-36){1'b0}},Qcnt_four_10[42:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-37){1'b0}},Qcnt_four_10[42:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-38){1'b0}},Qcnt_four_10[42:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001011:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_four_11[46:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-40){1'b0}},Qcnt_four_11[46:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-41){1'b0}},Qcnt_four_11[46:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-42){1'b0}},Qcnt_four_11[46:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001100:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_four_12[50:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_four_12[50:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_four_12[50:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-46){1'b0}},Qcnt_four_12[50:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001101:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_four_13[54:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-48){1'b0}},Qcnt_four_13[54:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-49){1'b0}},Qcnt_four_13[54:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-50){1'b0}},Qcnt_four_13[54:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                default:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+                    Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]};
+                    Sqrt_Q0=Q_sqrt_com_0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+                    Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+                    Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+                    Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+              endcase
+            end
+      endcase
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b11, end         //
+   /////////////////////////////////////////////////////////////////////////////
+ end
+
+
+
+  assign Sqrt_R0= ((Sqrt_start_dly_S)?'0:{Partial_remainder_DP[C_MANT_FP64+5:0]});
+  assign Sqrt_R1= {Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+2:0],Sqrt_DO[0]} ;
+  assign Sqrt_R2= {Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+2:0],Sqrt_DO[1]};
+  assign Sqrt_R3= {Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+2:0],Sqrt_DO[2]};
+  assign Sqrt_R4= {Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+2:0],Sqrt_DO[3]};
+
+  logic [C_MANT_FP64+5:0]                               Denominator_se_format_DB;  //
+
+  assign Denominator_se_format_DB={Denominator_se_DB[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-1]},
+                                                         Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-1]},
+                                                         Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-1]},
+                                                         Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0} ;
+  //                   for           iteration cell_U0
+  logic [C_MANT_FP64+5:0]                           First_iteration_cell_div_a_D,First_iteration_cell_div_b_D;
+  logic                                             Sel_b_for_first_S;
+
+
+  assign First_iteration_cell_div_a_D=(Div_start_dly_S)?{Numerator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-1]},
+                                                         Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16-1]},
+                                                         Numerator_se_D[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP32-1]},
+                                                         Numerator_se_D[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0}
+                                                        :{Partial_remainder_DP[C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                                         Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+2]},
+                                                         Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+2]},
+                                                         Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Quotient_DP[0],3'b0};
+  assign Sel_b_for_first_S=(Div_start_dly_S)?1:Quotient_DP[0];
+  assign First_iteration_cell_div_b_D=Sel_b_for_first_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+  assign Iteration_cell_a_BMASK_D[0]=Sqrt_enable_SO?Sqrt_R0:{First_iteration_cell_div_a_D};
+  assign Iteration_cell_b_BMASK_D[0]=Sqrt_enable_SO?Sqrt_Q0:{First_iteration_cell_div_b_D};
+
+
+
+  //                   for           iteration cell_U1
+  logic [C_MANT_FP64+5:0]                          Sec_iteration_cell_div_a_D,Sec_iteration_cell_div_b_D;
+  logic                                            Sel_b_for_sec_S;
+  generate
+    if(|Iteration_unit_num_S)
+      begin
+        assign Sel_b_for_sec_S=~Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5];
+        assign Sec_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                           Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+2]},
+                                           Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+2]},
+                                           Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_sec_S,3'b0};
+        assign Sec_iteration_cell_div_b_D=Sel_b_for_sec_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+        assign Iteration_cell_a_BMASK_D[1]=Sqrt_enable_SO?Sqrt_R1:{Sec_iteration_cell_div_a_D};
+        assign Iteration_cell_b_BMASK_D[1]=Sqrt_enable_SO?Sqrt_Q1:{Sec_iteration_cell_div_b_D};
+      end
+    endgenerate
+
+  //                   for           iteration cell_U2
+  logic [C_MANT_FP64+5:0]                          Thi_iteration_cell_div_a_D,Thi_iteration_cell_div_b_D;
+  logic                                            Sel_b_for_thi_S;
+  generate
+    if((Iteration_unit_num_S==2'b10) | (Iteration_unit_num_S==2'b11))
+      begin
+        assign Sel_b_for_thi_S=~Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5];
+        assign Thi_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                           Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+2]},
+                                           Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+2]},
+                                           Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_thi_S,3'b0};
+        assign Thi_iteration_cell_div_b_D=Sel_b_for_thi_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+        assign Iteration_cell_a_BMASK_D[2]=Sqrt_enable_SO?Sqrt_R2:{Thi_iteration_cell_div_a_D};
+        assign Iteration_cell_b_BMASK_D[2]=Sqrt_enable_SO?Sqrt_Q2:{Thi_iteration_cell_div_b_D};
+      end
+  endgenerate
+
+  //                   for           iteration cell_U3
+  logic [C_MANT_FP64+5:0]                          Fou_iteration_cell_div_a_D,Fou_iteration_cell_div_b_D;
+  logic                                            Sel_b_for_fou_S;
+
+  generate
+    if(Iteration_unit_num_S==2'b11)
+      begin
+        assign Sel_b_for_fou_S=~Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5];
+        assign Fou_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                           Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+2]},
+                                           Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+2]},
+                                           Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_fou_S,3'b0};
+        assign Fou_iteration_cell_div_b_D=Sel_b_for_fou_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+        assign Iteration_cell_a_BMASK_D[3]=Sqrt_enable_SO?Sqrt_R3:{Fou_iteration_cell_div_a_D};
+        assign Iteration_cell_b_BMASK_D[3]=Sqrt_enable_SO?Sqrt_Q3:{Fou_iteration_cell_div_b_D};
+      end
+  endgenerate
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Masking Contrl                                                          //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+  logic [C_MANT_FP64+1+4:0]                          Mask_bits_ctl_S;  //For extension
+
+  assign Mask_bits_ctl_S =58'h3ff_ffff_ffff_ffff;   //It is not needed. The corresponding process is handled the above codes
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Iteration Instances  with masking control                               //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+  logic                                             Div_enable_SI   [3:0];
+  logic                                             Div_start_dly_SI   [3:0];
+  logic                                             Sqrt_enable_SI   [3:0];
+  generate
+    genvar i,j;
+      for (i=0; i <= Iteration_unit_num_S ; i++)
+        begin
+          for (j = 0; j <= C_MANT_FP64+5; j++) begin
+              assign Iteration_cell_a_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_a_BMASK_D[i][j];
+              assign Iteration_cell_b_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_b_BMASK_D[i][j];
+              assign Iteration_cell_sum_AMASK_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_sum_D[i][j];
+          end
+
+          assign  Div_enable_SI[i] = Div_enable_SO;
+          assign  Div_start_dly_SI[i] = Div_start_dly_S;
+          assign  Sqrt_enable_SI[i] = Sqrt_enable_SO;
+          iteration_div_sqrt_mvp #(C_MANT_FP64+6) iteration_div_sqrt
+          (
+          .A_DI                                    (Iteration_cell_a_D[i]            ),
+          .B_DI                                    (Iteration_cell_b_D[i]            ),
+          .Div_enable_SI                           (Div_enable_SI[i]                 ),
+          .Div_start_dly_SI                        (Div_start_dly_SI[i]              ),
+          .Sqrt_enable_SI                          (Sqrt_enable_SI[i]                ),
+          .D_DI                                    (Sqrt_DI[i]                       ),
+          .D_DO                                    (Sqrt_DO[i]                       ),
+          .Sum_DO                                  (Iteration_cell_sum_D[i]          ),
+          .Carry_out_DO                            (Iteration_cell_carry_D[i]        )
+         );
+
+        end
+
+  endgenerate
+
+
+
+  always_comb
+    begin
+      case (Iteration_unit_num_S)
+        2'b00:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R1:Iteration_cell_sum_AMASK_D[0];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        2'b01:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R2:Iteration_cell_sum_AMASK_D[1];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        2'b10:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R3:Iteration_cell_sum_AMASK_D[2];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        2'b11:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R4:Iteration_cell_sum_AMASK_D[3];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        endcase
+     end
+
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   // partial_remainder
+     begin
+        if(~Rst_RBI)
+          begin
+             Partial_remainder_DP <= '0;
+          end
+        else
+          begin
+             Partial_remainder_DP <= Partial_remainder_DN;
+          end
+    end
+
+   logic [C_MANT_FP64+4:0] Quotient_DN;
+
+  always_comb                                                      // Can choosen the different carry-outs based on different operations
+    begin
+      case (Iteration_unit_num_S)
+        2'b00:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+3:0],Sqrt_quotinent_S[3]} :{Quotient_DP[C_MANT_FP64+3:0],Iteration_cell_carry_D[0]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        2'b01:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+2:0],Sqrt_quotinent_S[3:2]} :{Quotient_DP[C_MANT_FP64+2:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        2'b10:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+1:0],Sqrt_quotinent_S[3:1]} : {Quotient_DP[C_MANT_FP64+1:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        2'b11:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64:0],Sqrt_quotinent_S } : {Quotient_DP[C_MANT_FP64:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        endcase
+     end
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   // Quotient
+     begin
+        if(~Rst_RBI)
+          begin
+          Quotient_DP <= '0;
+          end
+        else
+          Quotient_DP <= Quotient_DN;
+    end
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Precision Control for outputs                                          //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+//////////////////////one iteration unit, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b00)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                    6'h17:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-1:0],{(C_MANT_FP64-C_MANT_FP32+4+1){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-5:0],{(C_MANT_FP64-C_MANT_FP32+4+5){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-7:0],{(C_MANT_FP64-C_MANT_FP32+4+7){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-11:0],{(C_MANT_FP64-C_MANT_FP32+4+11){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-13:0],{(C_MANT_FP64-C_MANT_FP32+4+13){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                    6'h34:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64:0],{(4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h33:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h32:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h31:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-4:0],{(4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-6:0],{(4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-10:0],{(4+10){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h29:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h28:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-12:0],{(4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h27:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h26:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h25:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-16:0],{(4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h23:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h22:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-18:0],{(4+18){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h21:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h20:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-22:0],{(4+22){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-24:0],{(4+24){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h19:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-28:0],{(4+28){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h17:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-30:0],{(4+30){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-34:0],{(4+34){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-36:0],{(4+36){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-40:0],{(4+40){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-42:0],{(4+42){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16:0],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+        end
+      endgenerate
+//////////////////////one iteration unit, end//////////////////////////////////////////
+
+//////////////////////two iteration units, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b01)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                    6'h17,6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3
+                      end
+                    6'h34:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h33,6'h32:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h31,6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2f,6'h2e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2d,6'h2c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2b,6'h2a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h29,6'h28:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h27,6'h26:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h25,6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h23,6'h22:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h21,6'h20:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1f,6'h1e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1d,6'h1c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1b,6'h1a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h19,6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h17,6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+3:0],{(C_MANT_FP64-C_MANT_FP16+1){1'b0}} }; //+3
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+       end
+     endgenerate
+//////////////////////two iteration units, end//////////////////////////////////////////
+
+//////////////////////three iteration units, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b10)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3
+                      end
+                    6'h17,6'h16,6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h14,6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10,6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08,6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3
+                      end
+                  endcase
+                end
+
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                    6'h34,6'h33:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h32,6'h31,6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2f,6'h2e,6'h2d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2c,6'h2b,6'h2a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h29,6'h28,6'h27:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h26,6'h25,6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h23,6'h22,6'h21:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h20,6'h1f,6'h1e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1d,6'h1c,6'h1b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1a,6'h19,6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h17,6'h16,6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h14,6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10,6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h08,6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+                      end
+                    6'h0a,6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h08,6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+1:1],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+        end
+      endgenerate
+//////////////////////three iteration units, end//////////////////////////////////////////
+
+//////////////////////four iteration units, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b11)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                    6'h17,6'h16,6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12,6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3
+                      end
+                    6'h34:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h33,6'h32,6'h31,6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(5){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2f,6'h2e,6'h2d,6'h2c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(9){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2b,6'h2a,6'h29,6'h28:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(13){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h27,6'h26,6'h25,6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(17){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h23,6'h22,6'h21,6'h20:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(21){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1f,6'h1e,6'h1d,6'h1c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(25){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1b,6'h1a,6'h19,6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(29){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h17,6'h16,6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(33){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12,6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(37){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(41){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(45){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(49){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5
+                      end
+                    6'h0a,6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1-4:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+        end
+      endgenerate
+//////////////////////four iteration units, end///////////////////////////////////////
+
+
+
+
+
+// resultant exponent
+   logic   [C_EXP_FP64+1:0]    Exp_result_prenorm_DN,Exp_result_prenorm_DP;
+
+   logic   [C_EXP_FP64+1:0]                                Exp_add_a_D;
+   logic   [C_EXP_FP64+1:0]                                Exp_add_b_D;
+   logic   [C_EXP_FP64+1:0]                                Exp_add_c_D;
+
+  integer                                                 C_BIAS_AONE, C_HALF_BIAS;
+  always_comb
+    begin  //
+      case (Format_sel_S)
+        2'b00:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP32;
+            C_HALF_BIAS =C_HALF_BIAS_FP32;
+          end
+        2'b01:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP64;
+            C_HALF_BIAS =C_HALF_BIAS_FP64;
+          end
+        2'b10:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP16;
+            C_HALF_BIAS =C_HALF_BIAS_FP16;
+          end
+        2'b11:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP16ALT;
+            C_HALF_BIAS =C_HALF_BIAS_FP16ALT;
+          end
+        endcase
+    end
+
+//For division, exponent=(Exp_a_D-LZ1)-(Exp_b_D-LZ2)+BIAS
+//For square root, exponent=(Exp_a_D-LZ1)/2+(Exp_a_D-LZ1)%2+C_HALF_BIAS
+//For exponent, in preprorces module, (Exp_a_D-LZ1) and (Exp_b_D-LZ2) have been processed with the corresponding process for denormal numbers.
+
+  assign Exp_add_a_D = {Sqrt_start_dly_S?{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64:1]}:{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI}};
+  assign Exp_add_b_D = {Sqrt_start_dly_S?{1'b0,{C_EXP_ZERO_FP64},Exp_num_DI[0]}:{~Exp_den_DI[C_EXP_FP64],~Exp_den_DI[C_EXP_FP64],~Exp_den_DI}};
+  assign Exp_add_c_D = {Div_start_dly_S?{{C_BIAS_AONE}}:{{C_HALF_BIAS}}};
+  assign Exp_result_prenorm_DN  = (Start_dly_S)?{Exp_add_a_D + Exp_add_b_D + Exp_add_c_D}:Exp_result_prenorm_DP;
+
+
+  always_ff @(posedge Clk_CI, negedge Rst_RBI)
+   begin
+      if(~Rst_RBI)
+        begin
+          Exp_result_prenorm_DP <= '0;
+        end
+      else
+        begin
+          Exp_result_prenorm_DP<=  Exp_result_prenorm_DN;
+        end
+   end
+
+  assign Exp_result_prenorm_DO = Exp_result_prenorm_DP;
+
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
new file mode 100644
index 0000000..b3f41fe
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
@@ -0,0 +1,83 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// This file contains all div_sqrt_top_mvp parameters
+// Authors    : Lei Li  (lile@iis.ee.ethz.ch)
+
+package defs_div_sqrt_mvp;
+
+   // op command
+   localparam C_RM                  = 3;
+   localparam C_RM_NEAREST          = 3'h0;
+   localparam C_RM_TRUNC            = 3'h1;
+   localparam C_RM_PLUSINF          = 3'h2;
+   localparam C_RM_MINUSINF         = 3'h3;
+   localparam C_PC                  = 6; // Precision Control
+   localparam C_FS                  = 2; // Format Selection
+   localparam C_IUNC                = 2; // Iteration Unit Number Control
+   localparam Iteration_unit_num_S  = 2'b10;
+
+   // FP64
+   localparam C_OP_FP64             = 64;
+   localparam C_MANT_FP64           = 52;
+   localparam C_EXP_FP64            = 11;
+   localparam C_BIAS_FP64           = 1023;
+   localparam C_BIAS_AONE_FP64      = 11'h400;
+   localparam C_HALF_BIAS_FP64      = 511;
+   localparam C_EXP_ZERO_FP64       = 11'h000;
+   localparam C_EXP_ONE_FP64        = 13'h001; // Bit width is in agreement with in norm
+   localparam C_EXP_INF_FP64        = 11'h7FF;
+   localparam C_MANT_ZERO_FP64      = 52'h0;
+   localparam C_MANT_NAN_FP64       = 52'h8_0000_0000_0000;
+   localparam C_PZERO_FP64          = 64'h0000_0000_0000_0000;
+   localparam C_MZERO_FP64          = 64'h8000_0000_0000_0000;
+   localparam C_QNAN_FP64           = 64'h7FF8_0000_0000_0000;
+
+   // FP32
+   localparam C_OP_FP32             = 32;
+   localparam C_MANT_FP32           = 23;
+   localparam C_EXP_FP32            = 8;
+   localparam C_BIAS_FP32           = 127;
+   localparam C_BIAS_AONE_FP32      = 8'h80;
+   localparam C_HALF_BIAS_FP32      = 63;
+   localparam C_EXP_ZERO_FP32       = 8'h00;
+   localparam C_EXP_INF_FP32        = 8'hFF;
+   localparam C_MANT_ZERO_FP32      = 23'h0;
+   localparam C_PZERO_FP32          = 32'h0000_0000;
+   localparam C_MZERO_FP32          = 32'h8000_0000;
+   localparam C_QNAN_FP32           = 32'h7FC0_0000;
+
+   // FP16
+   localparam C_OP_FP16             = 16;
+   localparam C_MANT_FP16           = 10;
+   localparam C_EXP_FP16            = 5;
+   localparam C_BIAS_FP16           = 15;
+   localparam C_BIAS_AONE_FP16      = 5'h10;
+   localparam C_HALF_BIAS_FP16      = 7;
+   localparam C_EXP_ZERO_FP16       = 5'h00;
+   localparam C_EXP_INF_FP16        = 5'h1F;
+   localparam C_MANT_ZERO_FP16      = 10'h0;
+   localparam C_PZERO_FP16          = 16'h0000;
+   localparam C_MZERO_FP16          = 16'h8000;
+   localparam C_QNAN_FP16           = 16'h7E00;
+
+   // FP16alt
+   localparam C_OP_FP16ALT           = 16;
+   localparam C_MANT_FP16ALT         = 7;
+   localparam C_EXP_FP16ALT          = 8;
+   localparam C_BIAS_FP16ALT         = 127;
+   localparam C_BIAS_AONE_FP16ALT    = 8'h80;
+   localparam C_HALF_BIAS_FP16ALT    = 63;
+   localparam C_EXP_ZERO_FP16ALT     = 8'h00;
+   localparam C_EXP_INF_FP16ALT      = 8'hFF;
+   localparam C_MANT_ZERO_FP16ALT    = 7'h0;
+   localparam C_QNAN_FP16ALT         = 16'h7FC0;
+
+endpackage : defs_div_sqrt_mvp
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
new file mode 100644
index 0000000..3af6081
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
@@ -0,0 +1,180 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li -- lile@iis.ee.ethz.ch                              //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    03/03/2018                                                 //
+// Design Name:    div_sqrt_top_mvp                                           //
+// Module Name:    div_sqrt_top_mvp.sv                                        //
+// Project Name:   The shared divisor and square root                         //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    The top of div and sqrt                                    //
+//                                                                            //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module div_sqrt_top_mvp
+
+  (//Input
+   input logic                            Clk_CI,
+   input logic                            Rst_RBI,
+   input logic                            Div_start_SI,
+   input logic                            Sqrt_start_SI,
+
+   //Input Operands
+   input logic [C_OP_FP64-1:0]            Operand_a_DI,
+   input logic [C_OP_FP64-1:0]            Operand_b_DI,
+
+   // Input Control
+   input logic [C_RM-1:0]                 RM_SI,    //Rounding Mode
+   input logic [C_PC-1:0]                 Precision_ctl_SI, // Precision Control
+   input logic [C_FS-1:0]                 Format_sel_SI,  // Format Selection,
+   input logic                            Kill_SI,
+
+   //Output Result
+   output logic [C_OP_FP64-1:0]           Result_DO,
+
+   //Output-Flags
+   output logic [4:0]                     Fflags_SO,
+   output logic                           Ready_SO,
+   output logic                           Done_SO
+ );
+
+
+
+
+
+   //Operand components
+   logic [C_EXP_FP64:0]                 Exp_a_D;
+   logic [C_EXP_FP64:0]                 Exp_b_D;
+   logic [C_MANT_FP64:0]                Mant_a_D;
+   logic [C_MANT_FP64:0]                Mant_b_D;
+
+   logic [C_EXP_FP64+1:0]               Exp_z_D;
+   logic [C_MANT_FP64+4:0]              Mant_z_D;
+   logic                                Sign_z_D;
+   logic                                Start_S;
+   logic [C_RM-1:0]                     RM_dly_S;
+   logic                                Div_enable_S;
+   logic                                Sqrt_enable_S;
+   logic                                Inf_a_S;
+   logic                                Inf_b_S;
+   logic                                Zero_a_S;
+   logic                                Zero_b_S;
+   logic                                NaN_a_S;
+   logic                                NaN_b_S;
+   logic                                SNaN_S;
+   logic                                Special_case_SB,Special_case_dly_SB;
+
+   logic Full_precision_S;
+   logic FP32_S;
+   logic FP64_S;
+   logic FP16_S;
+   logic FP16ALT_S;
+
+
+ preprocess_mvp  preprocess_U0
+ (
+   .Clk_CI                (Clk_CI             ),
+   .Rst_RBI               (Rst_RBI            ),
+   .Div_start_SI          (Div_start_SI       ),
+   .Sqrt_start_SI         (Sqrt_start_SI      ),
+   .Ready_SI              (Ready_SO           ),
+   .Operand_a_DI          (Operand_a_DI       ),
+   .Operand_b_DI          (Operand_b_DI       ),
+   .RM_SI                 (RM_SI              ),
+   .Format_sel_SI         (Format_sel_SI      ),
+   .Start_SO              (Start_S            ),
+   .Exp_a_DO_norm         (Exp_a_D            ),
+   .Exp_b_DO_norm         (Exp_b_D            ),
+   .Mant_a_DO_norm        (Mant_a_D           ),
+   .Mant_b_DO_norm        (Mant_b_D           ),
+   .RM_dly_SO             (RM_dly_S           ),
+   .Sign_z_DO             (Sign_z_D           ),
+   .Inf_a_SO              (Inf_a_S            ),
+   .Inf_b_SO              (Inf_b_S            ),
+   .Zero_a_SO             (Zero_a_S           ),
+   .Zero_b_SO             (Zero_b_S           ),
+   .NaN_a_SO              (NaN_a_S            ),
+   .NaN_b_SO              (NaN_b_S            ),
+   .SNaN_SO               (SNaN_S             ),
+   .Special_case_SBO      (Special_case_SB    ),
+   .Special_case_dly_SBO  (Special_case_dly_SB)
+   );
+
+ nrbd_nrsc_mvp   nrbd_nrsc_U0
+  (
+   .Clk_CI                (Clk_CI             ),
+   .Rst_RBI               (Rst_RBI            ),
+   .Div_start_SI          (Div_start_SI       ) ,
+   .Sqrt_start_SI         (Sqrt_start_SI      ),
+   .Start_SI              (Start_S            ),
+   .Kill_SI               (Kill_SI            ),
+   .Special_case_SBI      (Special_case_SB    ),
+   .Special_case_dly_SBI  (Special_case_dly_SB),
+   .Div_enable_SO         (Div_enable_S       ),
+   .Sqrt_enable_SO        (Sqrt_enable_S      ),
+   .Precision_ctl_SI      (Precision_ctl_SI   ),
+   .Format_sel_SI         (Format_sel_SI      ),
+   .Exp_a_DI              (Exp_a_D            ),
+   .Exp_b_DI              (Exp_b_D            ),
+   .Mant_a_DI             (Mant_a_D           ),
+   .Mant_b_DI             (Mant_b_D           ),
+   .Full_precision_SO     (Full_precision_S   ),
+   .FP32_SO               (FP32_S             ),
+   .FP64_SO               (FP64_S             ),
+   .FP16_SO               (FP16_S             ),
+   .FP16ALT_SO            (FP16ALT_S          ),
+   .Ready_SO              (Ready_SO           ),
+   .Done_SO               (Done_SO            ),
+   .Exp_z_DO              (Exp_z_D            ),
+   .Mant_z_DO             (Mant_z_D           )
+    );
+
+
+ norm_div_sqrt_mvp  fpu_norm_U0
+  (
+   .Mant_in_DI            (Mant_z_D           ),
+   .Exp_in_DI             (Exp_z_D            ),
+   .Sign_in_DI            (Sign_z_D           ),
+   .Div_enable_SI         (Div_enable_S       ),
+   .Sqrt_enable_SI        (Sqrt_enable_S      ),
+   .Inf_a_SI              (Inf_a_S            ),
+   .Inf_b_SI              (Inf_b_S            ),
+   .Zero_a_SI             (Zero_a_S           ),
+   .Zero_b_SI             (Zero_b_S           ),
+   .NaN_a_SI              (NaN_a_S            ),
+   .NaN_b_SI              (NaN_b_S            ),
+   .SNaN_SI               (SNaN_S             ),
+   .RM_SI                 (RM_dly_S           ),
+   .Full_precision_SI     (Full_precision_S   ),
+   .FP32_SI               (FP32_S             ),
+   .FP64_SI               (FP64_S             ),
+   .FP16_SI               (FP16_S             ),
+   .FP16ALT_SI            (FP16ALT_S          ),
+   .Result_DO             (Result_DO          ),
+   .Fflags_SO             (Fflags_SO          ) //{NV,DZ,OF,UF,NX}
+   );
+
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
new file mode 100644
index 0000000..0c645e6
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
@@ -0,0 +1,61 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li                  lile@iis.ee.ethz.ch                //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    12/01/2017                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    iteration_div_sqrt_mvp                                     //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    iteration unit for div and sqrt                            //
+//                                                                            //
+//                                                                            //
+// Revision:        03/14/2018                                                //
+//                  For div_sqrt_mvp                                          //
+////////////////////////////////////////////////////////////////////////////////
+
+module iteration_div_sqrt_mvp
+#(
+   parameter   WIDTH=25
+)
+  (//Input
+
+   input logic [WIDTH-1:0]      A_DI,
+   input logic [WIDTH-1:0]      B_DI,
+   input logic                  Div_enable_SI,
+   input logic                  Div_start_dly_SI,
+   input logic                  Sqrt_enable_SI,
+   input logic [1:0]            D_DI,
+
+   output logic [1:0]           D_DO,
+   output logic [WIDTH-1:0]     Sum_DO,
+   output logic                 Carry_out_DO
+    );
+
+   logic                        D_carry_D;
+   logic                        Sqrt_cin_D;
+   logic                        Cin_D;
+
+   assign D_DO[0]=~D_DI[0];
+   assign D_DO[1]=~(D_DI[1] ^ D_DI[0]);
+   assign D_carry_D=D_DI[1] | D_DI[0];
+   assign Sqrt_cin_D=Sqrt_enable_SI&&D_carry_D;
+   assign Cin_D=Div_enable_SI?1'b0:Sqrt_cin_D;
+   assign {Carry_out_DO,Sum_DO}=A_DI+B_DI+Cin_D;
+
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
new file mode 100644
index 0000000..590abe9
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
@@ -0,0 +1,470 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li    lile@iis.ee.ethz.ch                              //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    09/03/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    norm_div_sqrt_mvp.sv                                       //
+// Project Name:                                                              //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    Floating point Normalizer/Rounding unit                    //
+//                 Since this module is design as a combinatinal logic, it can//
+//                 be added arbinary register stages for different frequency  //
+//                 in the wrapper module.                                     //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan                     //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module norm_div_sqrt_mvp
+  (//Inputs
+   input logic [C_MANT_FP64+4:0]                Mant_in_DI,  // Include the needed 4-bit for rounding and hidden bit
+   input logic signed [C_EXP_FP64+1:0]          Exp_in_DI,
+   input logic                                  Sign_in_DI,
+   input logic                                  Div_enable_SI,
+   input logic                                  Sqrt_enable_SI,
+   input logic                                  Inf_a_SI,
+   input logic                                  Inf_b_SI,
+   input logic                                  Zero_a_SI,
+   input logic                                  Zero_b_SI,
+   input logic                                  NaN_a_SI,
+   input logic                                  NaN_b_SI,
+   input logic                                  SNaN_SI,
+   input logic [C_RM-1:0]                       RM_SI,
+   input logic                                  Full_precision_SI,
+   input logic                                  FP32_SI,
+   input logic                                  FP64_SI,
+   input logic                                  FP16_SI,
+   input logic                                  FP16ALT_SI,
+   //Outputs
+   output logic [C_EXP_FP64+C_MANT_FP64:0]      Result_DO,
+   output logic [4:0]                           Fflags_SO //{NV,DZ,OF,UF,NX}
+   );
+
+
+   logic                                        Sign_res_D;
+
+   logic                                        NV_OP_S;
+   logic                                        Exp_OF_S;
+   logic                                        Exp_UF_S;
+   logic                                        Div_Zero_S;
+   logic                                        In_Exact_S;
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Normalization                                                           //
+   /////////////////////////////////////////////////////////////////////////////
+   logic [C_MANT_FP64:0]                        Mant_res_norm_D;
+   logic [C_EXP_FP64-1:0]                       Exp_res_norm_D;
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Right shift operations for negtive exponents                            //
+   /////////////////////////////////////////////////////////////////////////////
+
+  logic  [C_EXP_FP64+1:0]                       Exp_Max_RS_FP64_D;
+  logic  [C_EXP_FP32+1:0]                       Exp_Max_RS_FP32_D;
+  logic  [C_EXP_FP16+1:0]                       Exp_Max_RS_FP16_D;
+  logic  [C_EXP_FP16ALT+1:0]                    Exp_Max_RS_FP16ALT_D;
+  //
+  assign Exp_Max_RS_FP64_D=Exp_in_DI[C_EXP_FP64:0]+C_MANT_FP64+1; // to check exponent after (C_MANT_FP64+1)-bit >> when Exp_in_DI is negative
+  assign Exp_Max_RS_FP32_D=Exp_in_DI[C_EXP_FP32:0]+C_MANT_FP32+1; // to check exponent after (C_MANT_FP32+1)-bit >> when Exp_in_DI is negative
+  assign Exp_Max_RS_FP16_D=Exp_in_DI[C_EXP_FP16:0]+C_MANT_FP16+1; // to check exponent after (C_MANT_FP16+1)-bit >> when Exp_in_DI is negative
+  assign Exp_Max_RS_FP16ALT_D=Exp_in_DI[C_EXP_FP16ALT:0]+C_MANT_FP16ALT+1; // to check exponent after (C_MANT_FP16ALT+1)-bit >> when Exp_in_DI is negative
+  logic  [C_EXP_FP64+1:0]                       Num_RS_D;
+  assign Num_RS_D=~Exp_in_DI+1+1;            // How many right shifts(RS) are needed to generate a denormal number? >> is need only when Exp_in_DI is negative
+  logic  [C_MANT_FP64:0]                        Mant_RS_D;
+  logic  [C_MANT_FP64+4:0]                      Mant_forsticky_D;
+  assign  {Mant_RS_D,Mant_forsticky_D} ={Mant_in_DI,{(C_MANT_FP64+1){1'b0}} } >>(Num_RS_D); //
+//
+  logic [C_EXP_FP64+1:0]                        Exp_subOne_D;
+  assign Exp_subOne_D = Exp_in_DI -1;
+
+   //normalization
+   logic [1:0]                                  Mant_lower_D;
+   logic                                        Mant_sticky_bit_D;
+   logic [C_MANT_FP64+4:0]                      Mant_forround_D;
+
+   always_comb
+     begin
+
+       if(NaN_a_SI)  //  if a is NaN, return NaN
+         begin
+           Div_Zero_S=1'b0;
+           Exp_OF_S=1'b0;
+           Exp_UF_S=1'b0;
+           Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+           Exp_res_norm_D='1;
+           Mant_forround_D='0;
+           Sign_res_D=1'b0;
+           NV_OP_S = SNaN_SI;
+         end
+
+      else if(NaN_b_SI)   //if b is NaN, return NaN
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+          Exp_res_norm_D='1;
+          Mant_forround_D='0;
+          Sign_res_D=1'b0;
+          NV_OP_S = SNaN_SI;
+        end
+
+      else if(Inf_a_SI)
+        begin
+          if(Div_enable_SI&&Inf_b_SI)                     //Inf/Inf, retrurn NaN
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b0;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=1'b0;
+              NV_OP_S = 1'b1;
+            end
+          else if (Sqrt_enable_SI && Sign_in_DI) begin // catch sqrt(-inf)
+            Div_Zero_S=1'b0;
+            Exp_OF_S=1'b0;
+            Exp_UF_S=1'b0;
+            Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+            Exp_res_norm_D='1;
+            Mant_forround_D='0;
+            Sign_res_D=1'b0;
+            NV_OP_S = 1'b1;
+          end else begin
+            Div_Zero_S=1'b0;
+            Exp_OF_S=1'b1;
+            Exp_UF_S=1'b0;
+            Mant_res_norm_D= '0;
+            Exp_res_norm_D='1;
+            Mant_forround_D='0;
+            Sign_res_D=Sign_in_DI;
+            NV_OP_S = 1'b0;
+          end
+        end
+
+      else if(Div_enable_SI&&Inf_b_SI)
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b1;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D= '0;
+          Exp_res_norm_D='0;
+          Mant_forround_D='0;
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+     else if(Zero_a_SI)
+       begin
+         if(Div_enable_SI&&Zero_b_SI)
+           begin
+              Div_Zero_S=1'b1;
+              Exp_OF_S=1'b0;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=1'b0;
+              NV_OP_S = 1'b1;
+           end
+         else
+           begin
+             Div_Zero_S=1'b0;
+             Exp_OF_S=1'b0;
+             Exp_UF_S=1'b0;
+             Mant_res_norm_D='0;
+             Exp_res_norm_D='0;
+             Mant_forround_D='0;
+             Sign_res_D=Sign_in_DI;
+             NV_OP_S = 1'b0;
+           end
+       end
+
+     else  if(Div_enable_SI&&(Zero_b_SI))  //div Zero
+       begin
+         Div_Zero_S=1'b1;
+         Exp_OF_S=1'b0;
+         Exp_UF_S=1'b0;
+         Mant_res_norm_D='0;
+         Exp_res_norm_D='1;
+         Mant_forround_D='0;
+         Sign_res_D=Sign_in_DI;
+         NV_OP_S = 1'b0;
+       end
+
+      else if(Sign_in_DI&&Sqrt_enable_SI)   //sqrt(-a)
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+          Exp_res_norm_D='1;
+          Mant_forround_D='0;
+          Sign_res_D=1'b0;
+          NV_OP_S = 1'b1;
+        end
+
+     else if((Exp_in_DI[C_EXP_FP64:0]=='0))
+       begin
+         if(Mant_in_DI!='0)       //Exp=0, Mant!=0, it is denormal
+           begin
+             Div_Zero_S=1'b0;
+             Exp_OF_S=1'b0;
+             Exp_UF_S=1'b1;
+             Mant_res_norm_D={1'b0,Mant_in_DI[C_MANT_FP64+4:5]};
+             Exp_res_norm_D='0;
+             Mant_forround_D={Mant_in_DI[4:0],{(C_MANT_FP64){1'b0}} };
+             Sign_res_D=Sign_in_DI;
+             NV_OP_S = 1'b0;
+           end
+         else                 // Zero
+           begin
+             Div_Zero_S=1'b0;
+             Exp_OF_S=1'b0;
+             Exp_UF_S=1'b0;
+             Mant_res_norm_D='0;
+             Exp_res_norm_D='0;
+             Mant_forround_D='0;
+             Sign_res_D=Sign_in_DI;
+             NV_OP_S = 1'b0;
+           end
+        end
+
+      else if((Exp_in_DI[C_EXP_FP64:0]==C_EXP_ONE_FP64)&&(~Mant_in_DI[C_MANT_FP64+4]))  //denormal
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b1;
+          Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+4:4];
+          Exp_res_norm_D='0;
+          Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+      else if(Exp_in_DI[C_EXP_FP64+1])    //minus              //consider format
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b1;
+          Mant_res_norm_D={Mant_RS_D[C_MANT_FP64:0]};
+          Exp_res_norm_D='0;
+          Mant_forround_D={Mant_forsticky_D[C_MANT_FP64+4:0]};   //??
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+      else if( (Exp_in_DI[C_EXP_FP32]&&FP32_SI) | (Exp_in_DI[C_EXP_FP64]&&FP64_SI) | (Exp_in_DI[C_EXP_FP16]&&FP16_SI) | (Exp_in_DI[C_EXP_FP16ALT]&&FP16ALT_SI) )            //OF
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b1;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D='0;
+          Exp_res_norm_D='1;
+          Mant_forround_D='0;
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+      else if( ((Exp_in_DI[C_EXP_FP32-1:0]=='1)&&FP32_SI) | ((Exp_in_DI[C_EXP_FP64-1:0]=='1)&&FP64_SI) |  ((Exp_in_DI[C_EXP_FP16-1:0]=='1)&&FP16_SI) | ((Exp_in_DI[C_EXP_FP16ALT-1:0]=='1)&&FP16ALT_SI) )//255
+        begin
+          if(~Mant_in_DI[C_MANT_FP64+4]) // MSB=0
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b0;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
+              Exp_res_norm_D=Exp_subOne_D;
+              Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+          else if(Mant_in_DI!='0)         //NaN
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b1;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D= '0;
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+          else                         //infinity
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b1;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D= '0;
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+         end
+
+      else if(Mant_in_DI[C_MANT_FP64+4])  //normal numbers with 1.XXX
+        begin
+           Div_Zero_S=1'b0;
+           Exp_OF_S=1'b0;
+           Exp_UF_S=1'b0;
+           Mant_res_norm_D= Mant_in_DI[C_MANT_FP64+4:4];
+           Exp_res_norm_D=Exp_in_DI[C_EXP_FP64-1:0];
+           Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
+           Sign_res_D=Sign_in_DI;
+           NV_OP_S = 1'b0;
+        end
+
+      else                                   //normal numbers with 0.1XX
+         begin
+           Div_Zero_S=1'b0;
+           Exp_OF_S=1'b0;
+           Exp_UF_S=1'b0;
+           Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
+           Exp_res_norm_D=Exp_subOne_D;
+           Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
+           Sign_res_D=Sign_in_DI;
+           NV_OP_S = 1'b0;
+         end
+
+     end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Rounding enable only for full precision (Full_precision_SI==1'b1)       //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic [C_MANT_FP64:0]                   Mant_upper_D;
+   logic [C_MANT_FP64+1:0]                 Mant_upperRounded_D;
+   logic                                   Mant_roundUp_S;
+   logic                                   Mant_rounded_S;
+
+  always_comb //determine which bits for Mant_lower_D and Mant_sticky_bit_D
+    begin
+      if(FP32_SI)
+        begin
+          Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} };
+          Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-1:C_MANT_FP64-C_MANT_FP32-2];
+          Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-3:0];
+        end
+      else if(FP64_SI)
+        begin
+          Mant_upper_D = Mant_res_norm_D[C_MANT_FP64:0];
+          Mant_lower_D = Mant_forround_D[C_MANT_FP64+4:C_MANT_FP64+3];
+          Mant_sticky_bit_D = | Mant_forround_D[C_MANT_FP64+3:0];
+        end
+      else if(FP16_SI)
+        begin
+          Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} };
+          Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-1:C_MANT_FP64-C_MANT_FP16-2];
+          Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-3:30];
+        end
+      else  //FP16ALT
+      begin
+          Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} };
+          Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-1:C_MANT_FP64-C_MANT_FP16ALT-2];
+          Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-3:30];
+      end
+    end
+
+   assign Mant_rounded_S = (|(Mant_lower_D))| Mant_sticky_bit_D;
+
+
+
+
+   always_comb //determine whether to round up or not
+     begin
+        Mant_roundUp_S = 1'b0;
+        case (RM_SI)
+          C_RM_NEAREST :
+            Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_bit_D )| ( (FP32_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP32]) | (FP64_SI&&Mant_upper_D[0]) | (FP16_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16]) | (FP16ALT_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16ALT]) ) );
+          C_RM_TRUNC   :
+            Mant_roundUp_S = 0;
+          C_RM_PLUSINF :
+            Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI;
+          C_RM_MINUSINF:
+            Mant_roundUp_S = Mant_rounded_S & Sign_in_DI;
+          default          :
+            Mant_roundUp_S = 0;
+        endcase // case (RM_DI)
+     end // always_comb begin
+
+  logic                                 Mant_renorm_S;
+  logic  [C_MANT_FP64:0]                Mant_roundUp_Vector_S; // for all the formats
+
+  assign Mant_roundUp_Vector_S={7'h0,(FP16ALT_SI&&Mant_roundUp_S),2'h0,(FP16_SI&&Mant_roundUp_S),12'h0,(FP32_SI&&Mant_roundUp_S),28'h0,(FP64_SI&&Mant_roundUp_S)};
+
+
+  assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_Vector_S;
+  assign Mant_renorm_S       = Mant_upperRounded_D[C_MANT_FP64+1];
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Renormalization for Rounding                                           //
+  /////////////////////////////////////////////////////////////////////////////
+  logic [C_MANT_FP64-1:0]               Mant_res_round_D;
+  logic [C_EXP_FP64-1:0]                Exp_res_round_D;
+
+
+  assign Mant_res_round_D = (Mant_renorm_S)?Mant_upperRounded_D[C_MANT_FP64:1]:Mant_upperRounded_D[C_MANT_FP64-1:0]; // including the process of the hidden bit
+  assign Exp_res_round_D  = Exp_res_norm_D+Mant_renorm_S;
+
+  /////////////////////////////////////////////////////////////////////////////
+  //  Output Assignments                                                     //
+  /////////////////////////////////////////////////////////////////////////////
+  logic [C_MANT_FP64-1:0]               Mant_before_format_ctl_D;
+  logic [C_EXP_FP64-1:0]                Exp_before_format_ctl_D;
+  assign Mant_before_format_ctl_D = Full_precision_SI ? Mant_res_round_D : Mant_res_norm_D;
+  assign Exp_before_format_ctl_D = Full_precision_SI ? Exp_res_round_D : Exp_res_norm_D;
+
+  always_comb    //NaN Boxing
+    begin  //
+      if(FP32_SI)
+          begin
+            Result_DO ={32'hffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP32-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP32]};
+          end
+       else if(FP64_SI)
+          begin
+            Result_DO ={Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP64-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:0]};
+          end
+      else if(FP16_SI)
+          begin
+            Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16]};
+          end
+      else
+          begin
+            Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16ALT-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16ALT]};
+          end
+    end
+
+assign In_Exact_S = (~Full_precision_SI) | Mant_rounded_S;
+assign Fflags_SO = {NV_OP_S,Div_Zero_S,Exp_OF_S,Exp_UF_S,In_Exact_S}; //{NV,DZ,OF,UF,NX}
+
+endmodule // norm_div_sqrt_mvp
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
new file mode 100644
index 0000000..62bd147
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
@@ -0,0 +1,104 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li      lile@iis.ee.ethz.ch                            //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    10/04/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    nrbd_nrsc_mvp.sv                                           //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:   non restroring binary  divisior/ square root                //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module nrbd_nrsc_mvp
+
+  (//Input
+   input logic                                 Clk_CI,
+   input logic                                 Rst_RBI,
+   input logic                                 Div_start_SI,
+   input logic                                 Sqrt_start_SI,
+   input logic                                 Start_SI,
+   input logic                                 Kill_SI,
+   input logic                                 Special_case_SBI,
+   input logic                                 Special_case_dly_SBI,
+   input logic [C_PC-1:0]                      Precision_ctl_SI,
+   input logic [1:0]                           Format_sel_SI,
+   input logic [C_MANT_FP64:0]                 Mant_a_DI,
+   input logic [C_MANT_FP64:0]                 Mant_b_DI,
+   input logic [C_EXP_FP64:0]                  Exp_a_DI,
+   input logic [C_EXP_FP64:0]                  Exp_b_DI,
+  //output
+   output logic                                Div_enable_SO,
+   output logic                                Sqrt_enable_SO,
+
+   output logic                                Full_precision_SO,
+   output logic                                FP32_SO,
+   output logic                                FP64_SO,
+   output logic                                FP16_SO,
+   output logic                                FP16ALT_SO,
+   output logic                                Ready_SO,
+   output logic                                Done_SO,
+   output logic  [C_MANT_FP64+4:0]             Mant_z_DO,
+   output logic [C_EXP_FP64+1:0]               Exp_z_DO
+    );
+
+
+    logic                                     Div_start_dly_S,Sqrt_start_dly_S;
+
+
+control_mvp         control_U0
+(  .Clk_CI                                   (Clk_CI                          ),
+   .Rst_RBI                                  (Rst_RBI                         ),
+   .Div_start_SI                             (Div_start_SI                    ),
+   .Sqrt_start_SI                            (Sqrt_start_SI                   ),
+   .Start_SI                                 (Start_SI                        ),
+   .Kill_SI                                  (Kill_SI                         ),
+   .Special_case_SBI                         (Special_case_SBI                ),
+   .Special_case_dly_SBI                     (Special_case_dly_SBI            ),
+   .Precision_ctl_SI                         (Precision_ctl_SI                ),
+   .Format_sel_SI                            (Format_sel_SI                   ),
+   .Numerator_DI                             (Mant_a_DI                       ),
+   .Exp_num_DI                               (Exp_a_DI                        ),
+   .Denominator_DI                           (Mant_b_DI                       ),
+   .Exp_den_DI                               (Exp_b_DI                        ),
+   .Div_start_dly_SO                         (Div_start_dly_S                 ),
+   .Sqrt_start_dly_SO                        (Sqrt_start_dly_S                ),
+   .Div_enable_SO                            (Div_enable_SO                   ),
+   .Sqrt_enable_SO                           (Sqrt_enable_SO                  ),
+   .Full_precision_SO                        (Full_precision_SO               ),
+   .FP32_SO                                  (FP32_SO                         ),
+   .FP64_SO                                  (FP64_SO                         ),
+   .FP16_SO                                  (FP16_SO                         ),
+   .FP16ALT_SO                               (FP16ALT_SO                      ),
+   .Ready_SO                                 (Ready_SO                        ),
+   .Done_SO                                  (Done_SO                         ),
+   .Mant_result_prenorm_DO                   (Mant_z_DO                       ),
+   .Exp_result_prenorm_DO                    (Exp_z_DO                        )
+);
+
+
+
+endmodule
diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
new file mode 100644
index 0000000..9e0d25f
--- /dev/null
+++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
@@ -0,0 +1,425 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:                Lei Li  //lile@iis.ee.ethz.ch                    //
+//		                                                                        //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    01/03/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    preprocess_mvp.sv                                          //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:           decode and data preparation                         //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module preprocess_mvp
+  (
+   input logic                   Clk_CI,
+   input logic                   Rst_RBI,
+   input logic                   Div_start_SI,
+   input logic                   Sqrt_start_SI,
+   input logic                   Ready_SI,
+   //Input Operands
+   input logic [C_OP_FP64-1:0]   Operand_a_DI,
+   input logic [C_OP_FP64-1:0]   Operand_b_DI,
+   input logic [C_RM-1:0]        RM_SI,    //Rounding Mode
+   input logic [C_FS-1:0]        Format_sel_SI,  // Format Selection
+
+   // to control
+   output logic                  Start_SO,
+   output logic [C_EXP_FP64:0]   Exp_a_DO_norm,
+   output logic [C_EXP_FP64:0]   Exp_b_DO_norm,
+   output logic [C_MANT_FP64:0]  Mant_a_DO_norm,
+   output logic [C_MANT_FP64:0]  Mant_b_DO_norm,
+
+   output logic [C_RM-1:0]       RM_dly_SO,
+
+   output logic                  Sign_z_DO,
+   output logic                  Inf_a_SO,
+   output logic                  Inf_b_SO,
+   output logic                  Zero_a_SO,
+   output logic                  Zero_b_SO,
+   output logic                  NaN_a_SO,
+   output logic                  NaN_b_SO,
+   output logic                  SNaN_SO,
+   output logic                  Special_case_SBO,
+   output logic                  Special_case_dly_SBO
+   );
+
+   //Hidden Bits
+   logic                         Hb_a_D;
+   logic                         Hb_b_D;
+
+   logic [C_EXP_FP64-1:0]        Exp_a_D;
+   logic [C_EXP_FP64-1:0]        Exp_b_D;
+   logic [C_MANT_FP64-1:0]       Mant_a_NonH_D;
+   logic [C_MANT_FP64-1:0]       Mant_b_NonH_D;
+   logic [C_MANT_FP64:0]         Mant_a_D;
+   logic [C_MANT_FP64:0]         Mant_b_D;
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Disassemble operands
+   /////////////////////////////////////////////////////////////////////////////
+   logic                      Sign_a_D,Sign_b_D;
+   logic                      Start_S;
+
+     always_comb
+       begin
+         case(Format_sel_SI)
+           2'b00:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP32-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP32-1];
+               Exp_a_D  = {3'h0, Operand_a_DI[C_OP_FP32-2:C_MANT_FP32]};
+               Exp_b_D  = {3'h0, Operand_b_DI[C_OP_FP32-2:C_MANT_FP32]};
+               Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP32-1:0],29'h0};
+               Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP32-1:0],29'h0};
+             end
+           2'b01:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP64-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP64-1];
+               Exp_a_D  = Operand_a_DI[C_OP_FP64-2:C_MANT_FP64];
+               Exp_b_D  = Operand_b_DI[C_OP_FP64-2:C_MANT_FP64];
+               Mant_a_NonH_D = Operand_a_DI[C_MANT_FP64-1:0];
+               Mant_b_NonH_D = Operand_b_DI[C_MANT_FP64-1:0];
+             end
+           2'b10:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP16-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP16-1];
+               Exp_a_D  = {6'h00, Operand_a_DI[C_OP_FP16-2:C_MANT_FP16]};
+               Exp_b_D  = {6'h00, Operand_b_DI[C_OP_FP16-2:C_MANT_FP16]};
+               Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16-1:0],42'h0};
+               Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16-1:0],42'h0};
+             end
+           2'b11:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP16ALT-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP16ALT-1];
+               Exp_a_D  = {3'h0, Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
+               Exp_b_D  = {3'h0, Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
+               Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16ALT-1:0],45'h0};
+               Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16ALT-1:0],45'h0};
+             end
+           endcase
+       end
+
+
+   assign Mant_a_D = {Hb_a_D,Mant_a_NonH_D};
+   assign Mant_b_D = {Hb_b_D,Mant_b_NonH_D};
+
+   assign Hb_a_D = | Exp_a_D; // hidden bit
+   assign Hb_b_D = | Exp_b_D; // hidden bit
+
+   assign Start_S= Div_start_SI | Sqrt_start_SI;
+
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // preliminary checks for infinite/zero/NaN operands                       //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic               Mant_a_prenorm_zero_S;
+   logic               Mant_b_prenorm_zero_S;
+
+   logic               Exp_a_prenorm_zero_S;
+   logic               Exp_b_prenorm_zero_S;
+   assign Exp_a_prenorm_zero_S = ~Hb_a_D;
+   assign Exp_b_prenorm_zero_S = ~Hb_b_D;
+
+   logic               Exp_a_prenorm_Inf_NaN_S;
+   logic               Exp_b_prenorm_Inf_NaN_S;
+
+   logic               Mant_a_prenorm_QNaN_S;
+   logic               Mant_a_prenorm_SNaN_S;
+   logic               Mant_b_prenorm_QNaN_S;
+   logic               Mant_b_prenorm_SNaN_S;
+
+   assign Mant_a_prenorm_QNaN_S=Mant_a_NonH_D[C_MANT_FP64-1]&&(~(|Mant_a_NonH_D[C_MANT_FP64-2:0]));
+   assign Mant_a_prenorm_SNaN_S=(~Mant_a_NonH_D[C_MANT_FP64-1])&&((|Mant_a_NonH_D[C_MANT_FP64-2:0]));
+   assign Mant_b_prenorm_QNaN_S=Mant_b_NonH_D[C_MANT_FP64-1]&&(~(|Mant_b_NonH_D[C_MANT_FP64-2:0]));
+   assign Mant_b_prenorm_SNaN_S=(~Mant_b_NonH_D[C_MANT_FP64-1])&&((|Mant_b_NonH_D[C_MANT_FP64-2:0]));
+
+     always_comb
+       begin
+         case(Format_sel_SI)
+           2'b00:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
+             end
+           2'b01:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
+             end
+           2'b10:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
+             end
+           2'b11:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
+             end
+           endcase
+       end
+
+
+
+
+   logic               Zero_a_SN,Zero_a_SP;
+   logic               Zero_b_SN,Zero_b_SP;
+   logic               Inf_a_SN,Inf_a_SP;
+   logic               Inf_b_SN,Inf_b_SP;
+   logic               NaN_a_SN,NaN_a_SP;
+   logic               NaN_b_SN,NaN_b_SP;
+   logic               SNaN_SN,SNaN_SP;
+
+   assign Zero_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_zero_S&&Mant_a_prenorm_zero_S):Zero_a_SP;
+   assign Zero_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_zero_S&&Mant_b_prenorm_zero_S):Zero_b_SP;
+   assign Inf_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&Mant_a_prenorm_zero_S):Inf_a_SP;
+   assign Inf_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&Mant_b_prenorm_zero_S):Inf_b_SP;
+   assign NaN_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&(~Mant_a_prenorm_zero_S)):NaN_a_SP;
+   assign NaN_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&(~Mant_b_prenorm_zero_S)):NaN_b_SP;
+   assign SNaN_SN = (Start_S&&Ready_SI) ? ((Mant_a_prenorm_SNaN_S&&NaN_a_SN) | (Mant_b_prenorm_SNaN_S&&NaN_b_SN)) : SNaN_SP;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Zero_a_SP <='0;
+            Zero_b_SP <='0;
+            Inf_a_SP <='0;
+            Inf_b_SP <='0;
+            NaN_a_SP <='0;
+            NaN_b_SP <='0;
+            SNaN_SP <= '0;
+          end
+        else
+         begin
+           Inf_a_SP <=Inf_a_SN;
+           Inf_b_SP <=Inf_b_SN;
+           Zero_a_SP <=Zero_a_SN;
+           Zero_b_SP <=Zero_b_SN;
+           NaN_a_SP <=NaN_a_SN;
+           NaN_b_SP <=NaN_b_SN;
+           SNaN_SP <= SNaN_SN;
+         end
+      end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Low power control
+   /////////////////////////////////////////////////////////////////////////////
+
+   assign Special_case_SBO=(~{(Div_start_SI)?(Zero_a_SN | Zero_b_SN |  Inf_a_SN | Inf_b_SN | NaN_a_SN | NaN_b_SN): (Zero_a_SN | Inf_a_SN | NaN_a_SN | Sign_a_D) })&&(Start_S&&Ready_SI);
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+       if(~Rst_RBI)
+          begin
+            Special_case_dly_SBO <= '0;
+          end
+       else if((Start_S&&Ready_SI))
+         begin
+            Special_case_dly_SBO <= Special_case_SBO;
+         end
+       else if(Special_case_dly_SBO)
+         begin
+         Special_case_dly_SBO <= 1'b1;
+         end
+      else
+         begin
+            Special_case_dly_SBO <= '0;
+         end
+    end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Delay sign for normalization and round                                  //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic                   Sign_z_DN;
+   logic                   Sign_z_DP;
+
+   always_comb
+     begin
+       if(Div_start_SI&&Ready_SI)
+           Sign_z_DN = Sign_a_D ^ Sign_b_D;
+       else if(Sqrt_start_SI&&Ready_SI)
+           Sign_z_DN = Sign_a_D;
+       else
+           Sign_z_DN = Sign_z_DP;
+    end
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+       if(~Rst_RBI)
+          begin
+            Sign_z_DP <= '0;
+          end
+       else
+         begin
+            Sign_z_DP <= Sign_z_DN;
+         end
+    end
+
+   logic [C_RM-1:0]                  RM_DN;
+   logic [C_RM-1:0]                  RM_DP;
+
+   always_comb
+     begin
+       if(Start_S&&Ready_SI)
+           RM_DN = RM_SI;
+       else
+           RM_DN = RM_DP;
+    end
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+       if(~Rst_RBI)
+          begin
+            RM_DP <= '0;
+          end
+       else
+         begin
+            RM_DP <= RM_DN;
+         end
+    end
+   assign RM_dly_SO = RM_DP;
+
+   logic [5:0]                  Mant_leadingOne_a, Mant_leadingOne_b;
+   logic                        Mant_zero_S_a,Mant_zero_S_b;
+
+  lzc #(
+    .WIDTH ( C_MANT_FP64+1 ),
+    .MODE  ( 1             )
+  ) LOD_Ua (
+    .in_i    ( Mant_a_D          ),
+    .cnt_o   ( Mant_leadingOne_a ),
+    .empty_o ( Mant_zero_S_a     )
+  );
+
+   logic [C_MANT_FP64:0]            Mant_a_norm_DN,Mant_a_norm_DP;
+
+   assign  Mant_a_norm_DN = ((Start_S&&Ready_SI))?(Mant_a_D<<(Mant_leadingOne_a)):Mant_a_norm_DP;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Mant_a_norm_DP <= '0;
+          end
+        else
+          begin
+            Mant_a_norm_DP<=Mant_a_norm_DN;
+          end
+     end
+
+   logic [C_EXP_FP64:0]            Exp_a_norm_DN,Exp_a_norm_DP;
+   assign  Exp_a_norm_DN = ((Start_S&&Ready_SI))?(Exp_a_D-Mant_leadingOne_a+(|Mant_leadingOne_a)):Exp_a_norm_DP;  //Covering the process of denormal numbers
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Exp_a_norm_DP <= '0;
+          end
+        else
+          begin
+            Exp_a_norm_DP<=Exp_a_norm_DN;
+          end
+     end
+
+  lzc #(
+    .WIDTH ( C_MANT_FP64+1 ),
+    .MODE  ( 1             )
+  ) LOD_Ub (
+    .in_i    ( Mant_b_D          ),
+    .cnt_o   ( Mant_leadingOne_b ),
+    .empty_o ( Mant_zero_S_b     )
+  );
+
+
+   logic [C_MANT_FP64:0]            Mant_b_norm_DN,Mant_b_norm_DP;
+
+   assign  Mant_b_norm_DN = ((Start_S&&Ready_SI))?(Mant_b_D<<(Mant_leadingOne_b)):Mant_b_norm_DP;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Mant_b_norm_DP <= '0;
+          end
+        else
+          begin
+            Mant_b_norm_DP<=Mant_b_norm_DN;
+          end
+     end
+
+   logic [C_EXP_FP64:0]            Exp_b_norm_DN,Exp_b_norm_DP;
+   assign  Exp_b_norm_DN = ((Start_S&&Ready_SI))?(Exp_b_D-Mant_leadingOne_b+(|Mant_leadingOne_b)):Exp_b_norm_DP; //Covering the process of denormal numbers
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Exp_b_norm_DP <= '0;
+          end
+        else
+          begin
+            Exp_b_norm_DP<=Exp_b_norm_DN;
+          end
+     end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Output assignments                                                      //
+   /////////////////////////////////////////////////////////////////////////////
+
+   assign Start_SO=Start_S;
+   assign Exp_a_DO_norm=Exp_a_norm_DP;
+   assign Exp_b_DO_norm=Exp_b_norm_DP;
+   assign Mant_a_DO_norm=Mant_a_norm_DP;
+   assign Mant_b_DO_norm=Mant_b_norm_DP;
+   assign Sign_z_DO=Sign_z_DP;
+   assign Inf_a_SO=Inf_a_SP;
+   assign Inf_b_SO=Inf_b_SP;
+   assign Zero_a_SO=Zero_a_SP;
+   assign Zero_b_SO=Zero_b_SP;
+   assign NaN_a_SO=NaN_a_SP;
+   assign NaN_b_SO=NaN_b_SP;
+   assign SNaN_SO=SNaN_SP;
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh b/test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh
new file mode 100644
index 0000000..14bb194
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh
@@ -0,0 +1,541 @@
+// Copyright (c) 2014-2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+
+// Macros to assign AXI Interfaces and Structs
+
+`ifndef AXI_ASSIGN_SVH_
+`define AXI_ASSIGN_SVH_
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Internal implementation for assigning one AXI struct or interface to another struct or interface.
+// The path to the signals on each side is defined by the `__sep*` arguments.  The `__opt_as`
+// argument allows to use this standalone (with `__opt_as = assign`) or in assignments inside
+// processes (with `__opt_as` void).
+`define __AXI_TO_AW(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep)   \
+  __opt_as __lhs``__lhs_sep``id     = __rhs``__rhs_sep``id;         \
+  __opt_as __lhs``__lhs_sep``addr   = __rhs``__rhs_sep``addr;       \
+  __opt_as __lhs``__lhs_sep``len    = __rhs``__rhs_sep``len;        \
+  __opt_as __lhs``__lhs_sep``size   = __rhs``__rhs_sep``size;       \
+  __opt_as __lhs``__lhs_sep``burst  = __rhs``__rhs_sep``burst;      \
+  __opt_as __lhs``__lhs_sep``lock   = __rhs``__rhs_sep``lock;       \
+  __opt_as __lhs``__lhs_sep``cache  = __rhs``__rhs_sep``cache;      \
+  __opt_as __lhs``__lhs_sep``prot   = __rhs``__rhs_sep``prot;       \
+  __opt_as __lhs``__lhs_sep``qos    = __rhs``__rhs_sep``qos;        \
+  __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region;     \
+  __opt_as __lhs``__lhs_sep``atop   = __rhs``__rhs_sep``atop;       \
+  __opt_as __lhs``__lhs_sep``user   = __rhs``__rhs_sep``user;
+`define __AXI_TO_W(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep)    \
+  __opt_as __lhs``__lhs_sep``data   = __rhs``__rhs_sep``data;       \
+  __opt_as __lhs``__lhs_sep``strb   = __rhs``__rhs_sep``strb;       \
+  __opt_as __lhs``__lhs_sep``last   = __rhs``__rhs_sep``last;       \
+  __opt_as __lhs``__lhs_sep``user   = __rhs``__rhs_sep``user;
+`define __AXI_TO_B(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep)    \
+  __opt_as __lhs``__lhs_sep``id     = __rhs``__rhs_sep``id;         \
+  __opt_as __lhs``__lhs_sep``resp   = __rhs``__rhs_sep``resp;       \
+  __opt_as __lhs``__lhs_sep``user   = __rhs``__rhs_sep``user;
+`define __AXI_TO_AR(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep)   \
+  __opt_as __lhs``__lhs_sep``id     = __rhs``__rhs_sep``id;         \
+  __opt_as __lhs``__lhs_sep``addr   = __rhs``__rhs_sep``addr;       \
+  __opt_as __lhs``__lhs_sep``len    = __rhs``__rhs_sep``len;        \
+  __opt_as __lhs``__lhs_sep``size   = __rhs``__rhs_sep``size;       \
+  __opt_as __lhs``__lhs_sep``burst  = __rhs``__rhs_sep``burst;      \
+  __opt_as __lhs``__lhs_sep``lock   = __rhs``__rhs_sep``lock;       \
+  __opt_as __lhs``__lhs_sep``cache  = __rhs``__rhs_sep``cache;      \
+  __opt_as __lhs``__lhs_sep``prot   = __rhs``__rhs_sep``prot;       \
+  __opt_as __lhs``__lhs_sep``qos    = __rhs``__rhs_sep``qos;        \
+  __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region;     \
+  __opt_as __lhs``__lhs_sep``user   = __rhs``__rhs_sep``user;
+`define __AXI_TO_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep)    \
+  __opt_as __lhs``__lhs_sep``id     = __rhs``__rhs_sep``id;         \
+  __opt_as __lhs``__lhs_sep``data   = __rhs``__rhs_sep``data;       \
+  __opt_as __lhs``__lhs_sep``resp   = __rhs``__rhs_sep``resp;       \
+  __opt_as __lhs``__lhs_sep``last   = __rhs``__rhs_sep``last;       \
+  __opt_as __lhs``__lhs_sep``user   = __rhs``__rhs_sep``user;
+`define __AXI_TO_REQ(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep)  \
+  `__AXI_TO_AW(__opt_as, __lhs.aw, __lhs_sep, __rhs.aw, __rhs_sep)  \
+  __opt_as __lhs.aw_valid = __rhs.aw_valid;                         \
+  `__AXI_TO_W(__opt_as, __lhs.w, __lhs_sep, __rhs.w, __rhs_sep)     \
+  __opt_as __lhs.w_valid = __rhs.w_valid;                           \
+  __opt_as __lhs.b_ready = __rhs.b_ready;                           \
+  `__AXI_TO_AR(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep)  \
+  __opt_as __lhs.ar_valid = __rhs.ar_valid;                         \
+  __opt_as __lhs.r_ready = __rhs.r_ready;
+`define __AXI_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \
+  __opt_as __lhs.aw_ready = __rhs.aw_ready;                         \
+  __opt_as __lhs.ar_ready = __rhs.ar_ready;                         \
+  __opt_as __lhs.w_ready = __rhs.w_ready;                           \
+  __opt_as __lhs.b_valid = __rhs.b_valid;                           \
+  `__AXI_TO_B(__opt_as, __lhs.b, __lhs_sep, __rhs.b, __rhs_sep)     \
+  __opt_as __lhs.r_valid = __rhs.r_valid;                           \
+  `__AXI_TO_R(__opt_as, __lhs.r, __lhs_sep, __rhs.r, __rhs_sep)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigning one AXI4+ATOP interface to another, as if you would do `assign slv = mst;`
+//
+// The channel assignments `AXI_ASSIGN_XX(dst, src)` assign all payload and the valid signal of the
+// `XX` channel from the `src` to the `dst` interface and they assign the ready signal from the
+// `src` to the `dst` interface.
+// The interface assignment `AXI_ASSIGN(dst, src)` assigns all channels including handshakes as if
+// `src` was the master of `dst`.
+//
+// Usage Example:
+// `AXI_ASSIGN(slv, mst)
+// `AXI_ASSIGN_AW(dst, src)
+// `AXI_ASSIGN_R(dst, src)
+`define AXI_ASSIGN_AW(dst, src)               \
+  `__AXI_TO_AW(assign, dst.aw, _, src.aw, _)  \
+  assign dst.aw_valid = src.aw_valid;         \
+  assign src.aw_ready = dst.aw_ready;
+`define AXI_ASSIGN_W(dst, src)                \
+  `__AXI_TO_W(assign, dst.w, _, src.w, _)     \
+  assign dst.w_valid  = src.w_valid;          \
+  assign src.w_ready  = dst.w_ready;
+`define AXI_ASSIGN_B(dst, src)                \
+  `__AXI_TO_B(assign, dst.b, _, src.b, _)     \
+  assign dst.b_valid  = src.b_valid;          \
+  assign src.b_ready  = dst.b_ready;
+`define AXI_ASSIGN_AR(dst, src)               \
+  `__AXI_TO_AR(assign, dst.ar, _, src.ar, _)  \
+  assign dst.ar_valid = src.ar_valid;         \
+  assign src.ar_ready = dst.ar_ready;
+`define AXI_ASSIGN_R(dst, src)                \
+  `__AXI_TO_R(assign, dst.r, _, src.r, _)     \
+  assign dst.r_valid  = src.r_valid;          \
+  assign src.r_ready  = dst.r_ready;
+`define AXI_ASSIGN(slv, mst)  \
+  `AXI_ASSIGN_AW(slv, mst)    \
+  `AXI_ASSIGN_W(slv, mst)     \
+  `AXI_ASSIGN_B(mst, slv)     \
+  `AXI_ASSIGN_AR(slv, mst)    \
+  `AXI_ASSIGN_R(mst, slv)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigning a AXI4+ATOP interface to a monitor modport, as if you would do `assign mon = axi_if;`
+//
+// The channel assignment `AXI_ASSIGN_MONITOR(mon_dv, axi_if)` assigns all signals from `axi_if`
+// to the `mon_dv` interface.
+//
+// Usage Example:
+// `AXI_ASSIGN_MONITOR(mon_dv, axi_if)
+`define AXI_ASSIGN_MONITOR(mon_dv, axi_if)          \
+  `__AXI_TO_AW(assign, mon_dv.aw, _, axi_if.aw, _)  \
+  assign mon_dv.aw_valid  = axi_if.aw_valid;        \
+  assign mon_dv.aw_ready  = axi_if.aw_ready;        \
+  `__AXI_TO_W(assign, mon_dv.w, _, axi_if.w, _)     \
+  assign mon_dv.w_valid   = axi_if.w_valid;         \
+  assign mon_dv.w_ready   = axi_if.w_ready;         \
+  `__AXI_TO_B(assign, mon_dv.b, _, axi_if.b, _)     \
+  assign mon_dv.b_valid   = axi_if.b_valid;         \
+  assign mon_dv.b_ready   = axi_if.b_ready;         \
+  `__AXI_TO_AR(assign, mon_dv.ar, _, axi_if.ar, _)  \
+  assign mon_dv.ar_valid  = axi_if.ar_valid;        \
+  assign mon_dv.ar_ready  = axi_if.ar_ready;        \
+  `__AXI_TO_R(assign, mon_dv.r, _, axi_if.r, _)     \
+  assign mon_dv.r_valid   = axi_if.r_valid;         \
+  assign mon_dv.r_ready   = axi_if.r_ready;
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Setting an interface from channel or request/response structs inside a process.
+//
+// The channel macros `AXI_SET_FROM_XX(axi_if, xx_struct)` set the payload signals of the `axi_if`
+// interface from the signals in `xx_struct`.  They do not set the handshake signals.
+// The request macro `AXI_SET_FROM_REQ(axi_if, req_struct)` sets all request channels (AW, W, AR)
+// and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the `axi_if`
+// interface from the signals in `req_struct`.
+// The response macro `AXI_SET_FROM_RESP(axi_if, resp_struct)` sets both response channels (B and R)
+// and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the `axi_if`
+// interface from the signals in `resp_struct`.
+//
+// Usage Example:
+// always_comb begin
+//   `AXI_SET_FROM_REQ(my_if, my_req_struct)
+// end
+`define AXI_SET_FROM_AW(axi_if, aw_struct)      `__AXI_TO_AW(, axi_if.aw, _, aw_struct, .)
+`define AXI_SET_FROM_W(axi_if, w_struct)        `__AXI_TO_W(, axi_if.w, _, w_struct, .)
+`define AXI_SET_FROM_B(axi_if, b_struct)        `__AXI_TO_B(, axi_if.b, _, b_struct, .)
+`define AXI_SET_FROM_AR(axi_if, ar_struct)      `__AXI_TO_AR(, axi_if.ar, _, ar_struct, .)
+`define AXI_SET_FROM_R(axi_if, r_struct)        `__AXI_TO_R(, axi_if.r, _, r_struct, .)
+`define AXI_SET_FROM_REQ(axi_if, req_struct)    `__AXI_TO_REQ(, axi_if, _, req_struct, .)
+`define AXI_SET_FROM_RESP(axi_if, resp_struct)  `__AXI_TO_RESP(, axi_if, _, resp_struct, .)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigning an interface from channel or request/response structs outside a process.
+//
+// The channel macros `AXI_ASSIGN_FROM_XX(axi_if, xx_struct)` assign the payload signals of the
+// `axi_if` interface from the signals in `xx_struct`.  They do not assign the handshake signals.
+// The request macro `AXI_ASSIGN_FROM_REQ(axi_if, req_struct)` assigns all request channels (AW, W,
+// AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the
+// `axi_if` interface from the signals in `req_struct`.
+// The response macro `AXI_ASSIGN_FROM_RESP(axi_if, resp_struct)` assigns both response channels (B
+// and R) and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the
+// `axi_if` interface from the signals in `resp_struct`.
+//
+// Usage Example:
+// `AXI_ASSIGN_FROM_REQ(my_if, my_req_struct)
+`define AXI_ASSIGN_FROM_AW(axi_if, aw_struct)     `__AXI_TO_AW(assign, axi_if.aw, _, aw_struct, .)
+`define AXI_ASSIGN_FROM_W(axi_if, w_struct)       `__AXI_TO_W(assign, axi_if.w, _, w_struct, .)
+`define AXI_ASSIGN_FROM_B(axi_if, b_struct)       `__AXI_TO_B(assign, axi_if.b, _, b_struct, .)
+`define AXI_ASSIGN_FROM_AR(axi_if, ar_struct)     `__AXI_TO_AR(assign, axi_if.ar, _, ar_struct, .)
+`define AXI_ASSIGN_FROM_R(axi_if, r_struct)       `__AXI_TO_R(assign, axi_if.r, _, r_struct, .)
+`define AXI_ASSIGN_FROM_REQ(axi_if, req_struct)   `__AXI_TO_REQ(assign, axi_if, _, req_struct, .)
+`define AXI_ASSIGN_FROM_RESP(axi_if, resp_struct) `__AXI_TO_RESP(assign, axi_if, _, resp_struct, .)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Setting channel or request/response structs from an interface inside a process.
+//
+// The channel macros `AXI_SET_TO_XX(xx_struct, axi_if)` set the signals of `xx_struct` to the
+// payload signals of that channel in the `axi_if` interface.  They do not set the handshake
+// signals.
+// The request macro `AXI_SET_TO_REQ(axi_if, req_struct)` sets all signals of `req_struct` (i.e.,
+// request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR valid and
+// B and R ready)) to the signals in the `axi_if` interface.
+// The response macro `AXI_SET_TO_RESP(axi_if, resp_struct)` sets all signals of `resp_struct`
+// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and
+// AW, W, and AR ready)) to the signals in the `axi_if` interface.
+//
+// Usage Example:
+// always_comb begin
+//   `AXI_SET_TO_REQ(my_req_struct, my_if)
+// end
+`define AXI_SET_TO_AW(aw_struct, axi_if)     `__AXI_TO_AW(, aw_struct, ., axi_if.aw, _)
+`define AXI_SET_TO_W(w_struct, axi_if)       `__AXI_TO_W(, w_struct, ., axi_if.w, _)
+`define AXI_SET_TO_B(b_struct, axi_if)       `__AXI_TO_B(, b_struct, ., axi_if.b, _)
+`define AXI_SET_TO_AR(ar_struct, axi_if)     `__AXI_TO_AR(, ar_struct, ., axi_if.ar, _)
+`define AXI_SET_TO_R(r_struct, axi_if)       `__AXI_TO_R(, r_struct, ., axi_if.r, _)
+`define AXI_SET_TO_REQ(req_struct, axi_if)   `__AXI_TO_REQ(, req_struct, ., axi_if, _)
+`define AXI_SET_TO_RESP(resp_struct, axi_if) `__AXI_TO_RESP(, resp_struct, ., axi_if, _)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigning channel or request/response structs from an interface outside a process.
+//
+// The channel macros `AXI_ASSIGN_TO_XX(xx_struct, axi_if)` assign the signals of `xx_struct` to the
+// payload signals of that channel in the `axi_if` interface.  They do not assign the handshake
+// signals.
+// The request macro `AXI_ASSIGN_TO_REQ(axi_if, req_struct)` assigns all signals of `req_struct`
+// (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR
+// valid and B and R ready)) to the signals in the `axi_if` interface.
+// The response macro `AXI_ASSIGN_TO_RESP(axi_if, resp_struct)` assigns all signals of `resp_struct`
+// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and
+// AW, W, and AR ready)) to the signals in the `axi_if` interface.
+//
+// Usage Example:
+// `AXI_ASSIGN_TO_REQ(my_req_struct, my_if)
+`define AXI_ASSIGN_TO_AW(aw_struct, axi_if)     `__AXI_TO_AW(assign, aw_struct, ., axi_if.aw, _)
+`define AXI_ASSIGN_TO_W(w_struct, axi_if)       `__AXI_TO_W(assign, w_struct, ., axi_if.w, _)
+`define AXI_ASSIGN_TO_B(b_struct, axi_if)       `__AXI_TO_B(assign, b_struct, ., axi_if.b, _)
+`define AXI_ASSIGN_TO_AR(ar_struct, axi_if)     `__AXI_TO_AR(assign, ar_struct, ., axi_if.ar, _)
+`define AXI_ASSIGN_TO_R(r_struct, axi_if)       `__AXI_TO_R(assign, r_struct, ., axi_if.r, _)
+`define AXI_ASSIGN_TO_REQ(req_struct, axi_if)   `__AXI_TO_REQ(assign, req_struct, ., axi_if, _)
+`define AXI_ASSIGN_TO_RESP(resp_struct, axi_if) `__AXI_TO_RESP(assign, resp_struct, ., axi_if, _)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Setting channel or request/response structs from another struct inside a process.
+//
+// The channel macros `AXI_SET_XX_STRUCT(lhs, rhs)` set the fields of the `lhs` channel struct to
+// the fields of the `rhs` channel struct.  They do not set the handshake signals, which are not
+// part of channel structs.
+// The request macro `AXI_SET_REQ_STRUCT(lhs, rhs)` sets all fields of the `lhs` request struct to
+// the fields of the `rhs` request struct.  This includes all request channel (AW, W, AR) payload
+// and request-side handshake signals (AW, W, and AR valid and B and R ready).
+// The response macro `AXI_SET_RESP_STRUCT(lhs, rhs)` sets all fields of the `lhs` response struct
+// to the fields of the `rhs` response struct.  This includes all response channel (B and R) payload
+// and response-side handshake signals (B and R valid and AW, W, and R ready).
+//
+// Usage Example:
+// always_comb begin
+//   `AXI_SET_REQ_STRUCT(my_req_struct, another_req_struct)
+// end
+`define AXI_SET_AW_STRUCT(lhs, rhs)     `__AXI_TO_AW(, lhs, ., rhs, .)
+`define AXI_SET_W_STRUCT(lhs, rhs)       `__AXI_TO_W(, lhs, ., rhs, .)
+`define AXI_SET_B_STRUCT(lhs, rhs)       `__AXI_TO_B(, lhs, ., rhs, .)
+`define AXI_SET_AR_STRUCT(lhs, rhs)     `__AXI_TO_AR(, lhs, ., rhs, .)
+`define AXI_SET_R_STRUCT(lhs, rhs)       `__AXI_TO_R(, lhs, ., rhs, .)
+`define AXI_SET_REQ_STRUCT(lhs, rhs)   `__AXI_TO_REQ(, lhs, ., rhs, .)
+`define AXI_SET_RESP_STRUCT(lhs, rhs) `__AXI_TO_RESP(, lhs, ., rhs, .)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigning channel or request/response structs from another struct outside a process.
+//
+// The channel macros `AXI_ASSIGN_XX_STRUCT(lhs, rhs)` assign the fields of the `lhs` channel struct
+// to the fields of the `rhs` channel struct.  They do not assign the handshake signals, which are
+// not part of the channel structs.
+// The request macro `AXI_ASSIGN_REQ_STRUCT(lhs, rhs)` assigns all fields of the `lhs` request
+// struct to the fields of the `rhs` request struct.  This includes all request channel (AW, W, AR)
+// payload and request-side handshake signals (AW, W, and AR valid and B and R ready).
+// The response macro `AXI_ASSIGN_RESP_STRUCT(lhs, rhs)` assigns all fields of the `lhs` response
+// struct to the fields of the `rhs` response struct.  This includes all response channel (B and R)
+// payload and response-side handshake signals (B and R valid and AW, W, and R ready).
+//
+// Usage Example:
+// `AXI_ASSIGN_REQ_STRUCT(my_req_struct, another_req_struct)
+`define AXI_ASSIGN_AW_STRUCT(lhs, rhs)     `__AXI_TO_AW(assign, lhs, ., rhs, .)
+`define AXI_ASSIGN_W_STRUCT(lhs, rhs)       `__AXI_TO_W(assign, lhs, ., rhs, .)
+`define AXI_ASSIGN_B_STRUCT(lhs, rhs)       `__AXI_TO_B(assign, lhs, ., rhs, .)
+`define AXI_ASSIGN_AR_STRUCT(lhs, rhs)     `__AXI_TO_AR(assign, lhs, ., rhs, .)
+`define AXI_ASSIGN_R_STRUCT(lhs, rhs)       `__AXI_TO_R(assign, lhs, ., rhs, .)
+`define AXI_ASSIGN_REQ_STRUCT(lhs, rhs)   `__AXI_TO_REQ(assign, lhs, ., rhs, .)
+`define AXI_ASSIGN_RESP_STRUCT(lhs, rhs) `__AXI_TO_RESP(assign, lhs, ., rhs, .)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Internal implementation for assigning one Lite structs or interface to another struct or
+// interface.  The path to the signals on each side is defined by the `__sep*` arguments.  The
+// `__opt_as` argument allows to use this standalne (with `__opt_as = assign`) or in assignments
+// inside processes (with `__opt_as` void).
+`define __AXI_LITE_TO_AX(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep)  \
+  __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr;             \
+  __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot;
+`define __AXI_LITE_TO_W(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \
+  __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data;           \
+  __opt_as __lhs``__lhs_sep``strb = __rhs``__rhs_sep``strb;
+`define __AXI_LITE_TO_B(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \
+  __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp;
+`define __AXI_LITE_TO_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \
+  __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data;           \
+  __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp;
+`define __AXI_LITE_TO_REQ(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \
+  `__AXI_LITE_TO_AX(__opt_as, __lhs.aw, __lhs_sep, __rhs.aw, __rhs_sep) \
+  __opt_as __lhs.aw_valid = __rhs.aw_valid;                             \
+  `__AXI_LITE_TO_W(__opt_as, __lhs.w, __lhs_sep, __rhs.w, __rhs_sep)    \
+  __opt_as __lhs.w_valid = __rhs.w_valid;                               \
+  __opt_as __lhs.b_ready = __rhs.b_ready;                               \
+  `__AXI_LITE_TO_AX(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep) \
+  __opt_as __lhs.ar_valid = __rhs.ar_valid;                             \
+  __opt_as __lhs.r_ready = __rhs.r_ready;
+`define __AXI_LITE_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep)  \
+  __opt_as __lhs.aw_ready = __rhs.aw_ready;                               \
+  __opt_as __lhs.ar_ready = __rhs.ar_ready;                               \
+  __opt_as __lhs.w_ready = __rhs.w_ready;                                 \
+  __opt_as __lhs.b_valid = __rhs.b_valid;                                 \
+  `__AXI_LITE_TO_B(__opt_as, __lhs.b, __lhs_sep, __rhs.b, __rhs_sep)      \
+  __opt_as __lhs.r_valid = __rhs.r_valid;                                 \
+  `__AXI_LITE_TO_R(__opt_as, __lhs.r, __lhs_sep, __rhs.r, __rhs_sep)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigning one AXI-Lite interface to another, as if you would do `assign slv = mst;`
+//
+// The channel assignments `AXI_LITE_ASSIGN_XX(dst, src)` assign all payload and the valid signal of
+// the `XX` channel from the `src` to the `dst` interface and they assign the ready signal from the
+// `src` to the `dst` interface.
+// The interface assignment `AXI_LITE_ASSIGN(dst, src)` assigns all channels including handshakes as
+// if `src` was the master of `dst`.
+//
+// Usage Example:
+// `AXI_LITE_ASSIGN(slv, mst)
+// `AXI_LITE_ASSIGN_AW(dst, src)
+// `AXI_LITE_ASSIGN_R(dst, src)
+`define AXI_LITE_ASSIGN_AW(dst, src)              \
+  `__AXI_LITE_TO_AX(assign, dst.aw, _, src.aw, _) \
+  assign dst.aw_valid = src.aw_valid;             \
+  assign src.aw_ready = dst.aw_ready;
+`define AXI_LITE_ASSIGN_W(dst, src)             \
+  `__AXI_LITE_TO_W(assign, dst.w, _, src.w, _)  \
+  assign dst.w_valid  = src.w_valid;            \
+  assign src.w_ready  = dst.w_ready;
+`define AXI_LITE_ASSIGN_B(dst, src)             \
+  `__AXI_LITE_TO_B(assign, dst.b, _, src.b, _)  \
+  assign dst.b_valid  = src.b_valid;            \
+  assign src.b_ready  = dst.b_ready;
+`define AXI_LITE_ASSIGN_AR(dst, src)              \
+  `__AXI_LITE_TO_AX(assign, dst.ar, _, src.ar, _) \
+  assign dst.ar_valid = src.ar_valid;             \
+  assign src.ar_ready = dst.ar_ready;
+`define AXI_LITE_ASSIGN_R(dst, src)             \
+  `__AXI_LITE_TO_R(assign, dst.r, _, src.r, _)  \
+  assign dst.r_valid  = src.r_valid;            \
+  assign src.r_ready  = dst.r_ready;
+`define AXI_LITE_ASSIGN(slv, mst) \
+  `AXI_LITE_ASSIGN_AW(slv, mst)   \
+  `AXI_LITE_ASSIGN_W(slv, mst)    \
+  `AXI_LITE_ASSIGN_B(mst, slv)    \
+  `AXI_LITE_ASSIGN_AR(slv, mst)   \
+  `AXI_LITE_ASSIGN_R(mst, slv)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Setting a Lite interface from channel or request/response structs inside a process.
+//
+// The channel macros `AXI_LITE_SET_FROM_XX(axi_if, xx_struct)` set the payload signals of the
+// `axi_if` interface from the signals in `xx_struct`.  They do not set the handshake signals.
+// The request macro `AXI_LITE_SET_FROM_REQ(axi_if, req_struct)` sets all request channels (AW, W,
+// AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the
+// `axi_if` interface from the signals in `req_struct`.
+// The response macro `AXI_LITE_SET_FROM_RESP(axi_if, resp_struct)` sets both response channels (B
+// and R) and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the
+// `axi_if` interface from the signals in `resp_struct`.
+//
+// Usage Example:
+// always_comb begin
+//   `AXI_LITE_SET_FROM_REQ(my_if, my_req_struct)
+// end
+`define AXI_LITE_SET_FROM_AW(axi_if, aw_struct)      `__AXI_LITE_TO_AX(, axi_if.aw, _, aw_struct, .)
+`define AXI_LITE_SET_FROM_W(axi_if, w_struct)        `__AXI_LITE_TO_W(, axi_if.w, _, w_struct, .)
+`define AXI_LITE_SET_FROM_B(axi_if, b_struct)        `__AXI_LITE_TO_B(, axi_if.b, _, b_struct, .)
+`define AXI_LITE_SET_FROM_AR(axi_if, ar_struct)      `__AXI_LITE_TO_AX(, axi_if.ar, _, ar_struct, .)
+`define AXI_LITE_SET_FROM_R(axi_if, r_struct)        `__AXI_LITE_TO_R(, axi_if.r, _, r_struct, .)
+`define AXI_LITE_SET_FROM_REQ(axi_if, req_struct)    `__AXI_LITE_TO_REQ(, axi_if, _, req_struct, .)
+`define AXI_LITE_SET_FROM_RESP(axi_if, resp_struct)  `__AXI_LITE_TO_RESP(, axi_if, _, resp_struct, .)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigning a Lite interface from channel or request/response structs outside a process.
+//
+// The channel macros `AXI_LITE_ASSIGN_FROM_XX(axi_if, xx_struct)` assign the payload signals of the
+// `axi_if` interface from the signals in `xx_struct`.  They do not assign the handshake signals.
+// The request macro `AXI_LITE_ASSIGN_FROM_REQ(axi_if, req_struct)` assigns all request channels
+// (AW, W, AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the
+// `axi_if` interface from the signals in `req_struct`.
+// The response macro `AXI_LITE_ASSIGN_FROM_RESP(axi_if, resp_struct)` assigns both response
+// channels (B and R) and the response-side handshake signals (B and R valid and AW, W, and AR
+// ready) of the `axi_if` interface from the signals in `resp_struct`.
+//
+// Usage Example:
+// `AXI_LITE_ASSIGN_FROM_REQ(my_if, my_req_struct)
+`define AXI_LITE_ASSIGN_FROM_AW(axi_if, aw_struct)     `__AXI_LITE_TO_AX(assign, axi_if.aw, _, aw_struct, .)
+`define AXI_LITE_ASSIGN_FROM_W(axi_if, w_struct)       `__AXI_LITE_TO_W(assign, axi_if.w, _, w_struct, .)
+`define AXI_LITE_ASSIGN_FROM_B(axi_if, b_struct)       `__AXI_LITE_TO_B(assign, axi_if.b, _, b_struct, .)
+`define AXI_LITE_ASSIGN_FROM_AR(axi_if, ar_struct)     `__AXI_LITE_TO_AX(assign, axi_if.ar, _, ar_struct, .)
+`define AXI_LITE_ASSIGN_FROM_R(axi_if, r_struct)       `__AXI_LITE_TO_R(assign, axi_if.r, _, r_struct, .)
+`define AXI_LITE_ASSIGN_FROM_REQ(axi_if, req_struct)   `__AXI_LITE_TO_REQ(assign, axi_if, _, req_struct, .)
+`define AXI_LITE_ASSIGN_FROM_RESP(axi_if, resp_struct) `__AXI_LITE_TO_RESP(assign, axi_if, _, resp_struct, .)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Setting channel or request/response structs from an interface inside a process.
+//
+// The channel macros `AXI_LITE_SET_TO_XX(xx_struct, axi_if)` set the signals of `xx_struct` to the
+// payload signals of that channel in the `axi_if` interface.  They do not set the handshake
+// signals.
+// The request macro `AXI_LITE_SET_TO_REQ(axi_if, req_struct)` sets all signals of `req_struct`
+// (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR
+// valid and B and R ready)) to the signals in the `axi_if` interface.
+// The response macro `AXI_LITE_SET_TO_RESP(axi_if, resp_struct)` sets all signals of `resp_struct`
+// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and
+// AW, W, and AR ready)) to the signals in the `axi_if` interface.
+//
+// Usage Example:
+// always_comb begin
+//   `AXI_LITE_SET_TO_REQ(my_req_struct, my_if)
+// end
+`define AXI_LITE_SET_TO_AW(aw_struct, axi_if)     `__AXI_LITE_TO_AX(, aw_struct, ., axi_if.aw, _)
+`define AXI_LITE_SET_TO_W(w_struct, axi_if)       `__AXI_LITE_TO_W(, w_struct, ., axi_if.w, _)
+`define AXI_LITE_SET_TO_B(b_struct, axi_if)       `__AXI_LITE_TO_B(, b_struct, ., axi_if.b, _)
+`define AXI_LITE_SET_TO_AR(ar_struct, axi_if)     `__AXI_LITE_TO_AX(, ar_struct, ., axi_if.ar, _)
+`define AXI_LITE_SET_TO_R(r_struct, axi_if)       `__AXI_LITE_TO_R(, r_struct, ., axi_if.r, _)
+`define AXI_LITE_SET_TO_REQ(req_struct, axi_if)   `__AXI_LITE_TO_REQ(, req_struct, ., axi_if, _)
+`define AXI_LITE_SET_TO_RESP(resp_struct, axi_if) `__AXI_LITE_TO_RESP(, resp_struct, ., axi_if, _)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigning channel or request/response structs from an interface outside a process.
+//
+// The channel macros `AXI_LITE_ASSIGN_TO_XX(xx_struct, axi_if)` assign the signals of `xx_struct`
+// to the payload signals of that channel in the `axi_if` interface.  They do not assign the
+// handshake signals.
+// The request macro `AXI_LITE_ASSIGN_TO_REQ(axi_if, req_struct)` assigns all signals of
+// `req_struct` (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW,
+// W, and AR valid and B and R ready)) to the signals in the `axi_if` interface.
+// The response macro `AXI_LITE_ASSIGN_TO_RESP(axi_if, resp_struct)` assigns all signals of
+// `resp_struct` (i.e., response channel (B and R) payload and response-side handshake signals (B
+// and R valid and AW, W, and AR ready)) to the signals in the `axi_if` interface.
+//
+// Usage Example:
+// `AXI_LITE_ASSIGN_TO_REQ(my_req_struct, my_if)
+`define AXI_LITE_ASSIGN_TO_AW(aw_struct, axi_if)     `__AXI_LITE_TO_AX(assign, aw_struct, ., axi_if.aw, _)
+`define AXI_LITE_ASSIGN_TO_W(w_struct, axi_if)       `__AXI_LITE_TO_W(assign, w_struct, ., axi_if.w, _)
+`define AXI_LITE_ASSIGN_TO_B(b_struct, axi_if)       `__AXI_LITE_TO_B(assign, b_struct, ., axi_if.b, _)
+`define AXI_LITE_ASSIGN_TO_AR(ar_struct, axi_if)     `__AXI_LITE_TO_AX(assign, ar_struct, ., axi_if.ar, _)
+`define AXI_LITE_ASSIGN_TO_R(r_struct, axi_if)       `__AXI_LITE_TO_R(assign, r_struct, ., axi_if.r, _)
+`define AXI_LITE_ASSIGN_TO_REQ(req_struct, axi_if)   `__AXI_LITE_TO_REQ(assign, req_struct, ., axi_if, _)
+`define AXI_LITE_ASSIGN_TO_RESP(resp_struct, axi_if) `__AXI_LITE_TO_RESP(assign, resp_struct, ., axi_if, _)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Setting channel or request/response structs from another struct inside a process.
+//
+// The channel macros `AXI_LITE_SET_XX_STRUCT(lhs, rhs)` set the fields of the `lhs` channel struct
+// to the fields of the `rhs` channel struct.  They do not set the handshake signals, which are not
+// part of channel structs.
+// The request macro `AXI_LITE_SET_REQ_STRUCT(lhs, rhs)` sets all fields of the `lhs` request struct
+// to the fields of the `rhs` request struct.  This includes all request channel (AW, W, AR) payload
+// and request-side handshake signals (AW, W, and AR valid and B and R ready).
+// The response macro `AXI_LITE_SET_RESP_STRUCT(lhs, rhs)` sets all fields of the `lhs` response
+// struct to the fields of the `rhs` response struct.  This includes all response channel (B and R)
+// payload and response-side handshake signals (B and R valid and AW, W, and R ready).
+//
+// Usage Example:
+// always_comb begin
+//   `AXI_LITE_SET_REQ_STRUCT(my_req_struct, another_req_struct)
+// end
+`define AXI_LITE_SET_AW_STRUCT(lhs, rhs)     `__AXI_LITE_TO_AX(, lhs, ., rhs, .)
+`define AXI_LITE_SET_W_STRUCT(lhs, rhs)       `__AXI_LITE_TO_W(, lhs, ., rhs, .)
+`define AXI_LITE_SET_B_STRUCT(lhs, rhs)       `__AXI_LITE_TO_B(, lhs, ., rhs, .)
+`define AXI_LITE_SET_AR_STRUCT(lhs, rhs)     `__AXI_LITE_TO_AX(, lhs, ., rhs, .)
+`define AXI_LITE_SET_R_STRUCT(lhs, rhs)       `__AXI_LITE_TO_R(, lhs, ., rhs, .)
+`define AXI_LITE_SET_REQ_STRUCT(lhs, rhs)   `__AXI_LITE_TO_REQ(, lhs, ., rhs, .)
+`define AXI_LITE_SET_RESP_STRUCT(lhs, rhs) `__AXI_LITE_TO_RESP(, lhs, ., rhs, .)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigning channel or request/response structs from another struct outside a process.
+//
+// The channel macros `AXI_LITE_ASSIGN_XX_STRUCT(lhs, rhs)` assign the fields of the `lhs` channel
+// struct to the fields of the `rhs` channel struct.  They do not assign the handshake signals,
+// which are not part of the channel structs.
+// The request macro `AXI_LITE_ASSIGN_REQ_STRUCT(lhs, rhs)` assigns all fields of the `lhs` request
+// struct to the fields of the `rhs` request struct.  This includes all request channel (AW, W, AR)
+// payload and request-side handshake signals (AW, W, and AR valid and B and R ready).
+// The response macro `AXI_LITE_ASSIGN_RESP_STRUCT(lhs, rhs)` assigns all fields of the `lhs`
+// response struct to the fields of the `rhs` response struct.  This includes all response channel
+// (B and R) payload and response-side handshake signals (B and R valid and AW, W, and R ready).
+//
+// Usage Example:
+// `AXI_LITE_ASSIGN_REQ_STRUCT(my_req_struct, another_req_struct)
+`define AXI_LITE_ASSIGN_AW_STRUCT(lhs, rhs)     `__AXI_LITE_TO_AX(assign, lhs, ., rhs, .)
+`define AXI_LITE_ASSIGN_W_STRUCT(lhs, rhs)       `__AXI_LITE_TO_W(assign, lhs, ., rhs, .)
+`define AXI_LITE_ASSIGN_B_STRUCT(lhs, rhs)       `__AXI_LITE_TO_B(assign, lhs, ., rhs, .)
+`define AXI_LITE_ASSIGN_AR_STRUCT(lhs, rhs)     `__AXI_LITE_TO_AX(assign, lhs, ., rhs, .)
+`define AXI_LITE_ASSIGN_R_STRUCT(lhs, rhs)       `__AXI_LITE_TO_R(assign, lhs, ., rhs, .)
+`define AXI_LITE_ASSIGN_REQ_STRUCT(lhs, rhs)   `__AXI_LITE_TO_REQ(assign, lhs, ., rhs, .)
+`define AXI_LITE_ASSIGN_RESP_STRUCT(lhs, rhs) `__AXI_LITE_TO_RESP(assign, lhs, ., rhs, .)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+`endif
diff --git a/test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh b/test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh
new file mode 100644
index 0000000..a2a860e
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh
@@ -0,0 +1,211 @@
+// Copyright (c) 2019 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+// - Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+
+// Macros to define AXI and AXI-Lite Channel and Request/Response Structs
+
+`ifndef AXI_TYPEDEF_SVH_
+`define AXI_TYPEDEF_SVH_
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// AXI4+ATOP Channel and Request/Response Structs
+//
+// Usage Example:
+// `AXI_TYPEDEF_AW_CHAN_T(axi_aw_t, axi_addr_t, axi_id_t, axi_user_t)
+// `AXI_TYPEDEF_W_CHAN_T(axi_w_t, axi_data_t, axi_strb_t, axi_user_t)
+// `AXI_TYPEDEF_B_CHAN_T(axi_b_t, axi_id_t, axi_user_t)
+// `AXI_TYPEDEF_AR_CHAN_T(axi_ar_t, axi_addr_t, axi_id_t, axi_user_t)
+// `AXI_TYPEDEF_R_CHAN_T(axi_r_t, axi_data_t, axi_id_t, axi_user_t)
+// `AXI_TYPEDEF_REQ_T(axi_req_t, axi_aw_t, axi_w_t, axi_ar_t)
+// `AXI_TYPEDEF_RESP_T(axi_resp_t, axi_b_t, axi_r_t)
+`define AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t)  \
+  typedef struct packed {                                       \
+    id_t              id;                                       \
+    addr_t            addr;                                     \
+    axi_pkg::len_t    len;                                      \
+    axi_pkg::size_t   size;                                     \
+    axi_pkg::burst_t  burst;                                    \
+    logic             lock;                                     \
+    axi_pkg::cache_t  cache;                                    \
+    axi_pkg::prot_t   prot;                                     \
+    axi_pkg::qos_t    qos;                                      \
+    axi_pkg::region_t region;                                   \
+    axi_pkg::atop_t   atop;                                     \
+    user_t            user;                                     \
+  } aw_chan_t;
+`define AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)  \
+  typedef struct packed {                                       \
+    data_t data;                                                \
+    strb_t strb;                                                \
+    logic  last;                                                \
+    user_t user;                                                \
+  } w_chan_t;
+`define AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t)  \
+  typedef struct packed {                             \
+    id_t            id;                               \
+    axi_pkg::resp_t resp;                             \
+    user_t          user;                             \
+  } b_chan_t;
+`define AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t)  \
+  typedef struct packed {                                       \
+    id_t              id;                                       \
+    addr_t            addr;                                     \
+    axi_pkg::len_t    len;                                      \
+    axi_pkg::size_t   size;                                     \
+    axi_pkg::burst_t  burst;                                    \
+    logic             lock;                                     \
+    axi_pkg::cache_t  cache;                                    \
+    axi_pkg::prot_t   prot;                                     \
+    axi_pkg::qos_t    qos;                                      \
+    axi_pkg::region_t region;                                   \
+    user_t            user;                                     \
+  } ar_chan_t;
+`define AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t)  \
+  typedef struct packed {                                     \
+    id_t            id;                                       \
+    data_t          data;                                     \
+    axi_pkg::resp_t resp;                                     \
+    logic           last;                                     \
+    user_t          user;                                     \
+  } r_chan_t;
+`define AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t)  \
+  typedef struct packed {                                         \
+    aw_chan_t aw;                                                 \
+    logic     aw_valid;                                           \
+    w_chan_t  w;                                                  \
+    logic     w_valid;                                            \
+    logic     b_ready;                                            \
+    ar_chan_t ar;                                                 \
+    logic     ar_valid;                                           \
+    logic     r_ready;                                            \
+  } req_t;
+`define AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t)  \
+  typedef struct packed {                               \
+    logic     aw_ready;                                 \
+    logic     ar_ready;                                 \
+    logic     w_ready;                                  \
+    logic     b_valid;                                  \
+    b_chan_t  b;                                        \
+    logic     r_valid;                                  \
+    r_chan_t  r;                                        \
+  } resp_t;
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// All AXI4+ATOP Channels and Request/Response Structs in One Macro
+//
+// This can be used whenever the user is not interested in "precise" control of the naming of the
+// individual channels.
+//
+// Usage Example:
+// `AXI_TYPEDEF_ALL(axi, addr_t, id_t, data_t, strb_t, user_t)
+//
+// This defines `axi_req_t` and `axi_resp_t` request/response structs as well as `axi_aw_chan_t`,
+// `axi_w_chan_t`, `axi_b_chan_t`, `axi_ar_chan_t`, and `axi_r_chan_t` channel structs.
+`define AXI_TYPEDEF_ALL(__name, __addr_t, __id_t, __data_t, __strb_t, __user_t)                 \
+  `AXI_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t, __id_t, __user_t)                        \
+  `AXI_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t, __user_t)                        \
+  `AXI_TYPEDEF_B_CHAN_T(__name``_b_chan_t, __id_t, __user_t)                                    \
+  `AXI_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t, __id_t, __user_t)                        \
+  `AXI_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t, __id_t, __user_t)                          \
+  `AXI_TYPEDEF_REQ_T(__name``_req_t, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \
+  `AXI_TYPEDEF_RESP_T(__name``_resp_t, __name``_b_chan_t, __name``_r_chan_t)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// AXI4-Lite Channel and Request/Response Structs
+//
+// Usage Example:
+// `AXI_LITE_TYPEDEF_AW_CHAN_T(axi_lite_aw_t, axi_lite_addr_t)
+// `AXI_LITE_TYPEDEF_W_CHAN_T(axi_lite_w_t, axi_lite_data_t, axi_lite_strb_t)
+// `AXI_LITE_TYPEDEF_B_CHAN_T(axi_lite_b_t)
+// `AXI_LITE_TYPEDEF_AR_CHAN_T(axi_lite_ar_t, axi_lite_addr_t)
+// `AXI_LITE_TYPEDEF_R_CHAN_T(axi_lite_r_t, axi_lite_data_t)
+// `AXI_LITE_TYPEDEF_REQ_T(axi_lite_req_t, axi_lite_aw_t, axi_lite_w_t, axi_lite_ar_t)
+// `AXI_LITE_TYPEDEF_RESP_T(axi_lite_resp_t, axi_lite_b_t, axi_lite_r_t)
+`define AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_lite_t, addr_t)  \
+  typedef struct packed {                                   \
+    addr_t          addr;                                   \
+    axi_pkg::prot_t prot;                                   \
+  } aw_chan_lite_t;
+`define AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_lite_t, data_t, strb_t)  \
+  typedef struct packed {                                         \
+    data_t   data;                                                \
+    strb_t   strb;                                                \
+  } w_chan_lite_t;
+`define AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_lite_t)  \
+  typedef struct packed {                         \
+    axi_pkg::resp_t resp;                         \
+  } b_chan_lite_t;
+`define AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_lite_t, addr_t)  \
+  typedef struct packed {                                   \
+    addr_t          addr;                                   \
+    axi_pkg::prot_t prot;                                   \
+  } ar_chan_lite_t;
+`define AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_lite_t, data_t)  \
+  typedef struct packed {                                 \
+    data_t          data;                                 \
+    axi_pkg::resp_t resp;                                 \
+  } r_chan_lite_t;
+`define AXI_LITE_TYPEDEF_REQ_T(req_lite_t, aw_chan_lite_t, w_chan_lite_t, ar_chan_lite_t)  \
+  typedef struct packed {                                                                  \
+    aw_chan_lite_t aw;                                                                     \
+    logic          aw_valid;                                                               \
+    w_chan_lite_t  w;                                                                      \
+    logic          w_valid;                                                                \
+    logic          b_ready;                                                                \
+    ar_chan_lite_t ar;                                                                     \
+    logic          ar_valid;                                                               \
+    logic          r_ready;                                                                \
+  } req_lite_t;
+`define AXI_LITE_TYPEDEF_RESP_T(resp_lite_t, b_chan_lite_t, r_chan_lite_t)  \
+  typedef struct packed {                                                   \
+    logic          aw_ready;                                                \
+    logic          w_ready;                                                 \
+    b_chan_lite_t  b;                                                       \
+    logic          b_valid;                                                 \
+    logic          ar_ready;                                                \
+    r_chan_lite_t  r;                                                       \
+    logic          r_valid;                                                 \
+  } resp_lite_t;
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// All AXI4-Lite Channels and Request/Response Structs in One Macro
+//
+// This can be used whenever the user is not interested in "precise" control of the naming of the
+// individual channels.
+//
+// Usage Example:
+// `AXI_LITE_TYPEDEF_ALL(axi_lite, addr_t, data_t, strb_t)
+//
+// This defines `axi_lite_req_t` and `axi_lite_resp_t` request/response structs as well as
+// `axi_lite_aw_chan_t`, `axi_lite_w_chan_t`, `axi_lite_b_chan_t`, `axi_lite_ar_chan_t`, and
+// `axi_lite_r_chan_t` channel structs.
+`define AXI_LITE_TYPEDEF_ALL(__name, __addr_t, __data_t, __strb_t)                                    \
+  `AXI_LITE_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t)                                           \
+  `AXI_LITE_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t)                                   \
+  `AXI_LITE_TYPEDEF_B_CHAN_T(__name``_b_chan_t)                                                       \
+  `AXI_LITE_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t)                                           \
+  `AXI_LITE_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t)                                             \
+  `AXI_LITE_TYPEDEF_REQ_T(__name``_req_t, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t)  \
+  `AXI_LITE_TYPEDEF_RESP_T(__name``_resp_t, __name``_b_chan_t, __name``_r_chan_t)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+`endif
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv
new file mode 100644
index 0000000..4a2ecff
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv
@@ -0,0 +1,444 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+
+/// Filter atomic operations (ATOPs) in a protocol-compliant manner.
+///
+/// This module filters atomic operations (ATOPs), i.e., write transactions that have a non-zero
+/// `aw_atop` value, from its `slv` to its `mst` port. This module guarantees that:
+///
+/// 1) `aw_atop` is always zero on the `mst` port;
+///
+/// 2) write transactions with non-zero `aw_atop` on the `slv` port are handled in conformance with
+///    the AXI standard by replying to such write transactions with the proper B and R responses.
+///    The response code on atomic operations that reach this module is always SLVERR
+///    (implementation-specific, not defined in the AXI standard).
+///
+/// ## Intended usage
+/// This module is intended to be placed between masters that may issue ATOPs and slaves that do not
+/// support ATOPs. That way, this module ensures that the AXI protocol remains in a defined state on
+/// systems with mixed ATOP capabilities.
+///
+/// ## Specification reminder
+/// The AXI standard specifies that there may be no ordering requirements between different atomic
+/// bursts (i.e., a burst started by an AW with ATOP other than 0) and none between atomic bursts
+/// and non-atomic bursts [E2.1.4]. That is, **an atomic burst may never have the same ID as any
+/// other write or read burst that is in-flight at the same time**.
+module axi_atop_filter #(
+  /// AXI ID width
+  parameter int unsigned AxiIdWidth = 0,
+  /// Maximum number of in-flight AXI write transactions
+  parameter int unsigned AxiMaxWriteTxns = 0,
+  /// AXI request type
+  parameter type req_t  = logic,
+  /// AXI response type
+  parameter type resp_t = logic
+) (
+  /// Rising-edge clock of both ports
+  input  logic  clk_i,
+  /// Asynchronous reset, active low
+  input  logic  rst_ni,
+  /// Slave port request
+  input  req_t  slv_req_i,
+  /// Slave port response
+  output resp_t slv_resp_o,
+  /// Master port request
+  output req_t  mst_req_o,
+  /// Master port response
+  input  resp_t mst_resp_i
+);
+
+  // Minimum counter width is 2 to detect underflows.
+  localparam int unsigned COUNTER_WIDTH = (AxiMaxWriteTxns == 1) ? 2 : $clog2(AxiMaxWriteTxns+1);
+  typedef struct packed {
+    logic                     underflow;
+    logic [COUNTER_WIDTH-1:0] cnt;
+  } cnt_t;
+  cnt_t   w_cnt_d, w_cnt_q;
+
+  typedef enum logic [2:0] {
+    W_FEEDTHROUGH, BLOCK_AW, ABSORB_W, HOLD_B, INJECT_B, WAIT_R
+  } w_state_e;
+  w_state_e   w_state_d, w_state_q;
+
+  typedef enum logic [1:0] { R_FEEDTHROUGH, INJECT_R, R_HOLD } r_state_e;
+  r_state_e   r_state_d, r_state_q;
+
+  typedef logic [AxiIdWidth-1:0] id_t;
+  id_t  id_d, id_q;
+
+  typedef logic [7:0] len_t;
+  len_t   r_beats_d,  r_beats_q;
+
+  typedef struct packed {
+    len_t len;
+  } r_resp_cmd_t;
+  r_resp_cmd_t  r_resp_cmd_push, r_resp_cmd_pop;
+
+  logic aw_without_complete_w_downstream,
+        complete_w_without_aw_downstream,
+        r_resp_cmd_push_valid,  r_resp_cmd_push_ready,
+        r_resp_cmd_pop_valid,   r_resp_cmd_pop_ready;
+
+  // An AW without a complete W burst is in-flight downstream if the W counter is > 0 and not
+  // underflowed.
+  assign aw_without_complete_w_downstream = !w_cnt_q.underflow && (w_cnt_q.cnt > 0);
+  // A complete W burst without AW is in-flight downstream if the W counter is -1.
+  assign complete_w_without_aw_downstream = w_cnt_q.underflow && &(w_cnt_q.cnt);
+
+  // Manage AW, W, and B channels.
+  always_comb begin
+    // Defaults:
+    // Disable AW and W handshakes.
+    mst_req_o.aw_valid  = 1'b0;
+    slv_resp_o.aw_ready = 1'b0;
+    mst_req_o.w_valid   = 1'b0;
+    slv_resp_o.w_ready  = 1'b0;
+    // Feed write responses through.
+    mst_req_o.b_ready   = slv_req_i.b_ready;
+    slv_resp_o.b_valid  = mst_resp_i.b_valid;
+    slv_resp_o.b        = mst_resp_i.b;
+    // Keep ID stored for B and R response.
+    id_d = id_q;
+    // Do not push R response commands.
+    r_resp_cmd_push_valid = 1'b0;
+    // Keep the current state.
+    w_state_d = w_state_q;
+
+    unique case (w_state_q)
+      W_FEEDTHROUGH: begin
+        // Feed AW channel through if the maximum number of outstanding bursts is not reached.
+        if (complete_w_without_aw_downstream || (w_cnt_q.cnt < AxiMaxWriteTxns)) begin
+          mst_req_o.aw_valid  = slv_req_i.aw_valid;
+          slv_resp_o.aw_ready = mst_resp_i.aw_ready;
+        end
+        // Feed W channel through if ..
+        if (aw_without_complete_w_downstream // .. downstream is missing W bursts ..
+            // .. or a new non-ATOP AW is being applied and there is not already a complete W burst
+            // downstream (to prevent underflows of w_cnt).
+            || ((slv_req_i.aw_valid && slv_req_i.aw.atop[5:4] == axi_pkg::ATOP_NONE)
+                && !complete_w_without_aw_downstream)
+        ) begin
+          mst_req_o.w_valid  = slv_req_i.w_valid;
+          slv_resp_o.w_ready = mst_resp_i.w_ready;
+        end
+        // Filter out AWs that are atomic operations.
+        if (slv_req_i.aw_valid && slv_req_i.aw.atop[5:4] != axi_pkg::ATOP_NONE) begin
+          mst_req_o.aw_valid  = 1'b0; // Do not let AW pass to master port.
+          slv_resp_o.aw_ready = 1'b1; // Absorb AW on slave port.
+          id_d = slv_req_i.aw.id; // Store ID for B response.
+          // All atomic operations except atomic stores require a response on the R channel.
+          if (slv_req_i.aw.atop[5:4] != axi_pkg::ATOP_ATOMICSTORE) begin
+            // Push R response command.  We do not have to wait for the ready of the register
+            // because we know it is ready: we are its only master and will wait for the register to
+            // be emptied before going back to the `W_FEEDTHROUGH` state.
+            r_resp_cmd_push_valid = 1'b1;
+          end
+          // If downstream is missing W beats, block the AW channel and let the W bursts complete.
+          if (aw_without_complete_w_downstream) begin
+            w_state_d = BLOCK_AW;
+          // If downstream is not missing W beats, absorb the W beats for this atomic AW.
+          end else begin
+            mst_req_o.w_valid  = 1'b0; // Do not let W beats pass to master port.
+            slv_resp_o.w_ready = 1'b1; // Absorb W beats on slave port.
+            if (slv_req_i.w_valid && slv_req_i.w.last) begin
+              // If the W beat is valid and the last, proceed by injecting the B response.
+              // However, if there is a non-handshaked B on our response port, we must let that
+              // complete first.
+              if (slv_resp_o.b_valid && !slv_req_i.b_ready) begin
+                w_state_d = HOLD_B;
+              end else begin
+                w_state_d = INJECT_B;
+              end
+            end else begin
+              // Otherwise continue with absorbing W beats.
+              w_state_d = ABSORB_W;
+            end
+          end
+        end
+      end
+
+      BLOCK_AW: begin
+        // Feed W channel through to let outstanding bursts complete.
+        if (aw_without_complete_w_downstream) begin
+          mst_req_o.w_valid  = slv_req_i.w_valid;
+          slv_resp_o.w_ready = mst_resp_i.w_ready;
+        end else begin
+          // If there are no more outstanding W bursts, start absorbing the next W burst.
+          slv_resp_o.w_ready = 1'b1;
+          if (slv_req_i.w_valid && slv_req_i.w.last) begin
+            // If the W beat is valid and the last, proceed by injecting the B response.
+            if (slv_resp_o.b_valid && !slv_req_i.b_ready) begin
+              w_state_d = HOLD_B;
+            end else begin
+              w_state_d = INJECT_B;
+            end
+          end else begin
+            // Otherwise continue with absorbing W beats.
+            w_state_d = ABSORB_W;
+          end
+        end
+      end
+
+      ABSORB_W: begin
+        // Absorb all W beats of the current burst.
+        slv_resp_o.w_ready = 1'b1;
+        if (slv_req_i.w_valid && slv_req_i.w.last) begin
+          if (slv_resp_o.b_valid && !slv_req_i.b_ready) begin
+            w_state_d = HOLD_B;
+          end else begin
+            w_state_d = INJECT_B;
+          end
+        end
+      end
+
+      HOLD_B: begin
+        // Proceed with injection of B response upon handshake.
+        if (slv_resp_o.b_valid && slv_req_i.b_ready) begin
+          w_state_d = INJECT_B;
+        end
+      end
+
+      INJECT_B: begin
+        // Pause forwarding of B response.
+        mst_req_o.b_ready = 1'b0;
+        // Inject error response instead.  Since the B channel has an ID and the atomic burst we are
+        // replying to is guaranteed to be the only burst with this ID in flight, we do not have to
+        // observe any ordering and can immediately inject on the B channel.
+        slv_resp_o.b = '0;
+        slv_resp_o.b.id = id_q;
+        slv_resp_o.b.resp = axi_pkg::RESP_SLVERR;
+        slv_resp_o.b_valid = 1'b1;
+        if (slv_req_i.b_ready) begin
+          // If not all beats of the R response have been injected, wait for them. Otherwise, return
+          // to `W_FEEDTHROUGH`.
+          if (r_resp_cmd_pop_valid && !r_resp_cmd_pop_ready) begin
+            w_state_d = WAIT_R;
+          end else begin
+            w_state_d = W_FEEDTHROUGH;
+          end
+        end
+      end
+
+      WAIT_R: begin
+        // Wait with returning to `W_FEEDTHROUGH` until all beats of the R response have been
+        // injected.
+        if (!r_resp_cmd_pop_valid) begin
+          w_state_d = W_FEEDTHROUGH;
+        end
+      end
+
+      default: w_state_d = W_FEEDTHROUGH;
+    endcase
+  end
+  // Connect signals on AW and W channel that are not managed by the control FSM from slave port to
+  // master port.
+  // Feed-through of the AW and W vectors, make sure that downstream aw.atop is always zero
+  always_comb begin
+    // overwrite the atop signal
+    mst_req_o.aw      = slv_req_i.aw;
+    mst_req_o.aw.atop = '0;
+  end
+  assign mst_req_o.w = slv_req_i.w;
+
+  // Manage R channel.
+  always_comb begin
+    // Defaults:
+    // Feed read responses through.
+    slv_resp_o.r       = mst_resp_i.r;
+    slv_resp_o.r_valid = mst_resp_i.r_valid;
+    mst_req_o.r_ready  = slv_req_i.r_ready;
+    // Do not pop R response command.
+    r_resp_cmd_pop_ready = 1'b0;
+    // Keep the current value of the beats counter.
+    r_beats_d = r_beats_q;
+    // Keep the current state.
+    r_state_d = r_state_q;
+
+    unique case (r_state_q)
+      R_FEEDTHROUGH: begin
+        if (mst_resp_i.r_valid && !slv_req_i.r_ready) begin
+          r_state_d = R_HOLD;
+        end else if (r_resp_cmd_pop_valid) begin
+          // Upon a command to inject an R response, immediately proceed with doing so because there
+          // are no ordering requirements with other bursts that may be ongoing on the R channel at
+          // this moment.
+          r_beats_d = r_resp_cmd_pop.len;
+          r_state_d = INJECT_R;
+        end
+      end
+
+      INJECT_R: begin
+        mst_req_o.r_ready  = 1'b0;
+        slv_resp_o.r       = '0;
+        slv_resp_o.r.id    = id_q;
+        slv_resp_o.r.resp  = axi_pkg::RESP_SLVERR;
+        slv_resp_o.r.last  = (r_beats_q == '0);
+        slv_resp_o.r_valid = 1'b1;
+        if (slv_req_i.r_ready) begin
+          if (slv_resp_o.r.last) begin
+            r_resp_cmd_pop_ready = 1'b1;
+            r_state_d = R_FEEDTHROUGH;
+          end else begin
+            r_beats_d -= 1;
+          end
+        end
+      end
+
+      R_HOLD: begin
+        if (mst_resp_i.r_valid && slv_req_i.r_ready) begin
+          r_state_d = R_FEEDTHROUGH;
+        end
+      end
+
+      default: r_state_d = R_FEEDTHROUGH;
+    endcase
+  end
+  // Feed all signals on AR through.
+  assign mst_req_o.ar        = slv_req_i.ar;
+  assign mst_req_o.ar_valid  = slv_req_i.ar_valid;
+  assign slv_resp_o.ar_ready = mst_resp_i.ar_ready;
+
+  // Keep track of outstanding downstream write bursts and responses.
+  always_comb begin
+    w_cnt_d = w_cnt_q;
+    if (mst_req_o.aw_valid && mst_resp_i.aw_ready) begin
+      w_cnt_d.cnt += 1;
+    end
+    if (mst_req_o.w_valid && mst_resp_i.w_ready && mst_req_o.w.last) begin
+      w_cnt_d.cnt -= 1;
+    end
+    if (w_cnt_q.underflow && (w_cnt_d.cnt == '0)) begin
+      w_cnt_d.underflow = 1'b0;
+    end else if (w_cnt_q.cnt == '0 && &(w_cnt_d.cnt)) begin
+      w_cnt_d.underflow = 1'b1;
+    end
+  end
+
+  always_ff @(posedge clk_i, negedge rst_ni) begin
+    if (!rst_ni) begin
+      id_q <= '0;
+      r_beats_q <= '0;
+      r_state_q <= R_FEEDTHROUGH;
+      w_cnt_q <= '{default: '0};
+      w_state_q <= W_FEEDTHROUGH;
+    end else begin
+      id_q <= id_d;
+      r_beats_q <= r_beats_d;
+      r_state_q <= r_state_d;
+      w_cnt_q <= w_cnt_d;
+      w_state_q <= w_state_d;
+    end
+  end
+
+  stream_register #(
+    .T(r_resp_cmd_t)
+  ) r_resp_cmd (
+    .clk_i      (clk_i),
+    .rst_ni     (rst_ni),
+    .clr_i      (1'b0),
+    .testmode_i (1'b0),
+    .valid_i    (r_resp_cmd_push_valid),
+    .ready_o    (r_resp_cmd_push_ready),
+    .data_i     (r_resp_cmd_push),
+    .valid_o    (r_resp_cmd_pop_valid),
+    .ready_i    (r_resp_cmd_pop_ready),
+    .data_o     (r_resp_cmd_pop)
+  );
+  assign r_resp_cmd_push.len = slv_req_i.aw.len;
+
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin: p_assertions
+    assert (AxiIdWidth >= 1) else $fatal(1, "AXI ID width must be at least 1!");
+    assert (AxiMaxWriteTxns >= 1)
+      else $fatal(1, "Maximum number of outstanding write transactions must be at least 1!");
+  end
+`endif
+// pragma translate_on
+endmodule
+
+`include "axi/assign.svh"
+`include "axi/typedef.svh"
+
+/// Interface variant of [`axi_atop_filter`](module.axi_atop_filter).
+module axi_atop_filter_intf #(
+  /// AXI ID width
+  parameter int unsigned AXI_ID_WIDTH   = 0,
+  /// AXI address width
+  parameter int unsigned AXI_ADDR_WIDTH = 0,
+  /// AXI data width
+  parameter int unsigned AXI_DATA_WIDTH = 0,
+  /// AXI user signal width
+  parameter int unsigned AXI_USER_WIDTH = 0,
+  /// Maximum number of in-flight AXI write transactions
+  parameter int unsigned AXI_MAX_WRITE_TXNS = 0
+) (
+  /// Rising-edge clock of both ports
+  input  logic    clk_i,
+  /// Asynchronous reset, active low
+  input  logic    rst_ni,
+  /// Slave interface port
+  AXI_BUS.Slave   slv,
+  /// Master interface port
+  AXI_BUS.Master  mst
+);
+
+  typedef logic [AXI_ID_WIDTH-1:0]     id_t;
+  typedef logic [AXI_ADDR_WIDTH-1:0]   addr_t;
+  typedef logic [AXI_DATA_WIDTH-1:0]   data_t;
+  typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t;
+  typedef logic [AXI_USER_WIDTH-1:0]   user_t;
+
+  `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)
+  `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t)
+  `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t)
+  `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t)
+  `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t)
+
+  req_t  slv_req,  mst_req;
+  resp_t slv_resp, mst_resp;
+
+  `AXI_ASSIGN_TO_REQ(slv_req, slv)
+  `AXI_ASSIGN_FROM_RESP(slv, slv_resp)
+
+  `AXI_ASSIGN_FROM_REQ(mst, mst_req)
+  `AXI_ASSIGN_TO_RESP(mst_resp, mst)
+
+  axi_atop_filter #(
+    .AxiIdWidth      ( AXI_ID_WIDTH       ),
+  // Maximum number of AXI write bursts outstanding at the same time
+    .AxiMaxWriteTxns ( AXI_MAX_WRITE_TXNS ),
+  // AXI request & response type
+    .req_t           ( req_t              ),
+    .resp_t          ( resp_t             )
+  ) i_axi_atop_filter (
+    .clk_i,
+    .rst_ni,
+    .slv_req_i  ( slv_req  ),
+    .slv_resp_o ( slv_resp ),
+    .mst_req_o  ( mst_req  ),
+    .mst_resp_i ( mst_resp )
+  );
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin: p_assertions
+    assert (AXI_ADDR_WIDTH >= 1) else $fatal(1, "AXI ADDR width must be at least 1!");
+    assert (AXI_DATA_WIDTH >= 1) else $fatal(1, "AXI DATA width must be at least 1!");
+    assert (AXI_USER_WIDTH >= 1) else $fatal(1, "AXI USER width must be at least 1!");
+  end
+`endif
+// pragma translate_on
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv
new file mode 100644
index 0000000..6c31321
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv
@@ -0,0 +1,265 @@
+// Copyright (c) 2014-2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+// - Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+/// An AXI4 cut.
+///
+/// Breaks all combinatorial paths between its input and output.
+module axi_cut #(
+  // bypass enable
+  parameter bit  Bypass    = 1'b0,
+  // AXI channel structs
+  parameter type aw_chan_t = logic,
+  parameter type  w_chan_t = logic,
+  parameter type  b_chan_t = logic,
+  parameter type ar_chan_t = logic,
+  parameter type  r_chan_t = logic,
+  // AXI request & response structs
+  parameter type     req_t = logic,
+  parameter type    resp_t = logic
+) (
+  input logic   clk_i,
+  input logic   rst_ni,
+  // salve port
+  input  req_t  slv_req_i,
+  output resp_t slv_resp_o,
+  // master port
+  output req_t  mst_req_o,
+  input  resp_t mst_resp_i
+);
+
+  // a spill register for each channel
+  spill_register #(
+    .T       ( aw_chan_t ),
+    .Bypass  ( Bypass    )
+  ) i_reg_aw (
+    .clk_i   ( clk_i               ),
+    .rst_ni  ( rst_ni              ),
+    .valid_i ( slv_req_i.aw_valid  ),
+    .ready_o ( slv_resp_o.aw_ready ),
+    .data_i  ( slv_req_i.aw        ),
+    .valid_o ( mst_req_o.aw_valid  ),
+    .ready_i ( mst_resp_i.aw_ready ),
+    .data_o  ( mst_req_o.aw        )
+  );
+
+  spill_register #(
+    .T       ( w_chan_t ),
+    .Bypass  ( Bypass   )
+  ) i_reg_w  (
+    .clk_i   ( clk_i              ),
+    .rst_ni  ( rst_ni             ),
+    .valid_i ( slv_req_i.w_valid  ),
+    .ready_o ( slv_resp_o.w_ready ),
+    .data_i  ( slv_req_i.w        ),
+    .valid_o ( mst_req_o.w_valid  ),
+    .ready_i ( mst_resp_i.w_ready ),
+    .data_o  ( mst_req_o.w        )
+  );
+
+  spill_register #(
+    .T       ( b_chan_t ),
+    .Bypass  ( Bypass   )
+  ) i_reg_b  (
+    .clk_i   ( clk_i              ),
+    .rst_ni  ( rst_ni             ),
+    .valid_i ( mst_resp_i.b_valid ),
+    .ready_o ( mst_req_o.b_ready  ),
+    .data_i  ( mst_resp_i.b       ),
+    .valid_o ( slv_resp_o.b_valid ),
+    .ready_i ( slv_req_i.b_ready  ),
+    .data_o  ( slv_resp_o.b       )
+  );
+
+  spill_register #(
+    .T       ( ar_chan_t ),
+    .Bypass  ( Bypass    )
+  ) i_reg_ar (
+    .clk_i   ( clk_i               ),
+    .rst_ni  ( rst_ni              ),
+    .valid_i ( slv_req_i.ar_valid  ),
+    .ready_o ( slv_resp_o.ar_ready ),
+    .data_i  ( slv_req_i.ar        ),
+    .valid_o ( mst_req_o.ar_valid  ),
+    .ready_i ( mst_resp_i.ar_ready ),
+    .data_o  ( mst_req_o.ar        )
+  );
+
+  spill_register #(
+    .T       ( r_chan_t ),
+    .Bypass  ( Bypass   )
+  ) i_reg_r  (
+    .clk_i   ( clk_i              ),
+    .rst_ni  ( rst_ni             ),
+    .valid_i ( mst_resp_i.r_valid ),
+    .ready_o ( mst_req_o.r_ready  ),
+    .data_i  ( mst_resp_i.r       ),
+    .valid_o ( slv_resp_o.r_valid ),
+    .ready_i ( slv_req_i.r_ready  ),
+    .data_o  ( slv_resp_o.r       )
+  );
+endmodule
+
+`include "axi/assign.svh"
+`include "axi/typedef.svh"
+
+// interface wrapper
+module axi_cut_intf #(
+  // Bypass eneable
+  parameter bit          BYPASS     = 1'b0,
+  // The address width.
+  parameter int unsigned ADDR_WIDTH = 0,
+  // The data width.
+  parameter int unsigned DATA_WIDTH = 0,
+  // The ID width.
+  parameter int unsigned ID_WIDTH   = 0,
+  // The user data width.
+  parameter int unsigned USER_WIDTH = 0
+) (
+  input logic     clk_i  ,
+  input logic     rst_ni ,
+  AXI_BUS.Slave   in     ,
+  AXI_BUS.Master  out
+);
+
+  typedef logic [ID_WIDTH-1:0]     id_t;
+  typedef logic [ADDR_WIDTH-1:0]   addr_t;
+  typedef logic [DATA_WIDTH-1:0]   data_t;
+  typedef logic [DATA_WIDTH/8-1:0] strb_t;
+  typedef logic [USER_WIDTH-1:0]   user_t;
+
+  `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)
+  `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t)
+  `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t)
+  `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t)
+  `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t)
+
+  req_t  slv_req,  mst_req;
+  resp_t slv_resp, mst_resp;
+
+  `AXI_ASSIGN_TO_REQ(slv_req, in)
+  `AXI_ASSIGN_FROM_RESP(in, slv_resp)
+
+  `AXI_ASSIGN_FROM_REQ(out, mst_req)
+  `AXI_ASSIGN_TO_RESP(mst_resp, out)
+
+  axi_cut #(
+    .Bypass    (    BYPASS ),
+    .aw_chan_t ( aw_chan_t ),
+    .w_chan_t  (  w_chan_t ),
+    .b_chan_t  (  b_chan_t ),
+    .ar_chan_t ( ar_chan_t ),
+    .r_chan_t  (  r_chan_t ),
+    .req_t     (     req_t ),
+    .resp_t    (    resp_t )
+  ) i_axi_cut (
+    .clk_i,
+    .rst_ni,
+    .slv_req_i  ( slv_req  ),
+    .slv_resp_o ( slv_resp ),
+    .mst_req_o  ( mst_req  ),
+    .mst_resp_i ( mst_resp )
+  );
+
+  // Check the invariants.
+  // pragma translate_off
+  `ifndef VERILATOR
+  initial begin
+    assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter");
+    assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter");
+    assert (ID_WIDTH   > 0) else $fatal(1, "Wrong id   width parameter");
+    assert (USER_WIDTH > 0) else $fatal(1, "Wrong user width parameter");
+    assert (in.AXI_ADDR_WIDTH  == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (in.AXI_DATA_WIDTH  == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (in.AXI_ID_WIDTH    == ID_WIDTH)   else $fatal(1, "Wrong interface definition");
+    assert (in.AXI_USER_WIDTH  == USER_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_ID_WIDTH   == ID_WIDTH)   else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition");
+  end
+  `endif
+  // pragma translate_on
+endmodule
+
+module axi_lite_cut_intf #(
+  // bypass enable
+  parameter bit          BYPASS     = 1'b0,
+  /// The address width.
+  parameter int unsigned ADDR_WIDTH = 0,
+  /// The data width.
+  parameter int unsigned DATA_WIDTH = 0
+) (
+  input logic     clk_i  ,
+  input logic     rst_ni ,
+  AXI_LITE.Slave  in     ,
+  AXI_LITE.Master out
+);
+
+  typedef logic [ADDR_WIDTH-1:0]   addr_t;
+  typedef logic [DATA_WIDTH-1:0]   data_t;
+  typedef logic [DATA_WIDTH/8-1:0] strb_t;
+
+  `AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t)
+  `AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t)
+  `AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_t)
+  `AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t)
+  `AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_t, data_t)
+  `AXI_LITE_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t)
+  `AXI_LITE_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t)
+
+  req_t   slv_req,  mst_req;
+  resp_t  slv_resp, mst_resp;
+
+  `AXI_LITE_ASSIGN_TO_REQ(slv_req, in)
+  `AXI_LITE_ASSIGN_FROM_RESP(in, slv_resp)
+
+  `AXI_LITE_ASSIGN_FROM_REQ(out, mst_req)
+  `AXI_LITE_ASSIGN_TO_RESP(mst_resp, out)
+
+  axi_cut #(
+    .Bypass    (    BYPASS ),
+    .aw_chan_t ( aw_chan_t ),
+    .w_chan_t  (  w_chan_t ),
+    .b_chan_t  (  b_chan_t ),
+    .ar_chan_t ( ar_chan_t ),
+    .r_chan_t  (  r_chan_t ),
+    .req_t     (     req_t ),
+    .resp_t    (    resp_t )
+  ) i_axi_cut (
+    .clk_i,
+    .rst_ni,
+    .slv_req_i  ( slv_req  ),
+    .slv_resp_o ( slv_resp ),
+    .mst_req_o  ( mst_req  ),
+    .mst_resp_i ( mst_resp )
+  );
+
+  // Check the invariants.
+  // pragma translate_off
+  `ifndef VERILATOR
+  initial begin
+    assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter");
+    assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter");
+    assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH)  else $fatal(1, "Wrong interface definition");
+    assert (in.AXI_DATA_WIDTH == DATA_WIDTH)  else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
+  end
+  `endif
+  // pragma translate_on
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv
new file mode 100644
index 0000000..cab18eb
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv
@@ -0,0 +1,198 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+// - Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+/// Synthesizable module that (randomly) delays AXI channels.
+module axi_delayer #(
+  // AXI channel types
+  parameter type aw_chan_t = logic,
+  parameter type  w_chan_t = logic,
+  parameter type  b_chan_t = logic,
+  parameter type ar_chan_t = logic,
+  parameter type  r_chan_t = logic,
+  // AXI request & response types
+  parameter type     req_t = logic,
+  parameter type    resp_t = logic,
+  // delay parameters
+  parameter bit          StallRandomInput  = 0,
+  parameter bit          StallRandomOutput = 0,
+  parameter int unsigned FixedDelayInput   = 1,
+  parameter int unsigned FixedDelayOutput  = 1
+) (
+  input  logic  clk_i,      // Clock
+  input  logic  rst_ni,     // Asynchronous reset active low
+  // slave port
+  input  req_t  slv_req_i,
+  output resp_t slv_resp_o,
+  // master port
+  output req_t  mst_req_o,
+  input  resp_t mst_resp_i
+);
+  // AW
+  stream_delay #(
+    .StallRandom ( StallRandomInput ),
+    .FixedDelay  ( FixedDelayInput  ),
+    .payload_t   ( aw_chan_t        )
+  ) i_stream_delay_aw (
+    .clk_i,
+    .rst_ni,
+    .payload_i ( slv_req_i.aw        ),
+    .ready_o   ( slv_resp_o.aw_ready ),
+    .valid_i   ( slv_req_i.aw_valid  ),
+    .payload_o ( mst_req_o.aw        ),
+    .ready_i   ( mst_resp_i.aw_ready ),
+    .valid_o   ( mst_req_o.aw_valid  )
+  );
+
+  // AR
+  stream_delay #(
+    .StallRandom ( StallRandomInput ),
+    .FixedDelay  ( FixedDelayInput  ),
+    .payload_t   ( ar_chan_t        )
+  ) i_stream_delay_ar (
+    .clk_i,
+    .rst_ni,
+    .payload_i ( slv_req_i.ar        ),
+    .ready_o   ( slv_resp_o.ar_ready ),
+    .valid_i   ( slv_req_i.ar_valid  ),
+    .payload_o ( mst_req_o.ar        ),
+    .ready_i   ( mst_resp_i.ar_ready ),
+    .valid_o   ( mst_req_o.ar_valid  )
+  );
+
+  // W
+  stream_delay #(
+    .StallRandom ( StallRandomInput ),
+    .FixedDelay  ( FixedDelayInput  ),
+    .payload_t   ( w_chan_t         )
+  ) i_stream_delay_w (
+    .clk_i,
+    .rst_ni,
+    .payload_i ( slv_req_i.w        ),
+    .ready_o   ( slv_resp_o.w_ready ),
+    .valid_i   ( slv_req_i.w_valid  ),
+    .payload_o ( mst_req_o.w        ),
+    .ready_i   ( mst_resp_i.w_ready ),
+    .valid_o   ( mst_req_o.w_valid  )
+  );
+
+  // B
+  stream_delay #(
+    .StallRandom ( StallRandomOutput ),
+    .FixedDelay  ( FixedDelayOutput  ),
+    .payload_t   ( b_chan_t          )
+  ) i_stream_delay_b (
+    .clk_i,
+    .rst_ni,
+    .payload_i ( mst_resp_i.b       ),
+    .ready_o   ( mst_req_o.b_ready  ),
+    .valid_i   ( mst_resp_i.b_valid ),
+    .payload_o ( slv_resp_o.b       ),
+    .ready_i   ( slv_req_i.b_ready  ),
+    .valid_o   ( slv_resp_o.b_valid )
+  );
+
+  // R
+   stream_delay #(
+    .StallRandom ( StallRandomOutput ),
+    .FixedDelay  ( FixedDelayOutput  ),
+    .payload_t   ( r_chan_t          )
+  ) i_stream_delay_r (
+    .clk_i,
+    .rst_ni,
+    .payload_i ( mst_resp_i.r       ),
+    .ready_o   ( mst_req_o.r_ready  ),
+    .valid_i   ( mst_resp_i.r_valid ),
+    .payload_o ( slv_resp_o.r       ),
+    .ready_i   ( slv_req_i.r_ready  ),
+    .valid_o   ( slv_resp_o.r_valid )
+  );
+endmodule
+
+`include "axi/typedef.svh"
+`include "axi/assign.svh"
+
+// interface wrapper
+module axi_delayer_intf #(
+  // Synopsys DC requires a default value for parameters.
+  parameter int unsigned AXI_ID_WIDTH        = 0,
+  parameter int unsigned AXI_ADDR_WIDTH      = 0,
+  parameter int unsigned AXI_DATA_WIDTH      = 0,
+  parameter int unsigned AXI_USER_WIDTH      = 0,
+  parameter bit          STALL_RANDOM_INPUT  = 0,
+  parameter bit          STALL_RANDOM_OUTPUT = 0,
+  parameter int unsigned FIXED_DELAY_INPUT   = 1,
+  parameter int unsigned FIXED_DELAY_OUTPUT  = 1
+) (
+  input  logic    clk_i,
+  input  logic    rst_ni,
+  AXI_BUS.Slave   slv,
+  AXI_BUS.Master  mst
+);
+
+  typedef logic [AXI_ID_WIDTH-1:0]     id_t;
+  typedef logic [AXI_ADDR_WIDTH-1:0]   addr_t;
+  typedef logic [AXI_DATA_WIDTH-1:0]   data_t;
+  typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t;
+  typedef logic [AXI_USER_WIDTH-1:0]   user_t;
+
+  `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)
+  `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t)
+  `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t)
+  `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t)
+  `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t)
+
+  req_t  slv_req,  mst_req;
+  resp_t slv_resp, mst_resp;
+
+  `AXI_ASSIGN_TO_REQ(slv_req, slv)
+  `AXI_ASSIGN_FROM_RESP(slv, slv_resp)
+
+  `AXI_ASSIGN_FROM_REQ(mst, mst_req)
+  `AXI_ASSIGN_TO_RESP(mst_resp, mst)
+
+  axi_delayer #(
+    .aw_chan_t         (           aw_chan_t ),
+    .w_chan_t          (            w_chan_t ),
+    .b_chan_t          (            b_chan_t ),
+    .ar_chan_t         (           ar_chan_t ),
+    .r_chan_t          (            r_chan_t ),
+    .req_t             (               req_t ),
+    .resp_t            (              resp_t ),
+    .StallRandomInput  ( STALL_RANDOM_INPUT  ),
+    .StallRandomOutput ( STALL_RANDOM_OUTPUT ),
+    .FixedDelayInput   ( FIXED_DELAY_INPUT   ),
+    .FixedDelayOutput  ( FIXED_DELAY_OUTPUT  )
+  ) i_axi_delayer (
+    .clk_i,   // Clock
+    .rst_ni,  // Asynchronous reset active low
+    .slv_req_i  ( slv_req  ),
+    .slv_resp_o ( slv_resp ),
+    .mst_req_o  ( mst_req  ),
+    .mst_resp_i ( mst_resp )
+  );
+
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin: p_assertions
+    assert (AXI_ID_WIDTH >= 1) else $fatal(1, "AXI ID width must be at least 1!");
+    assert (AXI_ADDR_WIDTH >= 1) else $fatal(1, "AXI ADDR width must be at least 1!");
+    assert (AXI_DATA_WIDTH >= 1) else $fatal(1, "AXI DATA width must be at least 1!");
+    assert (AXI_USER_WIDTH >= 1) else $fatal(1, "AXI USER width must be at least 1!");
+  end
+`endif
+// pragma translate_on
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv
new file mode 100644
index 0000000..99a18c8
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv
@@ -0,0 +1,786 @@
+// Copyright (c) 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+// axi_demux: Demultiplex an AXI bus from one slave port to multiple master ports.
+// See `doc/axi_demux.md` for the documentation, including the definition of parameters and ports.
+module axi_demux #(
+  parameter int unsigned AxiIdWidth     = 32'd0,
+  parameter type         aw_chan_t      = logic,
+  parameter type         w_chan_t       = logic,
+  parameter type         b_chan_t       = logic,
+  parameter type         ar_chan_t      = logic,
+  parameter type         r_chan_t       = logic,
+  parameter type         req_t          = logic,
+  parameter type         resp_t         = logic,
+  parameter int unsigned NoMstPorts     = 32'd0,
+  parameter int unsigned MaxTrans       = 32'd8,
+  parameter int unsigned AxiLookBits    = 32'd3,
+  parameter bit          UniqueIds      = 1'b0,
+  parameter bit          FallThrough    = 1'b0,
+  parameter bit          SpillAw        = 1'b1,
+  parameter bit          SpillW         = 1'b0,
+  parameter bit          SpillB         = 1'b0,
+  parameter bit          SpillAr        = 1'b1,
+  parameter bit          SpillR         = 1'b0,
+  // Dependent parameters, DO NOT OVERRIDE!
+  parameter int unsigned SelectWidth    = (NoMstPorts > 32'd1) ? $clog2(NoMstPorts) : 32'd1,
+  parameter type         select_t       = logic [SelectWidth-1:0]
+) (
+  input  logic                     clk_i,
+  input  logic                     rst_ni,
+  input  logic                     test_i,
+  // Slave Port
+  input  req_t                     slv_req_i,
+  input  select_t                  slv_aw_select_i,
+  input  select_t                  slv_ar_select_i,
+  output resp_t                    slv_resp_o,
+  // Master Ports
+  output req_t    [NoMstPorts-1:0] mst_reqs_o,
+  input  resp_t   [NoMstPorts-1:0] mst_resps_i
+);
+
+  localparam int unsigned IdCounterWidth = MaxTrans > 1 ? $clog2(MaxTrans) : 1;
+
+  //--------------------------------------
+  // Typedefs for the FIFOs / Queues
+  //--------------------------------------
+  typedef struct packed {
+    aw_chan_t aw_chan;
+    select_t  aw_select;
+  } aw_chan_select_t;
+  typedef struct packed {
+    ar_chan_t ar_chan;
+    select_t  ar_select;
+  } ar_chan_select_t;
+
+  // pass through if only one master port
+  if (NoMstPorts == 32'h1) begin : gen_no_demux
+    assign mst_reqs_o[0] = slv_req_i;
+    assign slv_resp_o    = mst_resps_i;
+  // other non degenerate cases
+  end else begin : gen_demux
+
+    //--------------------------------------
+    //--------------------------------------
+    // Signal Declarations
+    //--------------------------------------
+    //--------------------------------------
+
+    //--------------------------------------
+    // Write Transaction
+    //--------------------------------------
+    // comes from spill register at input
+    aw_chan_select_t          slv_aw_chan_select;
+    logic                     slv_aw_valid,       slv_aw_ready;
+
+    // AW ID counter
+    select_t                  lookup_aw_select;
+    logic                     aw_select_occupied, aw_id_cnt_full;
+    logic                     aw_push;
+    // Upon an ATOP load, inject IDs from the AW into the AR channel
+    logic                     atop_inject;
+
+    // W FIFO: stores the decision to which master W beats should go
+    logic                     w_fifo_pop;
+    logic                     w_fifo_full,        w_fifo_empty;
+    select_t                  w_select;
+
+    // Register which locks the AW valid signal
+    logic                     lock_aw_valid_d,    lock_aw_valid_q, load_aw_lock;
+    logic                     aw_valid,           aw_ready;
+
+    // W channel from spill reg
+    w_chan_t                  slv_w_chan;
+    logic                     slv_w_valid,        slv_w_ready;
+
+    // B channles input into the arbitration
+    b_chan_t [NoMstPorts-1:0] mst_b_chans;
+    logic    [NoMstPorts-1:0] mst_b_valids,       mst_b_readies;
+
+    // B channel to spill register
+    b_chan_t                  slv_b_chan;
+    logic                     slv_b_valid,        slv_b_ready;
+
+    //--------------------------------------
+    // Read Transaction
+    //--------------------------------------
+    // comes from spill register at input
+    ar_chan_select_t          slv_ar_chan_select;
+    logic                     slv_ar_valid,       slv_ar_ready;
+
+    // AR ID counter
+    select_t                  lookup_ar_select;
+    logic                     ar_select_occupied, ar_id_cnt_full;
+    logic                     ar_push;
+
+    // Register which locks the AR valid signel
+    logic                     lock_ar_valid_d,    lock_ar_valid_q, load_ar_lock;
+    logic                     ar_valid,           ar_ready;
+
+    // R channles input into the arbitration
+    r_chan_t [NoMstPorts-1:0] mst_r_chans;
+    logic    [NoMstPorts-1:0] mst_r_valids, mst_r_readies;
+
+    // R channel to spill register
+    r_chan_t                  slv_r_chan;
+    logic                     slv_r_valid,        slv_r_ready;
+
+    //--------------------------------------
+    //--------------------------------------
+    // Channel Control
+    //--------------------------------------
+    //--------------------------------------
+
+    //--------------------------------------
+    // AW Channel
+    //--------------------------------------
+    // spill register at the channel input
+    `ifdef TARGET_VSIM
+    // Workaround for bug in Questa 2020.2 and 2021.1: Flatten the struct into a logic vector before
+    // instantiating `spill_register`.
+    typedef logic [$bits(aw_chan_select_t)-1:0] aw_chan_select_flat_t;
+    `else
+    typedef aw_chan_select_t aw_chan_select_flat_t;
+    `endif
+    aw_chan_select_flat_t slv_aw_chan_select_in_flat,
+                          slv_aw_chan_select_out_flat;
+    assign slv_aw_chan_select_in_flat = {slv_req_i.aw, slv_aw_select_i};
+    spill_register #(
+      .T       ( aw_chan_select_flat_t        ),
+      .Bypass  ( ~SpillAw                     ) // because module param indicates if we want a spill reg
+    ) i_aw_spill_reg (
+      .clk_i   ( clk_i                        ),
+      .rst_ni  ( rst_ni                       ),
+      .valid_i ( slv_req_i.aw_valid           ),
+      .ready_o ( slv_resp_o.aw_ready          ),
+      .data_i  ( slv_aw_chan_select_in_flat   ),
+      .valid_o ( slv_aw_valid                 ),
+      .ready_i ( slv_aw_ready                 ),
+      .data_o  ( slv_aw_chan_select_out_flat  )
+    );
+    assign slv_aw_chan_select = slv_aw_chan_select_out_flat;
+
+    // Control of the AW handshake
+    always_comb begin
+      // AXI Handshakes
+      slv_aw_ready = 1'b0;
+      aw_valid     = 1'b0;
+      // `lock_aw_valid`, used to be protocol conform as it is not allowed to deassert
+      // a valid if there was no corresponding ready. As this process has to be able to inject
+      // an AXI ID into the counter of the AR channel on an ATOP, there could be a case where
+      // this process waits on `aw_ready` but in the mean time on the AR channel the counter gets
+      // full.
+      lock_aw_valid_d = lock_aw_valid_q;
+      load_aw_lock    = 1'b0;
+      // AW ID counter and W FIFO
+      aw_push      = 1'b0;
+      // ATOP injection into ar counter
+      atop_inject  = 1'b0;
+      // we had an arbitration decision, the valid is locked, wait for the transaction
+      if (lock_aw_valid_q) begin
+        aw_valid = 1'b1;
+        // transaction
+        if (aw_ready) begin
+          slv_aw_ready    = 1'b1;
+          lock_aw_valid_d = 1'b0;
+          load_aw_lock    = 1'b1;
+          atop_inject     = slv_aw_chan_select.aw_chan.atop[5]; // inject the ATOP if necessary
+        end
+      end else begin
+        // Process can start handling a transaction if its `i_aw_id_counter` and `w_fifo` have
+        // space in them. Further check if we could inject something on the AR channel.
+        if (!aw_id_cnt_full && !w_fifo_full && !ar_id_cnt_full) begin
+          // there is a valid AW vector make the id lookup and go further, if it passes
+          if (slv_aw_valid && (!aw_select_occupied ||
+             (slv_aw_chan_select.aw_select == lookup_aw_select))) begin
+            // connect the handshake
+            aw_valid     = 1'b1;
+            // push arbitration to the W FIFO regardless, do not wait for the AW transaction
+            aw_push      = 1'b1;
+            // on AW transaction
+            if (aw_ready) begin
+              slv_aw_ready = 1'b1;
+              atop_inject  = slv_aw_chan_select.aw_chan.atop[5];
+            // no AW transaction this cycle, lock the decision
+            end else begin
+              lock_aw_valid_d = 1'b1;
+              load_aw_lock    = 1'b1;
+            end
+          end
+        end
+      end
+    end
+
+    // lock the valid signal, as the selection gets pushed into the W FIFO on first assertion,
+    // prevent further pushing
+    `FFLARN(lock_aw_valid_q, lock_aw_valid_d, load_aw_lock, '0, clk_i, rst_ni)
+
+    if (UniqueIds) begin : gen_unique_ids_aw
+      // If the `UniqueIds` parameter is set, each write transaction has an ID that is unique among
+      // all in-flight write transactions, or all write transactions with a given ID target the same
+      // master port as all write transactions with the same ID, or both.  This means that the
+      // signals that are driven by the ID counters if this parameter is not set can instead be
+      // derived from existing signals.  The ID counters can therefore be omitted.
+      assign lookup_aw_select = slv_aw_chan_select.aw_select;
+      assign aw_select_occupied = 1'b0;
+      assign aw_id_cnt_full = 1'b0;
+    end else begin : gen_aw_id_counter
+      axi_demux_id_counters #(
+        .AxiIdBits         ( AxiLookBits    ),
+        .CounterWidth      ( IdCounterWidth ),
+        .mst_port_select_t ( select_t       )
+      ) i_aw_id_counter (
+        .clk_i                        ( clk_i                                         ),
+        .rst_ni                       ( rst_ni                                        ),
+        .lookup_axi_id_i              ( slv_aw_chan_select.aw_chan.id[0+:AxiLookBits] ),
+        .lookup_mst_select_o          ( lookup_aw_select                              ),
+        .lookup_mst_select_occupied_o ( aw_select_occupied                            ),
+        .full_o                       ( aw_id_cnt_full                                ),
+        .inject_axi_id_i              ( '0                                            ),
+        .inject_i                     ( 1'b0                                          ),
+        .push_axi_id_i                ( slv_aw_chan_select.aw_chan.id[0+:AxiLookBits] ),
+        .push_mst_select_i            ( slv_aw_chan_select.aw_select                  ),
+        .push_i                       ( aw_push                                       ),
+        .pop_axi_id_i                 ( slv_b_chan.id[0+:AxiLookBits]                 ),
+        .pop_i                        ( slv_b_valid & slv_b_ready                     )
+      );
+      // pop from ID counter on outward transaction
+    end
+
+    // FIFO to save W selection
+    fifo_v3 #(
+      .FALL_THROUGH ( FallThrough ),
+      .DEPTH        ( MaxTrans    ),
+      .dtype        ( select_t    )
+    ) i_w_fifo (
+      .clk_i     ( clk_i                        ),
+      .rst_ni    ( rst_ni                       ),
+      .flush_i   ( 1'b0                         ),
+      .testmode_i( test_i                       ),
+      .full_o    ( w_fifo_full                  ),
+      .empty_o   ( w_fifo_empty                 ),
+      .usage_o   (                              ),
+      .data_i    ( slv_aw_chan_select.aw_select ),
+      .push_i    ( aw_push                      ), // controlled from proc_aw_chan
+      .data_o    ( w_select                     ), // where the w beat should go
+      .pop_i     ( w_fifo_pop                   )  // controlled from proc_w_chan
+    );
+
+    //--------------------------------------
+    //  W Channel
+    //--------------------------------------
+    spill_register #(
+      .T       ( w_chan_t ),
+      .Bypass  ( ~SpillW  )
+    ) i_w_spill_reg(
+      .clk_i   ( clk_i              ),
+      .rst_ni  ( rst_ni             ),
+      .valid_i ( slv_req_i.w_valid  ),
+      .ready_o ( slv_resp_o.w_ready ),
+      .data_i  ( slv_req_i.w        ),
+      .valid_o ( slv_w_valid        ),
+      .ready_i ( slv_w_ready        ),
+      .data_o  ( slv_w_chan         )
+    );
+
+    //--------------------------------------
+    //  B Channel
+    //--------------------------------------
+    // optional spill register
+    spill_register #(
+      .T       ( b_chan_t ),
+      .Bypass  ( ~SpillB  )
+    ) i_b_spill_reg (
+      .clk_i   ( clk_i              ),
+      .rst_ni  ( rst_ni             ),
+      .valid_i ( slv_b_valid        ),
+      .ready_o ( slv_b_ready        ),
+      .data_i  ( slv_b_chan         ),
+      .valid_o ( slv_resp_o.b_valid ),
+      .ready_i ( slv_req_i.b_ready  ),
+      .data_o  ( slv_resp_o.b       )
+    );
+
+    // Arbitration of the different B responses
+    rr_arb_tree #(
+      .NumIn    ( NoMstPorts ),
+      .DataType ( b_chan_t   ),
+      .AxiVldRdy( 1'b1       ),
+      .LockIn   ( 1'b1       )
+    ) i_b_mux (
+      .clk_i  ( clk_i         ),
+      .rst_ni ( rst_ni        ),
+      .flush_i( 1'b0          ),
+      .rr_i   ( '0            ),
+      .req_i  ( mst_b_valids  ),
+      .gnt_o  ( mst_b_readies ),
+      .data_i ( mst_b_chans   ),
+      .gnt_i  ( slv_b_ready   ),
+      .req_o  ( slv_b_valid   ),
+      .data_o ( slv_b_chan    ),
+      .idx_o  (               )
+    );
+
+    //--------------------------------------
+    //  AR Channel
+    //--------------------------------------
+    `ifdef TARGET_VSIM
+    // Workaround for bug in Questa 2020.2 and 2021.1: Flatten the struct into a logic vector before
+    // instantiating `spill_register`.
+    typedef logic [$bits(ar_chan_select_t)-1:0] ar_chan_select_flat_t;
+    `else
+    typedef ar_chan_select_t ar_chan_select_flat_t;
+    `endif
+    ar_chan_select_flat_t slv_ar_chan_select_in_flat,
+                          slv_ar_chan_select_out_flat;
+    assign slv_ar_chan_select_in_flat = {slv_req_i.ar, slv_ar_select_i};
+    spill_register #(
+      .T       ( ar_chan_select_flat_t        ),
+      .Bypass  ( ~SpillAr                     )
+    ) i_ar_spill_reg (
+      .clk_i   ( clk_i                        ),
+      .rst_ni  ( rst_ni                       ),
+      .valid_i ( slv_req_i.ar_valid           ),
+      .ready_o ( slv_resp_o.ar_ready          ),
+      .data_i  ( slv_ar_chan_select_in_flat   ),
+      .valid_o ( slv_ar_valid                 ),
+      .ready_i ( slv_ar_ready                 ),
+      .data_o  ( slv_ar_chan_select_out_flat  )
+    );
+    assign slv_ar_chan_select = slv_ar_chan_select_out_flat;
+
+    // control of the AR handshake
+    always_comb begin
+      // AXI Handshakes
+      slv_ar_ready    = 1'b0;
+      ar_valid        = 1'b0;
+      // `lock_ar_valid`: Used to be protocol conform as it is not allowed to deassert `ar_valid`
+      // if there was no corresponding `ar_ready`. There is the possibility that an injection
+      // of a R response from an `atop` from the AW channel can change the occupied flag of the
+      // `i_ar_id_counter`, even if it was previously empty. This FF prevents the deassertion.
+      lock_ar_valid_d = lock_ar_valid_q;
+      load_ar_lock    = 1'b0;
+      // AR id counter
+      ar_push         = 1'b0;
+      // The process had an arbitration decision in a previous cycle, the valid is locked,
+      // wait for the AR transaction.
+      if (lock_ar_valid_q) begin
+        ar_valid = 1'b1;
+        // transaction
+        if (ar_ready) begin
+          slv_ar_ready    = 1'b1;
+          ar_push         = 1'b1;
+          lock_ar_valid_d = 1'b0;
+          load_ar_lock    = 1'b1;
+        end
+      end else begin
+        // The process can start handling AR transaction if `i_ar_id_counter` has space.
+        if (!ar_id_cnt_full) begin
+          // There is a valid AR, so look the ID up.
+          if (slv_ar_valid && (!ar_select_occupied ||
+             (slv_ar_chan_select.ar_select == lookup_ar_select))) begin
+            // connect the AR handshake
+            ar_valid     = 1'b1;
+            // on transaction
+            if (ar_ready) begin
+              slv_ar_ready = 1'b1;
+              ar_push      = 1'b1;
+            // no transaction this cycle, lock the valid decision!
+            end else begin
+              lock_ar_valid_d = 1'b1;
+              load_ar_lock    = 1'b1;
+            end
+          end
+        end
+      end
+    end
+
+    // this ff is needed so that ar does not get de-asserted if an atop gets injected
+    `FFLARN(lock_ar_valid_q, lock_ar_valid_d, load_ar_lock, '0, clk_i, rst_ni)
+
+    if (UniqueIds) begin : gen_unique_ids_ar
+      // If the `UniqueIds` parameter is set, each read transaction has an ID that is unique among
+      // all in-flight read transactions, or all read transactions with a given ID target the same
+      // master port as all read transactions with the same ID, or both.  This means that the
+      // signals that are driven by the ID counters if this parameter is not set can instead be
+      // derived from existing signals.  The ID counters can therefore be omitted.
+      assign lookup_ar_select = slv_ar_chan_select.ar_select;
+      assign ar_select_occupied = 1'b0;
+      assign ar_id_cnt_full = 1'b0;
+    end else begin : gen_ar_id_counter
+      axi_demux_id_counters #(
+        .AxiIdBits         ( AxiLookBits    ),
+        .CounterWidth      ( IdCounterWidth ),
+        .mst_port_select_t ( select_t       )
+      ) i_ar_id_counter (
+        .clk_i                        ( clk_i                                         ),
+        .rst_ni                       ( rst_ni                                        ),
+        .lookup_axi_id_i              ( slv_ar_chan_select.ar_chan.id[0+:AxiLookBits] ),
+        .lookup_mst_select_o          ( lookup_ar_select                              ),
+        .lookup_mst_select_occupied_o ( ar_select_occupied                            ),
+        .full_o                       ( ar_id_cnt_full                                ),
+        .inject_axi_id_i              ( slv_aw_chan_select.aw_chan.id[0+:AxiLookBits] ),
+        .inject_i                     ( atop_inject                                   ),
+        .push_axi_id_i                ( slv_ar_chan_select.ar_chan.id[0+:AxiLookBits] ),
+        .push_mst_select_i            ( slv_ar_chan_select.ar_select                  ),
+        .push_i                       ( ar_push                                       ),
+        .pop_axi_id_i                 ( slv_r_chan.id[0+:AxiLookBits]                 ),
+        .pop_i                        ( slv_r_valid & slv_r_ready & slv_r_chan.last   )
+      );
+    end
+
+    //--------------------------------------
+    //  R Channel
+    //--------------------------------------
+    // optional spill register
+    spill_register #(
+      .T       ( r_chan_t ),
+      .Bypass  ( ~SpillR  )
+    ) i_r_spill_reg (
+      .clk_i   ( clk_i              ),
+      .rst_ni  ( rst_ni             ),
+      .valid_i ( slv_r_valid        ),
+      .ready_o ( slv_r_ready        ),
+      .data_i  ( slv_r_chan         ),
+      .valid_o ( slv_resp_o.r_valid ),
+      .ready_i ( slv_req_i.r_ready  ),
+      .data_o  ( slv_resp_o.r       )
+    );
+
+    // Arbitration of the different r responses
+    rr_arb_tree #(
+      .NumIn    ( NoMstPorts ),
+      .DataType ( r_chan_t   ),
+      .AxiVldRdy( 1'b1       ),
+      .LockIn   ( 1'b1       )
+    ) i_r_mux (
+      .clk_i  ( clk_i         ),
+      .rst_ni ( rst_ni        ),
+      .flush_i( 1'b0          ),
+      .rr_i   ( '0            ),
+      .req_i  ( mst_r_valids  ),
+      .gnt_o  ( mst_r_readies ),
+      .data_i ( mst_r_chans   ),
+      .gnt_i  ( slv_r_ready   ),
+      .req_o  ( slv_r_valid   ),
+      .data_o ( slv_r_chan    ),
+      .idx_o  (               )
+    );
+
+   assign ar_ready = ar_valid & mst_resps_i[slv_ar_chan_select.ar_select].ar_ready;
+   assign aw_ready = aw_valid & mst_resps_i[slv_aw_chan_select.aw_select].aw_ready;
+
+    // process that defines the individual demuxes and assignments for the arbitration
+    // as mst_reqs_o has to be drivem from the same always comb block!
+    always_comb begin
+      // default assignments
+      mst_reqs_o  = '0;
+      slv_w_ready = 1'b0;
+      w_fifo_pop  = 1'b0;
+
+      for (int unsigned i = 0; i < NoMstPorts; i++) begin
+        // AW channel
+        mst_reqs_o[i].aw       = slv_aw_chan_select.aw_chan;
+        mst_reqs_o[i].aw_valid = 1'b0;
+        if (aw_valid && (slv_aw_chan_select.aw_select == i)) begin
+          mst_reqs_o[i].aw_valid = 1'b1;
+        end
+
+        //  W channel
+        mst_reqs_o[i].w       = slv_w_chan;
+        mst_reqs_o[i].w_valid = 1'b0;
+        if (!w_fifo_empty && (w_select == i)) begin
+          mst_reqs_o[i].w_valid = slv_w_valid;
+          slv_w_ready           = mst_resps_i[i].w_ready;
+          w_fifo_pop            = slv_w_valid & mst_resps_i[i].w_ready & slv_w_chan.last;
+        end
+
+        //  B channel
+        mst_reqs_o[i].b_ready = mst_b_readies[i];
+
+        // AR channel
+        mst_reqs_o[i].ar       = slv_ar_chan_select.ar_chan;
+        mst_reqs_o[i].ar_valid = 1'b0;
+        if (ar_valid && (slv_ar_chan_select.ar_select == i)) begin
+          mst_reqs_o[i].ar_valid = 1'b1;
+        end
+
+        //  R channel
+        mst_reqs_o[i].r_ready = mst_r_readies[i];
+      end
+    end
+    // unpack the response B and R channels for the arbitration
+    for (genvar i = 0; i < NoMstPorts; i++) begin : gen_b_channels
+      assign mst_b_chans[i]        = mst_resps_i[i].b;
+      assign mst_b_valids[i]       = mst_resps_i[i].b_valid;
+      assign mst_r_chans[i]        = mst_resps_i[i].r;
+      assign mst_r_valids[i]       = mst_resps_i[i].r_valid;
+    end
+
+
+// Validate parameters.
+// pragma translate_off
+`ifndef VERILATOR
+`ifndef XSIM
+    initial begin: validate_params
+      no_mst_ports: assume (NoMstPorts > 0) else
+        $fatal(1, "The Number of slaves (NoMstPorts) has to be at least 1");
+      AXI_ID_BITS:  assume (AxiIdWidth >= AxiLookBits) else
+        $fatal(1, "AxiIdBits has to be equal or smaller than AxiIdWidth.");
+    end
+    default disable iff (!rst_ni);
+    aw_select: assume property( @(posedge clk_i) (slv_req_i.aw_valid |->
+                                                 (slv_aw_select_i < NoMstPorts))) else
+      $fatal(1, "slv_aw_select_i is %d: AW has selected a slave that is not defined.\
+                 NoMstPorts: %d", slv_aw_select_i, NoMstPorts);
+    ar_select: assume property( @(posedge clk_i) (slv_req_i.ar_valid |->
+                                                 (slv_ar_select_i < NoMstPorts))) else
+      $fatal(1, "slv_ar_select_i is %d: AR has selected a slave that is not defined.\
+                 NoMstPorts: %d", slv_ar_select_i, NoMstPorts);
+    aw_valid_stable: assert property( @(posedge clk_i) (aw_valid && !aw_ready) |=> aw_valid) else
+      $fatal(1, "aw_valid was deasserted, when aw_ready = 0 in last cycle.");
+    ar_valid_stable: assert property( @(posedge clk_i)
+                               (ar_valid && !ar_ready) |=> ar_valid) else
+      $fatal(1, "ar_valid was deasserted, when ar_ready = 0 in last cycle.");
+    aw_stable: assert property( @(posedge clk_i) (aw_valid && !aw_ready)
+                               |=> $stable(slv_aw_chan_select)) else
+      $fatal(1, "slv_aw_chan_select unstable with valid set.");
+    ar_stable: assert property( @(posedge clk_i) (ar_valid && !ar_ready)
+                               |=> $stable(slv_ar_chan_select)) else
+      $fatal(1, "slv_aw_chan_select unstable with valid set.");
+    internal_ar_select: assert property( @(posedge clk_i)
+        (ar_valid |-> slv_ar_chan_select.ar_select < NoMstPorts))
+      else $fatal(1, "slv_ar_chan_select.ar_select illegal while ar_valid.");
+    internal_aw_select: assert property( @(posedge clk_i)
+        (aw_valid |-> slv_aw_chan_select.aw_select < NoMstPorts))
+      else $fatal(1, "slv_aw_chan_select.aw_select illegal while aw_valid.");
+`endif
+`endif
+// pragma translate_on
+  end
+endmodule
+
+module axi_demux_id_counters #(
+  // the lower bits of the AXI ID that should be considered, results in 2**AXI_ID_BITS counters
+  parameter int unsigned AxiIdBits         = 2,
+  parameter int unsigned CounterWidth      = 4,
+  parameter type         mst_port_select_t = logic
+) (
+  input                        clk_i,   // Clock
+  input                        rst_ni,  // Asynchronous reset active low
+  // lookup
+  input  logic [AxiIdBits-1:0] lookup_axi_id_i,
+  output mst_port_select_t     lookup_mst_select_o,
+  output logic                 lookup_mst_select_occupied_o,
+  // push
+  output logic                 full_o,
+  input  logic [AxiIdBits-1:0] push_axi_id_i,
+  input  mst_port_select_t     push_mst_select_i,
+  input  logic                 push_i,
+  // inject ATOPs in AR channel
+  input  logic [AxiIdBits-1:0] inject_axi_id_i,
+  input  logic                 inject_i,
+  // pop
+  input  logic [AxiIdBits-1:0] pop_axi_id_i,
+  input  logic                 pop_i
+);
+  localparam int unsigned NoCounters = 2**AxiIdBits;
+  typedef logic [CounterWidth-1:0] cnt_t;
+
+  // registers, each gets loaded when push_en[i]
+  mst_port_select_t [NoCounters-1:0] mst_select_q;
+
+  // counter signals
+  logic [NoCounters-1:0] push_en, inject_en, pop_en, occupied, cnt_full;
+
+  //-----------------------------------
+  // Lookup
+  //-----------------------------------
+  assign lookup_mst_select_o          = mst_select_q[lookup_axi_id_i];
+  assign lookup_mst_select_occupied_o = occupied[lookup_axi_id_i];
+  //-----------------------------------
+  // Push and Pop
+  //-----------------------------------
+  assign push_en   = (push_i)   ? (1 << push_axi_id_i)   : '0;
+  assign inject_en = (inject_i) ? (1 << inject_axi_id_i) : '0;
+  assign pop_en    = (pop_i)    ? (1 << pop_axi_id_i)    : '0;
+  assign full_o    = |cnt_full;
+  // counters
+  for (genvar i = 0; i < NoCounters; i++) begin : gen_counters
+    logic cnt_en, cnt_down, overflow;
+    cnt_t cnt_delta, in_flight;
+    always_comb begin
+      unique case ({push_en[i], inject_en[i], pop_en[i]})
+        3'b001  : begin // pop_i = -1
+          cnt_en    = 1'b1;
+          cnt_down  = 1'b1;
+          cnt_delta = cnt_t'(1);
+        end
+        3'b010  : begin // inject_i = +1
+          cnt_en    = 1'b1;
+          cnt_down  = 1'b0;
+          cnt_delta = cnt_t'(1);
+        end
+     // 3'b011, inject_i & pop_i = 0 --> use default
+        3'b100  : begin // push_i = +1
+          cnt_en    = 1'b1;
+          cnt_down  = 1'b0;
+          cnt_delta = cnt_t'(1);
+        end
+     // 3'b101, push_i & pop_i = 0 --> use default
+        3'b110  : begin // push_i & inject_i = +2
+          cnt_en    = 1'b1;
+          cnt_down  = 1'b0;
+          cnt_delta = cnt_t'(2);
+        end
+        3'b111  : begin // push_i & inject_i & pop_i = +1
+          cnt_en    = 1'b1;
+          cnt_down  = 1'b0;
+          cnt_delta = cnt_t'(1);
+        end
+        default : begin // do nothing to the counters
+          cnt_en    = 1'b0;
+          cnt_down  = 1'b0;
+          cnt_delta = cnt_t'(0);
+        end
+      endcase
+    end
+
+    delta_counter #(
+      .WIDTH           ( CounterWidth ),
+      .STICKY_OVERFLOW ( 1'b0         )
+    ) i_in_flight_cnt (
+      .clk_i      ( clk_i     ),
+      .rst_ni     ( rst_ni    ),
+      .clear_i    ( 1'b0      ),
+      .en_i       ( cnt_en    ),
+      .load_i     ( 1'b0      ),
+      .down_i     ( cnt_down  ),
+      .delta_i    ( cnt_delta ),
+      .d_i        ( '0        ),
+      .q_o        ( in_flight ),
+      .overflow_o ( overflow  )
+    );
+    assign occupied[i] = |in_flight;
+    assign cnt_full[i] = overflow | (&in_flight);
+
+    // holds the selection signal for this id
+    `FFLARN(mst_select_q[i], push_mst_select_i, push_en[i], '0, clk_i, rst_ni)
+
+// pragma translate_off
+`ifndef VERILATOR
+`ifndef XSIM
+    // Validate parameters.
+    cnt_underflow: assert property(
+      @(posedge clk_i) disable iff (~rst_ni) (pop_en[i] |=> !overflow)) else
+        $fatal(1, "axi_demux_id_counters > Counter: %0d underflowed.\
+                   The reason is probably a faulty AXI response.", i);
+`endif
+`endif
+// pragma translate_on
+  end
+endmodule
+
+// interface wrapper
+`include "axi/assign.svh"
+`include "axi/typedef.svh"
+module axi_demux_intf #(
+  parameter int unsigned AXI_ID_WIDTH     = 32'd0, // Synopsys DC requires default value for params
+  parameter int unsigned AXI_ADDR_WIDTH   = 32'd0,
+  parameter int unsigned AXI_DATA_WIDTH   = 32'd0,
+  parameter int unsigned AXI_USER_WIDTH   = 32'd0,
+  parameter int unsigned NO_MST_PORTS     = 32'd3,
+  parameter int unsigned MAX_TRANS        = 32'd8,
+  parameter int unsigned AXI_LOOK_BITS    = 32'd3,
+  parameter bit          UNIQUE_IDS       = 1'b0,
+  parameter bit          FALL_THROUGH     = 1'b0,
+  parameter bit          SPILL_AW         = 1'b1,
+  parameter bit          SPILL_W          = 1'b0,
+  parameter bit          SPILL_B          = 1'b0,
+  parameter bit          SPILL_AR         = 1'b1,
+  parameter bit          SPILL_R          = 1'b0,
+  // Dependent parameters, DO NOT OVERRIDE!
+  parameter int unsigned SELECT_WIDTH   = (NO_MST_PORTS > 32'd1) ? $clog2(NO_MST_PORTS) : 32'd1,
+  parameter type         select_t       = logic [SELECT_WIDTH-1:0] // MST port select type
+) (
+  input  logic    clk_i,                 // Clock
+  input  logic    rst_ni,                // Asynchronous reset active low
+  input  logic    test_i,                // Testmode enable
+  input  select_t slv_aw_select_i,       // has to be stable, when aw_valid
+  input  select_t slv_ar_select_i,       // has to be stable, when ar_valid
+  AXI_BUS.Slave   slv,                   // slave port
+  AXI_BUS.Master  mst [NO_MST_PORTS-1:0] // master ports
+);
+
+  typedef logic [AXI_ID_WIDTH-1:0]       id_t;
+  typedef logic [AXI_ADDR_WIDTH-1:0]   addr_t;
+  typedef logic [AXI_DATA_WIDTH-1:0]   data_t;
+  typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t;
+  typedef logic [AXI_USER_WIDTH-1:0]   user_t;
+  `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)
+  `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t)
+  `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t)
+  `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t)
+  `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t)
+
+  req_t                     slv_req;
+  resp_t                    slv_resp;
+  req_t  [NO_MST_PORTS-1:0] mst_req;
+  resp_t [NO_MST_PORTS-1:0] mst_resp;
+
+  `AXI_ASSIGN_TO_REQ(slv_req, slv)
+  `AXI_ASSIGN_FROM_RESP(slv, slv_resp)
+
+  for (genvar i = 0; i < NO_MST_PORTS; i++) begin : gen_assign_mst_ports
+    `AXI_ASSIGN_FROM_REQ(mst[i], mst_req[i])
+    `AXI_ASSIGN_TO_RESP(mst_resp[i], mst[i])
+  end
+
+  axi_demux #(
+    .AxiIdWidth     ( AXI_ID_WIDTH  ), // ID Width
+    .aw_chan_t      ( aw_chan_t     ), // AW Channel Type
+    .w_chan_t       (  w_chan_t     ), //  W Channel Type
+    .b_chan_t       (  b_chan_t     ), //  B Channel Type
+    .ar_chan_t      ( ar_chan_t     ), // AR Channel Type
+    .r_chan_t       (  r_chan_t     ), //  R Channel Type
+    .req_t          (     req_t     ),
+    .resp_t         (    resp_t     ),
+    .NoMstPorts     ( NO_MST_PORTS  ),
+    .MaxTrans       ( MAX_TRANS     ),
+    .AxiLookBits    ( AXI_LOOK_BITS ),
+    .UniqueIds      ( UNIQUE_IDS    ),
+    .FallThrough    ( FALL_THROUGH  ),
+    .SpillAw        ( SPILL_AW      ),
+    .SpillW         ( SPILL_W       ),
+    .SpillB         ( SPILL_B       ),
+    .SpillAr        ( SPILL_AR      ),
+    .SpillR         ( SPILL_R       )
+  ) i_axi_demux (
+    .clk_i,   // Clock
+    .rst_ni,  // Asynchronous reset active low
+    .test_i,  // Testmode enable
+    // slave port
+    .slv_req_i       ( slv_req         ),
+    .slv_aw_select_i ( slv_aw_select_i ),
+    .slv_ar_select_i ( slv_ar_select_i ),
+    .slv_resp_o      ( slv_resp        ),
+    // master port
+    .mst_reqs_o      ( mst_req         ),
+    .mst_resps_i     ( mst_resp        )
+  );
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv
new file mode 100644
index 0000000..f3c807d
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv
@@ -0,0 +1,261 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+// - Matheus Cavalcante <matheusd@iis.ee.ethz.ch>
+
+// AXI Error Slave: This module always responds with an AXI error for transactions that are sent to
+// it.  This module optionally supports ATOPs if the `ATOPs` parameter is set.
+
+module axi_err_slv #(
+  parameter int unsigned          AxiIdWidth  = 0,                    // AXI ID Width
+  parameter type                  req_t       = logic,                // AXI 4 request struct, with atop field
+  parameter type                  resp_t      = logic,                // AXI 4 response struct
+  parameter axi_pkg::resp_t       Resp        = axi_pkg::RESP_DECERR, // Error generated by this slave.
+  parameter int unsigned          RespWidth   = 32'd64,               // Data response width, gets zero extended or truncated to r.data.
+  parameter logic [RespWidth-1:0] RespData    = 64'hCA11AB1EBADCAB1E, // Hexvalue for data return value
+  parameter bit                   ATOPs       = 1'b1,                 // Activate support for ATOPs.  Set to 1 if this slave could ever get an atomic AXI transaction.
+  parameter int unsigned          MaxTrans    = 1                     // Maximum # of accepted transactions before stalling
+) (
+  input  logic  clk_i,   // Clock
+  input  logic  rst_ni,  // Asynchronous reset active low
+  input  logic  test_i,  // Testmode enable
+  // slave port
+  input  req_t  slv_req_i,
+  output resp_t slv_resp_o
+);
+  typedef logic [AxiIdWidth-1:0] id_t;
+  typedef struct packed {
+    id_t           id;
+    axi_pkg::len_t len;
+  } r_data_t;
+
+  req_t   err_req;
+  resp_t  err_resp;
+
+  if (ATOPs) begin
+    axi_atop_filter #(
+      .AxiIdWidth       ( AxiIdWidth  ),
+      .AxiMaxWriteTxns  ( MaxTrans    ),
+      .req_t            ( req_t       ),
+      .resp_t           ( resp_t      )
+    ) i_atop_filter (
+      .clk_i,
+      .rst_ni,
+      .slv_req_i  ( slv_req_i   ),
+      .slv_resp_o ( slv_resp_o  ),
+      .mst_req_o  ( err_req     ),
+      .mst_resp_i ( err_resp    )
+    );
+  end else begin
+    assign err_req    = slv_req_i;
+    assign slv_resp_o = err_resp;
+  end
+
+  // w fifo
+  logic    w_fifo_full, w_fifo_empty;
+  logic    w_fifo_push, w_fifo_pop;
+  id_t     w_fifo_data;
+  // b fifo
+  logic    b_fifo_full, b_fifo_empty;
+  logic    b_fifo_push, b_fifo_pop;
+  id_t     b_fifo_data;
+  // r fifo
+  r_data_t r_fifo_inp;
+  logic    r_fifo_full, r_fifo_empty;
+  logic    r_fifo_push, r_fifo_pop;
+  r_data_t r_fifo_data;
+  // r counter
+  logic    r_cnt_clear, r_cnt_en, r_cnt_load;
+  axi_pkg::len_t r_current_beat;
+  // r status
+  logic    r_busy_d, r_busy_q, r_busy_load;
+
+  //--------------------------------------
+  // Write Transactions
+  //--------------------------------------
+  // push, when there is room in the fifo
+  assign w_fifo_push        = err_req.aw_valid & ~w_fifo_full;
+  assign err_resp.aw_ready  = ~w_fifo_full;
+
+  fifo_v3 #(
+    .FALL_THROUGH ( 1'b1      ),
+    .DEPTH        ( MaxTrans  ),
+    .dtype        ( id_t      )
+  ) i_w_fifo (
+    .clk_i      ( clk_i             ),
+    .rst_ni     ( rst_ni            ),
+    .flush_i    ( 1'b0              ),
+    .testmode_i ( test_i            ),
+    .full_o     ( w_fifo_full       ),
+    .empty_o    ( w_fifo_empty      ),
+    .usage_o    (                   ),
+    .data_i     ( err_req.aw.id     ),
+    .push_i     ( w_fifo_push       ),
+    .data_o     ( w_fifo_data       ),
+    .pop_i      ( w_fifo_pop        )
+  );
+
+  always_comb begin : proc_w_channel
+    err_resp.w_ready  = 1'b0;
+    w_fifo_pop        = 1'b0;
+    b_fifo_push       = 1'b0;
+    if (!w_fifo_empty && !b_fifo_full) begin
+      // eat the beats
+      err_resp.w_ready = 1'b1;
+      // on the last w transaction
+      if (err_req.w_valid && err_req.w.last) begin
+        w_fifo_pop    = 1'b1;
+        b_fifo_push   = 1'b1;
+      end
+    end
+  end
+
+  fifo_v3 #(
+    .FALL_THROUGH ( 1'b0         ),
+    .DEPTH        ( unsigned'(2) ), // two placed so that w can eat beats if b is not sent
+    .dtype        ( id_t         )
+  ) i_b_fifo (
+    .clk_i      ( clk_i        ),
+    .rst_ni     ( rst_ni       ),
+    .flush_i    ( 1'b0         ),
+    .testmode_i ( test_i       ),
+    .full_o     ( b_fifo_full  ),
+    .empty_o    ( b_fifo_empty ),
+    .usage_o    (              ),
+    .data_i     ( w_fifo_data  ),
+    .push_i     ( b_fifo_push  ),
+    .data_o     ( b_fifo_data  ),
+    .pop_i      ( b_fifo_pop   )
+  );
+
+  always_comb begin : proc_b_channel
+    b_fifo_pop        = 1'b0;
+    err_resp.b        = '0;
+    err_resp.b.id     = b_fifo_data;
+    err_resp.b.resp   = Resp;
+    err_resp.b_valid  = 1'b0;
+    if (!b_fifo_empty) begin
+      err_resp.b_valid = 1'b1;
+      // b transaction
+      b_fifo_pop = err_req.b_ready;
+    end
+  end
+
+  //--------------------------------------
+  // Read Transactions
+  //--------------------------------------
+  // push if there is room in the fifo
+  assign r_fifo_push        = err_req.ar_valid & ~r_fifo_full;
+  assign err_resp.ar_ready  = ~r_fifo_full;
+
+  // fifo data assignment
+  assign r_fifo_inp.id  = err_req.ar.id;
+  assign r_fifo_inp.len = err_req.ar.len;
+
+  fifo_v3 #(
+    .FALL_THROUGH ( 1'b0      ),
+    .DEPTH        ( MaxTrans  ),
+    .dtype        ( r_data_t  )
+  ) i_r_fifo (
+    .clk_i     ( clk_i        ),
+    .rst_ni    ( rst_ni       ),
+    .flush_i   ( 1'b0         ),
+    .testmode_i( test_i       ),
+    .full_o    ( r_fifo_full  ),
+    .empty_o   ( r_fifo_empty ),
+    .usage_o   (              ),
+    .data_i    ( r_fifo_inp   ),
+    .push_i    ( r_fifo_push  ),
+    .data_o    ( r_fifo_data  ),
+    .pop_i     ( r_fifo_pop   )
+  );
+
+  always_comb begin : proc_r_channel
+    // default assignments
+    r_busy_d    = r_busy_q;
+    r_busy_load = 1'b0;
+    // r fifo signals
+    r_fifo_pop  = 1'b0;
+    // r counter signals
+    r_cnt_clear = 1'b0;
+    r_cnt_en    = 1'b0;
+    r_cnt_load  = 1'b0;
+    // r_channel
+    err_resp.r        = '0;
+    err_resp.r.id     = r_fifo_data.id;
+    err_resp.r.data   = RespData;
+    err_resp.r.resp   = Resp;
+    err_resp.r_valid  = 1'b0;
+    // control
+    if (r_busy_q) begin
+      err_resp.r_valid = 1'b1;
+      err_resp.r.last = (r_current_beat == '0);
+      // r transaction
+      if (err_req.r_ready) begin
+        r_cnt_en = 1'b1;
+        if (r_current_beat == '0) begin
+          r_busy_d    = 1'b0;
+          r_busy_load = 1'b1;
+          r_cnt_clear = 1'b1;
+          r_fifo_pop  = 1'b1;
+        end
+      end
+    end else begin
+      // when not busy and fifo not empty, start counter err gen
+      if (!r_fifo_empty) begin
+        r_busy_d    = 1'b1;
+        r_busy_load = 1'b1;
+        r_cnt_load  = 1'b1;
+      end
+    end
+  end
+
+  always_ff @(posedge clk_i, negedge rst_ni) begin
+    if (!rst_ni) begin
+      r_busy_q <= '0;
+    end else if (r_busy_load) begin
+      r_busy_q <= r_busy_d;
+    end
+  end
+
+  counter #(
+    .WIDTH     ($bits(axi_pkg::len_t))
+  ) i_r_counter (
+    .clk_i     ( clk_i           ),
+    .rst_ni    ( rst_ni          ),
+    .clear_i   ( r_cnt_clear     ),
+    .en_i      ( r_cnt_en        ),
+    .load_i    ( r_cnt_load      ),
+    .down_i    ( 1'b1            ),
+    .d_i       ( r_fifo_data.len ),
+    .q_o       ( r_current_beat  ),
+    .overflow_o(                 )
+  );
+
+  // pragma translate_off
+  `ifndef VERILATOR
+  `ifndef XSIM
+  initial begin
+    assert (Resp == axi_pkg::RESP_DECERR || Resp == axi_pkg::RESP_SLVERR) else
+      $fatal(1, "This module may only generate RESP_DECERR or RESP_SLVERR responses!");
+  end
+  default disable iff (!rst_ni);
+  if (!ATOPs) begin : gen_assert_atops_unsupported
+    assume property( @(posedge clk_i) (slv_req_i.aw_valid |-> slv_req_i.aw.atop == '0)) else
+     $fatal(1, "Got ATOP but not configured to support ATOPs!");
+  end
+  `endif
+  `endif
+  // pragma translate_on
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv
new file mode 100644
index 0000000..e9359b9
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv
@@ -0,0 +1,161 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+// AXI ID Prepend: This module prepends/strips the MSB from the AXI IDs.
+// Constraints enforced through assertions: ID width of slave and master port
+
+module axi_id_prepend #(
+  parameter int unsigned NoBus             = 1,     // Can take multiple axi busses
+  parameter int unsigned AxiIdWidthSlvPort = 4,     // AXI ID Width of the Slave Ports
+  parameter int unsigned AxiIdWidthMstPort = 6,     // AXI ID Width of the Master Ports
+  parameter type         slv_aw_chan_t     = logic, // AW Channel Type for slv port
+  parameter type         slv_w_chan_t      = logic, //  W Channel Type for slv port
+  parameter type         slv_b_chan_t      = logic, //  B Channel Type for slv port
+  parameter type         slv_ar_chan_t     = logic, // AR Channel Type for slv port
+  parameter type         slv_r_chan_t      = logic, //  R Channel Type for slv port
+  parameter type         mst_aw_chan_t     = logic, // AW Channel Type for mst port
+  parameter type         mst_w_chan_t      = logic, //  W Channel Type for mst port
+  parameter type         mst_b_chan_t      = logic, //  B Channel Type for mst port
+  parameter type         mst_ar_chan_t     = logic, // AR Channel Type for mst port
+  parameter type         mst_r_chan_t      = logic, //  R Channel Type for mst port
+  // DEPENDENT PARAMETER DO NOT OVERWRITE!
+  parameter int unsigned PreIdWidth        = AxiIdWidthMstPort - AxiIdWidthSlvPort
+) (
+  input  logic [PreIdWidth-1:0] pre_id_i, // ID to be prepended
+  // slave port (input), connect master modules here
+  // AW channel
+  input  slv_aw_chan_t [NoBus-1:0] slv_aw_chans_i,
+  input  logic         [NoBus-1:0] slv_aw_valids_i,
+  output logic         [NoBus-1:0] slv_aw_readies_o,
+  //  W channel
+  input  slv_w_chan_t  [NoBus-1:0] slv_w_chans_i,
+  input  logic         [NoBus-1:0] slv_w_valids_i,
+  output logic         [NoBus-1:0] slv_w_readies_o,
+  //  B channel
+  output slv_b_chan_t  [NoBus-1:0] slv_b_chans_o,
+  output logic         [NoBus-1:0] slv_b_valids_o,
+  input  logic         [NoBus-1:0] slv_b_readies_i,
+  // AR channel
+  input  slv_ar_chan_t [NoBus-1:0] slv_ar_chans_i,
+  input  logic         [NoBus-1:0] slv_ar_valids_i,
+  output logic         [NoBus-1:0] slv_ar_readies_o,
+  //  R channel
+  output slv_r_chan_t  [NoBus-1:0] slv_r_chans_o,
+  output logic         [NoBus-1:0] slv_r_valids_o,
+  input  logic         [NoBus-1:0] slv_r_readies_i,
+  // master ports (output), connect slave modules here
+  // AW channel
+  output mst_aw_chan_t [NoBus-1:0] mst_aw_chans_o,
+  output logic         [NoBus-1:0] mst_aw_valids_o,
+  input  logic         [NoBus-1:0] mst_aw_readies_i,
+  //  W channel
+  output mst_w_chan_t  [NoBus-1:0] mst_w_chans_o,
+  output logic         [NoBus-1:0] mst_w_valids_o,
+  input  logic         [NoBus-1:0] mst_w_readies_i,
+  //  B channel
+  input  mst_b_chan_t  [NoBus-1:0] mst_b_chans_i,
+  input  logic         [NoBus-1:0] mst_b_valids_i,
+  output logic         [NoBus-1:0] mst_b_readies_o,
+  // AR channel
+  output mst_ar_chan_t [NoBus-1:0] mst_ar_chans_o,
+  output logic         [NoBus-1:0] mst_ar_valids_o,
+  input  logic         [NoBus-1:0] mst_ar_readies_i,
+  //  R channel
+  input  mst_r_chan_t  [NoBus-1:0] mst_r_chans_i,
+  input  logic         [NoBus-1:0] mst_r_valids_i,
+  output logic         [NoBus-1:0] mst_r_readies_o
+);
+
+  // prepend the ID
+  for (genvar i = 0; i < NoBus; i++) begin : gen_id_prepend
+    if (PreIdWidth == 0) begin : gen_no_prepend
+      assign mst_aw_chans_o[i] = slv_aw_chans_i[i];
+      assign mst_ar_chans_o[i] = slv_ar_chans_i[i];
+    end else begin : gen_prepend
+      always_comb begin
+        mst_aw_chans_o[i] = slv_aw_chans_i[i];
+        mst_ar_chans_o[i] = slv_ar_chans_i[i];
+        mst_aw_chans_o[i].id = {pre_id_i, slv_aw_chans_i[i].id[AxiIdWidthSlvPort-1:0]};
+        mst_ar_chans_o[i].id = {pre_id_i, slv_ar_chans_i[i].id[AxiIdWidthSlvPort-1:0]};
+      end
+    end
+    // The ID is in the highest bits of the struct, so an assignment from a channel with a wide ID
+    // to a channel with a shorter ID correctly cuts the prepended ID.
+    assign slv_b_chans_o[i] = mst_b_chans_i[i];
+    assign slv_r_chans_o[i] = mst_r_chans_i[i];
+  end
+
+  // assign the handshaking's and w channel
+  assign mst_w_chans_o    = slv_w_chans_i;
+  assign mst_aw_valids_o  = slv_aw_valids_i;
+  assign slv_aw_readies_o = mst_aw_readies_i;
+  assign mst_w_valids_o   = slv_w_valids_i;
+  assign slv_w_readies_o  = mst_w_readies_i;
+  assign slv_b_valids_o   = mst_b_valids_i;
+  assign mst_b_readies_o  = slv_b_readies_i;
+  assign mst_ar_valids_o  = slv_ar_valids_i;
+  assign slv_ar_readies_o = mst_ar_readies_i;
+  assign slv_r_valids_o   = mst_r_valids_i;
+  assign mst_r_readies_o  = slv_r_readies_i;
+
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin : p_assert
+    assert(NoBus > 0)
+      else $fatal(1, "Input must be at least one element wide.");
+    assert(PreIdWidth == ($bits(mst_aw_chans_o[0].id) - $bits(slv_aw_chans_i[0].id)))
+      else $fatal(1, "Prepend ID Width must be: $bits(mst_aw_chans_o.id)-$bits(slv_aw_chans_i.id)");
+    assert ($bits(mst_aw_chans_o[0].id) > $bits(slv_aw_chans_i[0].id))
+      else $fatal(1, "The master AXI port has to have a wider ID than the slave port.");
+  end
+
+  aw_id   : assert final(
+      mst_aw_chans_o[0].id[$bits(slv_aw_chans_i[0].id)-1:0] === slv_aw_chans_i[0].id)
+        else $fatal (1, "Something with the AW channel ID prepending went wrong.");
+  aw_addr : assert final(mst_aw_chans_o[0].addr === slv_aw_chans_i[0].addr)
+      else $fatal (1, "Something with the AW channel ID prepending went wrong.");
+  aw_len  : assert final(mst_aw_chans_o[0].len === slv_aw_chans_i[0].len)
+      else $fatal (1, "Something with the AW channel ID prepending went wrong.");
+  aw_size : assert final(mst_aw_chans_o[0].size === slv_aw_chans_i[0].size)
+      else $fatal (1, "Something with the AW channel ID prepending went wrong.");
+  aw_qos  : assert final(mst_aw_chans_o[0].qos === slv_aw_chans_i[0].qos)
+      else $fatal (1, "Something with the AW channel ID prepending went wrong.");
+
+  b_id    : assert final(
+      mst_b_chans_i[0].id[$bits(slv_b_chans_o[0].id)-1:0] === slv_b_chans_o[0].id)
+        else $fatal (1, "Something with the B channel ID stripping went wrong.");
+  b_resp  : assert final(mst_b_chans_i[0].resp === slv_b_chans_o[0].resp)
+      else $fatal (1, "Something with the B channel ID stripping went wrong.");
+
+  ar_id   : assert final(
+      mst_ar_chans_o[0].id[$bits(slv_ar_chans_i[0].id)-1:0] === slv_ar_chans_i[0].id)
+        else $fatal (1, "Something with the AR channel ID prepending went wrong.");
+  ar_addr : assert final(mst_ar_chans_o[0].addr === slv_ar_chans_i[0].addr)
+      else $fatal (1, "Something with the AR channel ID prepending went wrong.");
+  ar_len  : assert final(mst_ar_chans_o[0].len === slv_ar_chans_i[0].len)
+      else $fatal (1, "Something with the AR channel ID prepending went wrong.");
+  ar_size : assert final(mst_ar_chans_o[0].size === slv_ar_chans_i[0].size)
+      else $fatal (1, "Something with the AR channel ID prepending went wrong.");
+  ar_qos  : assert final(mst_ar_chans_o[0].qos === slv_ar_chans_i[0].qos)
+      else $fatal (1, "Something with the AR channel ID prepending went wrong.");
+
+  r_id    : assert final(mst_r_chans_i[0].id[$bits(slv_r_chans_o[0].id)-1:0] === slv_r_chans_o[0].id)
+      else $fatal (1, "Something with the R channel ID stripping went wrong.");
+  r_data  : assert final(mst_r_chans_i[0].data === slv_r_chans_o[0].data)
+      else $fatal (1, "Something with the R channel ID stripping went wrong.");
+  r_resp  : assert final(mst_r_chans_i[0].resp === slv_r_chans_o[0].resp)
+      else $fatal (1, "Something with the R channel ID stripping went wrong.");
+`endif
+// pragma translate_on
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_join.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_join.sv
new file mode 100644
index 0000000..f15648e
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_join.sv
@@ -0,0 +1,37 @@
+// Copyright (c) 2014-2018 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+`include "axi/assign.svh"
+
+/// A connector that joins two AXI interfaces.
+module axi_join_intf (
+  AXI_BUS.Slave  in,
+  AXI_BUS.Master out
+);
+
+  `AXI_ASSIGN(out, in)
+
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin
+    assert(in.AXI_ADDR_WIDTH == out.AXI_ADDR_WIDTH);
+    assert(in.AXI_DATA_WIDTH == out.AXI_DATA_WIDTH);
+    assert(in.AXI_ID_WIDTH   <= out.AXI_ID_WIDTH  );
+    assert(in.AXI_USER_WIDTH == out.AXI_USER_WIDTH);
+  end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv
new file mode 100644
index 0000000..8e5dc2f
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv
@@ -0,0 +1,237 @@
+// Copyright (c) 2014-2019 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+// - Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+// - Stefan Mach <smach@iis.ee.ethz.ch>
+
+// Multiple AXI4 cuts.
+//
+// These can be used to relax timing pressure on very long AXI busses.
+module axi_multicut #(
+  parameter int unsigned NoCuts = 32'd1, // Number of cuts.
+  // AXI channel structs
+  parameter type aw_chan_t = logic,
+  parameter type  w_chan_t = logic,
+  parameter type  b_chan_t = logic,
+  parameter type ar_chan_t = logic,
+  parameter type  r_chan_t = logic,
+  // AXI request & response structs
+  parameter type     req_t = logic,
+  parameter type    resp_t = logic
+) (
+  input  logic  clk_i,   // Clock
+  input  logic  rst_ni,  // Asynchronous reset active low
+  // slave port
+  input  req_t  slv_req_i,
+  output resp_t slv_resp_o,
+  // master port
+  output req_t  mst_req_o,
+  input  resp_t mst_resp_i
+);
+
+  if (NoCuts == '0) begin : gen_no_cut
+    // degenerate case, connect input to output
+    assign mst_req_o  = slv_req_i;
+    assign slv_resp_o = mst_resp_i;
+  end else begin : gen_axi_cut
+    // instantiate all needed cuts
+    req_t  [NoCuts:0] cut_req;
+    resp_t [NoCuts:0] cut_resp;
+
+    // connect slave to the lowest index
+    assign cut_req[0] = slv_req_i;
+    assign slv_resp_o = cut_resp[0];
+
+    // AXI cuts
+    for (genvar i = 0; i < NoCuts; i++) begin : gen_axi_cuts
+      axi_cut #(
+        .Bypass    (      1'b0 ),
+        .aw_chan_t ( aw_chan_t ),
+        .w_chan_t  (  w_chan_t ),
+        .b_chan_t  (  b_chan_t ),
+        .ar_chan_t ( ar_chan_t ),
+        .r_chan_t  (  r_chan_t ),
+        .req_t     (     req_t ),
+        .resp_t    (    resp_t )
+      ) i_cut (
+        .clk_i,
+        .rst_ni,
+        .slv_req_i  ( cut_req[i]    ),
+        .slv_resp_o ( cut_resp[i]   ),
+        .mst_req_o  ( cut_req[i+1]  ),
+        .mst_resp_i ( cut_resp[i+1] )
+      );
+    end
+
+    // connect master to the highest index
+    assign mst_req_o        = cut_req[NoCuts];
+    assign cut_resp[NoCuts] = mst_resp_i;
+  end
+
+  // Check the invariants
+  // pragma translate_off
+  `ifndef VERILATOR
+  initial begin
+    assert(NoCuts >= 0);
+  end
+  `endif
+  // pragma translate_on
+endmodule
+
+`include "axi/assign.svh"
+`include "axi/typedef.svh"
+
+// interface wrapper
+module axi_multicut_intf #(
+  parameter int unsigned ADDR_WIDTH = 0, // The address width.
+  parameter int unsigned DATA_WIDTH = 0, // The data width.
+  parameter int unsigned ID_WIDTH   = 0, // The ID width.
+  parameter int unsigned USER_WIDTH = 0, // The user data width.
+  parameter int unsigned NUM_CUTS   = 0  // The number of cuts.
+) (
+  input logic    clk_i,
+  input logic    rst_ni,
+  AXI_BUS.Slave  in,
+  AXI_BUS.Master out
+);
+
+  typedef logic [ID_WIDTH-1:0]     id_t;
+  typedef logic [ADDR_WIDTH-1:0]   addr_t;
+  typedef logic [DATA_WIDTH-1:0]   data_t;
+  typedef logic [DATA_WIDTH/8-1:0] strb_t;
+  typedef logic [USER_WIDTH-1:0]   user_t;
+
+  `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)
+  `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t)
+  `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t)
+  `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t)
+  `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t)
+
+  req_t  slv_req,  mst_req;
+  resp_t slv_resp, mst_resp;
+
+  `AXI_ASSIGN_TO_REQ(slv_req, in)
+  `AXI_ASSIGN_FROM_RESP(in, slv_resp)
+
+  `AXI_ASSIGN_FROM_REQ(out, mst_req)
+  `AXI_ASSIGN_TO_RESP(mst_resp, out)
+
+  axi_multicut #(
+    .NoCuts    (  NUM_CUTS ),
+    .aw_chan_t ( aw_chan_t ),
+    .w_chan_t  (  w_chan_t ),
+    .b_chan_t  (  b_chan_t ),
+    .ar_chan_t ( ar_chan_t ),
+    .r_chan_t  (  r_chan_t ),
+    .req_t     (     req_t ),
+    .resp_t    (    resp_t )
+  ) i_axi_multicut (
+    .clk_i,
+    .rst_ni,
+    .slv_req_i  ( slv_req  ),
+    .slv_resp_o ( slv_resp ),
+    .mst_req_o  ( mst_req  ),
+    .mst_resp_i ( mst_resp )
+  );
+
+  // Check the invariants.
+  // pragma translate_off
+  `ifndef VERILATOR
+  initial begin
+    assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter");
+    assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter");
+    assert (ID_WIDTH   > 0) else $fatal(1, "Wrong id   width parameter");
+    assert (USER_WIDTH > 0) else $fatal(1, "Wrong user width parameter");
+    assert (in.AXI_ADDR_WIDTH  == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (in.AXI_DATA_WIDTH  == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (in.AXI_ID_WIDTH    == ID_WIDTH)   else $fatal(1, "Wrong interface definition");
+    assert (in.AXI_USER_WIDTH  == USER_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_ID_WIDTH   == ID_WIDTH)   else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition");
+  end
+  `endif
+  // pragma translate_on
+endmodule
+
+module axi_lite_multicut_intf #(
+  // The address width.
+  parameter int unsigned ADDR_WIDTH = 0,
+  // The data width.
+  parameter int unsigned DATA_WIDTH = 0,
+  // The number of cuts.
+  parameter int unsigned NUM_CUTS   = 0
+) (
+  input logic     clk_i  ,
+  input logic     rst_ni ,
+  AXI_LITE.Slave  in     ,
+  AXI_LITE.Master out
+);
+
+  typedef logic [ADDR_WIDTH-1:0]   addr_t;
+  typedef logic [DATA_WIDTH-1:0]   data_t;
+  typedef logic [DATA_WIDTH/8-1:0] strb_t;
+
+  `AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t)
+  `AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t)
+  `AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_t)
+  `AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t)
+  `AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_t, data_t)
+  `AXI_LITE_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t)
+  `AXI_LITE_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t)
+
+  req_t  slv_req,  mst_req;
+  resp_t slv_resp, mst_resp;
+
+  `AXI_LITE_ASSIGN_TO_REQ(slv_req, in)
+  `AXI_LITE_ASSIGN_FROM_RESP(in, slv_resp)
+
+  `AXI_LITE_ASSIGN_FROM_REQ(out, mst_req)
+  `AXI_LITE_ASSIGN_TO_RESP(mst_resp, out)
+
+  axi_multicut #(
+    .NoCuts    (  NUM_CUTS ),
+    .aw_chan_t ( aw_chan_t ),
+    .w_chan_t  (  w_chan_t ),
+    .b_chan_t  (  b_chan_t ),
+    .ar_chan_t ( ar_chan_t ),
+    .r_chan_t  (  r_chan_t ),
+    .req_t     (     req_t ),
+    .resp_t    (    resp_t )
+  ) i_axi_multicut (
+    .clk_i,
+    .rst_ni,
+    .slv_req_i  ( slv_req  ),
+    .slv_resp_o ( slv_resp ),
+    .mst_req_o  ( mst_req  ),
+    .mst_resp_i ( mst_resp )
+  );
+
+  // Check the invariants.
+  // pragma translate_off
+  `ifndef VERILATOR
+  initial begin
+    assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter");
+    assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter");
+    assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
+    assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
+  end
+  `endif
+  // pragma translate_on
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv
new file mode 100644
index 0000000..59ee3ec
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv
@@ -0,0 +1,522 @@
+// Copyright (c) 2019 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+// AXI Multiplexer: This module multiplexes the AXI4 slave ports down to one master port.
+// The AXI IDs from the slave ports get extended with the respective slave port index.
+// The extension width can be calculated with `$clog2(NoSlvPorts)`. This means the AXI
+// ID for the master port has to be this `$clog2(NoSlvPorts)` wider than the ID for the
+// slave ports.
+// Responses are switched based on these bits. For example, with 4 slave ports
+// a response with ID `6'b100110` will be forwarded to slave port 2 (`2'b10`).
+
+// register macros
+`include "common_cells/registers.svh"
+
+module axi_mux #(
+  // AXI parameter and channel types
+  parameter int unsigned SlvAxiIDWidth = 32'd0, // AXI ID width, slave ports
+  parameter type         slv_aw_chan_t = logic, // AW Channel Type, slave ports
+  parameter type         mst_aw_chan_t = logic, // AW Channel Type, master port
+  parameter type         w_chan_t      = logic, //  W Channel Type, all ports
+  parameter type         slv_b_chan_t  = logic, //  B Channel Type, slave ports
+  parameter type         mst_b_chan_t  = logic, //  B Channel Type, master port
+  parameter type         slv_ar_chan_t = logic, // AR Channel Type, slave ports
+  parameter type         mst_ar_chan_t = logic, // AR Channel Type, master port
+  parameter type         slv_r_chan_t  = logic, //  R Channel Type, slave ports
+  parameter type         mst_r_chan_t  = logic, //  R Channel Type, master port
+  parameter type         slv_req_t     = logic, // Slave port request type
+  parameter type         slv_resp_t    = logic, // Slave port response type
+  parameter type         mst_req_t     = logic, // Master ports request type
+  parameter type         mst_resp_t    = logic, // Master ports response type
+  parameter int unsigned NoSlvPorts    = 32'd0, // Number of slave ports
+  // Maximum number of outstanding transactions per write
+  parameter int unsigned MaxWTrans     = 32'd8,
+  // If enabled, this multiplexer is purely combinatorial
+  parameter bit          FallThrough   = 1'b0,
+  // add spill register on write master ports, adds a cycle latency on write channels
+  parameter bit          SpillAw       = 1'b1,
+  parameter bit          SpillW        = 1'b0,
+  parameter bit          SpillB        = 1'b0,
+  // add spill register on read master ports, adds a cycle latency on read channels
+  parameter bit          SpillAr       = 1'b1,
+  parameter bit          SpillR        = 1'b0
+) (
+  input  logic                       clk_i,    // Clock
+  input  logic                       rst_ni,   // Asynchronous reset active low
+  input  logic                       test_i,   // Test Mode enable
+  // slave ports (AXI inputs), connect master modules here
+  input  slv_req_t  [NoSlvPorts-1:0] slv_reqs_i,
+  output slv_resp_t [NoSlvPorts-1:0] slv_resps_o,
+  // master port (AXI outputs), connect slave modules here
+  output mst_req_t                   mst_req_o,
+  input  mst_resp_t                  mst_resp_i
+);
+
+  localparam int unsigned MstIdxBits    = $clog2(NoSlvPorts);
+  localparam int unsigned MstAxiIDWidth = SlvAxiIDWidth + MstIdxBits;
+
+  // pass through if only one slave port
+  if (NoSlvPorts == 32'h1) begin : gen_no_mux
+    assign mst_req_o      = slv_reqs_i[0];
+    assign slv_resps_o[0] = mst_resp_i;
+  // other non degenerate cases
+  end else begin : gen_mux
+
+    typedef logic [MstIdxBits-1:0] switch_id_t;
+
+    // AXI channels between the ID prepend unit and the rest of the multiplexer
+    mst_aw_chan_t [NoSlvPorts-1:0] slv_aw_chans;
+    logic         [NoSlvPorts-1:0] slv_aw_valids, slv_aw_readies;
+    w_chan_t      [NoSlvPorts-1:0] slv_w_chans;
+    logic         [NoSlvPorts-1:0] slv_w_valids,  slv_w_readies;
+    mst_b_chan_t  [NoSlvPorts-1:0] slv_b_chans;
+    logic         [NoSlvPorts-1:0] slv_b_valids,  slv_b_readies;
+    mst_ar_chan_t [NoSlvPorts-1:0] slv_ar_chans;
+    logic         [NoSlvPorts-1:0] slv_ar_valids, slv_ar_readies;
+    mst_r_chan_t  [NoSlvPorts-1:0] slv_r_chans;
+    logic         [NoSlvPorts-1:0] slv_r_valids,  slv_r_readies;
+
+    // These signals are all ID prepended
+    // AW channel
+    mst_aw_chan_t   mst_aw_chan;
+    logic           mst_aw_valid, mst_aw_ready;
+
+    // AW master handshake internal, so that we are able to stall, if w_fifo is full
+    logic           aw_valid,     aw_ready;
+
+    // FF to lock the AW valid signal, when a new arbitration decision is made the decision
+    // gets pushed into the W FIFO, when it now stalls prevent subsequent pushing
+    // This FF removes AW to W dependency
+    logic           lock_aw_valid_d, lock_aw_valid_q;
+    logic           load_aw_lock;
+
+    // signals for the FIFO that holds the last switching decision of the AW channel
+    logic           w_fifo_full,  w_fifo_empty;
+    logic           w_fifo_push,  w_fifo_pop;
+    switch_id_t     w_fifo_data;
+
+    // W channel spill reg
+    w_chan_t        mst_w_chan;
+    logic           mst_w_valid,  mst_w_ready;
+
+    // master ID in the b_id
+    switch_id_t     switch_b_id;
+
+    // B channel spill reg
+    mst_b_chan_t    mst_b_chan;
+    logic           mst_b_valid;
+
+    // AR channel for when spill is enabled
+    mst_ar_chan_t   mst_ar_chan;
+    logic           ar_valid,     ar_ready;
+
+    // master ID in the r_id
+    switch_id_t     switch_r_id;
+
+    // R channel spill reg
+    mst_r_chan_t    mst_r_chan;
+    logic           mst_r_valid;
+
+    //--------------------------------------
+    // ID prepend for all slave ports
+    //--------------------------------------
+    for (genvar i = 0; i < NoSlvPorts; i++) begin : gen_id_prepend
+      axi_id_prepend #(
+        .NoBus            ( 32'd1               ), // one AXI bus per slave port
+        .AxiIdWidthSlvPort( SlvAxiIDWidth       ),
+        .AxiIdWidthMstPort( MstAxiIDWidth       ),
+        .slv_aw_chan_t    ( slv_aw_chan_t       ),
+        .slv_w_chan_t     ( w_chan_t            ),
+        .slv_b_chan_t     ( slv_b_chan_t        ),
+        .slv_ar_chan_t    ( slv_ar_chan_t       ),
+        .slv_r_chan_t     ( slv_r_chan_t        ),
+        .mst_aw_chan_t    ( mst_aw_chan_t       ),
+        .mst_w_chan_t     ( w_chan_t            ),
+        .mst_b_chan_t     ( mst_b_chan_t        ),
+        .mst_ar_chan_t    ( mst_ar_chan_t       ),
+        .mst_r_chan_t     ( mst_r_chan_t        )
+      ) i_id_prepend (
+        .pre_id_i         ( switch_id_t'(i)         ),
+        .slv_aw_chans_i   ( slv_reqs_i[i].aw        ),
+        .slv_aw_valids_i  ( slv_reqs_i[i].aw_valid  ),
+        .slv_aw_readies_o ( slv_resps_o[i].aw_ready ),
+        .slv_w_chans_i    ( slv_reqs_i[i].w         ),
+        .slv_w_valids_i   ( slv_reqs_i[i].w_valid   ),
+        .slv_w_readies_o  ( slv_resps_o[i].w_ready  ),
+        .slv_b_chans_o    ( slv_resps_o[i].b        ),
+        .slv_b_valids_o   ( slv_resps_o[i].b_valid  ),
+        .slv_b_readies_i  ( slv_reqs_i[i].b_ready   ),
+        .slv_ar_chans_i   ( slv_reqs_i[i].ar        ),
+        .slv_ar_valids_i  ( slv_reqs_i[i].ar_valid  ),
+        .slv_ar_readies_o ( slv_resps_o[i].ar_ready ),
+        .slv_r_chans_o    ( slv_resps_o[i].r        ),
+        .slv_r_valids_o   ( slv_resps_o[i].r_valid  ),
+        .slv_r_readies_i  ( slv_reqs_i[i].r_ready   ),
+        .mst_aw_chans_o   ( slv_aw_chans[i]         ),
+        .mst_aw_valids_o  ( slv_aw_valids[i]        ),
+        .mst_aw_readies_i ( slv_aw_readies[i]       ),
+        .mst_w_chans_o    ( slv_w_chans[i]          ),
+        .mst_w_valids_o   ( slv_w_valids[i]         ),
+        .mst_w_readies_i  ( slv_w_readies[i]        ),
+        .mst_b_chans_i    ( slv_b_chans[i]          ),
+        .mst_b_valids_i   ( slv_b_valids[i]         ),
+        .mst_b_readies_o  ( slv_b_readies[i]        ),
+        .mst_ar_chans_o   ( slv_ar_chans[i]         ),
+        .mst_ar_valids_o  ( slv_ar_valids[i]        ),
+        .mst_ar_readies_i ( slv_ar_readies[i]       ),
+        .mst_r_chans_i    ( slv_r_chans[i]          ),
+        .mst_r_valids_i   ( slv_r_valids[i]         ),
+        .mst_r_readies_o  ( slv_r_readies[i]        )
+      );
+    end
+
+    //--------------------------------------
+    // AW Channel
+    //--------------------------------------
+    rr_arb_tree #(
+      .NumIn    ( NoSlvPorts    ),
+      .DataType ( mst_aw_chan_t ),
+      .AxiVldRdy( 1'b1          ),
+      .LockIn   ( 1'b1          )
+    ) i_aw_arbiter (
+      .clk_i  ( clk_i           ),
+      .rst_ni ( rst_ni          ),
+      .flush_i( 1'b0            ),
+      .rr_i   ( '0              ),
+      .req_i  ( slv_aw_valids   ),
+      .gnt_o  ( slv_aw_readies  ),
+      .data_i ( slv_aw_chans    ),
+      .gnt_i  ( aw_ready        ),
+      .req_o  ( aw_valid        ),
+      .data_o ( mst_aw_chan     ),
+      .idx_o  (                 )
+    );
+
+    // control of the AW channel
+    always_comb begin
+      // default assignments
+      lock_aw_valid_d = lock_aw_valid_q;
+      load_aw_lock    = 1'b0;
+      w_fifo_push     = 1'b0;
+      mst_aw_valid    = 1'b0;
+      aw_ready        = 1'b0;
+      // had a downstream stall, be valid and send the AW along
+      if (lock_aw_valid_q) begin
+        mst_aw_valid = 1'b1;
+        // transaction
+        if (mst_aw_ready) begin
+          aw_ready        = 1'b1;
+          lock_aw_valid_d = 1'b0;
+          load_aw_lock    = 1'b1;
+        end
+      end else begin
+        if (!w_fifo_full && aw_valid) begin
+          mst_aw_valid = 1'b1;
+          w_fifo_push = 1'b1;
+          if (mst_aw_ready) begin
+            aw_ready = 1'b1;
+          end else begin
+            // go to lock if transaction not in this cycle
+            lock_aw_valid_d = 1'b1;
+            load_aw_lock    = 1'b1;
+          end
+        end
+      end
+    end
+
+    `FFLARN(lock_aw_valid_q, lock_aw_valid_d, load_aw_lock, '0, clk_i, rst_ni)
+
+    fifo_v3 #(
+      .FALL_THROUGH ( FallThrough ),
+      .DEPTH        ( MaxWTrans   ),
+      .dtype        ( switch_id_t )
+    ) i_w_fifo (
+      .clk_i     ( clk_i                                     ),
+      .rst_ni    ( rst_ni                                    ),
+      .flush_i   ( 1'b0                                      ),
+      .testmode_i( test_i                                    ),
+      .full_o    ( w_fifo_full                               ),
+      .empty_o   ( w_fifo_empty                              ),
+      .usage_o   (                                           ),
+      .data_i    ( mst_aw_chan.id[SlvAxiIDWidth+:MstIdxBits] ),
+      .push_i    ( w_fifo_push                               ),
+      .data_o    ( w_fifo_data                               ),
+      .pop_i     ( w_fifo_pop                                )
+    );
+
+    spill_register #(
+      .T       ( mst_aw_chan_t ),
+      .Bypass  ( ~SpillAw      ) // Param indicated that we want a spill reg
+    ) i_aw_spill_reg (
+      .clk_i   ( clk_i               ),
+      .rst_ni  ( rst_ni              ),
+      .valid_i ( mst_aw_valid        ),
+      .ready_o ( mst_aw_ready        ),
+      .data_i  ( mst_aw_chan         ),
+      .valid_o ( mst_req_o.aw_valid  ),
+      .ready_i ( mst_resp_i.aw_ready ),
+      .data_o  ( mst_req_o.aw        )
+    );
+
+    //--------------------------------------
+    // W Channel
+    //--------------------------------------
+    // multiplexer
+    assign mst_w_chan = slv_w_chans[w_fifo_data];
+    always_comb begin
+      // default assignments
+      mst_w_valid   = 1'b0;
+      slv_w_readies = '0;
+      w_fifo_pop    = 1'b0;
+      // control
+      if (!w_fifo_empty) begin
+        // connect the handshake
+        mst_w_valid                = slv_w_valids[w_fifo_data];
+        slv_w_readies[w_fifo_data] = mst_w_ready;
+        // pop FIFO on a last transaction
+        w_fifo_pop = slv_w_valids[w_fifo_data] & mst_w_ready & mst_w_chan.last;
+      end
+    end
+
+    spill_register #(
+      .T       ( w_chan_t ),
+      .Bypass  ( ~SpillW  )
+    ) i_w_spill_reg (
+      .clk_i   ( clk_i              ),
+      .rst_ni  ( rst_ni             ),
+      .valid_i ( mst_w_valid        ),
+      .ready_o ( mst_w_ready        ),
+      .data_i  ( mst_w_chan         ),
+      .valid_o ( mst_req_o.w_valid  ),
+      .ready_i ( mst_resp_i.w_ready ),
+      .data_o  ( mst_req_o.w        )
+    );
+
+    //--------------------------------------
+    // B Channel
+    //--------------------------------------
+    // replicate B channels
+    assign slv_b_chans  = {NoSlvPorts{mst_b_chan}};
+    // control B channel handshake
+    assign switch_b_id  = mst_b_chan.id[SlvAxiIDWidth+:MstIdxBits];
+    assign slv_b_valids = (mst_b_valid) ? (1 << switch_b_id) : '0;
+
+    spill_register #(
+      .T       ( mst_b_chan_t ),
+      .Bypass  ( ~SpillB      )
+    ) i_b_spill_reg (
+      .clk_i   ( clk_i                      ),
+      .rst_ni  ( rst_ni                     ),
+      .valid_i ( mst_resp_i.b_valid         ),
+      .ready_o ( mst_req_o.b_ready          ),
+      .data_i  ( mst_resp_i.b               ),
+      .valid_o ( mst_b_valid                ),
+      .ready_i ( slv_b_readies[switch_b_id] ),
+      .data_o  ( mst_b_chan                 )
+    );
+
+    //--------------------------------------
+    // AR Channel
+    //--------------------------------------
+    rr_arb_tree #(
+      .NumIn    ( NoSlvPorts    ),
+      .DataType ( mst_ar_chan_t ),
+      .AxiVldRdy( 1'b1          ),
+      .LockIn   ( 1'b1          )
+    ) i_ar_arbiter (
+      .clk_i  ( clk_i           ),
+      .rst_ni ( rst_ni          ),
+      .flush_i( 1'b0            ),
+      .rr_i   ( '0              ),
+      .req_i  ( slv_ar_valids   ),
+      .gnt_o  ( slv_ar_readies  ),
+      .data_i ( slv_ar_chans    ),
+      .gnt_i  ( ar_ready        ),
+      .req_o  ( ar_valid        ),
+      .data_o ( mst_ar_chan     ),
+      .idx_o  (                 )
+    );
+
+    spill_register #(
+      .T       ( mst_ar_chan_t ),
+      .Bypass  ( ~SpillAr      )
+    ) i_ar_spill_reg (
+      .clk_i   ( clk_i               ),
+      .rst_ni  ( rst_ni              ),
+      .valid_i ( ar_valid            ),
+      .ready_o ( ar_ready            ),
+      .data_i  ( mst_ar_chan         ),
+      .valid_o ( mst_req_o.ar_valid  ),
+      .ready_i ( mst_resp_i.ar_ready ),
+      .data_o  ( mst_req_o.ar        )
+    );
+
+    //--------------------------------------
+    // R Channel
+    //--------------------------------------
+    // replicate R channels
+    assign slv_r_chans  = {NoSlvPorts{mst_r_chan}};
+    // R channel handshake control
+    assign switch_r_id  = mst_r_chan.id[SlvAxiIDWidth+:MstIdxBits];
+    assign slv_r_valids = (mst_r_valid) ? (1 << switch_r_id) : '0;
+
+    spill_register #(
+      .T       ( mst_r_chan_t ),
+      .Bypass  ( ~SpillR      )
+    ) i_r_spill_reg (
+      .clk_i   ( clk_i                      ),
+      .rst_ni  ( rst_ni                     ),
+      .valid_i ( mst_resp_i.r_valid         ),
+      .ready_o ( mst_req_o.r_ready          ),
+      .data_i  ( mst_resp_i.r               ),
+      .valid_o ( mst_r_valid                ),
+      .ready_i ( slv_r_readies[switch_r_id] ),
+      .data_o  ( mst_r_chan                 )
+    );
+  end
+
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin
+    assert (SlvAxiIDWidth > 0) else $fatal(1, "AXI ID width of slave ports must be non-zero!");
+    assert (NoSlvPorts > 0) else $fatal(1, "Number of slave ports must be non-zero!");
+    assert (MaxWTrans > 0)
+      else $fatal(1, "Maximum number of outstanding writes must be non-zero!");
+    assert (MstAxiIDWidth >= SlvAxiIDWidth + $clog2(NoSlvPorts))
+      else $fatal(1, "AXI ID width of master ports must be wide enough to identify slave ports!");
+    // Assert ID widths (one slave is sufficient since they all have the same type).
+    assert ($unsigned($bits(slv_reqs_i[0].aw.id)) == SlvAxiIDWidth)
+      else $fatal(1, "ID width of AW channel of slave ports does not match parameter!");
+    assert ($unsigned($bits(slv_reqs_i[0].ar.id)) == SlvAxiIDWidth)
+      else $fatal(1, "ID width of AR channel of slave ports does not match parameter!");
+    assert ($unsigned($bits(slv_resps_o[0].b.id)) == SlvAxiIDWidth)
+      else $fatal(1, "ID width of B channel of slave ports does not match parameter!");
+    assert ($unsigned($bits(slv_resps_o[0].r.id)) == SlvAxiIDWidth)
+      else $fatal(1, "ID width of R channel of slave ports does not match parameter!");
+    assert ($unsigned($bits(mst_req_o.aw.id)) == MstAxiIDWidth)
+      else $fatal(1, "ID width of AW channel of master port is wrong!");
+    assert ($unsigned($bits(mst_req_o.ar.id)) == MstAxiIDWidth)
+      else $fatal(1, "ID width of AR channel of master port is wrong!");
+    assert ($unsigned($bits(mst_resp_i.b.id)) == MstAxiIDWidth)
+      else $fatal(1, "ID width of B channel of master port is wrong!");
+    assert ($unsigned($bits(mst_resp_i.r.id)) == MstAxiIDWidth)
+      else $fatal(1, "ID width of R channel of master port is wrong!");
+  end
+`endif
+// pragma translate_on
+endmodule
+
+// interface wrap
+`include "axi/assign.svh"
+`include "axi/typedef.svh"
+module axi_mux_intf #(
+  parameter int unsigned SLV_AXI_ID_WIDTH = 32'd0, // Synopsys DC requires default value for params
+  parameter int unsigned MST_AXI_ID_WIDTH = 32'd0,
+  parameter int unsigned AXI_ADDR_WIDTH   = 32'd0,
+  parameter int unsigned AXI_DATA_WIDTH   = 32'd0,
+  parameter int unsigned AXI_USER_WIDTH   = 32'd0,
+  parameter int unsigned NO_SLV_PORTS     = 32'd0, // Number of slave ports
+  // Maximum number of outstanding transactions per write
+  parameter int unsigned MAX_W_TRANS      = 32'd8,
+  // if enabled, this multiplexer is purely combinatorial
+  parameter bit          FALL_THROUGH     = 1'b0,
+  // add spill register on write master ports, adds a cycle latency on write channels
+  parameter bit          SPILL_AW         = 1'b1,
+  parameter bit          SPILL_W          = 1'b0,
+  parameter bit          SPILL_B          = 1'b0,
+  // add spill register on read master ports, adds a cycle latency on read channels
+  parameter bit          SPILL_AR         = 1'b1,
+  parameter bit          SPILL_R          = 1'b0
+) (
+  input  logic   clk_i,                  // Clock
+  input  logic   rst_ni,                 // Asynchronous reset active low
+  input  logic   test_i,                 // Testmode enable
+  AXI_BUS.Slave  slv [NO_SLV_PORTS-1:0], // slave ports
+  AXI_BUS.Master mst                     // master port
+);
+
+  typedef logic [SLV_AXI_ID_WIDTH-1:0] slv_id_t;
+  typedef logic [MST_AXI_ID_WIDTH-1:0] mst_id_t;
+  typedef logic [AXI_ADDR_WIDTH -1:0]  addr_t;
+  typedef logic [AXI_DATA_WIDTH-1:0]   data_t;
+  typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t;
+  typedef logic [AXI_USER_WIDTH-1:0]   user_t;
+  // channels typedef
+  `AXI_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, slv_id_t, user_t)
+  `AXI_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, mst_id_t, user_t)
+
+  `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)
+
+  `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, slv_id_t, user_t)
+  `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, mst_id_t, user_t)
+
+  `AXI_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, slv_id_t, user_t)
+  `AXI_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, mst_id_t, user_t)
+
+  `AXI_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, slv_id_t, user_t)
+  `AXI_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, mst_id_t, user_t)
+
+  `AXI_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t)
+  `AXI_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t)
+
+  `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t)
+  `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t)
+
+  slv_req_t  [NO_SLV_PORTS-1:0] slv_reqs;
+  slv_resp_t [NO_SLV_PORTS-1:0] slv_resps;
+  mst_req_t                     mst_req;
+  mst_resp_t                    mst_resp;
+
+  for (genvar i = 0; i < NO_SLV_PORTS; i++) begin : gen_assign_slv_ports
+    `AXI_ASSIGN_TO_REQ(slv_reqs[i], slv[i])
+    `AXI_ASSIGN_FROM_RESP(slv[i], slv_resps[i])
+  end
+
+  `AXI_ASSIGN_FROM_REQ(mst, mst_req)
+  `AXI_ASSIGN_TO_RESP(mst_resp, mst)
+
+  axi_mux #(
+    .SlvAxiIDWidth ( SLV_AXI_ID_WIDTH ),
+    .slv_aw_chan_t ( slv_aw_chan_t    ), // AW Channel Type, slave ports
+    .mst_aw_chan_t ( mst_aw_chan_t    ), // AW Channel Type, master port
+    .w_chan_t      ( w_chan_t         ), //  W Channel Type, all ports
+    .slv_b_chan_t  ( slv_b_chan_t     ), //  B Channel Type, slave ports
+    .mst_b_chan_t  ( mst_b_chan_t     ), //  B Channel Type, master port
+    .slv_ar_chan_t ( slv_ar_chan_t    ), // AR Channel Type, slave ports
+    .mst_ar_chan_t ( mst_ar_chan_t    ), // AR Channel Type, master port
+    .slv_r_chan_t  ( slv_r_chan_t     ), //  R Channel Type, slave ports
+    .mst_r_chan_t  ( mst_r_chan_t     ), //  R Channel Type, master port
+    .slv_req_t     ( slv_req_t        ),
+    .slv_resp_t    ( slv_resp_t       ),
+    .mst_req_t     ( mst_req_t        ),
+    .mst_resp_t    ( mst_resp_t       ),
+    .NoSlvPorts    ( NO_SLV_PORTS     ), // Number of slave ports
+    .MaxWTrans     ( MAX_W_TRANS      ),
+    .FallThrough   ( FALL_THROUGH     ),
+    .SpillAw       ( SPILL_AW         ),
+    .SpillW        ( SPILL_W          ),
+    .SpillB        ( SPILL_B          ),
+    .SpillAr       ( SPILL_AR         ),
+    .SpillR        ( SPILL_R          )
+  ) i_axi_mux (
+    .clk_i       ( clk_i     ), // Clock
+    .rst_ni      ( rst_ni    ), // Asynchronous reset active low
+    .test_i      ( test_i    ), // Test Mode enable
+    .slv_reqs_i  ( slv_reqs  ),
+    .slv_resps_o ( slv_resps ),
+    .mst_req_o   ( mst_req   ),
+    .mst_resp_i  ( mst_resp  )
+  );
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv
new file mode 100644
index 0000000..92ede55
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv
@@ -0,0 +1,423 @@
+// Copyright (c) 2014-2020 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+// - Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+// - Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+// - Matheus Cavalcante <matheusd@iis.ee.ethz.ch>
+
+//! AXI Package
+/// Contains all necessary type definitions, constants, and generally useful functions.
+package axi_pkg;
+  /// AXI Transaction Burst Type.
+  typedef logic [1:0] burst_t;
+  /// AXI Transaction Response Type.
+  typedef logic [1:0] resp_t;
+  /// AXI Transaction Cacheability Type.
+  typedef logic [3:0] cache_t;
+  /// AXI Transaction Protection Type.
+  typedef logic [2:0] prot_t;
+  /// AXI Transaction Quality of Service Type.
+  typedef logic [3:0] qos_t;
+  /// AXI Transaction Region Type.
+  typedef logic [3:0] region_t;
+  /// AXI Transaction Length Type.
+  typedef logic [7:0] len_t;
+  /// AXI Transaction Size Type.
+  typedef logic [2:0] size_t;
+  /// AXI5 Atomic Operation Type.
+  typedef logic [5:0] atop_t; // atomic operations
+  /// AXI5 Non-Secure Address Identifier.
+  typedef logic [3:0] nsaid_t;
+
+  /// In a fixed burst:
+  /// - The address is the same for every transfer in the burst.
+  /// - The byte lanes that are valid are constant for all beats in the burst.  However, within
+  ///   those byte lanes, the actual bytes that have `wstrb` asserted can differ for each beat in
+  ///   the burst.
+  /// This burst type is used for repeated accesses to the same location such as when loading or
+  /// emptying a FIFO.
+  localparam BURST_FIXED = 2'b00;
+  /// In an incrementing burst, the address for each transfer in the burst is an increment of the
+  /// address for the previous transfer.  The increment value depends on the size of the transfer.
+  /// For example, the address for each transfer in a burst with a size of 4 bytes is the previous
+  /// address plus four.
+  /// This burst type is used for accesses to normal sequential memory.
+  localparam BURST_INCR  = 2'b01;
+  /// A wrapping burst is similar to an incrementing burst, except that the address wraps around to
+  /// a lower address if an upper address limit is reached.
+  /// The following restrictions apply to wrapping bursts:
+  /// - The start address must be aligned to the size of each transfer.
+  /// - The length of the burst must be 2, 4, 8, or 16 transfers.
+  localparam BURST_WRAP  = 2'b10;
+
+  /// Normal access success.  Indicates that a normal access has been successful. Can also indicate
+  /// that an exclusive access has failed.
+  localparam RESP_OKAY   = 2'b00;
+  /// Exclusive access okay.  Indicates that either the read or write portion of an exclusive access
+  /// has been successful.
+  localparam RESP_EXOKAY = 2'b01;
+  /// Slave error.  Used when the access has reached the slave successfully, but the slave wishes to
+  /// return an error condition to the originating master.
+  localparam RESP_SLVERR = 2'b10;
+  /// Decode error.  Generated, typically by an interconnect component, to indicate that there is no
+  /// slave at the transaction address.
+  localparam RESP_DECERR = 2'b11;
+
+  /// When this bit is asserted, the interconnect, or any component, can delay the transaction
+  /// reaching its final destination for any number of cycles.
+  localparam CACHE_BUFFERABLE = 4'b0001;
+  /// When HIGH, Modifiable indicates that the characteristics of the transaction can be modified.
+  /// When Modifiable is LOW, the transaction is Non-modifiable.
+  localparam CACHE_MODIFIABLE = 4'b0010;
+  /// When this bit is asserted, read allocation of the transaction is recommended but is not
+  /// mandatory.
+  localparam CACHE_RD_ALLOC   = 4'b0100;
+  /// When this bit is asserted, write allocation of the transaction is recommended but is not
+  /// mandatory.
+  localparam CACHE_WR_ALLOC   = 4'b1000;
+
+  /// Maximum number of bytes per burst, as specified by `size` (see Table A3-2).
+  function automatic shortint unsigned num_bytes(size_t size);
+    return 1 << size;
+  endfunction
+
+  /// An overly long address type.
+  /// It lets us define functions that work generically for shorter addresses.  We rely on the
+  /// synthesizer to optimize the unused bits away.
+  typedef logic [127:0] largest_addr_t;
+
+  /// Aligned address of burst (see A3-51).
+  function automatic largest_addr_t aligned_addr(largest_addr_t addr, size_t size);
+    return (addr >> size) << size;
+  endfunction
+
+  /// Warp boundary of a `BURST_WRAP` transfer (see A3-51).
+  /// This is the lowest address accessed within a wrapping burst.
+  /// This address is aligned to the size and length of the burst.
+  /// The length of a `BURST_WRAP` has to be 2, 4, 8, or 16 transfers.
+  function automatic largest_addr_t wrap_boundary (largest_addr_t addr, size_t size, len_t len);
+    largest_addr_t wrap_addr;
+
+    // pragma translate_off
+    `ifndef VERILATOR
+      assume (len == len_t'(4'b1) || len == len_t'(4'b11) || len == len_t'(4'b111) ||
+          len == len_t'(4'b1111)) else
+        $error("AXI BURST_WRAP with not allowed len of: %0h", len);
+    `endif
+    // pragma translate_on
+
+    // In A3-51 the wrap boundary is defined as:
+    // `Wrap_Boundary = (INT(Start_Address / (Number_Bytes × Burst_Length))) ×
+    //    (Number_Bytes × Burst_Length)`
+    // Whereas the aligned address is defined as:
+    // `Aligned_Address = (INT(Start_Address / Number_Bytes)) × Number_Bytes`
+    // This leads to the wrap boundary using the same calculation as the aligned address, difference
+    // being the additional dependency on the burst length. The addition in the case statement
+    // is equal to the multiplication with `Burst_Length` as a shift (used by `aligned_addr`) is
+    // equivalent with multiplication and division by a power of two, which conveniently are the
+    // only allowed values for `len` of a `BURST_WRAP`.
+    unique case (len)
+      4'b1    : wrap_addr = (addr >> (unsigned'(size) + 1)) << (unsigned'(size) + 1); // multiply `Number_Bytes` by `2`
+      4'b11   : wrap_addr = (addr >> (unsigned'(size) + 2)) << (unsigned'(size) + 2); // multiply `Number_Bytes` by `4`
+      4'b111  : wrap_addr = (addr >> (unsigned'(size) + 3)) << (unsigned'(size) + 3); // multiply `Number_Bytes` by `8`
+      4'b1111 : wrap_addr = (addr >> (unsigned'(size) + 4)) << (unsigned'(size) + 4); // multiply `Number_Bytes` by `16`
+      default : wrap_addr = '0;
+    endcase
+    return wrap_addr;
+  endfunction
+
+  /// Address of beat (see A3-51).
+  function automatic largest_addr_t
+  beat_addr(largest_addr_t addr, size_t size, len_t len, burst_t burst, shortint unsigned i_beat);
+    largest_addr_t ret_addr = addr;
+    largest_addr_t wrp_bond = '0;
+    if (burst == BURST_WRAP) begin
+      // do not trigger the function if there is no wrapping burst, to prevent assumptions firing
+      wrp_bond = wrap_boundary(addr, size, len);
+    end
+    if (i_beat != 0 && burst != BURST_FIXED) begin
+      // From A3-51:
+      // For an INCR burst, and for a WRAP burst for which the address has not wrapped, this
+      // equation determines the address of any transfer after the first transfer in a burst:
+      // `Address_N = Aligned_Address + (N – 1) × Number_Bytes` (N counts from 1 to len!)
+      ret_addr = aligned_addr(addr, size) + i_beat * num_bytes(size);
+      // From A3-51:
+      // For a WRAP burst, if Address_N = Wrap_Boundary + (Number_Bytes × Burst_Length), then:
+      // * Use this equation for the current transfer:
+      //     `Address_N = Wrap_Boundary`
+      // * Use this equation for any subsequent transfers:
+      //     `Address_N = Start_Address + ((N – 1) × Number_Bytes) – (Number_Bytes × Burst_Length)`
+      // This means that the address calculation of a `BURST_WRAP` fundamentally works the same
+      // as for a `BURST_INC`, the difference is when the calculated address increments
+      // over the wrap threshold, the address wraps around by subtracting the accessed address
+      // space from the normal `BURST_INCR` address. The lower wrap boundary is equivalent to
+      // The wrap trigger condition minus the container size (`num_bytes(size) * (len + 1)`).
+      if (burst == BURST_WRAP && ret_addr >= wrp_bond + (num_bytes(size) * (len + 1))) begin
+        ret_addr = ret_addr - (num_bytes(size) * (len + 1));
+      end
+    end
+    return ret_addr;
+  endfunction
+
+  /// Index of lowest byte in beat (see A3-51).
+  function automatic shortint unsigned
+  beat_lower_byte(largest_addr_t addr, size_t size, len_t len, burst_t burst,
+      shortint unsigned strobe_width, shortint unsigned i_beat);
+    largest_addr_t _addr = beat_addr(addr, size, len, burst, i_beat);
+    return _addr - (_addr / strobe_width) * strobe_width;
+  endfunction
+
+  /// Index of highest byte in beat (see A3-51).
+  function automatic shortint unsigned
+  beat_upper_byte(largest_addr_t addr, size_t size, len_t len, burst_t burst,
+      shortint unsigned strobe_width, shortint unsigned i_beat);
+    if (i_beat == 0) begin
+      return aligned_addr(addr, size) + (num_bytes(size) - 1) - (addr / strobe_width) * strobe_width;
+    end else begin
+      return beat_lower_byte(addr, size, len, burst, strobe_width, i_beat) + num_bytes(size) - 1;
+    end
+  endfunction
+
+  /// Is the bufferable bit set?
+  function automatic logic bufferable(cache_t cache);
+    return |(cache & CACHE_BUFFERABLE);
+  endfunction
+
+  /// Is the modifiable bit set?
+  function automatic logic modifiable(cache_t cache);
+    return |(cache & CACHE_MODIFIABLE);
+  endfunction
+
+  /// Memory Type.
+  typedef enum logic [3:0] {
+    DEVICE_NONBUFFERABLE,
+    DEVICE_BUFFERABLE,
+    NORMAL_NONCACHEABLE_NONBUFFERABLE,
+    NORMAL_NONCACHEABLE_BUFFERABLE,
+    WTHRU_NOALLOCATE,
+    WTHRU_RALLOCATE,
+    WTHRU_WALLOCATE,
+    WTHRU_RWALLOCATE,
+    WBACK_NOALLOCATE,
+    WBACK_RALLOCATE,
+    WBACK_WALLOCATE,
+    WBACK_RWALLOCATE
+  } mem_type_t;
+
+  /// Create an `AR_CACHE` field from a `mem_type_t` type.
+  function automatic logic [3:0] get_arcache(mem_type_t mtype);
+    unique case (mtype)
+      DEVICE_NONBUFFERABLE              : return 4'b0000;
+      DEVICE_BUFFERABLE                 : return 4'b0001;
+      NORMAL_NONCACHEABLE_NONBUFFERABLE : return 4'b0010;
+      NORMAL_NONCACHEABLE_BUFFERABLE    : return 4'b0011;
+      WTHRU_NOALLOCATE                  : return 4'b1010;
+      WTHRU_RALLOCATE                   : return 4'b1110;
+      WTHRU_WALLOCATE                   : return 4'b1010;
+      WTHRU_RWALLOCATE                  : return 4'b1110;
+      WBACK_NOALLOCATE                  : return 4'b1011;
+      WBACK_RALLOCATE                   : return 4'b1111;
+      WBACK_WALLOCATE                   : return 4'b1011;
+      WBACK_RWALLOCATE                  : return 4'b1111;
+    endcase // mtype
+  endfunction
+
+  /// Create an `AW_CACHE` field from a `mem_type_t` type.
+  function automatic logic [3:0] get_awcache(mem_type_t mtype);
+    unique case (mtype)
+      DEVICE_NONBUFFERABLE              : return 4'b0000;
+      DEVICE_BUFFERABLE                 : return 4'b0001;
+      NORMAL_NONCACHEABLE_NONBUFFERABLE : return 4'b0010;
+      NORMAL_NONCACHEABLE_BUFFERABLE    : return 4'b0011;
+      WTHRU_NOALLOCATE                  : return 4'b0110;
+      WTHRU_RALLOCATE                   : return 4'b0110;
+      WTHRU_WALLOCATE                   : return 4'b1110;
+      WTHRU_RWALLOCATE                  : return 4'b1110;
+      WBACK_NOALLOCATE                  : return 4'b0111;
+      WBACK_RALLOCATE                   : return 4'b0111;
+      WBACK_WALLOCATE                   : return 4'b1111;
+      WBACK_RWALLOCATE                  : return 4'b1111;
+    endcase // mtype
+  endfunction
+
+  /// RESP precedence: DECERR > SLVERR > OKAY > EXOKAY.  This is not defined in the AXI standard but
+  /// depends on the implementation.  We consistently use the precedence above.  Rationale:
+  /// - EXOKAY means an exclusive access was successful, whereas OKAY means it was not.  Thus, if
+  ///   OKAY and EXOKAY are to be merged, OKAY precedes because the exclusive access was not fully
+  ///   successful.
+  /// - Both DECERR and SLVERR mean (part of) a transaction were unsuccessful, whereas OKAY means an
+  ///   entire transaction was successful.  Thus both DECERR and SLVERR precede OKAY.
+  /// - DECERR means (part of) a transactions could not be routed to a slave component, whereas
+  ///   SLVERR means the transaction reached a slave component but lead to an error condition there.
+  ///   Thus DECERR precedes SLVERR because DECERR happens earlier in the handling of a transaction.
+  function automatic resp_t resp_precedence(resp_t resp_a, resp_t resp_b);
+    unique case (resp_a)
+      RESP_OKAY: begin
+        // Any response except EXOKAY precedes OKAY.
+        if (resp_b == RESP_EXOKAY) begin
+          return resp_a;
+        end else begin
+          return resp_b;
+        end
+      end
+      RESP_EXOKAY: begin
+        // Any response precedes EXOKAY.
+        return resp_b;
+      end
+      RESP_SLVERR: begin
+        // Only DECERR precedes SLVERR.
+        if (resp_b == RESP_DECERR) begin
+          return resp_b;
+        end else begin
+          return resp_a;
+        end
+      end
+      RESP_DECERR: begin
+        // No response precedes DECERR.
+        return resp_a;
+      end
+    endcase
+  endfunction
+
+  // ATOP[5:0]
+  /// - Sends a single data value with an address.
+  /// - The target swaps the value at the addressed location with the data value that is supplied in
+  ///   the transaction.
+  /// - The original data value at the addressed location is returned.
+  /// - Outbound data size is 1, 2, 4, or 8 bytes.
+  /// - Inbound data size is the same as the outbound data size.
+  localparam ATOP_ATOMICSWAP  = 6'b110000;
+  /// - Sends two data values, the compare value and the swap value, to the addressed location.
+  ///   The compare and swap values are of equal size.
+  /// - The data value at the addressed location is checked against the compare value:
+  ///   - If the values match, the swap value is written to the addressed location.
+  ///   - If the values do not match, the swap value is not written to the addressed location.
+  /// - The original data value at the addressed location is returned.
+  /// - Outbound data size is 2, 4, 8, 16, or 32 bytes.
+  /// - Inbound data size is half of the outbound data size because the outbound data contains both
+  ///   compare and swap values, whereas the inbound data has only the original data value.
+  localparam ATOP_ATOMICCMP   = 6'b110001;
+  // ATOP[5:4]
+  /// Perform no atomic operation.
+  localparam ATOP_NONE        = 2'b00;
+  /// - Sends a single data value with an address and the atomic operation to be performed.
+  /// - The target performs the operation using the sent data and value at the addressed location as
+  ///   operands.
+  /// - The result is stored in the address location.
+  /// - A single response is given without data.
+  /// - Outbound data size is 1, 2, 4, or 8 bytes.
+  localparam ATOP_ATOMICSTORE = 2'b01;
+  /// Sends a single data value with an address and the atomic operation to be performed.
+  /// - The original data value at the addressed location is returned.
+  /// - The target performs the operation using the sent data and value at the addressed location as
+  ///   operands.
+  /// - The result is stored in the address location.
+  /// - Outbound data size is 1, 2, 4, or 8 bytes.
+  /// - Inbound data size is the same as the outbound data size.
+  localparam ATOP_ATOMICLOAD  = 2'b10;
+  // ATOP[3]
+  /// For AtomicStore and AtomicLoad transactions `AWATOP[3]` indicates the endianness that is
+  /// required for the atomic operation.  The value of `AWATOP[3]` applies to arithmetic operations
+  /// only and is ignored for bitwise logical operations.
+  /// When deasserted, this bit indicates that the operation is little-endian.
+  localparam ATOP_LITTLE_END  = 1'b0;
+  /// When asserted, this bit indicates that the operation is big-endian.
+  localparam ATOP_BIG_END     = 1'b1;
+  // ATOP[2:0]
+  /// The value in memory is added to the sent data and the result stored in memory.
+  localparam ATOP_ADD   = 3'b000;
+  /// Every set bit in the sent data clears the corresponding bit of the data in memory.
+  localparam ATOP_CLR   = 3'b001;
+  /// Bitwise exclusive OR of the sent data and value in memory.
+  localparam ATOP_EOR   = 3'b010;
+  /// Every set bit in the sent data sets the corresponding bit of the data in memory.
+  localparam ATOP_SET   = 3'b011;
+  /// The value stored in memory is the maximum of the existing value and sent data. This operation
+  /// assumes signed data.
+  localparam ATOP_SMAX  = 3'b100;
+  /// The value stored in memory is the minimum of the existing value and sent data. This operation
+  /// assumes signed data.
+  localparam ATOP_SMIN  = 3'b101;
+  /// The value stored in memory is the maximum of the existing value and sent data. This operation
+  /// assumes unsigned data.
+  localparam ATOP_UMAX  = 3'b110;
+  /// The value stored in memory is the minimum of the existing value and sent data. This operation
+  /// assumes unsigned data.
+  localparam ATOP_UMIN  = 3'b111;
+  // ATOP[5] == 1'b1 indicated that an atomic transaction has a read response
+  // Ussage eg: if (req_i.aw.atop[axi_pkg::ATOP_R_RESP]) begin
+  localparam ATOP_R_RESP = 32'd5;
+
+  // `xbar_latency_e` and `xbar_cfg_t` are documented in `doc/axi_xbar.md`.
+  /// Slice on Demux AW channel.
+  localparam logic [9:0] DemuxAw = (1 << 9);
+  /// Slice on Demux W channel.
+  localparam logic [9:0] DemuxW  = (1 << 8);
+  /// Slice on Demux B channel.
+  localparam logic [9:0] DemuxB  = (1 << 7);
+  /// Slice on Demux AR channel.
+  localparam logic [9:0] DemuxAr = (1 << 6);
+  /// Slice on Demux R channel.
+  localparam logic [9:0] DemuxR  = (1 << 5);
+  /// Slice on Mux AW channel.
+  localparam logic [9:0] MuxAw   = (1 << 4);
+  /// Slice on Mux W channel.
+  localparam logic [9:0] MuxW    = (1 << 3);
+  /// Slice on Mux B channel.
+  localparam logic [9:0] MuxB    = (1 << 2);
+  /// Slice on Mux AR channel.
+  localparam logic [9:0] MuxAr   = (1 << 1);
+  /// Slice on Mux R channel.
+  localparam logic [9:0] MuxR    = (1 << 0);
+  /// Latency configuration for `axi_xbar`.
+  typedef enum logic [9:0] {
+    NO_LATENCY    = 10'b000_00_000_00,
+    CUT_SLV_AX    = DemuxAw | DemuxAr,
+    CUT_MST_AX    = MuxAw | MuxAr,
+    CUT_ALL_AX    = DemuxAw | DemuxAr | MuxAw | MuxAr,
+    CUT_SLV_PORTS = DemuxAw | DemuxW | DemuxB | DemuxAr | DemuxR,
+    CUT_MST_PORTS = MuxAw | MuxW | MuxB | MuxAr | MuxR,
+    CUT_ALL_PORTS = 10'b111_11_111_11
+  } xbar_latency_e;
+
+  /// Configuration for `axi_xbar`.
+  typedef struct packed {
+    int unsigned   NoSlvPorts;
+    int unsigned   NoMstPorts;
+    int unsigned   MaxMstTrans;
+    int unsigned   MaxSlvTrans;
+    bit            FallThrough;
+    xbar_latency_e LatencyMode;
+    int unsigned   AxiIdWidthSlvPorts;
+    int unsigned   AxiIdUsedSlvPorts;
+    bit            UniqueIds;
+    int unsigned   AxiAddrWidth;
+    int unsigned   AxiDataWidth;
+    int unsigned   NoAddrRules;
+  } xbar_cfg_t;
+
+  /// Commonly used rule types for `axi_xbar` (64-bit addresses).
+  typedef struct packed {
+    int unsigned idx;
+    logic [63:0] start_addr;
+    logic [63:0] end_addr;
+  } xbar_rule_64_t;
+
+  /// Commonly used rule types for `axi_xbar` (32-bit addresses).
+  typedef struct packed {
+    int unsigned idx;
+    logic [31:0] start_addr;
+    logic [31:0] end_addr;
+  } xbar_rule_32_t;
+endpackage
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv
new file mode 100644
index 0000000..c75887a
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv
@@ -0,0 +1,323 @@
+// Copyright (c) 2014-2020 ETH Zurich, University of Bologna
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+// - Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+// - Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+/// An AXI4+ATOP to AXI4-Lite converter with atomic transaction and burst support.
+module axi_to_axi_lite #(
+  parameter int unsigned AxiAddrWidth    = 32'd0,
+  parameter int unsigned AxiDataWidth    = 32'd0,
+  parameter int unsigned AxiIdWidth      = 32'd0,
+  parameter int unsigned AxiUserWidth    = 32'd0,
+  parameter int unsigned AxiMaxWriteTxns = 32'd0,
+  parameter int unsigned AxiMaxReadTxns  = 32'd0,
+  parameter bit          FallThrough     = 1'b1,  // FIFOs in Fall through mode in ID reflect
+  parameter type         full_req_t      = logic,
+  parameter type         full_resp_t     = logic,
+  parameter type         lite_req_t      = logic,
+  parameter type         lite_resp_t     = logic
+) (
+  input  logic       clk_i,    // Clock
+  input  logic       rst_ni,   // Asynchronous reset active low
+  input  logic       test_i,   // Testmode enable
+  // slave port full AXI4+ATOP
+  input  full_req_t  slv_req_i,
+  output full_resp_t slv_resp_o,
+  // master port AXI4-Lite
+  output lite_req_t  mst_req_o,
+  input  lite_resp_t mst_resp_i
+);
+  // full bus declarations
+  full_req_t  filtered_req,  splitted_req;
+  full_resp_t filtered_resp, splitted_resp;
+
+  // atomics adapter so that atomics can be resolved
+  axi_atop_filter #(
+    .AxiIdWidth      ( AxiIdWidth      ),
+    .AxiMaxWriteTxns ( AxiMaxWriteTxns ),
+    .req_t           ( full_req_t      ),
+    .resp_t          ( full_resp_t     )
+  ) i_axi_atop_filter(
+    .clk_i      ( clk_i         ),
+    .rst_ni     ( rst_ni        ),
+    .slv_req_i  ( slv_req_i     ),
+    .slv_resp_o ( slv_resp_o    ),
+    .mst_req_o  ( filtered_req  ),
+    .mst_resp_i ( filtered_resp )
+  );
+
+  // burst splitter so that the id reflect module has no burst accessing it
+  axi_burst_splitter #(
+    .MaxReadTxns  ( AxiMaxReadTxns  ),
+    .MaxWriteTxns ( AxiMaxWriteTxns ),
+    .AddrWidth    ( AxiAddrWidth    ),
+    .DataWidth    ( AxiDataWidth    ),
+    .IdWidth      ( AxiIdWidth      ),
+    .UserWidth    ( AxiUserWidth    ),
+    .req_t        ( full_req_t      ),
+    .resp_t       ( full_resp_t     )
+  ) i_axi_burst_splitter (
+    .clk_i      ( clk_i         ),
+    .rst_ni     ( rst_ni        ),
+    .slv_req_i  ( filtered_req  ),
+    .slv_resp_o ( filtered_resp ),
+    .mst_req_o  ( splitted_req  ),
+    .mst_resp_i ( splitted_resp )
+  );
+
+  // ID reflect module handles the conversion from the full AXI to AXI lite on the wireing
+  axi_to_axi_lite_id_reflect #(
+    .AxiIdWidth      ( AxiIdWidth      ),
+    .AxiMaxWriteTxns ( AxiMaxWriteTxns ),
+    .AxiMaxReadTxns  ( AxiMaxReadTxns  ),
+    .FallThrough     ( FallThrough     ),
+    .full_req_t      ( full_req_t      ),
+    .full_resp_t     ( full_resp_t     ),
+    .lite_req_t      ( lite_req_t      ),
+    .lite_resp_t     ( lite_resp_t     )
+  ) i_axi_to_axi_lite_id_reflect (
+    .clk_i      ( clk_i         ),
+    .rst_ni     ( rst_ni        ),
+    .test_i     ( test_i        ),
+    .slv_req_i  ( splitted_req  ),
+    .slv_resp_o ( splitted_resp ),
+    .mst_req_o  ( mst_req_o     ),
+    .mst_resp_i ( mst_resp_i    )
+  );
+
+  // Assertions, check params
+  // pragma translate_off
+  `ifndef VERILATOR
+  initial begin
+    assume (AxiIdWidth   > 0) else $fatal(1, "AXI ID width has to be > 0");
+    assume (AxiAddrWidth > 0) else $fatal(1, "AXI address width has to be > 0");
+    assume (AxiDataWidth > 0) else $fatal(1, "AXI data width has to be > 0");
+  end
+  `endif
+  // pragma translate_on
+endmodule
+
+// Description: This module does the translation of the full AXI4+ATOP to AXI4-Lite signals.
+//              It reflects the ID of the incoming transaction and crops all signals not used
+//              in AXI4-Lite. It requires that incoming AXI4+ATOP transactions have a
+//              `axi_pkg::len_t` of `'0` and an `axi_pkg::atop_t` of `'0`.
+
+module axi_to_axi_lite_id_reflect #(
+  parameter int unsigned AxiIdWidth      = 32'd0,
+  parameter int unsigned AxiMaxWriteTxns = 32'd0,
+  parameter int unsigned AxiMaxReadTxns  = 32'd0,
+  parameter bit          FallThrough     = 1'b1,  // FIFOs in fall through mode
+  parameter type         full_req_t      = logic,
+  parameter type         full_resp_t     = logic,
+  parameter type         lite_req_t      = logic,
+  parameter type         lite_resp_t     = logic
+) (
+  input  logic       clk_i,    // Clock
+  input  logic       rst_ni,   // Asynchronous reset active low
+  input  logic       test_i,   // Testmode enable
+  // slave port full AXI
+  input  full_req_t  slv_req_i,
+  output full_resp_t slv_resp_o,
+  // master port AXI LITE
+  output lite_req_t  mst_req_o,
+  input  lite_resp_t mst_resp_i
+);
+  typedef logic [AxiIdWidth-1:0] id_t;
+
+  // FIFO status and control signals
+  logic aw_full, aw_empty, aw_push, aw_pop, ar_full, ar_empty, ar_push, ar_pop;
+  id_t  aw_reflect_id, ar_reflect_id;
+
+  assign slv_resp_o = '{
+    aw_ready: mst_resp_i.aw_ready & ~aw_full,
+    w_ready:  mst_resp_i.w_ready,
+    b: '{
+      id:       aw_reflect_id,
+      resp:     mst_resp_i.b.resp,
+      default:  '0
+    },
+    b_valid:  mst_resp_i.b_valid  & ~aw_empty,
+    ar_ready: mst_resp_i.ar_ready & ~ar_full,
+    r: '{
+      id:       ar_reflect_id,
+      data:     mst_resp_i.r.data,
+      resp:     mst_resp_i.r.resp,
+      last:     1'b1,
+      default:  '0
+    },
+    r_valid: mst_resp_i.r_valid & ~ar_empty,
+    default: '0
+  };
+
+  // Write ID reflection
+  assign aw_push = mst_req_o.aw_valid & slv_resp_o.aw_ready;
+  assign aw_pop  = slv_resp_o.b_valid & mst_req_o.b_ready;
+  fifo_v3 #(
+    .FALL_THROUGH ( FallThrough     ),
+    .DEPTH        ( AxiMaxWriteTxns ),
+    .dtype        ( id_t            )
+  ) i_aw_id_fifo (
+    .clk_i     ( clk_i           ),
+    .rst_ni    ( rst_ni          ),
+    .flush_i   ( 1'b0            ),
+    .testmode_i( test_i          ),
+    .full_o    ( aw_full         ),
+    .empty_o   ( aw_empty        ),
+    .usage_o   ( /*not used*/    ),
+    .data_i    ( slv_req_i.aw.id ),
+    .push_i    ( aw_push         ),
+    .data_o    ( aw_reflect_id   ),
+    .pop_i     ( aw_pop          )
+  );
+
+  // Read ID reflection
+  assign ar_push = mst_req_o.ar_valid & slv_resp_o.ar_ready;
+  assign ar_pop  = slv_resp_o.r_valid & mst_req_o.r_ready;
+  fifo_v3 #(
+    .FALL_THROUGH ( FallThrough    ),
+    .DEPTH        ( AxiMaxReadTxns ),
+    .dtype        ( id_t           )
+  ) i_ar_id_fifo (
+    .clk_i     ( clk_i           ),
+    .rst_ni    ( rst_ni          ),
+    .flush_i   ( 1'b0            ),
+    .testmode_i( test_i          ),
+    .full_o    ( ar_full         ),
+    .empty_o   ( ar_empty        ),
+    .usage_o   ( /*not used*/    ),
+    .data_i    ( slv_req_i.ar.id ),
+    .push_i    ( ar_push         ),
+    .data_o    ( ar_reflect_id   ),
+    .pop_i     ( ar_pop          )
+  );
+
+  assign mst_req_o = '{
+    aw: '{
+      addr: slv_req_i.aw.addr,
+      prot: slv_req_i.aw.prot
+    },
+    aw_valid: slv_req_i.aw_valid & ~aw_full,
+    w: '{
+      data: slv_req_i.w.data,
+      strb: slv_req_i.w.strb
+    },
+    w_valid:  slv_req_i.w_valid,
+    b_ready:  slv_req_i.b_ready & ~aw_empty,
+    ar: '{
+      addr: slv_req_i.ar.addr,
+      prot: slv_req_i.ar.prot
+    },
+    ar_valid: slv_req_i.ar_valid & ~ar_full,
+    r_ready:  slv_req_i.r_ready  & ~ar_empty,
+    default:  '0
+  };
+
+  // Assertions
+  // pragma translate_off
+  `ifndef VERILATOR
+  aw_atop: assume property( @(posedge clk_i) disable iff (~rst_ni)
+                        slv_req_i.aw_valid |-> (slv_req_i.aw.atop == '0)) else
+    $fatal(1, "Module does not support atomics. Value observed: %0b", slv_req_i.aw.atop);
+  aw_axi_len: assume property( @(posedge clk_i) disable iff (~rst_ni)
+                        slv_req_i.aw_valid |-> (slv_req_i.aw.len == '0)) else
+    $fatal(1, "AW request length has to be zero. Value observed: %0b", slv_req_i.aw.len);
+  w_axi_last: assume property( @(posedge clk_i) disable iff (~rst_ni)
+                        slv_req_i.w_valid |-> (slv_req_i.w.last == 1'b1)) else
+    $fatal(1, "W last signal has to be one. Value observed: %0b", slv_req_i.w.last);
+  ar_axi_len: assume property( @(posedge clk_i) disable iff (~rst_ni)
+                        slv_req_i.ar_valid |-> (slv_req_i.ar.len == '0)) else
+    $fatal(1, "AR request length has to be zero. Value observed: %0b", slv_req_i.ar.len);
+  `endif
+  // pragma translate_on
+endmodule
+
+// interface wrapper
+`include "axi/assign.svh"
+`include "axi/typedef.svh"
+module axi_to_axi_lite_intf #(
+  /// AXI bus parameters
+  parameter int unsigned AXI_ADDR_WIDTH     = 32'd0,
+  parameter int unsigned AXI_DATA_WIDTH     = 32'd0,
+  parameter int unsigned AXI_ID_WIDTH       = 32'd0,
+  parameter int unsigned AXI_USER_WIDTH     = 32'd0,
+  /// Maximum number of outstanding writes.
+  parameter int unsigned AXI_MAX_WRITE_TXNS = 32'd1,
+  /// Maximum number of outstanding reads.
+  parameter int unsigned AXI_MAX_READ_TXNS  = 32'd1,
+  parameter bit          FALL_THROUGH       = 1'b1
+) (
+  input logic     clk_i,
+  input logic     rst_ni,
+  input logic     testmode_i,
+  AXI_BUS.Slave   slv,
+  AXI_LITE.Master mst
+);
+  typedef logic [AXI_ADDR_WIDTH-1:0]   addr_t;
+  typedef logic [AXI_DATA_WIDTH-1:0]   data_t;
+  typedef logic [AXI_ID_WIDTH-1:0]       id_t;
+  typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t;
+  typedef logic [AXI_USER_WIDTH-1:0]   user_t;
+  // full channels typedefs
+  `AXI_TYPEDEF_AW_CHAN_T(full_aw_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_W_CHAN_T(full_w_chan_t, data_t, strb_t, user_t)
+  `AXI_TYPEDEF_B_CHAN_T(full_b_chan_t, id_t, user_t)
+  `AXI_TYPEDEF_AR_CHAN_T(full_ar_chan_t, addr_t, id_t, user_t)
+  `AXI_TYPEDEF_R_CHAN_T(full_r_chan_t, data_t, id_t, user_t)
+  `AXI_TYPEDEF_REQ_T(full_req_t, full_aw_chan_t, full_w_chan_t, full_ar_chan_t)
+  `AXI_TYPEDEF_RESP_T(full_resp_t, full_b_chan_t, full_r_chan_t)
+  // LITE channels typedef
+  `AXI_LITE_TYPEDEF_AW_CHAN_T(lite_aw_chan_t, addr_t)
+  `AXI_LITE_TYPEDEF_W_CHAN_T(lite_w_chan_t, data_t, strb_t)
+  `AXI_LITE_TYPEDEF_B_CHAN_T(lite_b_chan_t)
+  `AXI_LITE_TYPEDEF_AR_CHAN_T(lite_ar_chan_t, addr_t)
+  `AXI_LITE_TYPEDEF_R_CHAN_T (lite_r_chan_t, data_t)
+  `AXI_LITE_TYPEDEF_REQ_T(lite_req_t, lite_aw_chan_t, lite_w_chan_t, lite_ar_chan_t)
+  `AXI_LITE_TYPEDEF_RESP_T(lite_resp_t, lite_b_chan_t, lite_r_chan_t)
+
+  full_req_t  full_req;
+  full_resp_t full_resp;
+  lite_req_t  lite_req;
+  lite_resp_t lite_resp;
+
+  `AXI_ASSIGN_TO_REQ(full_req, slv)
+  `AXI_ASSIGN_FROM_RESP(slv, full_resp)
+
+  `AXI_LITE_ASSIGN_FROM_REQ(mst, lite_req)
+  `AXI_LITE_ASSIGN_TO_RESP(lite_resp, mst)
+
+  axi_to_axi_lite #(
+    .AxiAddrWidth    ( AXI_ADDR_WIDTH     ),
+    .AxiDataWidth    ( AXI_DATA_WIDTH     ),
+    .AxiIdWidth      ( AXI_ID_WIDTH       ),
+    .AxiUserWidth    ( AXI_USER_WIDTH     ),
+    .AxiMaxWriteTxns ( AXI_MAX_WRITE_TXNS ),
+    .AxiMaxReadTxns  ( AXI_MAX_READ_TXNS  ),
+    .FallThrough     ( FALL_THROUGH       ),  // FIFOs in Fall through mode in ID reflect
+    .full_req_t      ( full_req_t         ),
+    .full_resp_t     ( full_resp_t        ),
+    .lite_req_t      ( lite_req_t         ),
+    .lite_resp_t     ( lite_resp_t        )
+  ) i_axi_to_axi_lite (
+    .clk_i      ( clk_i      ),
+    .rst_ni     ( rst_ni     ),
+    .test_i     ( testmode_i ),
+    // slave port full AXI4+ATOP
+    .slv_req_i  ( full_req   ),
+    .slv_resp_o ( full_resp  ),
+    // master port AXI4-Lite
+    .mst_req_o  ( lite_req   ),
+    .mst_resp_i ( lite_resp  )
+  );
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv
new file mode 100644
index 0000000..d66cd97
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv
@@ -0,0 +1,324 @@
+// Copyright (c) 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Authors:
+// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+// - Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+// axi_xbar: Fully-connected AXI4+ATOP crossbar with an arbitrary number of slave and master ports.
+// See `doc/axi_xbar.md` for the documentation, including the definition of parameters and ports.
+module axi_xbar #(
+  parameter axi_pkg::xbar_cfg_t Cfg = '0,
+  parameter bit  ATOPs              = 1'b1,
+  parameter type slv_aw_chan_t      = logic,
+  parameter type mst_aw_chan_t      = logic,
+  parameter type w_chan_t           = logic,
+  parameter type slv_b_chan_t       = logic,
+  parameter type mst_b_chan_t       = logic,
+  parameter type slv_ar_chan_t      = logic,
+  parameter type mst_ar_chan_t      = logic,
+  parameter type slv_r_chan_t       = logic,
+  parameter type mst_r_chan_t       = logic,
+  parameter type slv_req_t          = logic,
+  parameter type slv_resp_t         = logic,
+  parameter type mst_req_t          = logic,
+  parameter type mst_resp_t         = logic,
+  parameter type rule_t             = axi_pkg::xbar_rule_64_t
+) (
+  input  logic                                                       clk_i,
+  input  logic                                                       rst_ni,
+  input  logic                                                       test_i,
+  input  slv_req_t  [Cfg.NoSlvPorts-1:0]                             slv_ports_req_i,
+  output slv_resp_t [Cfg.NoSlvPorts-1:0]                             slv_ports_resp_o,
+  output mst_req_t  [Cfg.NoMstPorts-1:0]                             mst_ports_req_o,
+  input  mst_resp_t [Cfg.NoMstPorts-1:0]                             mst_ports_resp_i,
+  input  rule_t     [Cfg.NoAddrRules-1:0]                            addr_map_i,
+  input  logic      [Cfg.NoSlvPorts-1:0]                             en_default_mst_port_i,
+  input  logic      [Cfg.NoSlvPorts-1:0][$clog2(Cfg.NoMstPorts)-1:0] default_mst_port_i
+);
+
+  typedef logic [Cfg.AxiAddrWidth-1:0]           addr_t;
+  // to account for the decoding error slave
+  typedef logic [$clog2(Cfg.NoMstPorts + 1)-1:0] mst_port_idx_t;
+
+  // signals from the axi_demuxes, one index more for decode error
+  slv_req_t  [Cfg.NoSlvPorts-1:0][Cfg.NoMstPorts:0]  slv_reqs;
+  slv_resp_t [Cfg.NoSlvPorts-1:0][Cfg.NoMstPorts:0]  slv_resps;
+
+  // workaround for issue #133 (problem with vsim 10.6c)
+  localparam int unsigned cfg_NoMstPorts = Cfg.NoMstPorts;
+
+  // signals into the axi_muxes, are of type slave as the multiplexer extends the ID
+  slv_req_t  [Cfg.NoMstPorts-1:0][Cfg.NoSlvPorts-1:0] mst_reqs;
+  slv_resp_t [Cfg.NoMstPorts-1:0][Cfg.NoSlvPorts-1:0] mst_resps;
+
+  for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_slv_port_demux
+    logic [$clog2(Cfg.NoMstPorts)-1:0] dec_aw,        dec_ar;
+    mst_port_idx_t                     slv_aw_select, slv_ar_select;
+    logic                              dec_aw_valid,  dec_aw_error;
+    logic                              dec_ar_valid,  dec_ar_error;
+
+    addr_decode #(
+      .NoIndices  ( Cfg.NoMstPorts  ),
+      .NoRules    ( Cfg.NoAddrRules ),
+      .addr_t     ( addr_t          ),
+      .rule_t     ( rule_t          )
+    ) i_axi_aw_decode (
+      .addr_i           ( slv_ports_req_i[i].aw.addr ),
+      .addr_map_i       ( addr_map_i                 ),
+      .idx_o            ( dec_aw                     ),
+      .dec_valid_o      ( dec_aw_valid               ),
+      .dec_error_o      ( dec_aw_error               ),
+      .en_default_idx_i ( en_default_mst_port_i[i]   ),
+      .default_idx_i    ( default_mst_port_i[i]      )
+    );
+
+    addr_decode #(
+      .NoIndices  ( Cfg.NoMstPorts  ),
+      .addr_t     ( addr_t          ),
+      .NoRules    ( Cfg.NoAddrRules ),
+      .rule_t     ( rule_t          )
+    ) i_axi_ar_decode (
+      .addr_i           ( slv_ports_req_i[i].ar.addr ),
+      .addr_map_i       ( addr_map_i                 ),
+      .idx_o            ( dec_ar                     ),
+      .dec_valid_o      ( dec_ar_valid               ),
+      .dec_error_o      ( dec_ar_error               ),
+      .en_default_idx_i ( en_default_mst_port_i[i]   ),
+      .default_idx_i    ( default_mst_port_i[i]      )
+    );
+
+    assign slv_aw_select = (dec_aw_error) ?
+        mst_port_idx_t'(Cfg.NoMstPorts) : mst_port_idx_t'(dec_aw);
+    assign slv_ar_select = (dec_ar_error) ?
+        mst_port_idx_t'(Cfg.NoMstPorts) : mst_port_idx_t'(dec_ar);
+
+    // make sure that the default slave does not get changed, if there is an unserved Ax
+    // pragma translate_off
+    `ifndef VERILATOR
+    `ifndef XSIM
+    default disable iff (~rst_ni);
+    default_aw_mst_port_en: assert property(
+      @(posedge clk_i) (slv_ports_req_i[i].aw_valid && !slv_ports_resp_o[i].aw_ready)
+          |=> $stable(en_default_mst_port_i[i]))
+        else $fatal (1, $sformatf("It is not allowed to change the default mst port\
+                                   enable, when there is an unserved Aw beat. Slave Port: %0d", i));
+    default_aw_mst_port: assert property(
+      @(posedge clk_i) (slv_ports_req_i[i].aw_valid && !slv_ports_resp_o[i].aw_ready)
+          |=> $stable(default_mst_port_i[i]))
+        else $fatal (1, $sformatf("It is not allowed to change the default mst port\
+                                   when there is an unserved Aw beat. Slave Port: %0d", i));
+    default_ar_mst_port_en: assert property(
+      @(posedge clk_i) (slv_ports_req_i[i].ar_valid && !slv_ports_resp_o[i].ar_ready)
+          |=> $stable(en_default_mst_port_i[i]))
+        else $fatal (1, $sformatf("It is not allowed to change the enable, when\
+                                   there is an unserved Ar beat. Slave Port: %0d", i));
+    default_ar_mst_port: assert property(
+      @(posedge clk_i) (slv_ports_req_i[i].ar_valid && !slv_ports_resp_o[i].ar_ready)
+          |=> $stable(default_mst_port_i[i]))
+        else $fatal (1, $sformatf("It is not allowed to change the default mst port\
+                                   when there is an unserved Ar beat. Slave Port: %0d", i));
+    `endif
+    `endif
+    // pragma translate_on
+    axi_demux #(
+      .AxiIdWidth     ( Cfg.AxiIdWidthSlvPorts ),  // ID Width
+      .aw_chan_t      ( slv_aw_chan_t          ),  // AW Channel Type
+      .w_chan_t       ( w_chan_t               ),  //  W Channel Type
+      .b_chan_t       ( slv_b_chan_t           ),  //  B Channel Type
+      .ar_chan_t      ( slv_ar_chan_t          ),  // AR Channel Type
+      .r_chan_t       ( slv_r_chan_t           ),  //  R Channel Type
+      .req_t          ( slv_req_t              ),
+      .resp_t         ( slv_resp_t             ),
+      .NoMstPorts     ( Cfg.NoMstPorts + 1     ),
+      .MaxTrans       ( Cfg.MaxMstTrans        ),
+      .AxiLookBits    ( Cfg.AxiIdUsedSlvPorts  ),
+      .UniqueIds      ( Cfg.UniqueIds          ),
+      .FallThrough    ( Cfg.FallThrough        ),
+      .SpillAw        ( Cfg.LatencyMode[9]     ),
+      .SpillW         ( Cfg.LatencyMode[8]     ),
+      .SpillB         ( Cfg.LatencyMode[7]     ),
+      .SpillAr        ( Cfg.LatencyMode[6]     ),
+      .SpillR         ( Cfg.LatencyMode[5]     )
+    ) i_axi_demux (
+      .clk_i,   // Clock
+      .rst_ni,  // Asynchronous reset active low
+      .test_i,  // Testmode enable
+      .slv_req_i       ( slv_ports_req_i[i]  ),
+      .slv_aw_select_i ( slv_aw_select       ),
+      .slv_ar_select_i ( slv_ar_select       ),
+      .slv_resp_o      ( slv_ports_resp_o[i] ),
+      .mst_reqs_o      ( slv_reqs[i]         ),
+      .mst_resps_i     ( slv_resps[i]        )
+    );
+
+    axi_err_slv #(
+      .AxiIdWidth  ( Cfg.AxiIdWidthSlvPorts ),
+      .req_t       ( slv_req_t              ),
+      .resp_t      ( slv_resp_t             ),
+      .Resp        ( axi_pkg::RESP_DECERR   ),
+      .ATOPs       ( ATOPs                  ),
+      .MaxTrans    ( 4                      )   // Transactions terminate at this slave, so minimize
+                                                // resource consumption by accepting only a few
+                                                // transactions at a time.
+    ) i_axi_err_slv (
+      .clk_i,   // Clock
+      .rst_ni,  // Asynchronous reset active low
+      .test_i,  // Testmode enable
+      // slave port
+      .slv_req_i  ( slv_reqs[i][Cfg.NoMstPorts]   ),
+      .slv_resp_o ( slv_resps[i][cfg_NoMstPorts]  )
+    );
+  end
+
+  // cross all channels
+  for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_xbar_slv_cross
+    for (genvar j = 0; j < Cfg.NoMstPorts; j++) begin : gen_xbar_mst_cross
+      assign mst_reqs[j][i]  = slv_reqs[i][j];
+      assign slv_resps[i][j] = mst_resps[j][i];
+    end
+  end
+
+  for (genvar i = 0; i < Cfg.NoMstPorts; i++) begin : gen_mst_port_mux
+    axi_mux #(
+      .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports
+      .slv_aw_chan_t ( slv_aw_chan_t          ), // AW Channel Type, slave ports
+      .mst_aw_chan_t ( mst_aw_chan_t          ), // AW Channel Type, master port
+      .w_chan_t      ( w_chan_t               ), //  W Channel Type, all ports
+      .slv_b_chan_t  ( slv_b_chan_t           ), //  B Channel Type, slave ports
+      .mst_b_chan_t  ( mst_b_chan_t           ), //  B Channel Type, master port
+      .slv_ar_chan_t ( slv_ar_chan_t          ), // AR Channel Type, slave ports
+      .mst_ar_chan_t ( mst_ar_chan_t          ), // AR Channel Type, master port
+      .slv_r_chan_t  ( slv_r_chan_t           ), //  R Channel Type, slave ports
+      .mst_r_chan_t  ( mst_r_chan_t           ), //  R Channel Type, master port
+      .slv_req_t     ( slv_req_t              ),
+      .slv_resp_t    ( slv_resp_t             ),
+      .mst_req_t     ( mst_req_t              ),
+      .mst_resp_t    ( mst_resp_t             ),
+      .NoSlvPorts    ( Cfg.NoSlvPorts         ), // Number of Masters for the module
+      .MaxWTrans     ( Cfg.MaxSlvTrans        ),
+      .FallThrough   ( Cfg.FallThrough        ),
+      .SpillAw       ( Cfg.LatencyMode[4]     ),
+      .SpillW        ( Cfg.LatencyMode[3]     ),
+      .SpillB        ( Cfg.LatencyMode[2]     ),
+      .SpillAr       ( Cfg.LatencyMode[1]     ),
+      .SpillR        ( Cfg.LatencyMode[0]     )
+    ) i_axi_mux (
+      .clk_i,   // Clock
+      .rst_ni,  // Asynchronous reset active low
+      .test_i,  // Test Mode enable
+      .slv_reqs_i  ( mst_reqs[i]         ),
+      .slv_resps_o ( mst_resps[i]        ),
+      .mst_req_o   ( mst_ports_req_o[i]  ),
+      .mst_resp_i  ( mst_ports_resp_i[i] )
+    );
+  end
+
+  // pragma translate_off
+  `ifndef VERILATOR
+  `ifndef XSIM
+  initial begin : check_params
+    id_slv_req_ports: assert ($bits(slv_ports_req_i[0].aw.id ) == Cfg.AxiIdWidthSlvPorts) else
+      $fatal(1, $sformatf("Slv_req and aw_chan id width not equal."));
+    id_slv_resp_ports: assert ($bits(slv_ports_resp_o[0].r.id) == Cfg.AxiIdWidthSlvPorts) else
+      $fatal(1, $sformatf("Slv_req and aw_chan id width not equal."));
+  end
+  `endif
+  `endif
+  // pragma translate_on
+endmodule
+
+`include "axi/assign.svh"
+`include "axi/typedef.svh"
+
+module axi_xbar_intf #(
+  parameter int unsigned AXI_USER_WIDTH =  0,
+  parameter axi_pkg::xbar_cfg_t Cfg     = '0,
+  parameter type rule_t                 = axi_pkg::xbar_rule_64_t
+) (
+  input  logic                                                    clk_i,
+  input  logic                                                    rst_ni,
+  input  logic                                                    test_i,
+  AXI_BUS.Slave                                                   slv_ports [Cfg.NoSlvPorts-1:0],
+  AXI_BUS.Master                                                  mst_ports [Cfg.NoMstPorts-1:0],
+  input  rule_t [Cfg.NoAddrRules-1:0]                             addr_map_i,
+  input  logic  [Cfg.NoSlvPorts-1:0]                              en_default_mst_port_i,
+  input  logic  [Cfg.NoSlvPorts-1:0][$clog2(Cfg.NoMstPorts)-1:0]  default_mst_port_i
+);
+
+  localparam int unsigned AxiIdWidthMstPorts = Cfg.AxiIdWidthSlvPorts + $clog2(Cfg.NoSlvPorts);
+
+  typedef logic [AxiIdWidthMstPorts     -1:0] id_mst_t;
+  typedef logic [Cfg.AxiIdWidthSlvPorts -1:0] id_slv_t;
+  typedef logic [Cfg.AxiAddrWidth       -1:0] addr_t;
+  typedef logic [Cfg.AxiDataWidth       -1:0] data_t;
+  typedef logic [Cfg.AxiDataWidth/8     -1:0] strb_t;
+  typedef logic [AXI_USER_WIDTH         -1:0] user_t;
+
+  `AXI_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, id_mst_t, user_t)
+  `AXI_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, id_slv_t, user_t)
+  `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)
+  `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, id_mst_t, user_t)
+  `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, id_slv_t, user_t)
+  `AXI_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, id_mst_t, user_t)
+  `AXI_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, id_slv_t, user_t)
+  `AXI_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, id_mst_t, user_t)
+  `AXI_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, id_slv_t, user_t)
+  `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t)
+  `AXI_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t)
+  `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t)
+  `AXI_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t)
+
+  mst_req_t   [Cfg.NoMstPorts-1:0]  mst_reqs;
+  mst_resp_t  [Cfg.NoMstPorts-1:0]  mst_resps;
+  slv_req_t   [Cfg.NoSlvPorts-1:0]  slv_reqs;
+  slv_resp_t  [Cfg.NoSlvPorts-1:0]  slv_resps;
+
+  for (genvar i = 0; i < Cfg.NoMstPorts; i++) begin : gen_assign_mst
+    `AXI_ASSIGN_FROM_REQ(mst_ports[i], mst_reqs[i])
+    `AXI_ASSIGN_TO_RESP(mst_resps[i], mst_ports[i])
+  end
+
+  for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_assign_slv
+    `AXI_ASSIGN_TO_REQ(slv_reqs[i], slv_ports[i])
+    `AXI_ASSIGN_FROM_RESP(slv_ports[i], slv_resps[i])
+  end
+
+  axi_xbar #(
+    .Cfg  (Cfg),
+    .slv_aw_chan_t  ( slv_aw_chan_t ),
+    .mst_aw_chan_t  ( mst_aw_chan_t ),
+    .w_chan_t       ( w_chan_t      ),
+    .slv_b_chan_t   ( slv_b_chan_t  ),
+    .mst_b_chan_t   ( mst_b_chan_t  ),
+    .slv_ar_chan_t  ( slv_ar_chan_t ),
+    .mst_ar_chan_t  ( mst_ar_chan_t ),
+    .slv_r_chan_t   ( slv_r_chan_t  ),
+    .mst_r_chan_t   ( mst_r_chan_t  ),
+    .slv_req_t      ( slv_req_t     ),
+    .slv_resp_t     ( slv_resp_t    ),
+    .mst_req_t      ( mst_req_t     ),
+    .mst_resp_t     ( mst_resp_t    ),
+    .rule_t         ( rule_t        )
+  ) i_xbar (
+    .clk_i,
+    .rst_ni,
+    .test_i,
+    .slv_ports_req_i  (slv_reqs ),
+    .slv_ports_resp_o (slv_resps),
+    .mst_ports_req_o  (mst_reqs ),
+    .mst_ports_resp_i (mst_resps),
+    .addr_map_i,
+    .en_default_mst_port_i,
+    .default_mst_port_i
+  );
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh b/test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh
new file mode 100644
index 0000000..b64f31a
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh
@@ -0,0 +1,221 @@
+// Copyright 2018, 2021 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// SPDX-License-Identifier: SHL-0.51
+//
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+// Description: Common register defines for RTL designs
+
+`ifndef COMMON_CELLS_REGISTERS_SVH_
+`define COMMON_CELLS_REGISTERS_SVH_
+
+// Abridged Summary of available FF macros:
+// `FF:      asynchronous active-low reset
+// `FFAR:    asynchronous active-high reset
+// `FFARN:   [deprecated] asynchronous active-low reset
+// `FFSR:    synchronous active-high reset
+// `FFSRN:   synchronous active-low reset
+// `FFNR:    without reset
+// `FFL:     load-enable and asynchronous active-low reset
+// `FFLAR:   load-enable and asynchronous active-high reset
+// `FFLARN:  [deprecated] load-enable and asynchronous active-low reset
+// `FFLARNC: load-enable and asynchronous active-low reset and synchronous active-high clear
+// `FFLSR:   load-enable and synchronous active-high reset
+// `FFLSRN:  load-enable and synchronous active-low reset
+// `FFLNR:   load-enable without reset
+
+`ifdef VERILATOR
+`define NO_SYNOPSYS_FF 1
+`endif
+
+`define REG_DFLT_CLK clk_i
+`define REG_DFLT_RST rst_ni
+
+// Flip-Flop with asynchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// (__clk: clock input)
+// (__arst_n: asynchronous reset, active-low)
+`define FF(__q, __d, __reset_value, __clk = `REG_DFLT_CLK, __arst_n = `REG_DFLT_RST) \
+  always_ff @(posedge (__clk) or negedge (__arst_n)) begin                           \
+    if (!__arst_n) begin                                                             \
+      __q <= (__reset_value);                                                        \
+    end else begin                                                                   \
+      __q <= (__d);                                                                  \
+    end                                                                              \
+  end
+
+// Flip-Flop with asynchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst: asynchronous reset, active-high
+`define FFAR(__q, __d, __reset_value, __clk, __arst)     \
+  always_ff @(posedge (__clk) or posedge (__arst)) begin \
+    if (__arst) begin                                    \
+      __q <= (__reset_value);                            \
+    end else begin                                       \
+      __q <= (__d);                                      \
+    end                                                  \
+  end
+
+// DEPRECATED - use `FF instead
+// Flip-Flop with asynchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst_n: asynchronous reset, active-low
+`define FFARN(__q, __d, __reset_value, __clk, __arst_n) \
+  `FF(__q, __d, __reset_value, __clk, __arst_n)
+
+// Flip-Flop with synchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_clk: reset input, active-high
+`define FFSR(__q, __d, __reset_value, __clk, __reset_clk) \
+  `ifndef NO_SYNOPSYS_FF                       \
+  /``* synopsys sync_set_reset `"__reset_clk`" *``/       \
+    `endif                        \
+  always_ff @(posedge (__clk)) begin                      \
+    __q <= (__reset_clk) ? (__reset_value) : (__d);       \
+  end
+
+// Flip-Flop with synchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_n_clk: reset input, active-low
+`define FFSRN(__q, __d, __reset_value, __clk, __reset_n_clk) \
+    `ifndef NO_SYNOPSYS_FF                       \
+  /``* synopsys sync_set_reset `"__reset_n_clk`" *``/        \
+    `endif                        \
+  always_ff @(posedge (__clk)) begin                         \
+    __q <= (!__reset_n_clk) ? (__reset_value) : (__d);       \
+  end
+
+// Always-enable Flip-Flop without reset
+// __q: Q output of FF
+// __d: D input of FF
+// __clk: clock input
+`define FFNR(__q, __d, __clk)        \
+  always_ff @(posedge (__clk)) begin \
+    __q <= (__d);                    \
+  end
+
+// Flip-Flop with load-enable and asynchronous active-low reset (implicit clock and reset)
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// (__clk: clock input)
+// (__arst_n: asynchronous reset, active-low)
+`define FFL(__q, __d, __load, __reset_value, __clk = `REG_DFLT_CLK, __arst_n = `REG_DFLT_RST) \
+  always_ff @(posedge (__clk) or negedge (__arst_n)) begin                                    \
+    if (!__arst_n) begin                                                                      \
+      __q <= (__reset_value);                                                                 \
+    end else begin                                                                            \
+      __q <= (__load) ? (__d) : (__q);                                                        \
+    end                                                                                       \
+  end
+
+// Flip-Flop with load-enable and asynchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst: asynchronous reset, active-high
+`define FFLAR(__q, __d, __load, __reset_value, __clk, __arst) \
+  always_ff @(posedge (__clk) or posedge (__arst)) begin      \
+    if (__arst) begin                                         \
+      __q <= (__reset_value);                                 \
+    end else begin                                            \
+      __q <= (__load) ? (__d) : (__q);                        \
+    end                                                       \
+  end
+
+// DEPRECATED - use `FFL instead
+// Flip-Flop with load-enable and asynchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst_n: asynchronous reset, active-low
+`define FFLARN(__q, __d, __load, __reset_value, __clk, __arst_n) \
+  `FFL(__q, __d, __load, __reset_value, __clk, __arst_n)
+
+// Flip-Flop with load-enable and synchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_clk: reset input, active-high
+`define FFLSR(__q, __d, __load, __reset_value, __clk, __reset_clk)       \
+    `ifndef NO_SYNOPSYS_FF                                               \
+  /``* synopsys sync_set_reset `"__reset_clk`" *``/                      \
+    `endif                                                               \
+  always_ff @(posedge (__clk)) begin                                     \
+    __q <= (__reset_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \
+  end
+
+// Flip-Flop with load-enable and synchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_n_clk: reset input, active-low
+`define FFLSRN(__q, __d, __load, __reset_value, __clk, __reset_n_clk)       \
+    `ifndef NO_SYNOPSYS_FF                                                  \
+  /``* synopsys sync_set_reset `"__reset_n_clk`" *``/                       \
+    `endif                                                                  \
+  always_ff @(posedge (__clk)) begin                                        \
+    __q <= (!__reset_n_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \
+  end
+
+// Flip-Flop with load-enable and asynchronous active-low reset and synchronous clear
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __clear: assign reset value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst_n: asynchronous reset, active-low
+`define FFLARNC(__q, __d, __load, __clear, __reset_value, __clk, __arst_n) \
+    `ifndef NO_SYNOPSYS_FF                                                 \
+  /``* synopsys sync_set_reset `"__clear`" *``/                            \
+    `endif                                                                 \
+  always_ff @(posedge (__clk) or negedge (__arst_n)) begin                 \
+    if (!__arst_n) begin                                                   \
+      __q <= (__reset_value);                                              \
+    end else begin                                                         \
+      __q <= (__clear) ? (__reset_value) : (__load) ? (__d) : (__q);       \
+    end                                                                    \
+  end
+
+// Load-enable Flip-Flop without reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __clk: clock input
+`define FFLNR(__q, __d, __load, __clk) \
+  always_ff @(posedge (__clk)) begin   \
+    __q <= (__load) ? (__d) : (__q);   \
+  end
+
+`endif
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv b/test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv
new file mode 100644
index 0000000..90a43a0
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv
@@ -0,0 +1,161 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Wolfgang Roenninger <wroennin@ethz.ch>
+
+/// Address Decoder: Maps the input address combinatorially to an index.
+/// The address map `addr_map_i` is a packed array of rule_t structs.
+/// The ranges of any two rules may overlap. If so, the rule at the higher (more significant)
+/// position in `addr_map_i` prevails.
+///
+/// There can be an arbitrary number of address rules. There can be multiple
+/// ranges defined for the same index. The start address has to be less than the end address.
+///
+/// There is the possibility to add a default mapping:
+/// `en_default_idx_i`: Driving this port to `1'b1` maps all input addresses
+/// for which no rule in `addr_map_i` exists to the default index specified by
+/// `default_idx_i`. In this case, `dec_error_o` is always `1'b0`.
+///
+/// Assertions: The module checks every time there is a change in the address mapping
+/// if the resulting map is valid. It fatals if `start_addr` is higher than `end_addr`
+/// or if a mapping targets an index that is outside the number of allowed indices.
+/// It issues warnings if the address regions of any two mappings overlap.
+module addr_decode #(
+  /// Highest index which can happen in a rule.
+  parameter int unsigned NoIndices = 32'd0,
+  /// Total number of rules.
+  parameter int unsigned NoRules   = 32'd0,
+  /// Address type inside the rules and to decode.
+  parameter type         addr_t    = logic,
+  /// Rule packed struct type.
+  /// The address decoder expects three fields in `rule_t`:
+  ///
+  /// typedef struct packed {
+  ///   int unsigned idx;
+  ///   addr_t       start_addr;
+  ///   addr_t       end_addr;
+  /// } rule_t;
+  ///
+  ///  - `idx`:        index of the rule, has to be < `NoIndices`
+  ///  - `start_addr`: start address of the range the rule describes, value is included in range
+  ///  - `end_addr`:   end address of the range the rule describes, value is NOT included in range
+  parameter type         rule_t    = logic,
+  /// Dependent parameter, do **not** overwite!
+  ///
+  /// Width of the `idx_o` output port.
+  parameter int unsigned IdxWidth  = cf_math_pkg::idx_width(NoIndices),
+  /// Dependent parameter, do **not** overwite!
+  ///
+  /// Type of the `idx_o` output port.
+  parameter type         idx_t     = logic [IdxWidth-1:0]
+) (
+  /// Address to decode.
+  input  addr_t               addr_i,
+  /// Address map: rule with the highest array position wins on collision
+  input  rule_t [NoRules-1:0] addr_map_i,
+  /// Decoded index.
+  output idx_t                idx_o,
+  /// Decode is valid.
+  output logic                dec_valid_o,
+  /// Decode is not valid, no matching rule found.
+  output logic                dec_error_o,
+  /// Enable default port mapping.
+  ///
+  /// When not used, tie to `0`.
+  input  logic                en_default_idx_i,
+  /// Default port index.
+  ///
+  /// When `en_default_idx_i` is `1`, this will be the index when no rule matches.
+  ///
+  /// When not used, tie to `0`.
+  input  idx_t                default_idx_i
+);
+
+  logic [NoRules-1:0] matched_rules; // purely for address map debugging
+
+  always_comb begin
+    // default assignments
+    matched_rules = '0;
+    dec_valid_o   = 1'b0;
+    dec_error_o   = (en_default_idx_i) ? 1'b0 : 1'b1;
+    idx_o         = (en_default_idx_i) ? default_idx_i : '0;
+
+    // match the rules
+    for (int unsigned i = 0; i < NoRules; i++) begin
+      if ((addr_i >= addr_map_i[i].start_addr) && (addr_i < addr_map_i[i].end_addr)) begin
+        matched_rules[i] = 1'b1;
+        dec_valid_o      = 1'b1;
+        dec_error_o      = 1'b0;
+        idx_o            = idx_t'(addr_map_i[i].idx);
+      end
+    end
+  end
+
+  // Assumptions and assertions
+  `ifndef VERILATOR
+  `ifndef XSIM
+  // pragma translate_off
+  initial begin : proc_check_parameters
+    assume ($bits(addr_i) == $bits(addr_map_i[0].start_addr)) else
+      $warning($sformatf("Input address has %d bits and address map has %d bits.",
+        $bits(addr_i), $bits(addr_map_i[0].start_addr)));
+    assume (NoRules > 0) else
+      $fatal(1, $sformatf("At least one rule needed"));
+    assume (NoIndices > 0) else
+      $fatal(1, $sformatf("At least one index needed"));
+  end
+
+  assert final ($onehot0(matched_rules)) else
+    $warning("More than one bit set in the one-hot signal, matched_rules");
+
+  // These following assumptions check the validity of the address map.
+  // The assumptions gets generated for each distinct pair of rules.
+  // Each assumption is present two times, as they rely on one rules being
+  // effectively ordered. Only one of the rules with the same function is
+  // active at a time for a given pair.
+  // check_start:        Enforces a smaller start than end address.
+  // check_idx:          Enforces a valid index in the rule.
+  // check_overlap:      Warns if there are overlapping address regions.
+  always @(addr_map_i) #0 begin : proc_check_addr_map
+    if (!$isunknown(addr_map_i)) begin
+      for (int unsigned i = 0; i < NoRules; i++) begin
+        check_start : assume (addr_map_i[i].start_addr < addr_map_i[i].end_addr) else
+          $fatal(1, $sformatf("This rule has a higher start than end address!!!\n\
+              Violating rule %d.\n\
+              Rule> IDX: %h START: %h END: %h\n\
+              #####################################################",
+              i ,addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr));
+        // check the SLV ids
+        check_idx : assume (addr_map_i[i].idx < NoIndices) else
+            $fatal(1, $sformatf("This rule has a IDX that is not allowed!!!\n\
+            Violating rule %d.\n\
+            Rule> IDX: %h START: %h END: %h\n\
+            Rule> MAX_IDX: %h\n\
+            #####################################################",
+            i, addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr,
+            (NoIndices-1)));
+        for (int unsigned j = i + 1; j < NoRules; j++) begin
+          // overlap check
+          check_overlap : assume (!((addr_map_i[j].start_addr < addr_map_i[i].end_addr) &&
+                                    (addr_map_i[j].end_addr > addr_map_i[i].start_addr)))   else
+               $warning($sformatf("Overlapping address region found!!!\n\
+              Rule %d: IDX: %h START: %h END: %h\n\
+              Rule %d: IDX: %h START: %h END: %h\n\
+              #####################################################",
+              i, addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr,
+              j, addr_map_i[j].idx, addr_map_i[j].start_addr, addr_map_i[j].end_addr));
+        end
+      end
+    end
+  end
+  // pragma translate_on
+  `endif
+  `endif
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv b/test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv
new file mode 100644
index 0000000..8e770ab
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv
@@ -0,0 +1,175 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+/// A two-phase clock domain crossing.
+///
+/// CONSTRAINT: Requires max_delay of min_period(src_clk_i, dst_clk_i) through
+/// the paths async_req, async_ack, async_data.
+/* verilator lint_off DECLFILENAME */
+module cdc_2phase #(
+  parameter type T = logic
+)(
+  input  logic src_rst_ni,
+  input  logic src_clk_i,
+  input  T     src_data_i,
+  input  logic src_valid_i,
+  output logic src_ready_o,
+
+  input  logic dst_rst_ni,
+  input  logic dst_clk_i,
+  output T     dst_data_o,
+  output logic dst_valid_o,
+  input  logic dst_ready_i
+);
+
+  // Asynchronous handshake signals.
+  (* dont_touch = "true" *) logic async_req;
+  (* dont_touch = "true" *) logic async_ack;
+  (* dont_touch = "true" *) T async_data;
+
+  // The sender in the source domain.
+  cdc_2phase_src #(.T(T)) i_src (
+    .rst_ni       ( src_rst_ni  ),
+    .clk_i        ( src_clk_i   ),
+    .data_i       ( src_data_i  ),
+    .valid_i      ( src_valid_i ),
+    .ready_o      ( src_ready_o ),
+    .async_req_o  ( async_req   ),
+    .async_ack_i  ( async_ack   ),
+    .async_data_o ( async_data  )
+  );
+
+  // The receiver in the destination domain.
+  cdc_2phase_dst #(.T(T)) i_dst (
+    .rst_ni       ( dst_rst_ni  ),
+    .clk_i        ( dst_clk_i   ),
+    .data_o       ( dst_data_o  ),
+    .valid_o      ( dst_valid_o ),
+    .ready_i      ( dst_ready_i ),
+    .async_req_i  ( async_req   ),
+    .async_ack_o  ( async_ack   ),
+    .async_data_i ( async_data  )
+  );
+
+endmodule
+
+
+/// Half of the two-phase clock domain crossing located in the source domain.
+module cdc_2phase_src #(
+  parameter type T = logic
+)(
+  input  logic rst_ni,
+  input  logic clk_i,
+  input  T     data_i,
+  input  logic valid_i,
+  output logic ready_o,
+  output logic async_req_o,
+  input  logic async_ack_i,
+  output T     async_data_o
+);
+
+  (* dont_touch = "true" *)
+  logic req_src_q, ack_src_q, ack_q;
+  (* dont_touch = "true" *)
+  T data_src_q;
+
+  // The req_src and data_src registers change when a new data item is accepted.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      req_src_q  <= 0;
+      data_src_q <= '0;
+    end else if (valid_i && ready_o) begin
+      req_src_q  <= ~req_src_q;
+      data_src_q <= data_i;
+    end
+  end
+
+  // The ack_src and ack registers act as synchronization stages.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      ack_src_q <= 0;
+      ack_q     <= 0;
+    end else begin
+      ack_src_q <= async_ack_i;
+      ack_q     <= ack_src_q;
+    end
+  end
+
+  // Output assignments.
+  assign ready_o = (req_src_q == ack_q);
+  assign async_req_o = req_src_q;
+  assign async_data_o = data_src_q;
+
+endmodule
+
+
+/// Half of the two-phase clock domain crossing located in the destination
+/// domain.
+module cdc_2phase_dst #(
+  parameter type T = logic
+)(
+  input  logic rst_ni,
+  input  logic clk_i,
+  output T     data_o,
+  output logic valid_o,
+  input  logic ready_i,
+  input  logic async_req_i,
+  output logic async_ack_o,
+  input  T     async_data_i
+);
+
+  (* dont_touch = "true" *)
+  (* async_reg = "true" *)
+  logic req_dst_q, req_q0, req_q1, ack_dst_q;
+  (* dont_touch = "true" *)
+  T data_dst_q;
+
+  // The ack_dst register changes when a new data item is accepted.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      ack_dst_q  <= 0;
+    end else if (valid_o && ready_i) begin
+      ack_dst_q  <= ~ack_dst_q;
+    end
+  end
+
+  // The data_dst register changes when a new data item is presented. This is
+  // indicated by the async_req line changing levels.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      data_dst_q <= '0;
+    end else if (req_q0 != req_q1 && !valid_o) begin
+      data_dst_q <= async_data_i;
+    end
+  end
+
+  // The req_dst and req registers act as synchronization stages.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      req_dst_q <= 0;
+      req_q0    <= 0;
+      req_q1    <= 0;
+    end else begin
+      req_dst_q <= async_req_i;
+      req_q0    <= req_dst_q;
+      req_q1    <= req_q0;
+    end
+  end
+
+  // Output assignments.
+  assign valid_o = (ack_dst_q != req_q1);
+  assign data_o = data_dst_q;
+  assign async_ack_o = ack_dst_q;
+
+endmodule
+/* verilator lint_on DECLFILENAME */
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv b/test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
new file mode 100644
index 0000000..9f35a44
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
@@ -0,0 +1,61 @@
+// Copyright 2016 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// cf_math_pkg: Constant Function Implementations of Mathematical Functions for HDL Elaboration
+///
+/// This package contains a collection of mathematical functions that are commonly used when defining
+/// the value of constants in HDL code.  These functions are implemented as Verilog constants
+/// functions.  Introduced in Verilog 2001 (IEEE Std 1364-2001), a constant function (§ 10.3.5) is a
+/// function whose value can be evaluated at compile time or during elaboration.  A constant function
+/// must be called with arguments that are constants.
+package cf_math_pkg;
+
+    /// Ceiled Division of Two Natural Numbers
+    ///
+    /// Returns the quotient of two natural numbers, rounded towards plus infinity.
+    function automatic integer ceil_div (input longint dividend, input longint divisor);
+        automatic longint remainder;
+
+        // pragma translate_off
+        `ifndef VERILATOR
+        if (dividend < 0) begin
+            $fatal(1, "Dividend %0d is not a natural number!", dividend);
+        end
+
+        if (divisor < 0) begin
+            $fatal(1, "Divisor %0d is not a natural number!", divisor);
+        end
+
+        if (divisor == 0) begin
+            $fatal(1, "Division by zero!");
+        end
+        `endif
+        // pragma translate_on
+
+        remainder = dividend;
+        for (ceil_div = 0; remainder > 0; ceil_div++) begin
+            remainder = remainder - divisor;
+        end
+    endfunction
+
+    /// Index width required to be able to represent up to `num_idx` indices as a binary
+    /// encoded signal.
+    /// Ensures that the minimum width if an index signal is `1`, regardless of parametrization.
+    ///
+    /// Sample usage in type definition:
+    /// As parameter:
+    ///   `parameter type idx_t = logic[cf_math_pkg::idx_width(NumIdx)-1:0]`
+    /// As typedef:
+    ///   `typedef logic [cf_math_pkg::idx_width(NumIdx)-1:0] idx_t`
+    function automatic integer unsigned idx_width (input integer unsigned num_idx);
+        return (num_idx > 32'd1) ? unsigned'($clog2(num_idx)) : 32'd1;
+    endfunction
+
+endpackage
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/counter.sv b/test/type_param/vendor/pulp-platform/common_cells/src/counter.sv
new file mode 100644
index 0000000..43392e4
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/counter.sv
@@ -0,0 +1,43 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba
+// Description: Generic up/down counter
+
+module counter #(
+    parameter int unsigned WIDTH = 4,
+    parameter bit STICKY_OVERFLOW = 1'b0
+)(
+    input  logic             clk_i,
+    input  logic             rst_ni,
+    input  logic             clear_i, // synchronous clear
+    input  logic             en_i,    // enable the counter
+    input  logic             load_i,  // load a new value
+    input  logic             down_i,  // downcount, default is up
+    input  logic [WIDTH-1:0] d_i,
+    output logic [WIDTH-1:0] q_o,
+    output logic             overflow_o
+);
+    delta_counter #(
+        .WIDTH          (WIDTH),
+        .STICKY_OVERFLOW (STICKY_OVERFLOW)
+    ) i_counter (
+        .clk_i,
+        .rst_ni,
+        .clear_i,
+        .en_i,
+        .load_i,
+        .down_i,
+        .delta_i({{WIDTH-1{1'b0}}, 1'b1}),
+        .d_i,
+        .q_o,
+        .overflow_o
+    );
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv b/test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv
new file mode 100644
index 0000000..90b5cff
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv
@@ -0,0 +1,74 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Up/down counter with variable delta
+
+module delta_counter #(
+    parameter int unsigned WIDTH = 4,
+    parameter bit STICKY_OVERFLOW = 1'b0
+)(
+    input  logic             clk_i,
+    input  logic             rst_ni,
+    input  logic             clear_i, // synchronous clear
+    input  logic             en_i,    // enable the counter
+    input  logic             load_i,  // load a new value
+    input  logic             down_i,  // downcount, default is up
+    input  logic [WIDTH-1:0] delta_i,
+    input  logic [WIDTH-1:0] d_i,
+    output logic [WIDTH-1:0] q_o,
+    output logic             overflow_o
+);
+    logic [WIDTH:0] counter_q, counter_d;
+    if (STICKY_OVERFLOW) begin : gen_sticky_overflow
+        logic overflow_d, overflow_q;
+        always_ff @(posedge clk_i or negedge rst_ni) overflow_q <= ~rst_ni ? 1'b0 : overflow_d;
+        always_comb begin
+            overflow_d = overflow_q;
+            if (clear_i || load_i) begin
+                overflow_d = 1'b0;
+            end else if (!overflow_q && en_i) begin
+                if (down_i) begin
+                    overflow_d = delta_i > counter_q[WIDTH-1:0];
+                end else begin
+                    overflow_d = counter_q[WIDTH-1:0] > ({WIDTH{1'b1}} - delta_i);
+                end
+            end
+        end
+        assign overflow_o = overflow_q;
+    end else begin : gen_transient_overflow
+        // counter overflowed if the MSB is set
+        assign overflow_o = counter_q[WIDTH];
+    end
+    assign q_o = counter_q[WIDTH-1:0];
+
+    always_comb begin
+        counter_d = counter_q;
+
+        if (clear_i) begin
+            counter_d = '0;
+        end else if (load_i) begin
+            counter_d = {1'b0, d_i};
+        end else if (en_i) begin
+            if (down_i) begin
+                counter_d = counter_q - delta_i;
+            end else begin
+                counter_d = counter_q + delta_i;
+            end
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (!rst_ni) begin
+           counter_q <= '0;
+        end else begin
+           counter_q <= counter_d;
+        end
+    end
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv
new file mode 100644
index 0000000..31295e8
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv
@@ -0,0 +1,57 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+/* verilator lint_off DECLFILENAME */
+module fifo #(
+    parameter bit          FALL_THROUGH = 1'b0, // fifo is in fall-through mode
+    parameter int unsigned DATA_WIDTH   = 32,   // default data width if the fifo is of type logic
+    parameter int unsigned DEPTH        = 8,    // depth can be arbitrary from 0 to 2**32
+    parameter int unsigned THRESHOLD    = 1,    // fill count until when to assert threshold_o
+    parameter type dtype                = logic [DATA_WIDTH-1:0]
+)(
+    input  logic  clk_i,            // Clock
+    input  logic  rst_ni,           // Asynchronous reset active low
+    input  logic  flush_i,          // flush the queue
+    input  logic  testmode_i,       // test_mode to bypass clock gating
+    // status flags
+    output logic  full_o,           // queue is full
+    output logic  empty_o,          // queue is empty
+    output logic  threshold_o,      // the FIFO is above the specified threshold
+    // as long as the queue is not full we can push new data
+    input  dtype  data_i,           // data to push into the queue
+    input  logic  push_i,           // data is valid and can be pushed to the queue
+    // as long as the queue is not empty we can pop new elements
+    output dtype  data_o,           // output data
+    input  logic  pop_i             // pop head from queue
+);
+    fifo_v2 #(
+        .FALL_THROUGH ( FALL_THROUGH ),
+        .DATA_WIDTH   ( DATA_WIDTH   ),
+        .DEPTH        ( DEPTH        ),
+        .ALM_FULL_TH  ( THRESHOLD    ),
+        .dtype        ( dtype        )
+    ) impl (
+        .clk_i       ( clk_i       ),
+        .rst_ni      ( rst_ni      ),
+        .flush_i     ( flush_i     ),
+        .testmode_i  ( testmode_i  ),
+        .full_o      ( full_o      ),
+        .empty_o     ( empty_o     ),
+        .alm_full_o  ( threshold_o ),
+        .alm_empty_o (             ),
+        .data_i      ( data_i      ),
+        .push_i      ( push_i      ),
+        .data_o      ( data_o      ),
+        .pop_i       ( pop_i       )
+    );
+endmodule
+/* verilator lint_on DECLFILENAME */
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv
new file mode 100644
index 0000000..9c87ed9
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv
@@ -0,0 +1,79 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+module fifo_v2 #(
+    parameter bit          FALL_THROUGH = 1'b0, // fifo is in fall-through mode
+    parameter int unsigned DATA_WIDTH   = 32,   // default data width if the fifo is of type logic
+    parameter int unsigned DEPTH        = 8,    // depth can be arbitrary from 0 to 2**32
+    parameter int unsigned ALM_EMPTY_TH = 1,    // almost empty threshold (when to assert alm_empty_o)
+    parameter int unsigned ALM_FULL_TH  = 1,    // almost full threshold (when to assert alm_full_o)
+    parameter type dtype                = logic [DATA_WIDTH-1:0],
+    // DO NOT OVERWRITE THIS PARAMETER
+    parameter int unsigned ADDR_DEPTH   = (DEPTH > 1) ? $clog2(DEPTH) : 1
+)(
+    input  logic  clk_i,            // Clock
+    input  logic  rst_ni,           // Asynchronous reset active low
+    input  logic  flush_i,          // flush the queue
+    input  logic  testmode_i,       // test_mode to bypass clock gating
+    // status flags
+    output logic  full_o,           // queue is full
+    output logic  empty_o,          // queue is empty
+    output logic  alm_full_o,       // FIFO fillstate >= the specified threshold
+    output logic  alm_empty_o,      // FIFO fillstate <= the specified threshold
+    // as long as the queue is not full we can push new data
+    input  dtype  data_i,           // data to push into the queue
+    input  logic  push_i,           // data is valid and can be pushed to the queue
+    // as long as the queue is not empty we can pop new elements
+    output dtype  data_o,           // output data
+    input  logic  pop_i             // pop head from queue
+);
+
+    logic [ADDR_DEPTH-1:0] usage;
+
+    // generate threshold parameters
+    if (DEPTH == 0) begin
+        assign alm_full_o  = 1'b0; // that signal does not make any sense in a FIFO of depth 0
+        assign alm_empty_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0
+    end else begin
+        assign alm_full_o   = (usage >= ALM_FULL_TH[ADDR_DEPTH-1:0]);
+        assign alm_empty_o  = (usage <= ALM_EMPTY_TH[ADDR_DEPTH-1:0]);
+    end
+
+    fifo_v3 #(
+        .FALL_THROUGH ( FALL_THROUGH ),
+        .DATA_WIDTH   ( DATA_WIDTH   ),
+        .DEPTH        ( DEPTH        ),
+        .dtype        ( dtype        )
+    ) i_fifo_v3 (
+        .clk_i,
+        .rst_ni,
+        .flush_i,
+        .testmode_i,
+        .full_o,
+        .empty_o,
+        .usage_o (usage),
+        .data_i,
+        .push_i,
+        .data_o,
+        .pop_i
+    );
+
+    // pragma translate_off
+    `ifndef VERILATOR
+        initial begin
+            assert (ALM_FULL_TH <= DEPTH)  else $error("ALM_FULL_TH can't be larger than the DEPTH.");
+            assert (ALM_EMPTY_TH <= DEPTH) else $error("ALM_EMPTY_TH can't be larger than the DEPTH.");
+        end
+    `endif
+    // pragma translate_on
+
+endmodule // fifo_v2
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv b/test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv
new file mode 100644
index 0000000..91dccb0
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv
@@ -0,0 +1,98 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 10.04.2019
+// Description: exponential backoff counter with randomization.
+//
+// For each failed trial (set_i pulsed), this unit exponentially increases the
+// (average) backoff time by masking an LFSR with a shifted mask in order to
+// create the backoff counter initial value.
+//
+// The shift register mask and the counter value are both reset to '0 in case of
+// a successful trial (clr_i).
+//
+
+module exp_backoff #(
+  /// Seed for 16bit LFSR
+  parameter int unsigned Seed   = 'hffff,
+  /// 2**MaxExp-1 determines the maximum range from which random wait counts are drawn
+  parameter int unsigned MaxExp = 16
+) (
+  input  logic clk_i,
+  input  logic rst_ni,
+  /// Sets the backoff counter (pulse) -> use when trial did not succeed
+  input  logic set_i,
+  /// Clears the backoff counter (pulse) -> use when trial succeeded
+  input  logic clr_i,
+  /// Indicates whether the backoff counter is equal to zero and a new trial can be launched
+  output logic is_zero_o
+);
+
+  // leave this constant
+  localparam int unsigned WIDTH = 16;
+
+  logic [WIDTH-1:0] lfsr_d, lfsr_q, cnt_d, cnt_q, mask_d, mask_q;
+  logic lfsr;
+
+  // generate random wait counts
+  // note: we use a flipped lfsr here to
+  // avoid strange correlation effects between
+  // the (left-shifted) mask and the lfsr
+  assign lfsr = lfsr_q[15-15] ^
+                lfsr_q[15-13] ^
+                lfsr_q[15-12] ^
+                lfsr_q[15-10];
+
+  assign lfsr_d = (set_i) ? {lfsr, lfsr_q[$high(lfsr_q):1]} :
+                            lfsr_q;
+
+  // mask the wait counts with exponentially increasing mask (shift reg)
+  assign mask_d = (clr_i) ? '0                                :
+                  (set_i) ? {{(WIDTH-MaxExp){1'b0}},mask_q[MaxExp-2:0], 1'b1} :
+                            mask_q;
+
+  assign cnt_d =  (clr_i)      ? '0                :
+                  (set_i)      ? (mask_q & lfsr_q) :
+                  (!is_zero_o) ? cnt_q - 1'b1      : '0;
+
+  assign is_zero_o = (cnt_q=='0);
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      lfsr_q <= WIDTH'(Seed);
+      mask_q <= '0;
+      cnt_q  <= '0;
+    end else begin
+      lfsr_q <= lfsr_d;
+      mask_q <= mask_d;
+      cnt_q  <= cnt_d;
+    end
+  end
+
+///////////////////////////////////////////////////////
+// assertions
+///////////////////////////////////////////////////////
+
+//pragma translate_off
+`ifndef VERILATOR
+  initial begin
+    // assert wrong parameterizations
+    assert (MaxExp>0)
+      else $fatal(1,"MaxExp must be greater than 0");
+    assert (MaxExp<=16)
+      else $fatal(1,"MaxExp cannot be greater than 16");
+    assert (Seed>0)
+      else $fatal(1,"Zero seed is not allowed for LFSR");
+  end
+`endif
+//pragma translate_on
+
+endmodule // exp_backoff
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv b/test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv
new file mode 100644
index 0000000..11b77e0
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv
@@ -0,0 +1,191 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+module fifo_v3 #(
+    parameter bit          FALL_THROUGH = 1'b0, // fifo is in fall-through mode
+    parameter int unsigned DATA_WIDTH   = 32,   // default data width if the fifo is of type logic
+    parameter int unsigned DEPTH        = 8,    // depth can be arbitrary from 0 to 2**32
+    parameter type dtype                = logic [DATA_WIDTH-1:0],
+    // DO NOT OVERWRITE THIS PARAMETER
+    parameter int unsigned ADDR_DEPTH   = (DEPTH > 1) ? $clog2(DEPTH) : 1
+)(
+    input  logic  clk_i,            // Clock
+    input  logic  rst_ni,           // Asynchronous reset active low
+    input  logic  flush_i,          // flush the queue
+    input  logic  testmode_i,       // test_mode to bypass clock gating
+    // status flags
+    output logic  full_o,           // queue is full
+    output logic  empty_o,          // queue is empty
+    output logic  [ADDR_DEPTH-1:0] usage_o,  // fill pointer
+    // as long as the queue is not full we can push new data
+    input  dtype  data_i,           // data to push into the queue
+    input  logic  push_i,           // data is valid and can be pushed to the queue
+    // as long as the queue is not empty we can pop new elements
+    output dtype  data_o,           // output data
+    input  logic  pop_i             // pop head from queue
+);
+    // local parameter
+    // FIFO depth - handle the case of pass-through, synthesizer will do constant propagation
+    localparam int unsigned FifoDepth = (DEPTH > 0) ? DEPTH : 1;
+    // clock gating control
+    logic gate_clock;
+    // pointer to the read and write section of the queue
+    logic [ADDR_DEPTH - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q;
+    // keep a counter to keep track of the current queue status
+    // this integer will be truncated by the synthesis tool
+    logic [ADDR_DEPTH:0] status_cnt_n, status_cnt_q;
+    // actual memory
+    dtype [FifoDepth - 1:0] mem_n, mem_q;
+
+    // fifo ram signals for fpga target
+    logic   fifo_ram_we;
+    logic   [ADDR_DEPTH-1:0] fifo_ram_read_address;
+    logic   [ADDR_DEPTH-1:0] fifo_ram_write_address;
+    logic   [$bits(dtype)-1:0] fifo_ram_wdata;
+    logic   [$bits(dtype)-1:0] fifo_ram_rdata;
+
+    assign usage_o = status_cnt_q[ADDR_DEPTH-1:0];
+
+    if (DEPTH == 0) begin : gen_pass_through
+        assign empty_o     = ~push_i;
+        assign full_o      = ~pop_i;
+    end else begin : gen_fifo
+        assign full_o       = (status_cnt_q == FifoDepth[ADDR_DEPTH:0]);
+        assign empty_o      = (status_cnt_q == 0) & ~(FALL_THROUGH & push_i);
+    end
+    // status flags
+
+    // read and write queue logic
+    always_comb begin : read_write_comb
+        // default assignment
+        read_pointer_n  = read_pointer_q;
+        write_pointer_n = write_pointer_q;
+        status_cnt_n    = status_cnt_q;
+        if (ariane_pkg::FPGA_EN) begin
+             fifo_ram_we             = '0;
+             fifo_ram_read_address   = read_pointer_q;
+             fifo_ram_write_address  = '0;
+             fifo_ram_wdata          = '0;
+             data_o = (DEPTH == 0) ? data_i : fifo_ram_rdata;
+        end else begin
+            data_o          = (DEPTH == 0) ? data_i : mem_q[read_pointer_q];
+            mem_n           = mem_q;
+            gate_clock      = 1'b1;
+        end
+
+        // push a new element to the queue
+        if (push_i && ~full_o) begin
+            if (ariane_pkg::FPGA_EN) begin
+                fifo_ram_we = 1'b1;
+                fifo_ram_write_address = write_pointer_q;
+                fifo_ram_wdata = data_i;
+            end else begin
+                // push the data onto the queue
+                mem_n[write_pointer_q] = data_i;
+                // un-gate the clock, we want to write something
+                gate_clock = 1'b0;
+            end
+            
+            // increment the write counter
+            if (write_pointer_q == FifoDepth[ADDR_DEPTH-1:0] - 1)
+                write_pointer_n = '0;
+            else
+                write_pointer_n = write_pointer_q + 1;
+            // increment the overall counter
+            status_cnt_n    = status_cnt_q + 1;
+        end
+
+        if (pop_i && ~empty_o) begin
+            // read from the queue is a default assignment
+            // but increment the read pointer...
+            if (read_pointer_n == FifoDepth[ADDR_DEPTH-1:0] - 1)
+                read_pointer_n = '0;
+            else
+                read_pointer_n = read_pointer_q + 1;
+            // ... and decrement the overall count
+            status_cnt_n   = status_cnt_q - 1;
+        end
+
+        // keep the count pointer stable if we push and pop at the same time
+        if (push_i && pop_i &&  ~full_o && ~empty_o)
+            status_cnt_n   = status_cnt_q;
+
+        // FIFO is in pass through mode -> do not change the pointers
+        if (FALL_THROUGH && (status_cnt_q == 0) && push_i) begin
+            data_o = data_i;
+            if (pop_i) begin
+                status_cnt_n = status_cnt_q;
+                read_pointer_n = read_pointer_q;
+                write_pointer_n = write_pointer_q;
+            end
+        end
+    end
+
+    // sequential process
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if(~rst_ni) begin
+            read_pointer_q  <= '0;
+            write_pointer_q <= '0;
+            status_cnt_q    <= '0;
+        end else begin
+            if (flush_i) begin
+                read_pointer_q  <= '0;
+                write_pointer_q <= '0;
+                status_cnt_q    <= '0;
+             end else begin
+                read_pointer_q  <= read_pointer_n;
+                write_pointer_q <= write_pointer_n;
+                status_cnt_q    <= status_cnt_n;
+            end
+        end
+    end
+
+    if (ariane_pkg::FPGA_EN) begin : gen_fpga_queue
+        AsyncDpRam #(
+            .ADDR_WIDTH (ADDR_DEPTH),
+            .DATA_DEPTH (DEPTH),
+            .DATA_WIDTH ($bits(dtype))
+        ) fifo_ram (
+            .Clk_CI      ( clk_i                   ),  
+            .WrEn_SI     ( fifo_ram_we             ),
+            .RdAddr_DI   ( fifo_ram_read_address   ),
+            .WrAddr_DI   ( fifo_ram_write_address  ),
+            .WrData_DI   ( fifo_ram_wdata          ),
+            .RdData_DO   ( fifo_ram_rdata          )
+        );
+    end else begin : gen_asic_queue
+        always_ff @(posedge clk_i or negedge rst_ni) begin
+            if(~rst_ni) begin
+                mem_q <= '0;
+            end else if (!gate_clock) begin
+                mem_q <= mem_n;
+            end
+        end
+    end
+
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin
+        assert (DEPTH > 0)             else $error("DEPTH must be greater than 0.");
+    end
+
+    full_write : assert property(
+        @(posedge clk_i) disable iff (~rst_ni) (full_o |-> ~push_i))
+        else $fatal (1, "Trying to push new data although the FIFO is full.");
+
+    empty_read : assert property(
+        @(posedge clk_i) disable iff (~rst_ni) (empty_o |-> ~pop_i))
+        else $fatal (1, "Trying to pop data although the FIFO is empty.");
+`endif
+// pragma translate_on
+
+endmodule // fifo_v3
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv
new file mode 100644
index 0000000..aae2e2d
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv
@@ -0,0 +1,315 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 26.04.2019
+//
+// Description: This is a parametric LFSR with precomputed coefficients for
+// LFSR lengths from 4 to 64bit.
+
+// Additional block cipher layers can be instantiated to non-linearly transform
+// the pseudo-random LFSR sequence at the output, and hence break the shifting
+// patterns. The additional cipher layers can only be used for an LFSR width
+// of 64bit, since the block cipher has been designed for that block length.
+
+module lfsr #(
+  parameter int unsigned          LfsrWidth     = 64,   // [4,64]
+  parameter int unsigned          OutWidth      = 8,    // [1,LfsrWidth]
+  parameter logic [LfsrWidth-1:0] RstVal        = '1,   // [1,2^LfsrWidth-1]
+  // 0: disabled, the present cipher uses 31, but just a few layers (1-3) are enough
+  // to break linear shifting patterns
+  parameter int unsigned          CipherLayers  = 0,
+  parameter bit                   CipherReg     = 1'b1  // additional output reg after cipher
+) (
+  input  logic                 clk_i,
+  input  logic                 rst_ni,
+  input  logic                 en_i,
+  output logic [OutWidth-1:0]  out_o
+);
+
+// Galois LFSR feedback masks
+// Automatically generated with get_lfsr_masks.py
+// Masks are from https://users.ece.cmu.edu/~koopman/lfsr/
+localparam logic [63:0] Masks [4:64] = '{64'hC,
+                                         64'h1E,
+                                         64'h39,
+                                         64'h7E,
+                                         64'hFA,
+                                         64'h1FD,
+                                         64'h3FC,
+                                         64'h64B,
+                                         64'hD8F,
+                                         64'h1296,
+                                         64'h2496,
+                                         64'h4357,
+                                         64'h8679,
+                                         64'h1030E,
+                                         64'h206CD,
+                                         64'h403FE,
+                                         64'h807B8,
+                                         64'h1004B2,
+                                         64'h2006A8,
+                                         64'h4004B2,
+                                         64'h800B87,
+                                         64'h10004F3,
+                                         64'h200072D,
+                                         64'h40006AE,
+                                         64'h80009E3,
+                                         64'h10000583,
+                                         64'h20000C92,
+                                         64'h400005B6,
+                                         64'h80000EA6,
+                                         64'h1000007A3,
+                                         64'h200000ABF,
+                                         64'h400000842,
+                                         64'h80000123E,
+                                         64'h100000074E,
+                                         64'h2000000AE9,
+                                         64'h400000086A,
+                                         64'h8000001213,
+                                         64'h1000000077E,
+                                         64'h2000000123B,
+                                         64'h40000000877,
+                                         64'h8000000108D,
+                                         64'h100000000AE9,
+                                         64'h200000000E9F,
+                                         64'h4000000008A6,
+                                         64'h80000000191E,
+                                         64'h100000000090E,
+                                         64'h2000000000FB3,
+                                         64'h4000000000D7D,
+                                         64'h80000000016A5,
+                                         64'h10000000000B4B,
+                                         64'h200000000010AF,
+                                         64'h40000000000DDE,
+                                         64'h8000000000181A,
+                                         64'h100000000000B65,
+                                         64'h20000000000102D,
+                                         64'h400000000000CD5,
+                                         64'h8000000000024C1,
+                                         64'h1000000000000EF6,
+                                         64'h2000000000001363,
+                                         64'h4000000000000FCD,
+                                         64'h80000000000019E2};
+
+// this S-box and permutation P has been taken from the Present Cipher,
+// a super lightweight block cipher. use the cipher layers to add additional
+// non-linearity to the LFSR output. note one layer does not fully correspond
+// to the present cipher round, since the key and rekeying function is not applied here.
+//
+// See also:
+// "PRESENT: An Ultra-Lightweight Block Cipher", A. Bogdanov et al., Ches 2007
+// http://www.lightweightcrypto.org/present/present_ches2007.pdf
+
+// this is the sbox from the present cipher
+localparam logic[15:0][3:0] Sbox4 = {4'h2, 4'h1, 4'h7, 4'h4,
+                                     4'h8, 4'hF, 4'hE, 4'h3,
+                                     4'hD, 4'hA, 4'h0, 4'h9,
+                                     4'hB, 4'h6, 4'h5, 4'hC };
+
+// these are the permutation indices of the present cipher
+localparam logic[63:0][5:0] Perm = {6'd63, 6'd47, 6'd31, 6'd15, 6'd62, 6'd46, 6'd30, 6'd14,
+                                    6'd61, 6'd45, 6'd29, 6'd13, 6'd60, 6'd44, 6'd28, 6'd12,
+                                    6'd59, 6'd43, 6'd27, 6'd11, 6'd58, 6'd42, 6'd26, 6'd10,
+                                    6'd57, 6'd41, 6'd25, 6'd09, 6'd56, 6'd40, 6'd24, 6'd08,
+                                    6'd55, 6'd39, 6'd23, 6'd07, 6'd54, 6'd38, 6'd22, 6'd06,
+                                    6'd53, 6'd37, 6'd21, 6'd05, 6'd52, 6'd36, 6'd20, 6'd04,
+                                    6'd51, 6'd35, 6'd19, 6'd03, 6'd50, 6'd34, 6'd18, 6'd02,
+                                    6'd49, 6'd33, 6'd17, 6'd01, 6'd48, 6'd32, 6'd16, 6'd00};
+
+
+function automatic logic [63:0] sbox4_layer(logic [63:0] in);
+  logic [63:0] out;
+  //for (logic [4:0] j = '0; j<16; j++) out[j*4 +: 4] = sbox4[in[j*4 +: 4]];
+  // this simulates much faster than the loop
+  out[0*4  +: 4] = Sbox4[in[0*4  +: 4]];
+  out[1*4  +: 4] = Sbox4[in[1*4  +: 4]];
+  out[2*4  +: 4] = Sbox4[in[2*4  +: 4]];
+  out[3*4  +: 4] = Sbox4[in[3*4  +: 4]];
+
+  out[4*4  +: 4] = Sbox4[in[4*4  +: 4]];
+  out[5*4  +: 4] = Sbox4[in[5*4  +: 4]];
+  out[6*4  +: 4] = Sbox4[in[6*4  +: 4]];
+  out[7*4  +: 4] = Sbox4[in[7*4  +: 4]];
+
+  out[8*4  +: 4] = Sbox4[in[8*4  +: 4]];
+  out[9*4  +: 4] = Sbox4[in[9*4  +: 4]];
+  out[10*4 +: 4] = Sbox4[in[10*4 +: 4]];
+  out[11*4 +: 4] = Sbox4[in[11*4 +: 4]];
+
+  out[12*4 +: 4] = Sbox4[in[12*4 +: 4]];
+  out[13*4 +: 4] = Sbox4[in[13*4 +: 4]];
+  out[14*4 +: 4] = Sbox4[in[14*4 +: 4]];
+  out[15*4 +: 4] = Sbox4[in[15*4 +: 4]];
+  return out;
+endfunction : sbox4_layer
+
+function automatic logic [63:0] perm_layer(logic [63:0] in);
+  logic [63:0] out;
+  // for (logic [7:0] j = '0; j<64; j++) out[perm[j]] = in[j];
+  // this simulates much faster than the loop
+  out[Perm[0]] = in[0];
+  out[Perm[1]] = in[1];
+  out[Perm[2]] = in[2];
+  out[Perm[3]] = in[3];
+  out[Perm[4]] = in[4];
+  out[Perm[5]] = in[5];
+  out[Perm[6]] = in[6];
+  out[Perm[7]] = in[7];
+  out[Perm[8]] = in[8];
+  out[Perm[9]] = in[9];
+
+  out[Perm[10]] = in[10];
+  out[Perm[11]] = in[11];
+  out[Perm[12]] = in[12];
+  out[Perm[13]] = in[13];
+  out[Perm[14]] = in[14];
+  out[Perm[15]] = in[15];
+  out[Perm[16]] = in[16];
+  out[Perm[17]] = in[17];
+  out[Perm[18]] = in[18];
+  out[Perm[19]] = in[19];
+
+  out[Perm[20]] = in[20];
+  out[Perm[21]] = in[21];
+  out[Perm[22]] = in[22];
+  out[Perm[23]] = in[23];
+  out[Perm[24]] = in[24];
+  out[Perm[25]] = in[25];
+  out[Perm[26]] = in[26];
+  out[Perm[27]] = in[27];
+  out[Perm[28]] = in[28];
+  out[Perm[29]] = in[29];
+
+  out[Perm[30]] = in[30];
+  out[Perm[31]] = in[31];
+  out[Perm[32]] = in[32];
+  out[Perm[33]] = in[33];
+  out[Perm[34]] = in[34];
+  out[Perm[35]] = in[35];
+  out[Perm[36]] = in[36];
+  out[Perm[37]] = in[37];
+  out[Perm[38]] = in[38];
+  out[Perm[39]] = in[39];
+
+  out[Perm[40]] = in[40];
+  out[Perm[41]] = in[41];
+  out[Perm[42]] = in[42];
+  out[Perm[43]] = in[43];
+  out[Perm[44]] = in[44];
+  out[Perm[45]] = in[45];
+  out[Perm[46]] = in[46];
+  out[Perm[47]] = in[47];
+  out[Perm[48]] = in[48];
+  out[Perm[49]] = in[49];
+
+  out[Perm[50]] = in[50];
+  out[Perm[51]] = in[51];
+  out[Perm[52]] = in[52];
+  out[Perm[53]] = in[53];
+  out[Perm[54]] = in[54];
+  out[Perm[55]] = in[55];
+  out[Perm[56]] = in[56];
+  out[Perm[57]] = in[57];
+  out[Perm[58]] = in[58];
+  out[Perm[59]] = in[59];
+
+  out[Perm[60]] = in[60];
+  out[Perm[61]] = in[61];
+  out[Perm[62]] = in[62];
+  out[Perm[63]] = in[63];
+  return out;
+endfunction : perm_layer
+
+////////////////////////////////////////////////////////////////////////
+// lfsr
+////////////////////////////////////////////////////////////////////////
+
+logic [LfsrWidth-1:0] lfsr_d, lfsr_q;
+assign lfsr_d =
+  (en_i) ? (lfsr_q>>1) ^ ({LfsrWidth{lfsr_q[0]}} & Masks[LfsrWidth][LfsrWidth-1:0]) : lfsr_q;
+
+always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+  //$display("%b %h", en_i, lfsr_d);
+  if (!rst_ni) begin
+    lfsr_q <= LfsrWidth'(RstVal);
+  end else begin
+    lfsr_q <= lfsr_d;
+  end
+end
+
+////////////////////////////////////////////////////////////////////////
+// block cipher layers
+////////////////////////////////////////////////////////////////////////
+
+if (CipherLayers > unsigned'(0)) begin : g_cipher_layers
+  logic [63:0] ciph_layer;
+  localparam int unsigned NumRepl = ((64+LfsrWidth)/LfsrWidth);
+
+  always_comb begin : p_ciph_layer
+    automatic logic [63:0] tmp;
+    tmp = 64'({NumRepl{lfsr_q}});
+    for(int unsigned k = 0; k < CipherLayers; k++) begin
+      tmp = perm_layer(sbox4_layer(tmp));
+    end
+    ciph_layer = tmp;
+  end
+
+  // additiona output reg after cipher
+  if (CipherReg) begin : g_cipher_reg
+    logic [OutWidth-1:0] out_d, out_q;
+
+    assign out_d = (en_i) ? ciph_layer[OutWidth-1:0] : out_q;
+    assign out_o = out_q[OutWidth-1:0];
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+      if (!rst_ni) begin
+        out_q <= '0;
+      end else begin
+        out_q <= out_d;
+      end
+    end
+  // no outreg
+  end else begin : g_no_out_reg
+    assign out_o  = ciph_layer[OutWidth-1:0];
+  end
+
+// no block cipher
+end else begin : g_no_cipher_layers
+  assign out_o    = lfsr_q[OutWidth-1:0];
+end
+
+////////////////////////////////////////////////////////////////////////
+// assertions
+////////////////////////////////////////////////////////////////////////
+
+// pragma translate_off
+initial begin
+  // these are the LUT limits
+  assert(OutWidth <= LfsrWidth) else
+    $fatal(1,"OutWidth must be smaller equal the LfsrWidth.");
+  assert(RstVal > unsigned'(0)) else
+    $fatal(1,"RstVal must be nonzero.");
+  assert((LfsrWidth >= $low(Masks)) && (LfsrWidth <= $high(Masks))) else
+    $fatal(1,"Unsupported LfsrWidth.");
+  assert(Masks[LfsrWidth][LfsrWidth-1]) else
+    $fatal(1, "LFSR mask is not correct. The MSB must be 1." );
+  assert((CipherLayers > 0) && (LfsrWidth == 64) || (CipherLayers == 0)) else
+    $fatal(1, "Use additional cipher layers only in conjunction with an LFSR width of 64 bit." );
+end
+
+`ifndef VERILATOR
+  all_zero: assert property (
+    @(posedge clk_i) disable iff (!rst_ni) en_i |-> lfsr_d)
+      else $fatal(1,"Lfsr must not be all-zero.");
+`endif
+// pragma translate_on
+
+endmodule // lfsr
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv
new file mode 100644
index 0000000..3fc93c7
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv
@@ -0,0 +1,68 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba, ETH Zurich
+// Date: 5.11.2018
+// Description: 16-bit LFSR
+
+// --------------
+// 16-bit LFSR
+// --------------
+//
+// Description: Shift register
+//
+module lfsr_16bit #(
+    parameter logic [15:0] SEED  = 8'b0,
+    parameter int unsigned WIDTH = 16
+)(
+    input  logic                      clk_i,
+    input  logic                      rst_ni,
+    input  logic                      en_i,
+    output logic [WIDTH-1:0]          refill_way_oh,
+    output logic [$clog2(WIDTH)-1:0]  refill_way_bin
+);
+
+    localparam int unsigned LogWidth = $clog2(WIDTH);
+
+    logic [15:0] shift_d, shift_q;
+
+
+    always_comb begin
+
+        automatic logic shift_in;
+        shift_in = !(shift_q[15] ^ shift_q[12] ^ shift_q[5] ^ shift_q[1]);
+
+        shift_d = shift_q;
+
+        if (en_i)
+            shift_d = {shift_q[14:0], shift_in};
+
+        // output assignment
+        refill_way_oh = 'b0;
+        refill_way_oh[shift_q[LogWidth-1:0]] = 1'b1;
+        refill_way_bin = shift_q;
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
+        if(~rst_ni) begin
+            shift_q <= SEED;
+        end else begin
+            shift_q <= shift_d;
+        end
+    end
+
+    //pragma translate_off
+    initial begin
+        assert (WIDTH <= 16)
+            else $fatal(1, "WIDTH needs to be less than 16 because of the 16-bit LFSR");
+    end
+    //pragma translate_on
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv
new file mode 100644
index 0000000..60fdf19
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv
@@ -0,0 +1,61 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Igor Loi - University of Bologna
+// Author: Florian Zaruba, ETH Zurich
+// Date: 12.11.2017
+// Description: 8-bit LFSR
+
+/// 8 bit Linear Feedback Shift register
+module lfsr_8bit #(
+  parameter logic        [7:0] SEED  = 8'b0,
+  parameter int unsigned       WIDTH = 8
+) (
+  input  logic                     clk_i,
+  input  logic                     rst_ni,
+  input  logic                     en_i,
+  output logic [        WIDTH-1:0] refill_way_oh,
+  output logic [$clog2(WIDTH)-1:0] refill_way_bin
+);
+
+  localparam int unsigned LogWidth = $clog2(WIDTH);
+
+  logic [7:0] shift_d, shift_q;
+
+  always_comb begin
+
+    automatic logic shift_in;
+    shift_in = !(shift_q[7] ^ shift_q[3] ^ shift_q[2] ^ shift_q[1]);
+
+    shift_d = shift_q;
+
+    if (en_i) shift_d = {shift_q[6:0], shift_in};
+
+    // output assignment
+    refill_way_oh = 'b0;
+    refill_way_oh[shift_q[LogWidth - 1:0]] = 1'b1;
+    refill_way_bin = shift_q;
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
+    if (~rst_ni) begin
+      shift_q <= SEED;
+    end else begin
+      shift_q <= shift_d;
+    end
+  end
+
+  //pragma translate_off
+  initial begin
+    assert (WIDTH <= 8) else $fatal(1, "WIDTH needs to be less than 8 because of the 8-bit LFSR");
+  end
+  //pragma translate_on
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv
new file mode 100644
index 0000000..424eb2e
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv
@@ -0,0 +1,112 @@
+// Copyright (c) 2018 - 2019 ETH Zurich, University of Bologna
+// All rights reserved.
+//
+// This code is under development and not yet released to the public.
+// Until it is released, the code is under the copyright of ETH Zurich and
+// the University of Bologna, and may contain confidential and/or unpublished
+// work. Any reuse/redistribution is strictly forbidden without written
+// permission from ETH Zurich.
+//
+// Bug fixes and contributions will eventually be released under the
+// SolderPad open hardware license in the context of the PULP platform
+// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
+// University of Bologna.
+
+/// A trailing zero counter / leading zero counter.
+/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB)
+/// Set MODE to 1 for leading zero counter  => cnt_o is the number of leading zeros  (from the MSB)
+/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains
+/// the maximum number of zeros - 1. For example:
+///   in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0)
+///   in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0)
+///   in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0)
+/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only).
+/// This speeds up simulation significantly.
+module lzc #(
+  /// The width of the input vector.
+  parameter int unsigned WIDTH = 2,
+  /// Mode selection: 0 -> trailing zero, 1 -> leading zero
+  parameter bit          MODE  = 1'b0,
+  /// Dependent parameter. Do **not** change!
+  ///
+  /// Width of the output signal with the zero count.
+  parameter int unsigned CNT_WIDTH = cf_math_pkg::idx_width(WIDTH)
+) (
+  /// Input vector to be counted.
+  input  logic [WIDTH-1:0]     in_i,
+  /// Count of the leading / trailing zeros.
+  output logic [CNT_WIDTH-1:0] cnt_o,
+  /// Counter is empty: Asserted if all bits in in_i are zero.
+  output logic                 empty_o
+);
+
+  if (WIDTH == 1) begin : gen_degenerate_lzc
+
+    assign cnt_o[0] = !in_i[0];
+    assign empty_o = !in_i[0];
+
+  end else begin : gen_lzc
+
+    localparam int unsigned NumLevels = $clog2(WIDTH);
+
+    // pragma translate_off
+    initial begin
+      assert(WIDTH > 0) else $fatal(1, "input must be at least one bit wide");
+    end
+    // pragma translate_on
+
+    logic [WIDTH-1:0][NumLevels-1:0] index_lut;
+    logic [2**NumLevels-1:0] sel_nodes;
+    logic [2**NumLevels-1:0][NumLevels-1:0] index_nodes;
+
+    logic [WIDTH-1:0] in_tmp;
+
+    // reverse vector if required
+    always_comb begin : flip_vector
+      for (int unsigned i = 0; i < WIDTH; i++) begin
+        in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i];
+      end
+    end
+
+    for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut
+      assign index_lut[j] = (NumLevels)'(unsigned'(j));
+    end
+
+    for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : g_levels
+      if (unsigned'(level) == NumLevels - 1) begin : g_last_level
+        for (genvar k = 0; k < 2 ** level; k++) begin : g_level
+          // if two successive indices are still in the vector...
+          if (unsigned'(k) * 2 < WIDTH - 1) begin : g_reduce
+            assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2] | in_tmp[k * 2 + 1];
+            assign index_nodes[2 ** level - 1 + k] = (in_tmp[k * 2] == 1'b1)
+              ? index_lut[k * 2] :
+                index_lut[k * 2 + 1];
+          end
+          // if only the first index is still in the vector...
+          if (unsigned'(k) * 2 == WIDTH - 1) begin : g_base
+            assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2];
+            assign index_nodes[2 ** level - 1 + k] = index_lut[k * 2];
+          end
+          // if index is out of range
+          if (unsigned'(k) * 2 > WIDTH - 1) begin : g_out_of_range
+            assign sel_nodes[2 ** level - 1 + k] = 1'b0;
+            assign index_nodes[2 ** level - 1 + k] = '0;
+          end
+        end
+      end else begin : g_not_last_level
+        for (genvar l = 0; l < 2 ** level; l++) begin : g_level
+          assign sel_nodes[2 ** level - 1 + l] =
+              sel_nodes[2 ** (level + 1) - 1 + l * 2] | sel_nodes[2 ** (level + 1) - 1 + l * 2 + 1];
+          assign index_nodes[2 ** level - 1 + l] = (sel_nodes[2 ** (level + 1) - 1 + l * 2] == 1'b1)
+            ? index_nodes[2 ** (level + 1) - 1 + l * 2] :
+              index_nodes[2 ** (level + 1) - 1 + l * 2 + 1];
+        end
+      end
+    end
+
+    assign cnt_o = NumLevels > unsigned'(0) ? index_nodes[0] : {($clog2(WIDTH)) {1'b0}};
+    assign empty_o = NumLevels > unsigned'(0) ? ~sel_nodes[0] : ~(|in_i);
+
+  end : gen_lzc
+
+endmodule : lzc
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv b/test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv
new file mode 100644
index 0000000..72b9b71
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv
@@ -0,0 +1,60 @@
+// Copyright (C) 2013-2018 ETH Zurich, University of Bologna
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Manuel Eggimann <meggimann@iis.ee.ethz.ch>
+
+// Description: This module calculates the hamming weight (number of ones) in
+// its input vector using a balanced binary adder tree. Recursive instantiation
+// is used to build the tree.  Any unsigned INPUT_WIDTH larger or equal 2 is
+// legal.  The module pads the signal internally to the next power of two.  The
+// output result width is ceil(log2(INPUT_WIDTH))+1.
+
+module popcount #(
+    parameter int unsigned INPUT_WIDTH = 256,
+    localparam int unsigned PopcountWidth = $clog2(INPUT_WIDTH)+1
+) (
+    input logic [INPUT_WIDTH-1:0]     data_i,
+    output logic [PopcountWidth-1:0] popcount_o
+);
+
+   localparam int unsigned PaddedWidth = 1 << $clog2(INPUT_WIDTH);
+
+   logic [PaddedWidth-1:0]           padded_input;
+   logic [PopcountWidth-2:0]         left_child_result, right_child_result;
+
+   //Zero pad the input to next power of two
+   always_comb begin
+     padded_input = '0;
+     padded_input[INPUT_WIDTH-1:0] = data_i;
+   end
+
+   //Recursive instantiation to build binary adder tree
+   if (INPUT_WIDTH == 1) begin : single_node
+     assign left_child_result  = 1'b0;
+     assign right_child_result = padded_input[0];
+   end else if (INPUT_WIDTH == 2) begin : leaf_node
+     assign left_child_result  = padded_input[1];
+     assign right_child_result = padded_input[0];
+   end else begin : non_leaf_node
+     popcount #(.INPUT_WIDTH(PaddedWidth / 2))
+         left_child(
+                    .data_i(padded_input[PaddedWidth-1:PaddedWidth/2]),
+                    .popcount_o(left_child_result));
+
+     popcount #(.INPUT_WIDTH(PaddedWidth / 2))
+         right_child(
+                     .data_i(padded_input[PaddedWidth/2-1:0]),
+                     .popcount_o(right_child_result));
+   end
+
+   //Output assignment
+   assign popcount_o = left_child_result + right_child_result;
+
+endmodule : popcount
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv b/test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
new file mode 100644
index 0000000..90301c8
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
@@ -0,0 +1,348 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+//         Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>, ETH Zurich
+// Date: 02.04.2019
+// Description: logarithmic arbitration tree with round robin arbitration scheme.
+
+/// The rr_arb_tree employs non-starving round robin-arbitration - i.e., the priorities
+/// rotate each cycle.
+///
+/// ## Fair vs. unfair Arbitration
+///
+/// This refers to fair throughput distribution when not all inputs have active requests.
+/// This module has an internal state `rr_q` which defines the highest priority input. (When
+/// `ExtPrio` is `1'b1` this state is provided from the outside.) The arbitration tree will
+/// choose the input with the same index as currently defined by the state if it has an active
+/// request. Otherwise a *random* other active input is selected. The parameter `FairArb` is used
+/// to distinguish between two methods of calculating the next state.
+/// * `1'b0`: The next state is calculated by advancing the current state by one. This leads to the
+///           state being calculated without the context of the active request. Leading to an
+///           unfair throughput distribution if not all inputs have active requests.
+/// * `1'b1`: The next state jumps to the next unserved request with higher index.
+///           This is achieved by using two trailing-zero-counters (`lzc`). The upper has the masked
+///           `req_i` signal with all indices which will have a higher priority in the next state.
+///           The trailing zero count defines the input index with the next highest priority after
+///           the current one is served. When the upper is empty the lower `lzc` provides the
+///           wrapped index if there are outstanding requests with lower or same priority.
+/// The implication of throughput fairness on the module timing are:
+/// * The trailing zero counter (`lzc`) has a loglog relation of input to output timing. This means
+///   that in this module the input to register path scales with Log(Log(`NumIn`)).
+/// * The `rr_arb_tree` data multiplexing scales with Log(`NumIn`). This means that the input to output
+///   timing path of this module also scales scales with Log(`NumIn`).
+/// This implies that in this module the input to output path is always longer than the input to
+/// register path. As the output data usually also terminates in a register the parameter `FairArb`
+/// only has implications on the area. When it is `1'b0` a static plus one adder is instantiated.
+/// If it is `1'b1` two `lzc`, a masking logic stage and a two input multiplexer are instantiated.
+/// However these are small in respect of the data multiplexers needed, as the width of the `req_i`
+/// signal is usually less as than `DataWidth`.
+module rr_arb_tree #(
+  /// Number of inputs to be arbitrated.
+  parameter int unsigned NumIn      = 64,
+  /// Data width of the payload in bits. Not needed if `DataType` is overwritten.
+  parameter int unsigned DataWidth  = 32,
+  /// Data type of the payload, can be overwritten with custom type. Only use of `DataWidth`.
+  parameter type         DataType   = logic [DataWidth-1:0],
+  /// The `ExtPrio` option allows to override the internal round robin counter via the
+  /// `rr_i` signal. This can be useful in case multiple arbiters need to have
+  /// rotating priorities that are operating in lock-step. If static priority arbitration
+  /// is needed, just connect `rr_i` to '0.
+  ///
+  /// Set to 1'b1 to enable.
+  parameter bit          ExtPrio    = 1'b0,
+  /// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy
+  /// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted
+  /// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter
+  /// delay and area.
+  ///
+  /// Set to `1'b1` to treat req/gnt as vld/rdy.
+  parameter bit          AxiVldRdy  = 1'b0,
+  /// The `LockIn` option prevents the arbiter from changing the arbitration
+  /// decision when the arbiter is disabled. I.e., the index of the first request
+  /// that wins the arbitration will be locked in case the destination is not
+  /// able to grant the request in the same cycle.
+  ///
+  /// Set to `1'b1` to enable.
+  parameter bit          LockIn     = 1'b0,
+  /// When set, ensures that throughput gets distributed evenly between all inputs.
+  ///
+  /// Set to `1'b0` to disable.
+  parameter bit          FairArb    = 1'b1,
+  /// Dependent parameter, do **not** overwrite.
+  /// Width of the arbitration priority signal and the arbitrated index.
+  parameter int unsigned IdxWidth   = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1,
+  /// Dependent parameter, do **not** overwrite.
+  /// Type for defining the arbitration priority and arbitrated index signal.
+  parameter type         idx_t      = logic [IdxWidth-1:0]
+) (
+  /// Clock, positive edge triggered.
+  input  logic                clk_i,
+  /// Asynchronous reset, active low.
+  input  logic                rst_ni,
+  /// Clears the arbiter state. Only used if `ExtPrio` is `1'b0` or `LockIn` is `1'b1`.
+  input  logic                flush_i,
+  /// External round-robin priority. Only used if `ExtPrio` is `1'b1.`
+  input  idx_t                rr_i,
+  /// Input requests arbitration.
+  input  logic    [NumIn-1:0] req_i,
+  /* verilator lint_off UNOPTFLAT */
+  /// Input request is granted.
+  output logic    [NumIn-1:0] gnt_o,
+  /* verilator lint_on UNOPTFLAT */
+  /// Input data for arbitration.
+  input  DataType [NumIn-1:0] data_i,
+  /// Output request is valid.
+  output logic                req_o,
+  /// Output request is granted.
+  input  logic                gnt_i,
+  /// Output data.
+  output DataType             data_o,
+  /// Index from which input the data came from.
+  output idx_t                idx_o
+);
+
+  // pragma translate_off
+  `ifndef VERILATOR
+  `ifndef XSIM
+  // Default SVA reset
+  default disable iff (!rst_ni || flush_i);
+  `endif
+  `endif
+  // pragma translate_on
+
+  // just pass through in this corner case
+  if (NumIn == unsigned'(1)) begin : gen_pass_through
+    assign req_o    = req_i[0];
+    assign gnt_o[0] = gnt_i;
+    assign data_o   = data_i[0];
+    assign idx_o    = '0;
+  // non-degenerate cases
+  end else begin : gen_arbiter
+    localparam int unsigned NumLevels = unsigned'($clog2(NumIn));
+
+    /* verilator lint_off UNOPTFLAT */
+    idx_t    [2**NumLevels-2:0] index_nodes; // used to propagate the indices
+    DataType [2**NumLevels-2:0] data_nodes;  // used to propagate the data
+    logic    [2**NumLevels-2:0] gnt_nodes;   // used to propagate the grant to masters
+    logic    [2**NumLevels-2:0] req_nodes;   // used to propagate the requests to slave
+    /* lint_off */
+    idx_t                       rr_q;
+    logic [NumIn-1:0]           req_d;
+
+    // the final arbitration decision can be taken from the root of the tree
+    assign req_o        = req_nodes[0];
+    assign data_o       = data_nodes[0];
+    assign idx_o        = index_nodes[0];
+
+    if (ExtPrio) begin : gen_ext_rr
+      assign rr_q       = rr_i;
+      assign req_d      = req_i;
+    end else begin : gen_int_rr
+      idx_t rr_d;
+
+      // lock arbiter decision in case we got at least one req and no acknowledge
+      if (LockIn) begin : gen_lock
+        logic  lock_d, lock_q;
+        logic [NumIn-1:0] req_q;
+
+        assign lock_d     = req_o & ~gnt_i;
+        assign req_d      = (lock_q) ? req_q : req_i;
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg
+          if (!rst_ni) begin
+            lock_q <= '0;
+          end else begin
+            if (flush_i) begin
+              lock_q <= '0;
+            end else begin
+              lock_q <= lock_d;
+            end
+          end
+        end
+
+        // pragma translate_off
+        `ifndef VERILATOR
+          lock: assert property(
+            @(posedge clk_i) LockIn |-> req_o &&
+                             (!gnt_i && !flush_i) |=> idx_o == $past(idx_o)) else
+                $fatal (1, "Lock implies same arbiter decision in next cycle if output is not \
+                            ready.");
+
+          logic [NumIn-1:0] req_tmp;
+          assign req_tmp = req_q & req_i;
+          lock_req: assume property(
+            @(posedge clk_i) LockIn |-> lock_d |=> req_tmp == req_q) else
+                $fatal (1, "It is disallowed to deassert unserved request signals when LockIn is \
+                            enabled.");
+        `endif
+        // pragma translate_on
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs
+          if (!rst_ni) begin
+            req_q  <= '0;
+          end else begin
+            if (flush_i) begin
+              req_q  <= '0;
+            end else begin
+              req_q  <= req_d;
+            end
+          end
+        end
+      end else begin : gen_no_lock
+        assign req_d = req_i;
+      end
+
+      if (FairArb) begin : gen_fair_arb
+        logic [NumIn-1:0] upper_mask,  lower_mask;
+        idx_t             upper_idx,   lower_idx,   next_idx;
+        logic             upper_empty, lower_empty;
+
+        for (genvar i = 0; i < NumIn; i++) begin : gen_mask
+          assign upper_mask[i] = (i >  rr_q) ? req_d[i] : 1'b0;
+          assign lower_mask[i] = (i <= rr_q) ? req_d[i] : 1'b0;
+        end
+
+        lzc #(
+          .WIDTH ( NumIn ),
+          .MODE  ( 1'b0  )
+        ) i_lzc_upper (
+          .in_i    ( upper_mask  ),
+          .cnt_o   ( upper_idx   ),
+          .empty_o ( upper_empty )
+        );
+
+        lzc #(
+          .WIDTH ( NumIn ),
+          .MODE  ( 1'b0  )
+        ) i_lzc_lower (
+          .in_i    ( lower_mask  ),
+          .cnt_o   ( lower_idx   ),
+          .empty_o ( /*unused*/  )
+        );
+
+        assign next_idx = upper_empty      ? lower_idx : upper_idx;
+        assign rr_d     = (gnt_i && req_o) ? next_idx  : rr_q;
+
+      end else begin : gen_unfair_arb
+        assign rr_d = (gnt_i && req_o) ? ((rr_q == idx_t'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q;
+      end
+
+      // this holds the highest priority
+      always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs
+        if (!rst_ni) begin
+          rr_q   <= '0;
+        end else begin
+          if (flush_i) begin
+            rr_q   <= '0;
+          end else begin
+            rr_q   <= rr_d;
+          end
+        end
+      end
+    end
+
+    assign gnt_nodes[0] = gnt_i;
+
+    // arbiter tree
+    for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels
+      for (genvar l = 0; l < 2**level; l++) begin : gen_level
+        // local select signal
+        logic sel;
+        // index calcs
+        localparam int unsigned Idx0 = 2**level-1+l;// current node
+        localparam int unsigned Idx1 = 2**(level+1)-1+l*2;
+        //////////////////////////////////////////////////////////////
+        // uppermost level where data is fed in from the inputs
+        if (unsigned'(level) == NumLevels-1) begin : gen_first_level
+          // if two successive indices are still in the vector...
+          if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce
+            assign req_nodes[Idx0]   = req_d[l*2] | req_d[l*2+1];
+
+            // arbitration: round robin
+            assign sel =  ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level];
+
+            assign index_nodes[Idx0] = idx_t'(sel);
+            assign data_nodes[Idx0]  = (sel) ? data_i[l*2+1] : data_i[l*2];
+            assign gnt_o[l*2]        = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2])   & ~sel;
+            assign gnt_o[l*2+1]      = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2+1]) & sel;
+          end
+          // if only the first index is still in the vector...
+          if (unsigned'(l) * 2 == NumIn-1) begin : gen_first
+            assign req_nodes[Idx0]   = req_d[l*2];
+            assign index_nodes[Idx0] = '0;// always zero in this case
+            assign data_nodes[Idx0]  = data_i[l*2];
+            assign gnt_o[l*2]        = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]);
+          end
+          // if index is out of range, fill up with zeros (will get pruned)
+          if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range
+            assign req_nodes[Idx0]   = 1'b0;
+            assign index_nodes[Idx0] = idx_t'('0);
+            assign data_nodes[Idx0]  = DataType'('0);
+          end
+        //////////////////////////////////////////////////////////////
+        // general case for other levels within the tree
+        end else begin : gen_other_levels
+          assign req_nodes[Idx0]   = req_nodes[Idx1] | req_nodes[Idx1+1];
+
+          // arbitration: round robin
+          assign sel =  ~req_nodes[Idx1] | req_nodes[Idx1+1] & rr_q[NumLevels-1-level];
+
+          assign index_nodes[Idx0] = (sel) ?
+            idx_t'({1'b1, index_nodes[Idx1+1][NumLevels-unsigned'(level)-2:0]}) :
+            idx_t'({1'b0, index_nodes[Idx1][NumLevels-unsigned'(level)-2:0]});
+
+          assign data_nodes[Idx0]  = (sel) ? data_nodes[Idx1+1] : data_nodes[Idx1];
+          assign gnt_nodes[Idx1]   = gnt_nodes[Idx0] & ~sel;
+          assign gnt_nodes[Idx1+1] = gnt_nodes[Idx0] & sel;
+        end
+        //////////////////////////////////////////////////////////////
+      end
+    end
+
+    // pragma translate_off
+    `ifndef VERILATOR
+    `ifndef XSIM
+    initial begin : p_assert
+      assert(NumIn)
+        else $fatal(1, "Input must be at least one element wide.");
+      assert(!(LockIn && ExtPrio))
+        else $fatal(1,"Cannot use LockIn feature together with external ExtPrio.");
+    end
+
+    hot_one : assert property(
+      @(posedge clk_i) $onehot0(gnt_o))
+        else $fatal (1, "Grant signal must be hot1 or zero.");
+
+    gnt0 : assert property(
+      @(posedge clk_i) |gnt_o |-> gnt_i)
+        else $fatal (1, "Grant out implies grant in.");
+
+    gnt1 : assert property(
+      @(posedge clk_i) req_o |-> gnt_i |-> |gnt_o)
+        else $fatal (1, "Req out and grant in implies grant out.");
+
+    gnt_idx : assert property(
+      @(posedge clk_i) req_o |->  gnt_i |-> gnt_o[idx_o])
+        else $fatal (1, "Idx_o / gnt_o do not match.");
+
+    req0 : assert property(
+      @(posedge clk_i) |req_i |-> req_o)
+        else $fatal (1, "Req in implies req out.");
+
+    req1 : assert property(
+      @(posedge clk_i) req_o |-> |req_i)
+        else $fatal (1, "Req out implies req in.");
+    `endif
+    `endif
+    // pragma translate_on
+  end
+
+endmodule : rr_arb_tree
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv
new file mode 100644
index 0000000..a7dccc6
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv
@@ -0,0 +1,30 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Davide Rossi <davide.rossi@unibo.it>
+
+module rstgen (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic test_mode_i,
+    output logic rst_no,
+    output logic init_no
+);
+
+    rstgen_bypass i_rstgen_bypass (
+        .clk_i            ( clk_i       ),
+        .rst_ni           ( rst_ni      ),
+        .rst_test_mode_ni ( rst_ni      ),
+        .test_mode_i      ( test_mode_i ),
+        .rst_no           ( rst_no      ),
+        .init_no          ( init_no     )
+    );
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv
new file mode 100644
index 0000000..c51ee83
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv
@@ -0,0 +1,57 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+// Description: This module is a reset synchronizer with a dedicated reset bypass pin for testmode reset.
+// Pro Tip: The wise Dr. Schaffner recommends at least 4 registers!
+
+module rstgen_bypass #(
+    parameter int unsigned NumRegs = 4
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic rst_test_mode_ni,
+    input  logic test_mode_i,
+    output logic rst_no,
+    output logic init_no
+);
+
+    // internal reset
+    logic rst_n;
+
+    logic [NumRegs-1:0] synch_regs_q;
+    // bypass mode
+    always_comb begin
+        if (test_mode_i == 1'b0) begin
+            rst_n   = rst_ni;
+            rst_no  = synch_regs_q[NumRegs-1];
+            init_no = synch_regs_q[NumRegs-1];
+        end else begin
+            rst_n   = rst_test_mode_ni;
+            rst_no  = rst_test_mode_ni;
+            init_no = 1'b1;
+        end
+    end
+
+    always @(posedge clk_i or negedge rst_n) begin
+        if (~rst_n) begin
+            synch_regs_q <= 0;
+        end else begin
+            synch_regs_q <= {synch_regs_q[NumRegs-2:0], 1'b1};
+        end
+    end
+    // pragma translate_off
+    `ifndef VERILATOR
+    initial begin : p_assertions
+        if (NumRegs < 1) $fatal(1, "At least one register is required.");
+    end
+    `endif
+    // pragma translate_on
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv b/test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv
new file mode 100644
index 0000000..7193fbc
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv
@@ -0,0 +1,53 @@
+
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: <zarubaf@iis.ee.ethz.ch>
+//
+// Description: Simple shift register for arbitrary depth and types
+
+module shift_reg #(
+    parameter type dtype         = logic,
+    parameter int unsigned Depth = 1
+)(
+    input  logic clk_i,    // Clock
+    input  logic rst_ni,   // Asynchronous reset active low
+    input  dtype d_i,
+    output dtype d_o
+);
+
+    // register of depth 0 is a wire
+    if (Depth == 0) begin : gen_pass_through
+        assign d_o = d_i;
+    // register of depth 1 is a simple register
+    end else if (Depth == 1) begin : gen_register
+        always_ff @(posedge clk_i or negedge rst_ni) begin
+            if (~rst_ni) begin
+                d_o <= '0;
+            end else begin
+                d_o <= d_i;
+            end
+        end
+    // if depth is greater than 1 it becomes a shift register
+    end else if (Depth > 1) begin : gen_shift_reg
+        dtype [Depth-1:0] reg_d, reg_q;
+        assign d_o = reg_q[Depth-1];
+        assign reg_d = {reg_q[Depth-2:0], d_i};
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin
+            if (~rst_ni) begin
+                reg_q <= '0;
+            end else begin
+                reg_q <= reg_d;
+            end
+        end
+    end
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv
new file mode 100644
index 0000000..80ff37f
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv
@@ -0,0 +1,46 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+
+/// Wrapper around the flushable spill register to maintain back-ward
+/// compatibility.
+module spill_register #(
+  parameter type T      = logic,
+  parameter bit  Bypass = 1'b0     // make this spill register transparent
+) (
+  input  logic clk_i   ,
+  input  logic rst_ni  ,
+  input  logic valid_i ,
+  output logic ready_o ,
+  input  T     data_i  ,
+  output logic valid_o ,
+  input  logic ready_i ,
+  output T     data_o
+);
+
+  spill_register_flushable #(
+    .T(T),
+    .Bypass(Bypass)
+  ) spill_register_flushable_i (
+    .clk_i,
+    .rst_ni,
+    .valid_i,
+    .flush_i(1'b0),
+    .ready_o,
+    .data_i,
+    .valid_o,
+    .ready_i,
+    .data_o
+  );
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv
new file mode 100644
index 0000000..c03ad27
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv
@@ -0,0 +1,105 @@
+// Copyright 2021 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+
+/// A register with handshakes that completely cuts any combinational paths
+/// between the input and output. This spill register can be flushed.
+module spill_register_flushable #(
+  parameter type T           = logic,
+  parameter bit  Bypass      = 1'b0   // make this spill register transparent
+) (
+  input  logic clk_i   ,
+  input  logic rst_ni  ,
+  input  logic valid_i ,
+  input  logic flush_i ,
+  output logic ready_o ,
+  input  T     data_i  ,
+  output logic valid_o ,
+  input  logic ready_i ,
+  output T     data_o
+);
+
+  if (Bypass) begin : gen_bypass
+    assign valid_o = valid_i;
+    assign ready_o = ready_i;
+    assign data_o  = data_i;
+  end else begin : gen_spill_reg
+    // The A register.
+    T a_data_q;
+    logic a_full_q;
+    logic a_fill, a_drain;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_data
+      if (!rst_ni)
+        a_data_q <= '0;
+      else if (a_fill)
+        a_data_q <= data_i;
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_full
+      if (!rst_ni)
+        a_full_q <= 0;
+      else if (a_fill || a_drain)
+        a_full_q <= a_fill;
+    end
+
+    // The B register.
+    T b_data_q;
+    logic b_full_q;
+    logic b_fill, b_drain;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_data
+      if (!rst_ni)
+        b_data_q <= '0;
+      else if (b_fill)
+        b_data_q <= a_data_q;
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_full
+      if (!rst_ni)
+        b_full_q <= 0;
+      else if (b_fill || b_drain)
+        b_full_q <= b_fill;
+    end
+
+    // Fill the A register when the A or B register is empty. Drain the A register
+    // whenever it is full and being filled, or if a flush is requested.
+    assign a_fill = valid_i && ready_o && (!flush_i);
+    assign a_drain = (a_full_q && !b_full_q) || flush_i;
+
+    // Fill the B register whenever the A register is drained, but the downstream
+    // circuit is not ready. Drain the B register whenever it is full and the
+    // downstream circuit is ready, or if a flush is requested.
+    assign b_fill = a_drain && (!ready_i) && (!flush_i);
+    assign b_drain = (b_full_q && ready_i) || flush_i;
+
+    // We can accept input as long as register B is not full.
+    // Note: flush_i and valid_i must not be high at the same time,
+    // otherwise an invalid handshake may occur
+    assign ready_o = !a_full_q || !b_full_q;
+
+    // The unit provides output as long as one of the registers is filled.
+    assign valid_o = a_full_q | b_full_q;
+
+    // We empty the spill register before the slice register.
+    assign data_o = b_full_q ? b_data_q : a_data_q;
+
+    // pragma translate_off
+    `ifndef VERILATOR
+    flush_valid : assert property (
+      @(posedge clk_i) disable iff (~rst_ni) (flush_i |-> ~valid_i)) else
+      $warning("Trying to flush and feed the spill register simultaneously. You will lose data!");
+   `endif
+     // pragma translate_on
+  end
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv
new file mode 100644
index 0000000..c8ca2a8
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv
@@ -0,0 +1,49 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready
+// handshaking with dependency rules as in AXI4) to a single output stream.  Once `oup_valid_o` is
+// asserted, `oup_data_o` remains invariant until the output handshake has occurred.  The
+// arbitration scheme is round-robin with "look ahead", see the `rrarbiter` for details.
+
+module stream_arbiter #(
+    parameter type      DATA_T = logic,   // Vivado requires a default value for type parameters.
+    parameter integer   N_INP = -1,       // Synopsys DC requires a default value for parameters.
+    parameter           ARBITER = "rr"    // "rr" or "prio"
+) (
+    input  logic              clk_i,
+    input  logic              rst_ni,
+
+    input  DATA_T [N_INP-1:0] inp_data_i,
+    input  logic  [N_INP-1:0] inp_valid_i,
+    output logic  [N_INP-1:0] inp_ready_o,
+
+    output DATA_T             oup_data_o,
+    output logic              oup_valid_o,
+    input  logic              oup_ready_i
+);
+
+  stream_arbiter_flushable #(
+    .DATA_T   (DATA_T),
+    .N_INP    (N_INP),
+    .ARBITER  (ARBITER)
+  ) i_arb (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .flush_i      (1'b0),
+    .inp_data_i   (inp_data_i),
+    .inp_valid_i  (inp_valid_i),
+    .inp_ready_o  (inp_ready_o),
+    .oup_data_o   (oup_data_o),
+    .oup_valid_o  (oup_valid_o),
+    .oup_ready_i  (oup_ready_i)
+  );
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv
new file mode 100644
index 0000000..32946e6
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv
@@ -0,0 +1,82 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready
+// handshaking with dependency rules as in AXI4) to a single output stream.  Once `oup_valid_o` is
+// asserted, `oup_data_o` remains invariant until the output handshake has occurred.  The
+// arbitration scheme is fair round-robin tree, see `rr_arb_tree` for details.
+
+module stream_arbiter_flushable #(
+    parameter type      DATA_T = logic,   // Vivado requires a default value for type parameters.
+    parameter integer   N_INP = -1,       // Synopsys DC requires a default value for parameters.
+    parameter           ARBITER = "rr"    // "rr" or "prio"
+) (
+    input  logic              clk_i,
+    input  logic              rst_ni,
+    input  logic              flush_i,
+
+    input  DATA_T [N_INP-1:0] inp_data_i,
+    input  logic  [N_INP-1:0] inp_valid_i,
+    output logic  [N_INP-1:0] inp_ready_o,
+
+    output DATA_T             oup_data_o,
+    output logic              oup_valid_o,
+    input  logic              oup_ready_i
+);
+
+  if (ARBITER == "rr") begin : gen_rr_arb
+    rr_arb_tree #(
+      .NumIn      (N_INP),
+      .DataType   (DATA_T),
+      .ExtPrio    (1'b0),
+      .AxiVldRdy  (1'b1),
+      .LockIn     (1'b1)
+    ) i_arbiter (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .rr_i   ('0),
+      .req_i  (inp_valid_i),
+      .gnt_o  (inp_ready_o),
+      .data_i (inp_data_i),
+      .gnt_i  (oup_ready_i),
+      .req_o  (oup_valid_o),
+      .data_o (oup_data_o),
+      .idx_o  ()
+    );
+
+  end else if (ARBITER == "prio") begin : gen_prio_arb
+    rr_arb_tree #(
+      .NumIn      (N_INP),
+      .DataType   (DATA_T),
+      .ExtPrio    (1'b1),
+      .AxiVldRdy  (1'b1),
+      .LockIn     (1'b1)
+    ) i_arbiter (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .rr_i   ('0),
+      .req_i  (inp_valid_i),
+      .gnt_o  (inp_ready_o),
+      .data_i (inp_data_i),
+      .gnt_i  (oup_ready_i),
+      .req_o  (oup_valid_o),
+      .data_o (oup_data_o),
+      .idx_o  ()
+    );
+
+  end else begin : gen_arb_error
+    // pragma translate_off
+    $fatal(1, "Invalid value for parameter 'ARBITER'!");
+    // pragma translate_on
+  end
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv
new file mode 100644
index 0000000..5051b6c
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv
@@ -0,0 +1,132 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba, zarubaf@iis.ee.ethz.ch
+// Description: Delay (or randomize) AXI-like handshaking
+
+module stream_delay #(
+    parameter bit   StallRandom = 0,
+    parameter int   FixedDelay  = 1,
+    parameter type  payload_t  = logic
+)(
+    input  logic     clk_i,
+    input  logic     rst_ni,
+
+    input  payload_t payload_i,
+    output logic     ready_o,
+    input  logic     valid_i,
+
+    output payload_t payload_o,
+    input  logic     ready_i,
+    output logic     valid_o
+);
+
+    if (FixedDelay == 0 && !StallRandom) begin : gen_pass_through
+        assign ready_o = ready_i;
+        assign valid_o = valid_i;
+        assign payload_o = payload_i;
+    end else begin : gen_delay
+
+        localparam int unsigned CounterBits = 4;
+
+        typedef enum logic [1:0] {
+            Idle, Valid, Ready
+        } state_e;
+
+        state_e state_d, state_q;
+
+        logic       load;
+        logic [3:0] count_out;
+        logic       en;
+
+        logic [CounterBits-1:0] counter_load;
+
+        assign payload_o = payload_i;
+
+        always_comb begin
+            state_d = state_q;
+            valid_o = 1'b0;
+            ready_o = 1'b0;
+            load    = 1'b0;
+            en      = 1'b0;
+
+            unique case (state_q)
+                Idle: begin
+                    if (valid_i) begin
+                        load = 1'b1;
+                        state_d = Valid;
+                        // Just one cycle delay
+                        if (FixedDelay == 1 || (StallRandom && counter_load == 1)) begin
+                            state_d = Ready;
+                        end
+
+                        if (StallRandom && counter_load == 0) begin
+                            valid_o = 1'b1;
+                            ready_o = ready_i;
+                            if (ready_i) state_d = Idle;
+                            else state_d = Ready;
+                        end
+                    end
+                end
+                Valid: begin
+                    en = 1'b1;
+                    if (count_out == 0) begin
+                        state_d = Ready;
+                    end
+                end
+
+                Ready: begin
+                    valid_o = 1'b1;
+                    ready_o = ready_i;
+                    if (ready_i) state_d = Idle;
+                end
+                default : /* default */;
+            endcase
+
+        end
+
+        if (StallRandom) begin : gen_random_stall
+            lfsr_16bit #(
+              .WIDTH ( 16 )
+            ) i_lfsr_16bit (
+                .clk_i          ( clk_i        ),
+                .rst_ni         ( rst_ni       ),
+                .en_i           ( load         ),
+                .refill_way_oh  (              ),
+                .refill_way_bin ( counter_load )
+            );
+        end else begin : gen_fixed_delay
+            assign counter_load = FixedDelay;
+        end
+
+        counter #(
+            .WIDTH      ( CounterBits )
+        ) i_counter (
+            .clk_i      ( clk_i        ),
+            .rst_ni     ( rst_ni       ),
+            .clear_i    ( 1'b0         ),
+            .en_i       ( en           ),
+            .load_i     ( load         ),
+            .down_i     ( 1'b1         ),
+            .d_i        ( counter_load ),
+            .q_o        ( count_out    ),
+            .overflow_o (              )
+        );
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin
+            if (~rst_ni) begin
+                state_q <= Idle;
+            end else begin
+                state_q <= state_d;
+            end
+        end
+    end
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv
new file mode 100644
index 0000000..69ad309
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv
@@ -0,0 +1,36 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Connects the input stream (valid-ready) handshake to one of `N_OUP` output stream handshakes.
+///
+/// This module has no data ports because stream data does not need to be demultiplexed: the data of
+/// the input stream can just be applied at all output streams.
+module stream_demux #(
+  /// Number of connected outputs.
+  parameter int unsigned N_OUP     = 32'd1,
+  /// Dependent parameters, DO NOT OVERRIDE!
+  parameter int unsigned LOG_N_OUP = (N_OUP > 32'd1) ? unsigned'($clog2(N_OUP)) : 1'b1
+) (
+  input  logic                 inp_valid_i,
+  output logic                 inp_ready_o,
+
+  input  logic [LOG_N_OUP-1:0] oup_sel_i,
+
+  output logic [N_OUP-1:0]     oup_valid_o,
+  input  logic [N_OUP-1:0]     oup_ready_i
+);
+
+  always_comb begin
+    oup_valid_o = '0;
+    oup_valid_o[oup_sel_i] = inp_valid_i;
+  end
+  assign inp_ready_o = oup_ready_i[oup_sel_i];
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv
new file mode 100644
index 0000000..34607d9
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv
@@ -0,0 +1,46 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Stream multiplexer: connects the output to one of `N_INP` data streams with valid-ready
+/// handshaking.
+
+module stream_mux #(
+  parameter type DATA_T = logic,  // Vivado requires a default value for type parameters.
+  parameter integer N_INP = 0,    // Synopsys DC requires a default value for value parameters.
+  /// Dependent parameters, DO NOT OVERRIDE!
+  parameter integer LOG_N_INP = $clog2(N_INP)
+) (
+  input  DATA_T [N_INP-1:0]     inp_data_i,
+  input  logic  [N_INP-1:0]     inp_valid_i,
+  output logic  [N_INP-1:0]     inp_ready_o,
+
+  input  logic  [LOG_N_INP-1:0] inp_sel_i,
+
+  output DATA_T                 oup_data_o,
+  output logic                  oup_valid_o,
+  input  logic                  oup_ready_i
+);
+
+  always_comb begin
+    inp_ready_o = '0;
+    inp_ready_o[inp_sel_i] = oup_ready_i;
+  end
+  assign oup_data_o   = inp_data_i[inp_sel_i];
+  assign oup_valid_o  = inp_valid_i[inp_sel_i];
+
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin: p_assertions
+    assert (N_INP >= 1) else $fatal (1, "The number of inputs must be at least 1!");
+  end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv
new file mode 100644
index 0000000..f529d6a
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv
@@ -0,0 +1,57 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Register with a simple stream-like ready/valid handshake.
+/// This register does not cut combinatorial paths on all control signals; if you need a complete
+/// cut, use the `spill_register`.
+module stream_register #(
+    parameter type T = logic  // Vivado requires a default value for type parameters.
+) (
+    input  logic    clk_i,          // Clock
+    input  logic    rst_ni,         // Asynchronous active-low reset
+    input  logic    clr_i,          // Synchronous clear
+    input  logic    testmode_i,     // Test mode to bypass clock gating
+    // Input port
+    input  logic    valid_i,
+    output logic    ready_o,
+    input  T        data_i,
+    // Output port
+    output logic    valid_o,
+    input  logic    ready_i,
+    output T        data_o
+);
+
+    logic   fifo_empty,
+            fifo_full;
+
+    fifo_v2 #(
+        .FALL_THROUGH   (1'b0),
+        .DATA_WIDTH     ($bits(T)),
+        .DEPTH          (1),
+        .dtype          (T)
+    ) i_fifo (
+        .clk_i          (clk_i),
+        .rst_ni         (rst_ni),
+        .flush_i        (clr_i),
+        .testmode_i     (testmode_i),
+        .full_o         (fifo_full),
+        .empty_o        (fifo_empty),
+        .alm_full_o     ( ),
+        .alm_empty_o    ( ),
+        .data_i         (data_i),
+        .push_i         (valid_i & ~fifo_full),
+        .data_o         (data_o),
+        .pop_i          (ready_i & ~fifo_empty)
+    );
+
+    assign ready_o = ~fifo_full;
+    assign valid_o = ~fifo_empty;
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/unread.sv b/test/type_param/vendor/pulp-platform/common_cells/src/unread.sv
new file mode 100644
index 0000000..80e7356
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/common_cells/src/unread.sv
@@ -0,0 +1,21 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 29.10.2018
+// Description: Dummy circuit to mitigate Open Pin warnings
+
+/* verilator lint_off UNUSED */
+module unread (
+    input logic d_i
+);
+
+endmodule
+/* verilator lint_on UNUSED */
diff --git a/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv
new file mode 100644
index 0000000..ac04b9b
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv
@@ -0,0 +1,62 @@
+// Copyright 2022 Thales Research and Technology
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses
+//
+// Inferable, Asynchronous Dual-Port RAM, there are a write port and a read port
+//
+//
+// This module is designed to work with both Xilinx and Microchip FPGA tools by following the respective
+// guidelines:
+// - Xilinx UG901 Vivado Design Suite User Guide: Synthesis
+// - Inferring Microchip PolarFire RAM Blocks
+//
+// Intel FPGA (Altera) doesn't seem to support asynchronous RAM
+//
+// Current Maintainers:: Sébastien Jacq - sjthales on github.com
+
+ 
+module AsyncDpRam
+#(
+  parameter ADDR_WIDTH = 10,
+  parameter DATA_DEPTH = 1024, // usually 2**ADDR_WIDTH, but can be lower
+  parameter DATA_WIDTH = 32
+)(
+  input  logic                    Clk_CI,
+
+  // Write port
+  input  logic                    WrEn_SI,
+  input  logic [ADDR_WIDTH-1:0]   WrAddr_DI,
+  input  logic [DATA_WIDTH-1:0]   WrData_DI,
+  
+  // Read port
+  input  logic [ADDR_WIDTH-1:0]   RdAddr_DI,
+  output logic [DATA_WIDTH-1:0]   RdData_DO
+);
+
+  logic [DATA_WIDTH-1:0] mem [DATA_DEPTH-1:0]= '{default:0};
+
+  // WRITE
+  always_ff @(posedge Clk_CI)
+  begin
+    if (WrEn_SI) begin
+      mem[WrAddr_DI] <= WrData_DI;
+    end
+  end
+
+ // READ
+  assign RdData_DO = mem[RdAddr_DI];
+  
+  ////////////////////////////
+  // assertions
+  ////////////////////////////
+
+  // pragma translate_off
+  assert property
+    (@(posedge Clk_CI) (longint'(2)**longint'(ADDR_WIDTH) >= longint'(DATA_DEPTH)))
+    else $error("depth out of bounds");
+  // pragma translate_on
+
+endmodule 
diff --git a/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv
new file mode 100644
index 0000000..ee6fd1a
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv
@@ -0,0 +1,66 @@
+// Copyright 2023 Thales Research and Technology
+//
+// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+// You may obtain a copy of the License at https://solderpad.org/licenses
+//
+// Inferable, Asynchronous Three-Ports RAM, there are a write port and two read ports
+//
+//
+// This module is designed to work with both Xilinx and Microchip FPGA tools by following the respective
+// guidelines:
+// - Xilinx UG901 Vivado Design Suite User Guide: Synthesis
+// - Inferring Microchip PolarFire RAM Blocks
+//
+// Intel FPGA (Altera) doesn't seem to support asynchronous RAM
+//
+// Current Maintainers:: Sébastien Jacq - sjthales on github.com
+
+ 
+module AsyncThreePortRam
+#(
+  parameter ADDR_WIDTH = 10,
+  parameter DATA_DEPTH = 1024, // usually 2**ADDR_WIDTH, but can be lower
+  parameter DATA_WIDTH = 32
+)(
+  input  logic                    Clk_CI,
+
+  // Write port
+  input  logic                    WrEn_SI,
+  input  logic [ADDR_WIDTH-1:0]   WrAddr_DI,
+  input  logic [DATA_WIDTH-1:0]   WrData_DI,
+  
+  // Read ports
+  input  logic [ADDR_WIDTH-1:0]   RdAddr_DI_0,
+  input  logic [ADDR_WIDTH-1:0]   RdAddr_DI_1,
+  
+  output logic [DATA_WIDTH-1:0]   RdData_DO_0,
+  output logic [DATA_WIDTH-1:0]   RdData_DO_1
+);
+
+  logic [DATA_WIDTH-1:0] mem [DATA_DEPTH-1:0]= '{default:0};
+
+  // WRITE
+  always_ff @(posedge Clk_CI)
+  begin
+    if (WrEn_SI) begin
+      mem[WrAddr_DI] <= WrData_DI;
+    end
+  end
+
+ // READ
+  assign RdData_DO_0 = mem[RdAddr_DI_0];
+  assign RdData_DO_1 = mem[RdAddr_DI_1];
+  
+  ////////////////////////////
+  // assertions
+  ////////////////////////////
+
+  // pragma translate_off
+  assert property
+    (@(posedge Clk_CI) (longint'(2)**longint'(ADDR_WIDTH) >= longint'(DATA_DEPTH)))
+    else $error("depth out of bounds");
+  // pragma translate_on
+
+endmodule 
diff --git a/test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv b/test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv
new file mode 100644
index 0000000..e3efb12
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv
@@ -0,0 +1,182 @@
+// Copyright 2014 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/**
+ * Inferable, Synchronous Dual-Port RAM
+ *
+ * This module is designed to work with both Xilinx and Altera tools by following the respective
+ * guidelines:
+ * - Xilinx UG901 Vivado Design Suite User Guide: Synthesis (p. 106)
+ * - Altera Quartus II Handbook Volume 1: Design and Synthesis (p. 768)
+ *
+ * Current Maintainers:
+ * - Michael Schaffner  <schaffer@iis.ee.ethz.ch>
+ */
+
+// this automatically switches the behavioral description
+// pragma translate_off
+`define SIMULATION
+// pragma translate_on
+
+module SyncDpRam
+#(
+  parameter ADDR_WIDTH = 10,
+  parameter DATA_DEPTH = 1024, // usually 2**ADDR_WIDTH, but can be lower
+  parameter DATA_WIDTH = 32,
+  parameter OUT_REGS   = 0,
+  parameter SIM_INIT   = 0     // for simulation only, will not be synthesized
+                               // 0: no init, 1: zero init, 2: random init
+                               // note: on verilator, 2 is not supported. define the VERILATOR macro to work around.
+)(
+  input  logic                  Clk_CI,
+  input  logic                  Rst_RBI,
+  // port A
+  input  logic                  CSelA_SI,
+  input  logic                  WrEnA_SI,
+  input  logic [DATA_WIDTH-1:0] WrDataA_DI,
+  input  logic [ADDR_WIDTH-1:0] AddrA_DI,
+  output logic [DATA_WIDTH-1:0] RdDataA_DO,
+  // port B
+  input  logic                  CSelB_SI,
+  input  logic                  WrEnB_SI,
+  input  logic [DATA_WIDTH-1:0] WrDataB_DI,
+  input  logic [ADDR_WIDTH-1:0] AddrB_DI,
+  output logic [DATA_WIDTH-1:0] RdDataB_DO
+);
+
+  ////////////////////////////
+  // signals, localparams
+  ////////////////////////////
+
+  logic [DATA_WIDTH-1:0] RdDataA_DN;
+  logic [DATA_WIDTH-1:0] RdDataA_DP;
+  logic [DATA_WIDTH-1:0] RdDataB_DN;
+  logic [DATA_WIDTH-1:0] RdDataB_DP;
+  logic [DATA_WIDTH-1:0] Mem_DP [DATA_DEPTH-1:0];
+
+  ////////////////////////////
+  // XILINX/ALTERA implementation
+  ////////////////////////////
+
+  `ifdef SIMULATION
+    always_ff @(posedge Clk_CI)
+    begin
+      automatic logic [DATA_WIDTH-1:0] val;
+      if(Rst_RBI == 1'b0 && SIM_INIT>0) begin
+        for(int k=0; k<DATA_DEPTH;k++) begin
+          if(SIM_INIT==1) val = '0;
+          `ifndef VERILATOR
+          else if(SIM_INIT==2) void'(randomize(val));
+          `endif
+          Mem_DP[k] = val;
+        end
+      end else begin
+        if (CSelA_SI) begin
+          if (WrEnA_SI) begin
+            Mem_DP[AddrA_DI] <= WrDataA_DI;
+          end
+          else
+          begin
+            RdDataA_DN <= Mem_DP[AddrA_DI];
+          end
+        end
+
+        if (CSelB_SI) begin
+          if (WrEnB_SI) begin
+            Mem_DP[AddrB_DI] <= WrDataB_DI;
+          end
+          else
+          begin
+            RdDataB_DN <= Mem_DP[AddrB_DI];
+          end
+        end
+      end
+    end
+  `endif
+
+  ////////////////////////////
+  // XILINX/ALTERA implementation
+  ////////////////////////////
+
+  `ifndef SIMULATION
+    always_ff @(posedge Clk_CI)
+    begin
+      if (CSelA_SI) begin
+        if (WrEnA_SI) begin
+          Mem_DP[AddrA_DI] <= WrDataA_DI;
+        end
+        else
+        begin
+          RdDataA_DN <= Mem_DP[AddrA_DI];
+        end
+      end
+    end
+
+    always_ff @(posedge Clk_CI)
+    begin
+      if (CSelB_SI) begin
+        if (WrEnB_SI) begin
+          Mem_DP[AddrB_DI] <= WrDataB_DI;
+        end
+        else
+        begin
+          RdDataB_DN <= Mem_DP[AddrB_DI];
+        end
+      end
+    end
+  `endif
+
+  ////////////////////////////
+  // optional output regs
+  ////////////////////////////
+
+  // output regs
+  generate
+    if (OUT_REGS>0) begin : g_outreg
+      always_ff @(posedge Clk_CI or negedge Rst_RBI) begin
+        if(Rst_RBI == 1'b0)
+        begin
+          RdDataA_DP  <= 0;
+          RdDataB_DP  <= 0;
+        end
+        else
+        begin
+          RdDataA_DP  <= RdDataA_DN;
+          RdDataB_DP  <= RdDataB_DN;
+        end
+      end
+    end
+  endgenerate // g_outreg
+
+  // output reg bypass
+  generate
+    if (OUT_REGS==0) begin : g_oureg_byp
+      assign RdDataA_DP  = RdDataA_DN;
+      assign RdDataB_DP  = RdDataB_DN;
+    end
+  endgenerate// g_oureg_byp
+
+  assign RdDataA_DO = RdDataA_DP;
+  assign RdDataB_DO = RdDataB_DP;
+
+  ////////////////////////////
+  // assertions
+  ////////////////////////////
+
+  // pragma translate_off
+  assert property
+    (@(posedge Clk_CI) (longint'(2)**longint'(ADDR_WIDTH) >= longint'(DATA_DEPTH)))
+    else $error("depth out of bounds");
+  assert property
+    (@(posedge Clk_CI) (CSelA_SI & CSelB_SI & WrEnA_SI & WrEnB_SI) |-> (AddrA_DI != AddrB_DI))
+    else $error("A and B write to the same address");
+  // pragma translate_on
+
+endmodule // SyncDpRam
diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv
new file mode 100644
index 0000000..bc7ed5c
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv
@@ -0,0 +1,94 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+module cluster_clock_and2 (
+  input  logic clk0_i,
+  input  logic clk1_i,
+  output logic clk_o
+);
+
+  tc_clk_and2 i_tc_clk_and2 (
+    .clk0_i,
+    .clk1_i,
+    .clk_o
+  );
+
+endmodule
+
+module cluster_clock_buffer (
+  input  logic clk_i,
+  output logic clk_o
+);
+
+  tc_clk_buffer i_tc_clk_buffer (
+    .clk_i,
+    .clk_o
+  );
+
+endmodule
+
+// Description: Behavioral model of an integrated clock-gating cell (ICG)
+module cluster_clock_gating (
+   input  logic clk_i,
+   input  logic en_i,
+   input  logic test_en_i,
+   output logic clk_o
+);
+
+  tc_clk_gating i_tc_clk_gating (
+     .clk_i,
+     .en_i,
+     .test_en_i,
+     .clk_o
+  );
+
+endmodule
+
+module cluster_clock_inverter (
+  input  logic clk_i,
+  output logic clk_o
+);
+
+  tc_clk_inverter i_tc_clk_inverter (
+    .clk_i,
+    .clk_o
+  );
+
+endmodule
+
+module cluster_clock_mux2 (
+  input  logic clk0_i,
+  input  logic clk1_i,
+  input  logic clk_sel_i,
+  output logic clk_o
+);
+
+  tc_clk_mux2 i_tc_clk_mux2 (
+    .clk0_i,
+    .clk1_i,
+    .clk_sel_i,
+    .clk_o
+  );
+
+endmodule
+
+module cluster_clock_xor2 (
+  input  logic clk0_i,
+  input  logic clk1_i,
+  output logic clk_o
+);
+
+  tc_clk_xor2 i_tc_clk_xor2 (
+    .clk0_i,
+    .clk1_i,
+    .clk_o
+  );
+
+endmodule
diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv
new file mode 100644
index 0000000..53ad07f
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv
@@ -0,0 +1,107 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+module pulp_clock_and2 (
+  input  logic clk0_i,
+  input  logic clk1_i,
+  output logic clk_o
+);
+
+  tc_clk_and2 i_tc_clk_and2 (
+    .clk0_i,
+    .clk1_i,
+    .clk_o
+  );
+
+endmodule
+
+module pulp_clock_buffer (
+  input  logic clk_i,
+  output logic clk_o
+);
+
+  tc_clk_buffer i_tc_clk_buffer (
+    .clk_i,
+    .clk_o
+  );
+
+endmodule
+
+// Description: Behavioral model of an integrated clock-gating cell (ICG)
+module pulp_clock_gating (
+   input  logic clk_i,
+   input  logic en_i,
+   input  logic test_en_i,
+   output logic clk_o
+);
+
+  tc_clk_gating i_tc_clk_gating (
+     .clk_i,
+     .en_i,
+     .test_en_i,
+     .clk_o
+  );
+
+endmodule
+
+module pulp_clock_inverter (
+  input  logic clk_i,
+  output logic clk_o
+);
+
+  tc_clk_inverter i_tc_clk_inverter (
+    .clk_i,
+    .clk_o
+  );
+
+endmodule
+
+module pulp_clock_mux2 (
+  input  logic clk0_i,
+  input  logic clk1_i,
+  input  logic clk_sel_i,
+  output logic clk_o
+);
+
+  tc_clk_mux2 i_tc_clk_mux2 (
+    .clk0_i,
+    .clk1_i,
+    .clk_sel_i,
+    .clk_o
+  );
+
+endmodule
+
+module pulp_clock_xor2 (
+  input  logic clk0_i,
+  input  logic clk1_i,
+  output logic clk_o
+);
+
+  tc_clk_xor2 i_tc_clk_xor2 (
+    .clk0_i,
+    .clk1_i,
+    .clk_o
+  );
+
+endmodule
+
+`ifndef SYNTHESIS
+module pulp_clock_delay(
+  input  logic in_i,
+  output logic out_o
+);
+
+  assign #(300ps) out_o = in_i;
+
+endmodule
+`endif
+
+
diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv
new file mode 100644
index 0000000..3ab329e
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv
@@ -0,0 +1,120 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+module tc_clk_and2 (
+  input  logic clk0_i,
+  input  logic clk1_i,
+  output logic clk_o
+);
+
+  assign clk_o = clk0_i & clk1_i;
+
+endmodule
+
+module tc_clk_buffer (
+  input  logic clk_i,
+  output logic clk_o
+);
+
+  assign clk_o = clk_i;
+
+endmodule
+
+// Description: Behavioral model of an integrated clock-gating cell (ICG)
+module tc_clk_gating #(
+  /// This paramaeter is a hint for tool/technology specific mappings of this
+  /// tech_cell. It indicates wether this particular clk gate instance is
+  /// required for functional correctness or just instantiated for power
+  /// savings. If IS_FUNCTIONAL == 0, technology specific mappings might
+  /// replace this cell with a feedthrough connection without any gating.
+  parameter bit IS_FUNCTIONAL = 1'b1
+)(
+   input  logic clk_i,
+   input  logic en_i,
+   input  logic test_en_i,
+   output logic clk_o
+);
+
+  logic clk_en;
+
+  always_latch begin
+    if (clk_i == 1'b0) clk_en <= en_i | test_en_i;
+  end
+
+  assign clk_o = clk_i & clk_en;
+
+endmodule
+
+module tc_clk_inverter (
+  input  logic clk_i,
+  output logic clk_o
+);
+
+  assign clk_o = ~clk_i;
+
+endmodule
+
+// Warning: Typical clock mux cells of a technologies std cell library ARE NOT
+// GLITCH FREE!! The only difference to a regular multiplexer cell is that they
+// feature balanced rise- and fall-times. In other words: SWITCHING FROM ONE
+// CLOCK TO THE OTHER CAN INTRODUCE GLITCHES. ALSO, GLITCHES ON THE SELECT LINE
+// DIRECTLY TRANSLATE TO GLITCHES ON THE OUTPUT CLOCK!! This cell is only
+// intended to be used for quasi-static switching between clocks when one of the
+// clocks is anyway inactive or if the downstream logic remains gated or in
+// reset state during the transition phase. If you need dynamic switching
+// between arbitrary input clocks without introducing glitches, have a look at
+// the clk_mux_glitch_free cell in the pulp-platform/common_cells repository.
+module tc_clk_mux2 (
+  input  logic clk0_i,
+  input  logic clk1_i,
+  input  logic clk_sel_i,
+  output logic clk_o
+);
+
+  assign clk_o = (clk_sel_i) ? clk1_i : clk0_i;
+
+endmodule
+
+module tc_clk_xor2 (
+  input  logic clk0_i,
+  input  logic clk1_i,
+  output logic clk_o
+);
+
+  assign clk_o = clk0_i ^ clk1_i;
+
+endmodule
+
+module tc_clk_or2 (
+  input logic clk0_i,
+  input logic clk1_i,
+  output logic clk_o
+);
+
+  assign clk_o = clk0_i | clk1_i;
+
+endmodule
+
+`ifndef SYNTHESIS
+module tc_clk_delay #(
+  parameter int unsigned Delay = 300ps
+) (
+  input  logic in_i,
+  output logic out_o
+);
+
+// pragma translate_off
+`ifndef VERILATOR
+  assign #(Delay) out_o = in_i;
+`endif
+// pragma translate_on
+
+endmodule
+`endif
diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv
new file mode 100644
index 0000000..b702a11
--- /dev/null
+++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv
@@ -0,0 +1,245 @@
+// Copyright (c) 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Wolfgang Roenninger <wroennin@ethz.ch>
+
+// Description: Functional module of a generic SRAM
+//
+// Parameters:
+// - NumWords:    Number of words in the macro. Address width can be calculated with:
+//                `AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1`
+//                The module issues a warning if there is a request on an address which is
+//                not in range.
+// - DataWidth:   Width of the ports `wdata_i` and `rdata_o`.
+// - ByteWidth:   Width of a byte, the byte enable signal `be_i` can be calculated with the
+//                ceiling division `ceil(DataWidth, ByteWidth)`.
+// - NumPorts:    Number of read and write ports. Each is a full port. Ports with a higher
+//                index read and write after the ones with lower indices.
+// - Latency:     Read latency, the read data is available this many cycles after a request.
+// - SimInit:     Macro simulation initialization. Values are:
+//                "zeros":  Each bit gets initialized with 1'b0.
+//                "ones":   Each bit gets initialized with 1'b1.
+//                "random": Each bit gets random initialized with 1'b0 or 1'b1.
+//                "none":   Each bit gets initialized with 1'bx. (default)
+// - PrintSimCfg: Prints at the beginning of the simulation a `Hello` message with
+//                the instantiated parameters and signal widths.
+// - ImplKey:     Key by which an instance can refer to a specific implementation (e.g. macro).
+//                May be used to look up additional parameters for implementation (e.g. generator,
+//                line width, muxing) in an external reference, such as a configuration file.
+//
+// Ports:
+// - `clk_i`:   Clock
+// - `rst_ni`:  Asynchronous reset, active low
+// - `req_i`:   Request, active high
+// - `we_i`:    Write request, active high
+// - `addr_i`:  Request address
+// - `wdata_i`: Write data, has to be valid on request
+// - `be_i`:    Byte enable, active high
+// - `rdata_o`: Read data, valid `Latency` cycles after a request with `we_i` low.
+//
+// Behaviour:
+// - Address collision:  When Ports are making a write access onto the same address,
+//                       the write operation will start at the port with the lowest address
+//                       index, each port will overwrite the changes made by the previous ports
+//                       according how the respective `be_i` signal is set.
+// - Read data on write: This implementation will not produce a read data output on the signal
+//                       `rdata_o` when `req_i` and `we_i` are asserted. The output data is stable
+//                       on write requests.
+
+module tc_sram #(
+  parameter int unsigned NumWords     = 32'd1024, // Number of Words in data array
+  parameter int unsigned DataWidth    = 32'd128,  // Data signal width
+  parameter int unsigned ByteWidth    = 32'd8,    // Width of a data byte
+  parameter int unsigned NumPorts     = 32'd2,    // Number of read and write ports
+  parameter int unsigned Latency      = 32'd1,    // Latency when the read data is available
+  parameter              SimInit      = "none",   // Simulation initialization
+  parameter bit          PrintSimCfg  = 1'b0,     // Print configuration
+  parameter              ImplKey      = "none",   // Reference to specific implementation
+  // DEPENDENT PARAMETERS, DO NOT OVERWRITE!
+  parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1,
+  parameter int unsigned BeWidth   = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div
+  parameter type         addr_t    = logic [AddrWidth-1:0],
+  parameter type         data_t    = logic [DataWidth-1:0],
+  parameter type         be_t      = logic [BeWidth-1:0]
+) (
+  input  logic                 clk_i,      // Clock
+  input  logic                 rst_ni,     // Asynchronous reset active low
+  // input ports
+  input  logic  [NumPorts-1:0] req_i,      // request
+  input  logic  [NumPorts-1:0] we_i,       // write enable
+  input  addr_t [NumPorts-1:0] addr_i,     // request address
+  input  data_t [NumPorts-1:0] wdata_i,    // write data
+  input  be_t   [NumPorts-1:0] be_i,       // write byte enable
+  // output ports
+  output data_t [NumPorts-1:0] rdata_o     // read data
+);
+
+  // memory array
+  data_t sram [NumWords-1:0];
+  // hold the read address when no read access is made
+  addr_t [NumPorts-1:0] r_addr_q;
+
+  // SRAM simulation initialization
+  data_t init_val[NumWords-1:0];
+  initial begin : proc_sram_init
+    for (int unsigned i = 0; i < NumWords; i++) begin
+      case (SimInit)
+        "zeros":  init_val[i] = {DataWidth{1'b0}};
+        "ones":   init_val[i] = {DataWidth{1'b1}};
+        "random": init_val[i] = {DataWidth{$urandom()}};
+        default:  init_val[i] = {DataWidth{1'bx}};
+      endcase
+    end
+  end
+
+  // set the read output if requested
+  // The read data at the highest array index is set combinational.
+  // It gets then delayed for a number of cycles until it gets available at the output at
+  // array index 0.
+
+  // read data output assignment
+  data_t [NumPorts-1:0][Latency-1:0] rdata_q,  rdata_d;
+  if (Latency == 32'd0) begin : gen_no_read_lat
+    for (genvar i = 0; i < NumPorts; i++) begin : gen_port
+      assign rdata_o[i] = (req_i[i] && !we_i[i]) ? sram[addr_i[i]] : sram[r_addr_q[i]];
+    end
+  end else begin : gen_read_lat
+
+    always_comb begin
+      for (int unsigned i = 0; i < NumPorts; i++) begin
+        rdata_o[i] = rdata_q[i][0];
+        for (int unsigned j = 0; j < (Latency-1); j++) begin
+          rdata_d[i][j] = rdata_q[i][j+1];
+        end
+        rdata_d[i][Latency-1] = (req_i[i] && !we_i[i]) ? sram[addr_i[i]] : sram[r_addr_q[i]];
+      end
+    end
+  end
+
+  // In case simulation initialization is disabled (SimInit == 'none'), don't assign to the sram
+  // content at all. This improves simulation performance in tools like verilator
+  if (SimInit == "none") begin
+    // write memory array without initialization
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        for (int i = 0; i < NumPorts; i++) begin
+          r_addr_q[i] <= {AddrWidth{1'b0}};
+        end
+      end else begin
+        // read value latch happens before new data is written to the sram
+        for (int unsigned i = 0; i < NumPorts; i++) begin
+          if (Latency != 0) begin
+            for (int unsigned j = 0; j < Latency; j++) begin
+              rdata_q[i][j] <= rdata_d[i][j];
+            end
+          end
+        end
+        // there is a request for the SRAM, latch the required register
+        for (int unsigned i = 0; i < NumPorts; i++) begin
+          if (req_i[i]) begin
+            if (we_i[i]) begin
+              // update value when write is set at clock
+              for (int unsigned j = 0; j < BeWidth; j++) begin
+                if (be_i[i][j]) begin
+                  sram[addr_i[i]][j*ByteWidth+:ByteWidth] <= wdata_i[i][j*ByteWidth+:ByteWidth];
+                end
+              end
+            end else begin
+              // otherwise update read address for subsequent non request cycles
+              r_addr_q[i] <= addr_i[i];
+            end
+          end // if req_i
+        end // for ports
+      end // if !rst_ni
+    end
+  end else begin
+    // write memory array
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+      if (!rst_ni) begin
+        sram <= init_val;
+        for (int i = 0; i < NumPorts; i++) begin
+          r_addr_q[i] <= {AddrWidth{1'b0}};
+          // initialize the read output register for each port
+          if (Latency != 32'd0) begin
+            for (int unsigned j = 0; j < Latency; j++) begin
+              rdata_q[i][j] <= init_val[{AddrWidth{1'b0}}];
+            end
+          end
+        end
+      end else begin
+        // read value latch happens before new data is written to the sram
+        for (int unsigned i = 0; i < NumPorts; i++) begin
+          if (Latency != 0) begin
+            for (int unsigned j = 0; j < Latency; j++) begin
+              rdata_q[i][j] <= rdata_d[i][j];
+            end
+          end
+        end
+        // there is a request for the SRAM, latch the required register
+        for (int unsigned i = 0; i < NumPorts; i++) begin
+          if (req_i[i]) begin
+            if (we_i[i]) begin
+              // update value when write is set at clock
+              for (int unsigned j = 0; j < BeWidth; j++) begin
+                if (be_i[i][j]) begin
+                  sram[addr_i[i]][j*ByteWidth+:ByteWidth] <= wdata_i[i][j*ByteWidth+:ByteWidth];
+                end
+              end
+            end else begin
+              // otherwise update read address for subsequent non request cycles
+              r_addr_q[i] <= addr_i[i];
+            end
+          end // if req_i
+        end // for ports
+      end // if !rst_ni
+    end
+  end
+
+// Validate parameters.
+// pragma translate_off
+`ifndef VERILATOR
+`ifndef TARGET_SYNTHESIS
+  initial begin: p_assertions
+    assert ($bits(addr_i)  == NumPorts * AddrWidth) else $fatal(1, "AddrWidth problem on `addr_i`");
+    assert ($bits(wdata_i) == NumPorts * DataWidth) else $fatal(1, "DataWidth problem on `wdata_i`");
+    assert ($bits(be_i)    == NumPorts * BeWidth)   else $fatal(1, "BeWidth   problem on `be_i`"   );
+    assert ($bits(rdata_o) == NumPorts * DataWidth) else $fatal(1, "DataWidth problem on `rdata_o`");
+    assert (NumWords  >= 32'd1) else $fatal(1, "NumWords has to be > 0");
+    assert (DataWidth >= 32'd1) else $fatal(1, "DataWidth has to be > 0");
+    assert (ByteWidth >= 32'd1) else $fatal(1, "ByteWidth has to be > 0");
+    assert (NumPorts  >= 32'd1) else $fatal(1, "The number of ports must be at least 1!");
+  end
+  initial begin: p_sim_hello
+    if (PrintSimCfg) begin
+      $display("#################################################################################");
+      $display("tc_sram functional instantiated with the configuration:"                          );
+      $display("Instance: %m"                                                                     );
+      $display("Number of ports   (dec): %0d", NumPorts                                           );
+      $display("Number of words   (dec): %0d", NumWords                                           );
+      $display("Address width     (dec): %0d", AddrWidth                                          );
+      $display("Data width        (dec): %0d", DataWidth                                          );
+      $display("Byte width        (dec): %0d", ByteWidth                                          );
+      $display("Byte enable width (dec): %0d", BeWidth                                            );
+      $display("Latency Cycles    (dec): %0d", Latency                                            );
+      $display("Simulation init   (str): %0s", SimInit                                            );
+      $display("#################################################################################");
+    end
+  end
+  for (genvar i = 0; i < NumPorts; i++) begin : gen_assertions
+    assert property ( @(posedge clk_i) disable iff (!rst_ni)
+        (req_i[i] |-> (addr_i[i] < NumWords))) else
+      $warning("Request address %0h not mapped, port %0d, expect random write or read behavior!",
+          addr_i[i], i);
+  end
+
+`endif
+`endif
+// pragma translate_on
+endmodule