diff --git a/bin/verilator b/bin/verilator index 5ec9a5ef0..3ab656be9 100755 --- a/bin/verilator +++ b/bin/verilator @@ -383,6 +383,7 @@ detailed descriptions of these arguments. --help Show this help --hierarchical Enable hierarchical Verilation --hierarchical-params-file Internal option that specifies parameters file for hier blocks + --hierarchical-threads Number of threads for hierarchical scheduling -I Directory to search for includes --if-depth Tune IFDEPTH warning +incdir+ Directory to search for includes diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index 290f07c0e..477a74fde 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -777,6 +777,16 @@ Summary: for deparametrized modules with :option:`/*verilator&32;hier_block*/` metacomment. See :ref:`Hierarchical Verilation`. +.. option:: --hierarchical-threads + + Specifies the number of threads used for scheduling hierarchical blocks. + The main use-case of this option is to provide possiblity for scheduling + multi-thread hierarchical blocks on multiple threads without increasing + parallelism of the whole design. + + Set to :vlopt:`--threads` by default. For optimal performance should not exceed + CPU core count. + .. option:: -I See :vlopt:`-y`. diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index 5517e0db3..dce801f25 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -503,8 +503,11 @@ class EmitCModel final : public EmitCFunc { + "::hierName() const { return vlSymsp->name(); }\n"); putns(modp, "const char* " + topClassName() + "::modelName() const { return \"" + topClassName() + "\"; }\n"); + const int threads = v3Global.opt.hierChild() + ? v3Global.opt.threads() + : std::max(v3Global.opt.threads(), v3Global.opt.hierThreads()); putns(modp, "unsigned " + topClassName() + "::threads() const { return " - + cvtToStr(v3Global.opt.threads()) + "; }\n"); + + cvtToStr(threads) + "; }\n"); putns(modp, "void " + topClassName() + "::prepareClone() const { contextp()->prepareClone(); }\n"); putns(modp, "void " + topClassName() + "::atClone() const {\n"); diff --git a/src/V3ExecGraph.cpp b/src/V3ExecGraph.cpp index 8a011fcfd..e20ff26d4 100644 --- a/src/V3ExecGraph.cpp +++ b/src/V3ExecGraph.cpp @@ -341,13 +341,16 @@ class PackThreads final { // MEMBERS const uint32_t m_nThreads; // Number of threads + const uint32_t m_nHierThreads; // Number of threads used for hierarchical tasks const uint32_t m_sandbagNumerator; // Numerator padding for est runtime const uint32_t m_sandbagDenom; // Denominator padding for est runtime // CONSTRUCTORS explicit PackThreads(uint32_t nThreads = v3Global.opt.threads(), + uint32_t nHierThreads = v3Global.opt.hierThreads(), unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100) : m_nThreads{nThreads} + , m_nHierThreads{nHierThreads} , m_sandbagNumerator{sandbagNumerator} , m_sandbagDenom{sandbagDenom} {} ~PackThreads() = default; @@ -419,7 +422,7 @@ class PackThreads final { SchedulingMode mode = SchedulingMode::SCHEDULING; // Time each thread is occupied until - std::vector busyUntil(m_nThreads, 0); + std::vector busyUntil(std::max(m_nThreads, m_nHierThreads), 0); // MTasks ready to be assigned next. All their dependencies are already assigned. std::set readyMTasks; @@ -479,7 +482,7 @@ class PackThreads final { if (!bestMtaskp && mode == SchedulingMode::WIDE_TASK_DISCOVERED) { mode = SchedulingMode::WIDE_TASK_SCHEDULING; - const uint32_t size = m_nThreads / maxThreadWorkers; + const uint32_t size = m_nHierThreads / maxThreadWorkers; UASSERT(size, "Thread pool size should be bigger than 0"); // If no tasks were added to the normal thread schedule, clear it. if (schedule.mtaskState.empty()) result.clear(); @@ -584,8 +587,9 @@ public: new V3GraphEdge{&graph, t3, t5, 1}; new V3GraphEdge{&graph, t4, t6, 1}; - constexpr uint32_t threads = 6; - PackThreads packer{threads, + constexpr uint32_t threads = 2; + constexpr uint32_t hierThreads = 6; + PackThreads packer{threads, hierThreads, 3, // Sandbag numerator 10}; // Sandbag denom @@ -599,7 +603,7 @@ public: UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][0], t0); UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][1], t1); - UASSERT_SELFTEST(size_t, scheduled[1].threads.size(), threads / 3); + UASSERT_SELFTEST(size_t, scheduled[1].threads.size(), hierThreads / 3); UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][0], t2); UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][1], t3); UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[1][0], t4); @@ -689,14 +693,15 @@ public: */ new V3GraphEdge{&graph, t0, t1, 1}; - constexpr uint32_t threads = 2; - PackThreads packer{threads, + constexpr uint32_t threads = 1; + constexpr uint32_t hierThreads = 2; + PackThreads packer{threads, hierThreads, 3, // Sandbag numerator 10}; // Sandbag denom const std::vector scheduled = packer.pack(graph); UASSERT_SELFTEST(size_t, scheduled.size(), 2); - UASSERT_SELFTEST(size_t, scheduled[0].threads.size(), threads / 2); + UASSERT_SELFTEST(size_t, scheduled[0].threads.size(), hierThreads / 2); UASSERT_SELFTEST(size_t, scheduled[0].threads[0].size(), 1); for (size_t i = 1; i < scheduled[0].threads.size(); ++i) UASSERT_SELFTEST(size_t, scheduled[0].threads[i].size(), 0); diff --git a/src/V3Options.cpp b/src/V3Options.cpp index be8814730..7cd9b5fe4 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1602,6 +1602,10 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, m_threads = 1; } }); + DECL_OPTION("-hierarchical-threads", CbVal, [this, fl](const char* valp) { + m_hierThreads = std::atoi(valp); + if (m_hierThreads < 0) fl->v3fatal("--hierarchical-threads must be >= 0: " << valp); + }); DECL_OPTION("-threads-coarsen", OnOff, &m_threadsCoarsen).undocumented(); // Debug DECL_OPTION("-threads-dpi", CbVal, [this, fl](const char* valp) { if (!std::strcmp(valp, "all")) { diff --git a/src/V3Options.h b/src/V3Options.h index 0fa4e94f2..6c7fb6acd 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -316,6 +316,7 @@ private: int m_expandLimit = 64; // main switch: --expand-limit int m_gateStmts = 100; // main switch: --gate-stmts int m_hierChild = 0; // main switch: --hierarchical-child + int m_hierThreads = 0; // main switch: --hierarchical-threads int m_ifDepth = 0; // main switch: --if-depth int m_inlineMult = 2000; // main switch: --inline-mult int m_instrCountDpi = 200; // main switch: --instr-count-dpi @@ -741,6 +742,7 @@ public: bool hierarchical() const { return m_hierarchical; } int hierChild() const VL_MT_SAFE { return m_hierChild; } + int hierThreads() const VL_MT_SAFE { return m_hierThreads == 0 ? m_threads : m_hierThreads; } bool hierTop() const VL_MT_SAFE { return !m_hierChild && !m_hierBlocks.empty(); } const V3HierBlockOptSet& hierBlocks() const { return m_hierBlocks; } // Directory to save .tree, .dot, .dat, .vpp for hierarchical block top diff --git a/test_regress/t/t_flag_hierarchical_threads_bad.out b/test_regress/t/t_flag_hierarchical_threads_bad.out new file mode 100644 index 000000000..858c9146f --- /dev/null +++ b/test_regress/t/t_flag_hierarchical_threads_bad.out @@ -0,0 +1,2 @@ +%Error: --hierarchical-threads must be >= 0: -2 + ... See the manual at https://verilator.org/verilator_doc.html?v=latest for more assistance. diff --git a/test_regress/t/t_flag_hierarchical_threads_bad.py b/test_regress/t/t_flag_hierarchical_threads_bad.py new file mode 100755 index 000000000..f6f2005d4 --- /dev/null +++ b/test_regress/t/t_flag_hierarchical_threads_bad.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2025 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt') + +test.lint(fails=True, + verilator_flags2=['--hierarchical-threads -2'], + expect_filename=test.golden_filename) + +test.passes() diff --git a/test_regress/t/t_hier_block_chained.py b/test_regress/t/t_hier_block_chained.py new file mode 100755 index 000000000..4ed5f7ff7 --- /dev/null +++ b/test_regress/t/t_hier_block_chained.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2025 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt_all') +test.init_benchmarksim() +test.cycles = (int(test.benchmark) if test.benchmark else 100000) +test.sim_time = test.cycles * 10 + 1000 + +THREADS = 2 +HIER_BLOCK_THREADS = 2 +HIER_THREADS = 4 + +config_file = test.t_dir + "/" + test.name + ".vlt" + +test.compile( + benchmarksim=1, + v_flags2=[ + config_file, "+define+SIM_CYCLES=" + str(test.cycles), "--hierarchical", "--stats", + "-Wno-UNOPTFLAT", + (f"-DWORKERS={HIER_BLOCK_THREADS}" if test.vltmt and HIER_BLOCK_THREADS > 1 else ""), + (f"--hierarchical-threads {HIER_THREADS}" if test.vltmt and HIER_THREADS > 1 else "") + ], + threads=(THREADS if test.vltmt else 1), + context_threads=(max(HIER_THREADS, THREADS) if test.vltmt else 1)) + +if test.vltmt: + test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt", + r'Optimizations, Thread schedule count\s+(\d+)', 1) + test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt", + r'Optimizations, Thread schedule total tasks\s+(\d+)', 2) + +test.execute() + +test.passes() diff --git a/test_regress/t/t_hier_block_chained.v b/test_regress/t/t_hier_block_chained.v new file mode 100644 index 000000000..57ccbba99 --- /dev/null +++ b/test_regress/t/t_hier_block_chained.v @@ -0,0 +1,212 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2025 by Antmicro. +// SPDX-License-Identifier: CC0-1.0 + +// Based on tests emitted by t_gate_tree.py + +module t (clk); + input clk; + + logic reset; + + reg [255:0] v2_0; + reg [255:0] v1_0; + reg [255:0] v1_1; + reg [255:0] v1_2; + reg [255:0] v1_3; + reg [255:0] v1_4; + reg [255:0] v1_5; + reg [255:0] v1_6; + reg [255:0] v1_7; + reg [255:0] dummy; + + Calculate calc0(.clk(clk), .reset(reset), .v1_0(v1_0), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy)); + Calculate calc1(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(v1_1), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy)); + Calculate calc2(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(v1_2), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy)); + Calculate calc3(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(v1_3), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy)); + Calculate calc4(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(v1_4), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy)); + Calculate calc5(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(v1_5), .v1_6(dummy), .v1_7(dummy)); + Calculate calc6(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(v1_6), .v1_7(dummy)); + Calculate calc7(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(v1_7)); + always @ (posedge clk) v2_0 <= v1_0 + v1_1 + v1_2 + v1_3 + v1_4 + v1_5 + v1_6 + v1_7; + Check chk(.clk(clk), .reset(reset), .v2_0(v2_0)); +endmodule + +module Check(input clk, output logic reset, input reg [255:0] v2_0); + integer cyc=0; + always @ (posedge clk) begin + cyc <= cyc + 1; +`ifdef TEST_VERBOSE + $write("[%0t] rst=%0x v0_0=%0x v1_0=%0x result=%0x\n", $time, reset, v0_0, v1_0, v2_0); +`endif + if (cyc==0) begin + reset <= 1; + end + else if (cyc==10) begin + reset <= 0; + end +`ifndef SIM_CYCLES + `define SIM_CYCLES 99 +`endif + else if (cyc==`SIM_CYCLES) begin + if (v2_0 != 256'd2017) $stop; + $write("VARS=64 WIDTH=256 WORKINGSET=2KB\n"); + $write("*-* All Finished *-*\n"); + $finish; + end + end + +endmodule + +module Calculate(input clk, + input reset, + output reg [255:0] v1_0, + output reg [255:0] v1_1, + output reg [255:0] v1_2, + output reg [255:0] v1_3, + output reg [255:0] v1_4, + output reg [255:0] v1_5, + output reg [255:0] v1_6, + output reg [255:0] v1_7 + ); + reg [255:0] v0_0; + reg [255:0] v0_1; + reg [255:0] v0_2; + reg [255:0] v0_3; + reg [255:0] v0_4; + reg [255:0] v0_5; + reg [255:0] v0_6; + reg [255:0] v0_7; + reg [255:0] v0_8; + reg [255:0] v0_9; + reg [255:0] v0_10; + reg [255:0] v0_11; + reg [255:0] v0_12; + reg [255:0] v0_13; + reg [255:0] v0_14; + reg [255:0] v0_15; + reg [255:0] v0_16; + reg [255:0] v0_17; + reg [255:0] v0_18; + reg [255:0] v0_19; + reg [255:0] v0_20; + reg [255:0] v0_21; + reg [255:0] v0_22; + reg [255:0] v0_23; + reg [255:0] v0_24; + reg [255:0] v0_25; + reg [255:0] v0_26; + reg [255:0] v0_27; + reg [255:0] v0_28; + reg [255:0] v0_29; + reg [255:0] v0_30; + reg [255:0] v0_31; + reg [255:0] v0_32; + reg [255:0] v0_33; + reg [255:0] v0_34; + reg [255:0] v0_35; + reg [255:0] v0_36; + reg [255:0] v0_37; + reg [255:0] v0_38; + reg [255:0] v0_39; + reg [255:0] v0_40; + reg [255:0] v0_41; + reg [255:0] v0_42; + reg [255:0] v0_43; + reg [255:0] v0_44; + reg [255:0] v0_45; + reg [255:0] v0_46; + reg [255:0] v0_47; + reg [255:0] v0_48; + reg [255:0] v0_49; + reg [255:0] v0_50; + reg [255:0] v0_51; + reg [255:0] v0_52; + reg [255:0] v0_53; + reg [255:0] v0_54; + reg [255:0] v0_55; + reg [255:0] v0_56; + reg [255:0] v0_57; + reg [255:0] v0_58; + reg [255:0] v0_59; + reg [255:0] v0_60; + reg [255:0] v0_61; + reg [255:0] v0_62; + reg [255:0] v0_63; + + always @ (posedge clk) v0_0 <= reset ? 256'd1 : v0_1; + always @ (posedge clk) v0_1 <= reset ? 256'd1 : v0_2; + always @ (posedge clk) v0_2 <= reset ? 256'd2 : v0_3; + always @ (posedge clk) v0_3 <= reset ? 256'd3 : v0_4; + always @ (posedge clk) v0_4 <= reset ? 256'd4 : v0_5; + always @ (posedge clk) v0_5 <= reset ? 256'd5 : v0_6; + always @ (posedge clk) v0_6 <= reset ? 256'd6 : v0_7; + always @ (posedge clk) v0_7 <= reset ? 256'd7 : v0_0; + always @ (posedge clk) v0_8 <= reset ? 256'd8 : v0_9; + always @ (posedge clk) v0_9 <= reset ? 256'd9 : v0_10; + always @ (posedge clk) v0_10 <= reset ? 256'd10 : v0_11; + always @ (posedge clk) v0_11 <= reset ? 256'd11 : v0_12; + always @ (posedge clk) v0_12 <= reset ? 256'd12 : v0_13; + always @ (posedge clk) v0_13 <= reset ? 256'd13 : v0_14; + always @ (posedge clk) v0_14 <= reset ? 256'd14 : v0_15; + always @ (posedge clk) v0_15 <= reset ? 256'd15 : v0_8; + always @ (posedge clk) v0_16 <= reset ? 256'd16 : v0_17; + always @ (posedge clk) v0_17 <= reset ? 256'd17 : v0_18; + always @ (posedge clk) v0_18 <= reset ? 256'd18 : v0_19; + always @ (posedge clk) v0_19 <= reset ? 256'd19 : v0_20; + always @ (posedge clk) v0_20 <= reset ? 256'd20 : v0_21; + always @ (posedge clk) v0_21 <= reset ? 256'd21 : v0_22; + always @ (posedge clk) v0_22 <= reset ? 256'd22 : v0_23; + always @ (posedge clk) v0_23 <= reset ? 256'd23 : v0_16; + always @ (posedge clk) v0_24 <= reset ? 256'd24 : v0_25; + always @ (posedge clk) v0_25 <= reset ? 256'd25 : v0_26; + always @ (posedge clk) v0_26 <= reset ? 256'd26 : v0_27; + always @ (posedge clk) v0_27 <= reset ? 256'd27 : v0_28; + always @ (posedge clk) v0_28 <= reset ? 256'd28 : v0_29; + always @ (posedge clk) v0_29 <= reset ? 256'd29 : v0_30; + always @ (posedge clk) v0_30 <= reset ? 256'd30 : v0_31; + always @ (posedge clk) v0_31 <= reset ? 256'd31 : v0_24; + always @ (posedge clk) v0_32 <= reset ? 256'd32 : v0_33; + always @ (posedge clk) v0_33 <= reset ? 256'd33 : v0_34; + always @ (posedge clk) v0_34 <= reset ? 256'd34 : v0_35; + always @ (posedge clk) v0_35 <= reset ? 256'd35 : v0_36; + always @ (posedge clk) v0_36 <= reset ? 256'd36 : v0_37; + always @ (posedge clk) v0_37 <= reset ? 256'd37 : v0_38; + always @ (posedge clk) v0_38 <= reset ? 256'd38 : v0_39; + always @ (posedge clk) v0_39 <= reset ? 256'd39 : v0_32; + always @ (posedge clk) v0_40 <= reset ? 256'd40 : v0_41; + always @ (posedge clk) v0_41 <= reset ? 256'd41 : v0_42; + always @ (posedge clk) v0_42 <= reset ? 256'd42 : v0_43; + always @ (posedge clk) v0_43 <= reset ? 256'd43 : v0_44; + always @ (posedge clk) v0_44 <= reset ? 256'd44 : v0_45; + always @ (posedge clk) v0_45 <= reset ? 256'd45 : v0_46; + always @ (posedge clk) v0_46 <= reset ? 256'd46 : v0_47; + always @ (posedge clk) v0_47 <= reset ? 256'd47 : v0_40; + always @ (posedge clk) v0_48 <= reset ? 256'd48 : v0_49; + always @ (posedge clk) v0_49 <= reset ? 256'd49 : v0_50; + always @ (posedge clk) v0_50 <= reset ? 256'd50 : v0_51; + always @ (posedge clk) v0_51 <= reset ? 256'd51 : v0_52; + always @ (posedge clk) v0_52 <= reset ? 256'd52 : v0_53; + always @ (posedge clk) v0_53 <= reset ? 256'd53 : v0_54; + always @ (posedge clk) v0_54 <= reset ? 256'd54 : v0_55; + always @ (posedge clk) v0_55 <= reset ? 256'd55 : v0_48; + always @ (posedge clk) v0_56 <= reset ? 256'd56 : v0_57; + always @ (posedge clk) v0_57 <= reset ? 256'd57 : v0_58; + always @ (posedge clk) v0_58 <= reset ? 256'd58 : v0_59; + always @ (posedge clk) v0_59 <= reset ? 256'd59 : v0_60; + always @ (posedge clk) v0_60 <= reset ? 256'd60 : v0_61; + always @ (posedge clk) v0_61 <= reset ? 256'd61 : v0_62; + always @ (posedge clk) v0_62 <= reset ? 256'd62 : v0_63; + always @ (posedge clk) v0_63 <= reset ? 256'd63 : v0_56; + + always @ (posedge clk) v1_0 <= v0_0 + v0_1 + v0_2 + v0_3 + v0_4 + v0_5 + v0_6 + v0_7; + always @ (posedge clk) v1_1 <= v0_8 + v0_9 + v0_10 + v0_11 + v0_12 + v0_13 + v0_14 + v0_15; + always @ (posedge clk) v1_2 <= v0_16 + v0_17 + v0_18 + v0_19 + v0_20 + v0_21 + v0_22 + v0_23; + always @ (posedge clk) v1_3 <= v0_24 + v0_25 + v0_26 + v0_27 + v0_28 + v0_29 + v0_30 + v0_31; + always @ (posedge clk) v1_4 <= v0_32 + v0_33 + v0_34 + v0_35 + v0_36 + v0_37 + v0_38 + v0_39; + always @ (posedge clk) v1_5 <= v0_40 + v0_41 + v0_42 + v0_43 + v0_44 + v0_45 + v0_46 + v0_47; + always @ (posedge clk) v1_6 <= v0_48 + v0_49 + v0_50 + v0_51 + v0_52 + v0_53 + v0_54 + v0_55; + always @ (posedge clk) v1_7 <= v0_56 + v0_57 + v0_58 + v0_59 + v0_60 + v0_61 + v0_62 + v0_63; +endmodule diff --git a/test_regress/t/t_hier_block_chained.vlt b/test_regress/t/t_hier_block_chained.vlt new file mode 100644 index 000000000..9740d5a91 --- /dev/null +++ b/test_regress/t/t_hier_block_chained.vlt @@ -0,0 +1,14 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty, 2025 by Antmicro. +// SPDX-License-Identifier: CC0-1.0 + +`verilator_config + +hier_block -module "Calculate" +hier_block -module "Check" + +`ifdef WORKERS +hier_workers -module "Calculate" -workers `WORKERS +`endif diff --git a/test_regress/t/t_hier_block_perf.py b/test_regress/t/t_hier_block_perf.py index b36132b41..5847c7588 100755 --- a/test_regress/t/t_hier_block_perf.py +++ b/test_regress/t/t_hier_block_perf.py @@ -14,9 +14,9 @@ test.init_benchmarksim() test.cycles = (int(test.benchmark) if test.benchmark else 100000) test.sim_time = test.cycles * 10 + 1000 -THREADS = int(os.environ["THREADS"]) if "THREADS" in os.environ else 4 -HIER_BLOCK_THREADS = int( - os.environ["HIER_BLOCK_THREADS"]) if "HIER_BLOCK_THREADS" in os.environ else 2 +THREADS = 2 +HIER_BLOCK_THREADS = 2 +HIER_THREADS = 4 config_file = test.t_dir + "/" + test.name + ".vlt" @@ -25,18 +25,20 @@ test.compile( v_flags2=[ config_file, "+define+SIM_CYCLES=" + str(test.cycles), "--prof-exec", "--hierarchical", "--stats", "-Wno-UNOPTFLAT", - (f"-DWORKERS={HIER_BLOCK_THREADS}" if test.vltmt and HIER_BLOCK_THREADS > 1 else "") + (f"-DWORKERS={HIER_BLOCK_THREADS}" if test.vltmt and HIER_BLOCK_THREADS > 1 else ""), + (f"--hierarchical-threads {HIER_THREADS}" if test.vltmt and HIER_THREADS > 1 else "") ], - threads=(THREADS if test.vltmt else 1)) + threads=(THREADS if test.vltmt else 1), + context_threads=(HIER_THREADS if test.vltmt else 1)) test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt", r'Optimizations, Hierarchical DPI wrappers with costs\s+(\d+)', 6) if test.vltmt: test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt", - r'Optimizations, Thread schedule count\s+(\d+)', 4) + r'Optimizations, Thread schedule count\s+(\d+)', 1) test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt", - r'Optimizations, Thread schedule total tasks\s+(\d+)', 12) + r'Optimizations, Thread schedule total tasks\s+(\d+)', 2) test.execute(all_run_flags=[ "+verilator+prof+exec+start+2",