Add `--hierarchical-threads` (#6037)
This commit is contained in:
parent
c9fa54536e
commit
9cc4cc0efd
|
|
@ -383,6 +383,7 @@ detailed descriptions of these arguments.
|
||||||
--help Show this help
|
--help Show this help
|
||||||
--hierarchical Enable hierarchical Verilation
|
--hierarchical Enable hierarchical Verilation
|
||||||
--hierarchical-params-file <name> Internal option that specifies parameters file for hier blocks
|
--hierarchical-params-file <name> Internal option that specifies parameters file for hier blocks
|
||||||
|
--hierarchical-threads <threads> Number of threads for hierarchical scheduling
|
||||||
-I<dir> Directory to search for includes
|
-I<dir> Directory to search for includes
|
||||||
--if-depth <value> Tune IFDEPTH warning
|
--if-depth <value> Tune IFDEPTH warning
|
||||||
+incdir+<dir> Directory to search for includes
|
+incdir+<dir> Directory to search for includes
|
||||||
|
|
|
||||||
|
|
@ -777,6 +777,16 @@ Summary:
|
||||||
for deparametrized modules with :option:`/*verilator&32;hier_block*/`
|
for deparametrized modules with :option:`/*verilator&32;hier_block*/`
|
||||||
metacomment. See :ref:`Hierarchical Verilation`.
|
metacomment. See :ref:`Hierarchical Verilation`.
|
||||||
|
|
||||||
|
.. option:: --hierarchical-threads <threads>
|
||||||
|
|
||||||
|
Specifies the number of threads used for scheduling hierarchical blocks.
|
||||||
|
The main use-case of this option is to provide possiblity for scheduling
|
||||||
|
multi-thread hierarchical blocks on multiple threads without increasing
|
||||||
|
parallelism of the whole design.
|
||||||
|
|
||||||
|
Set to :vlopt:`--threads` by default. For optimal performance should not exceed
|
||||||
|
CPU core count.
|
||||||
|
|
||||||
.. option:: -I<dir>
|
.. option:: -I<dir>
|
||||||
|
|
||||||
See :vlopt:`-y`.
|
See :vlopt:`-y`.
|
||||||
|
|
|
||||||
|
|
@ -503,8 +503,11 @@ class EmitCModel final : public EmitCFunc {
|
||||||
+ "::hierName() const { return vlSymsp->name(); }\n");
|
+ "::hierName() const { return vlSymsp->name(); }\n");
|
||||||
putns(modp, "const char* " + topClassName() + "::modelName() const { return \""
|
putns(modp, "const char* " + topClassName() + "::modelName() const { return \""
|
||||||
+ topClassName() + "\"; }\n");
|
+ topClassName() + "\"; }\n");
|
||||||
|
const int threads = v3Global.opt.hierChild()
|
||||||
|
? v3Global.opt.threads()
|
||||||
|
: std::max(v3Global.opt.threads(), v3Global.opt.hierThreads());
|
||||||
putns(modp, "unsigned " + topClassName() + "::threads() const { return "
|
putns(modp, "unsigned " + topClassName() + "::threads() const { return "
|
||||||
+ cvtToStr(v3Global.opt.threads()) + "; }\n");
|
+ cvtToStr(threads) + "; }\n");
|
||||||
putns(modp, "void " + topClassName()
|
putns(modp, "void " + topClassName()
|
||||||
+ "::prepareClone() const { contextp()->prepareClone(); }\n");
|
+ "::prepareClone() const { contextp()->prepareClone(); }\n");
|
||||||
putns(modp, "void " + topClassName() + "::atClone() const {\n");
|
putns(modp, "void " + topClassName() + "::atClone() const {\n");
|
||||||
|
|
|
||||||
|
|
@ -341,13 +341,16 @@ class PackThreads final {
|
||||||
|
|
||||||
// MEMBERS
|
// MEMBERS
|
||||||
const uint32_t m_nThreads; // Number of threads
|
const uint32_t m_nThreads; // Number of threads
|
||||||
|
const uint32_t m_nHierThreads; // Number of threads used for hierarchical tasks
|
||||||
const uint32_t m_sandbagNumerator; // Numerator padding for est runtime
|
const uint32_t m_sandbagNumerator; // Numerator padding for est runtime
|
||||||
const uint32_t m_sandbagDenom; // Denominator padding for est runtime
|
const uint32_t m_sandbagDenom; // Denominator padding for est runtime
|
||||||
|
|
||||||
// CONSTRUCTORS
|
// CONSTRUCTORS
|
||||||
explicit PackThreads(uint32_t nThreads = v3Global.opt.threads(),
|
explicit PackThreads(uint32_t nThreads = v3Global.opt.threads(),
|
||||||
|
uint32_t nHierThreads = v3Global.opt.hierThreads(),
|
||||||
unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100)
|
unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100)
|
||||||
: m_nThreads{nThreads}
|
: m_nThreads{nThreads}
|
||||||
|
, m_nHierThreads{nHierThreads}
|
||||||
, m_sandbagNumerator{sandbagNumerator}
|
, m_sandbagNumerator{sandbagNumerator}
|
||||||
, m_sandbagDenom{sandbagDenom} {}
|
, m_sandbagDenom{sandbagDenom} {}
|
||||||
~PackThreads() = default;
|
~PackThreads() = default;
|
||||||
|
|
@ -419,7 +422,7 @@ class PackThreads final {
|
||||||
SchedulingMode mode = SchedulingMode::SCHEDULING;
|
SchedulingMode mode = SchedulingMode::SCHEDULING;
|
||||||
|
|
||||||
// Time each thread is occupied until
|
// Time each thread is occupied until
|
||||||
std::vector<uint32_t> busyUntil(m_nThreads, 0);
|
std::vector<uint32_t> busyUntil(std::max(m_nThreads, m_nHierThreads), 0);
|
||||||
|
|
||||||
// MTasks ready to be assigned next. All their dependencies are already assigned.
|
// MTasks ready to be assigned next. All their dependencies are already assigned.
|
||||||
std::set<ExecMTask*, MTaskCmp> readyMTasks;
|
std::set<ExecMTask*, MTaskCmp> readyMTasks;
|
||||||
|
|
@ -479,7 +482,7 @@ class PackThreads final {
|
||||||
|
|
||||||
if (!bestMtaskp && mode == SchedulingMode::WIDE_TASK_DISCOVERED) {
|
if (!bestMtaskp && mode == SchedulingMode::WIDE_TASK_DISCOVERED) {
|
||||||
mode = SchedulingMode::WIDE_TASK_SCHEDULING;
|
mode = SchedulingMode::WIDE_TASK_SCHEDULING;
|
||||||
const uint32_t size = m_nThreads / maxThreadWorkers;
|
const uint32_t size = m_nHierThreads / maxThreadWorkers;
|
||||||
UASSERT(size, "Thread pool size should be bigger than 0");
|
UASSERT(size, "Thread pool size should be bigger than 0");
|
||||||
// If no tasks were added to the normal thread schedule, clear it.
|
// If no tasks were added to the normal thread schedule, clear it.
|
||||||
if (schedule.mtaskState.empty()) result.clear();
|
if (schedule.mtaskState.empty()) result.clear();
|
||||||
|
|
@ -584,8 +587,9 @@ public:
|
||||||
new V3GraphEdge{&graph, t3, t5, 1};
|
new V3GraphEdge{&graph, t3, t5, 1};
|
||||||
new V3GraphEdge{&graph, t4, t6, 1};
|
new V3GraphEdge{&graph, t4, t6, 1};
|
||||||
|
|
||||||
constexpr uint32_t threads = 6;
|
constexpr uint32_t threads = 2;
|
||||||
PackThreads packer{threads,
|
constexpr uint32_t hierThreads = 6;
|
||||||
|
PackThreads packer{threads, hierThreads,
|
||||||
3, // Sandbag numerator
|
3, // Sandbag numerator
|
||||||
10}; // Sandbag denom
|
10}; // Sandbag denom
|
||||||
|
|
||||||
|
|
@ -599,7 +603,7 @@ public:
|
||||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][0], t0);
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][0], t0);
|
||||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][1], t1);
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][1], t1);
|
||||||
|
|
||||||
UASSERT_SELFTEST(size_t, scheduled[1].threads.size(), threads / 3);
|
UASSERT_SELFTEST(size_t, scheduled[1].threads.size(), hierThreads / 3);
|
||||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][0], t2);
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][0], t2);
|
||||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][1], t3);
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][1], t3);
|
||||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[1][0], t4);
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[1][0], t4);
|
||||||
|
|
@ -689,14 +693,15 @@ public:
|
||||||
*/
|
*/
|
||||||
new V3GraphEdge{&graph, t0, t1, 1};
|
new V3GraphEdge{&graph, t0, t1, 1};
|
||||||
|
|
||||||
constexpr uint32_t threads = 2;
|
constexpr uint32_t threads = 1;
|
||||||
PackThreads packer{threads,
|
constexpr uint32_t hierThreads = 2;
|
||||||
|
PackThreads packer{threads, hierThreads,
|
||||||
3, // Sandbag numerator
|
3, // Sandbag numerator
|
||||||
10}; // Sandbag denom
|
10}; // Sandbag denom
|
||||||
|
|
||||||
const std::vector<ThreadSchedule> scheduled = packer.pack(graph);
|
const std::vector<ThreadSchedule> scheduled = packer.pack(graph);
|
||||||
UASSERT_SELFTEST(size_t, scheduled.size(), 2);
|
UASSERT_SELFTEST(size_t, scheduled.size(), 2);
|
||||||
UASSERT_SELFTEST(size_t, scheduled[0].threads.size(), threads / 2);
|
UASSERT_SELFTEST(size_t, scheduled[0].threads.size(), hierThreads / 2);
|
||||||
UASSERT_SELFTEST(size_t, scheduled[0].threads[0].size(), 1);
|
UASSERT_SELFTEST(size_t, scheduled[0].threads[0].size(), 1);
|
||||||
for (size_t i = 1; i < scheduled[0].threads.size(); ++i)
|
for (size_t i = 1; i < scheduled[0].threads.size(); ++i)
|
||||||
UASSERT_SELFTEST(size_t, scheduled[0].threads[i].size(), 0);
|
UASSERT_SELFTEST(size_t, scheduled[0].threads[i].size(), 0);
|
||||||
|
|
|
||||||
|
|
@ -1602,6 +1602,10 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
|
||||||
m_threads = 1;
|
m_threads = 1;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
DECL_OPTION("-hierarchical-threads", CbVal, [this, fl](const char* valp) {
|
||||||
|
m_hierThreads = std::atoi(valp);
|
||||||
|
if (m_hierThreads < 0) fl->v3fatal("--hierarchical-threads must be >= 0: " << valp);
|
||||||
|
});
|
||||||
DECL_OPTION("-threads-coarsen", OnOff, &m_threadsCoarsen).undocumented(); // Debug
|
DECL_OPTION("-threads-coarsen", OnOff, &m_threadsCoarsen).undocumented(); // Debug
|
||||||
DECL_OPTION("-threads-dpi", CbVal, [this, fl](const char* valp) {
|
DECL_OPTION("-threads-dpi", CbVal, [this, fl](const char* valp) {
|
||||||
if (!std::strcmp(valp, "all")) {
|
if (!std::strcmp(valp, "all")) {
|
||||||
|
|
|
||||||
|
|
@ -316,6 +316,7 @@ private:
|
||||||
int m_expandLimit = 64; // main switch: --expand-limit
|
int m_expandLimit = 64; // main switch: --expand-limit
|
||||||
int m_gateStmts = 100; // main switch: --gate-stmts
|
int m_gateStmts = 100; // main switch: --gate-stmts
|
||||||
int m_hierChild = 0; // main switch: --hierarchical-child
|
int m_hierChild = 0; // main switch: --hierarchical-child
|
||||||
|
int m_hierThreads = 0; // main switch: --hierarchical-threads
|
||||||
int m_ifDepth = 0; // main switch: --if-depth
|
int m_ifDepth = 0; // main switch: --if-depth
|
||||||
int m_inlineMult = 2000; // main switch: --inline-mult
|
int m_inlineMult = 2000; // main switch: --inline-mult
|
||||||
int m_instrCountDpi = 200; // main switch: --instr-count-dpi
|
int m_instrCountDpi = 200; // main switch: --instr-count-dpi
|
||||||
|
|
@ -741,6 +742,7 @@ public:
|
||||||
|
|
||||||
bool hierarchical() const { return m_hierarchical; }
|
bool hierarchical() const { return m_hierarchical; }
|
||||||
int hierChild() const VL_MT_SAFE { return m_hierChild; }
|
int hierChild() const VL_MT_SAFE { return m_hierChild; }
|
||||||
|
int hierThreads() const VL_MT_SAFE { return m_hierThreads == 0 ? m_threads : m_hierThreads; }
|
||||||
bool hierTop() const VL_MT_SAFE { return !m_hierChild && !m_hierBlocks.empty(); }
|
bool hierTop() const VL_MT_SAFE { return !m_hierChild && !m_hierBlocks.empty(); }
|
||||||
const V3HierBlockOptSet& hierBlocks() const { return m_hierBlocks; }
|
const V3HierBlockOptSet& hierBlocks() const { return m_hierBlocks; }
|
||||||
// Directory to save .tree, .dot, .dat, .vpp for hierarchical block top
|
// Directory to save .tree, .dot, .dat, .vpp for hierarchical block top
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
%Error: --hierarchical-threads must be >= 0: -2
|
||||||
|
... See the manual at https://verilator.org/verilator_doc.html?v=latest for more assistance.
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||||
|
#
|
||||||
|
# Copyright 2025 by Wilson Snyder. This program is free software; you
|
||||||
|
# can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
# Version 2.0.
|
||||||
|
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
|
||||||
|
import vltest_bootstrap
|
||||||
|
|
||||||
|
test.scenarios('vlt')
|
||||||
|
|
||||||
|
test.lint(fails=True,
|
||||||
|
verilator_flags2=['--hierarchical-threads -2'],
|
||||||
|
expect_filename=test.golden_filename)
|
||||||
|
|
||||||
|
test.passes()
|
||||||
|
|
@ -0,0 +1,42 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||||
|
#
|
||||||
|
# Copyright 2025 by Wilson Snyder. This program is free software; you
|
||||||
|
# can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
# Version 2.0.
|
||||||
|
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
|
||||||
|
import vltest_bootstrap
|
||||||
|
|
||||||
|
test.scenarios('vlt_all')
|
||||||
|
test.init_benchmarksim()
|
||||||
|
test.cycles = (int(test.benchmark) if test.benchmark else 100000)
|
||||||
|
test.sim_time = test.cycles * 10 + 1000
|
||||||
|
|
||||||
|
THREADS = 2
|
||||||
|
HIER_BLOCK_THREADS = 2
|
||||||
|
HIER_THREADS = 4
|
||||||
|
|
||||||
|
config_file = test.t_dir + "/" + test.name + ".vlt"
|
||||||
|
|
||||||
|
test.compile(
|
||||||
|
benchmarksim=1,
|
||||||
|
v_flags2=[
|
||||||
|
config_file, "+define+SIM_CYCLES=" + str(test.cycles), "--hierarchical", "--stats",
|
||||||
|
"-Wno-UNOPTFLAT",
|
||||||
|
(f"-DWORKERS={HIER_BLOCK_THREADS}" if test.vltmt and HIER_BLOCK_THREADS > 1 else ""),
|
||||||
|
(f"--hierarchical-threads {HIER_THREADS}" if test.vltmt and HIER_THREADS > 1 else "")
|
||||||
|
],
|
||||||
|
threads=(THREADS if test.vltmt else 1),
|
||||||
|
context_threads=(max(HIER_THREADS, THREADS) if test.vltmt else 1))
|
||||||
|
|
||||||
|
if test.vltmt:
|
||||||
|
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||||
|
r'Optimizations, Thread schedule count\s+(\d+)', 1)
|
||||||
|
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||||
|
r'Optimizations, Thread schedule total tasks\s+(\d+)', 2)
|
||||||
|
|
||||||
|
test.execute()
|
||||||
|
|
||||||
|
test.passes()
|
||||||
|
|
@ -0,0 +1,212 @@
|
||||||
|
// DESCRIPTION: Verilator: Verilog Test module
|
||||||
|
//
|
||||||
|
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||||
|
// any use, without warranty, 2025 by Antmicro.
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
// Based on tests emitted by t_gate_tree.py
|
||||||
|
|
||||||
|
module t (clk);
|
||||||
|
input clk;
|
||||||
|
|
||||||
|
logic reset;
|
||||||
|
|
||||||
|
reg [255:0] v2_0;
|
||||||
|
reg [255:0] v1_0;
|
||||||
|
reg [255:0] v1_1;
|
||||||
|
reg [255:0] v1_2;
|
||||||
|
reg [255:0] v1_3;
|
||||||
|
reg [255:0] v1_4;
|
||||||
|
reg [255:0] v1_5;
|
||||||
|
reg [255:0] v1_6;
|
||||||
|
reg [255:0] v1_7;
|
||||||
|
reg [255:0] dummy;
|
||||||
|
|
||||||
|
Calculate calc0(.clk(clk), .reset(reset), .v1_0(v1_0), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy));
|
||||||
|
Calculate calc1(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(v1_1), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy));
|
||||||
|
Calculate calc2(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(v1_2), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy));
|
||||||
|
Calculate calc3(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(v1_3), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy));
|
||||||
|
Calculate calc4(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(v1_4), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy));
|
||||||
|
Calculate calc5(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(v1_5), .v1_6(dummy), .v1_7(dummy));
|
||||||
|
Calculate calc6(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(v1_6), .v1_7(dummy));
|
||||||
|
Calculate calc7(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(v1_7));
|
||||||
|
always @ (posedge clk) v2_0 <= v1_0 + v1_1 + v1_2 + v1_3 + v1_4 + v1_5 + v1_6 + v1_7;
|
||||||
|
Check chk(.clk(clk), .reset(reset), .v2_0(v2_0));
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
module Check(input clk, output logic reset, input reg [255:0] v2_0);
|
||||||
|
integer cyc=0;
|
||||||
|
always @ (posedge clk) begin
|
||||||
|
cyc <= cyc + 1;
|
||||||
|
`ifdef TEST_VERBOSE
|
||||||
|
$write("[%0t] rst=%0x v0_0=%0x v1_0=%0x result=%0x\n", $time, reset, v0_0, v1_0, v2_0);
|
||||||
|
`endif
|
||||||
|
if (cyc==0) begin
|
||||||
|
reset <= 1;
|
||||||
|
end
|
||||||
|
else if (cyc==10) begin
|
||||||
|
reset <= 0;
|
||||||
|
end
|
||||||
|
`ifndef SIM_CYCLES
|
||||||
|
`define SIM_CYCLES 99
|
||||||
|
`endif
|
||||||
|
else if (cyc==`SIM_CYCLES) begin
|
||||||
|
if (v2_0 != 256'd2017) $stop;
|
||||||
|
$write("VARS=64 WIDTH=256 WORKINGSET=2KB\n");
|
||||||
|
$write("*-* All Finished *-*\n");
|
||||||
|
$finish;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
module Calculate(input clk,
|
||||||
|
input reset,
|
||||||
|
output reg [255:0] v1_0,
|
||||||
|
output reg [255:0] v1_1,
|
||||||
|
output reg [255:0] v1_2,
|
||||||
|
output reg [255:0] v1_3,
|
||||||
|
output reg [255:0] v1_4,
|
||||||
|
output reg [255:0] v1_5,
|
||||||
|
output reg [255:0] v1_6,
|
||||||
|
output reg [255:0] v1_7
|
||||||
|
);
|
||||||
|
reg [255:0] v0_0;
|
||||||
|
reg [255:0] v0_1;
|
||||||
|
reg [255:0] v0_2;
|
||||||
|
reg [255:0] v0_3;
|
||||||
|
reg [255:0] v0_4;
|
||||||
|
reg [255:0] v0_5;
|
||||||
|
reg [255:0] v0_6;
|
||||||
|
reg [255:0] v0_7;
|
||||||
|
reg [255:0] v0_8;
|
||||||
|
reg [255:0] v0_9;
|
||||||
|
reg [255:0] v0_10;
|
||||||
|
reg [255:0] v0_11;
|
||||||
|
reg [255:0] v0_12;
|
||||||
|
reg [255:0] v0_13;
|
||||||
|
reg [255:0] v0_14;
|
||||||
|
reg [255:0] v0_15;
|
||||||
|
reg [255:0] v0_16;
|
||||||
|
reg [255:0] v0_17;
|
||||||
|
reg [255:0] v0_18;
|
||||||
|
reg [255:0] v0_19;
|
||||||
|
reg [255:0] v0_20;
|
||||||
|
reg [255:0] v0_21;
|
||||||
|
reg [255:0] v0_22;
|
||||||
|
reg [255:0] v0_23;
|
||||||
|
reg [255:0] v0_24;
|
||||||
|
reg [255:0] v0_25;
|
||||||
|
reg [255:0] v0_26;
|
||||||
|
reg [255:0] v0_27;
|
||||||
|
reg [255:0] v0_28;
|
||||||
|
reg [255:0] v0_29;
|
||||||
|
reg [255:0] v0_30;
|
||||||
|
reg [255:0] v0_31;
|
||||||
|
reg [255:0] v0_32;
|
||||||
|
reg [255:0] v0_33;
|
||||||
|
reg [255:0] v0_34;
|
||||||
|
reg [255:0] v0_35;
|
||||||
|
reg [255:0] v0_36;
|
||||||
|
reg [255:0] v0_37;
|
||||||
|
reg [255:0] v0_38;
|
||||||
|
reg [255:0] v0_39;
|
||||||
|
reg [255:0] v0_40;
|
||||||
|
reg [255:0] v0_41;
|
||||||
|
reg [255:0] v0_42;
|
||||||
|
reg [255:0] v0_43;
|
||||||
|
reg [255:0] v0_44;
|
||||||
|
reg [255:0] v0_45;
|
||||||
|
reg [255:0] v0_46;
|
||||||
|
reg [255:0] v0_47;
|
||||||
|
reg [255:0] v0_48;
|
||||||
|
reg [255:0] v0_49;
|
||||||
|
reg [255:0] v0_50;
|
||||||
|
reg [255:0] v0_51;
|
||||||
|
reg [255:0] v0_52;
|
||||||
|
reg [255:0] v0_53;
|
||||||
|
reg [255:0] v0_54;
|
||||||
|
reg [255:0] v0_55;
|
||||||
|
reg [255:0] v0_56;
|
||||||
|
reg [255:0] v0_57;
|
||||||
|
reg [255:0] v0_58;
|
||||||
|
reg [255:0] v0_59;
|
||||||
|
reg [255:0] v0_60;
|
||||||
|
reg [255:0] v0_61;
|
||||||
|
reg [255:0] v0_62;
|
||||||
|
reg [255:0] v0_63;
|
||||||
|
|
||||||
|
always @ (posedge clk) v0_0 <= reset ? 256'd1 : v0_1;
|
||||||
|
always @ (posedge clk) v0_1 <= reset ? 256'd1 : v0_2;
|
||||||
|
always @ (posedge clk) v0_2 <= reset ? 256'd2 : v0_3;
|
||||||
|
always @ (posedge clk) v0_3 <= reset ? 256'd3 : v0_4;
|
||||||
|
always @ (posedge clk) v0_4 <= reset ? 256'd4 : v0_5;
|
||||||
|
always @ (posedge clk) v0_5 <= reset ? 256'd5 : v0_6;
|
||||||
|
always @ (posedge clk) v0_6 <= reset ? 256'd6 : v0_7;
|
||||||
|
always @ (posedge clk) v0_7 <= reset ? 256'd7 : v0_0;
|
||||||
|
always @ (posedge clk) v0_8 <= reset ? 256'd8 : v0_9;
|
||||||
|
always @ (posedge clk) v0_9 <= reset ? 256'd9 : v0_10;
|
||||||
|
always @ (posedge clk) v0_10 <= reset ? 256'd10 : v0_11;
|
||||||
|
always @ (posedge clk) v0_11 <= reset ? 256'd11 : v0_12;
|
||||||
|
always @ (posedge clk) v0_12 <= reset ? 256'd12 : v0_13;
|
||||||
|
always @ (posedge clk) v0_13 <= reset ? 256'd13 : v0_14;
|
||||||
|
always @ (posedge clk) v0_14 <= reset ? 256'd14 : v0_15;
|
||||||
|
always @ (posedge clk) v0_15 <= reset ? 256'd15 : v0_8;
|
||||||
|
always @ (posedge clk) v0_16 <= reset ? 256'd16 : v0_17;
|
||||||
|
always @ (posedge clk) v0_17 <= reset ? 256'd17 : v0_18;
|
||||||
|
always @ (posedge clk) v0_18 <= reset ? 256'd18 : v0_19;
|
||||||
|
always @ (posedge clk) v0_19 <= reset ? 256'd19 : v0_20;
|
||||||
|
always @ (posedge clk) v0_20 <= reset ? 256'd20 : v0_21;
|
||||||
|
always @ (posedge clk) v0_21 <= reset ? 256'd21 : v0_22;
|
||||||
|
always @ (posedge clk) v0_22 <= reset ? 256'd22 : v0_23;
|
||||||
|
always @ (posedge clk) v0_23 <= reset ? 256'd23 : v0_16;
|
||||||
|
always @ (posedge clk) v0_24 <= reset ? 256'd24 : v0_25;
|
||||||
|
always @ (posedge clk) v0_25 <= reset ? 256'd25 : v0_26;
|
||||||
|
always @ (posedge clk) v0_26 <= reset ? 256'd26 : v0_27;
|
||||||
|
always @ (posedge clk) v0_27 <= reset ? 256'd27 : v0_28;
|
||||||
|
always @ (posedge clk) v0_28 <= reset ? 256'd28 : v0_29;
|
||||||
|
always @ (posedge clk) v0_29 <= reset ? 256'd29 : v0_30;
|
||||||
|
always @ (posedge clk) v0_30 <= reset ? 256'd30 : v0_31;
|
||||||
|
always @ (posedge clk) v0_31 <= reset ? 256'd31 : v0_24;
|
||||||
|
always @ (posedge clk) v0_32 <= reset ? 256'd32 : v0_33;
|
||||||
|
always @ (posedge clk) v0_33 <= reset ? 256'd33 : v0_34;
|
||||||
|
always @ (posedge clk) v0_34 <= reset ? 256'd34 : v0_35;
|
||||||
|
always @ (posedge clk) v0_35 <= reset ? 256'd35 : v0_36;
|
||||||
|
always @ (posedge clk) v0_36 <= reset ? 256'd36 : v0_37;
|
||||||
|
always @ (posedge clk) v0_37 <= reset ? 256'd37 : v0_38;
|
||||||
|
always @ (posedge clk) v0_38 <= reset ? 256'd38 : v0_39;
|
||||||
|
always @ (posedge clk) v0_39 <= reset ? 256'd39 : v0_32;
|
||||||
|
always @ (posedge clk) v0_40 <= reset ? 256'd40 : v0_41;
|
||||||
|
always @ (posedge clk) v0_41 <= reset ? 256'd41 : v0_42;
|
||||||
|
always @ (posedge clk) v0_42 <= reset ? 256'd42 : v0_43;
|
||||||
|
always @ (posedge clk) v0_43 <= reset ? 256'd43 : v0_44;
|
||||||
|
always @ (posedge clk) v0_44 <= reset ? 256'd44 : v0_45;
|
||||||
|
always @ (posedge clk) v0_45 <= reset ? 256'd45 : v0_46;
|
||||||
|
always @ (posedge clk) v0_46 <= reset ? 256'd46 : v0_47;
|
||||||
|
always @ (posedge clk) v0_47 <= reset ? 256'd47 : v0_40;
|
||||||
|
always @ (posedge clk) v0_48 <= reset ? 256'd48 : v0_49;
|
||||||
|
always @ (posedge clk) v0_49 <= reset ? 256'd49 : v0_50;
|
||||||
|
always @ (posedge clk) v0_50 <= reset ? 256'd50 : v0_51;
|
||||||
|
always @ (posedge clk) v0_51 <= reset ? 256'd51 : v0_52;
|
||||||
|
always @ (posedge clk) v0_52 <= reset ? 256'd52 : v0_53;
|
||||||
|
always @ (posedge clk) v0_53 <= reset ? 256'd53 : v0_54;
|
||||||
|
always @ (posedge clk) v0_54 <= reset ? 256'd54 : v0_55;
|
||||||
|
always @ (posedge clk) v0_55 <= reset ? 256'd55 : v0_48;
|
||||||
|
always @ (posedge clk) v0_56 <= reset ? 256'd56 : v0_57;
|
||||||
|
always @ (posedge clk) v0_57 <= reset ? 256'd57 : v0_58;
|
||||||
|
always @ (posedge clk) v0_58 <= reset ? 256'd58 : v0_59;
|
||||||
|
always @ (posedge clk) v0_59 <= reset ? 256'd59 : v0_60;
|
||||||
|
always @ (posedge clk) v0_60 <= reset ? 256'd60 : v0_61;
|
||||||
|
always @ (posedge clk) v0_61 <= reset ? 256'd61 : v0_62;
|
||||||
|
always @ (posedge clk) v0_62 <= reset ? 256'd62 : v0_63;
|
||||||
|
always @ (posedge clk) v0_63 <= reset ? 256'd63 : v0_56;
|
||||||
|
|
||||||
|
always @ (posedge clk) v1_0 <= v0_0 + v0_1 + v0_2 + v0_3 + v0_4 + v0_5 + v0_6 + v0_7;
|
||||||
|
always @ (posedge clk) v1_1 <= v0_8 + v0_9 + v0_10 + v0_11 + v0_12 + v0_13 + v0_14 + v0_15;
|
||||||
|
always @ (posedge clk) v1_2 <= v0_16 + v0_17 + v0_18 + v0_19 + v0_20 + v0_21 + v0_22 + v0_23;
|
||||||
|
always @ (posedge clk) v1_3 <= v0_24 + v0_25 + v0_26 + v0_27 + v0_28 + v0_29 + v0_30 + v0_31;
|
||||||
|
always @ (posedge clk) v1_4 <= v0_32 + v0_33 + v0_34 + v0_35 + v0_36 + v0_37 + v0_38 + v0_39;
|
||||||
|
always @ (posedge clk) v1_5 <= v0_40 + v0_41 + v0_42 + v0_43 + v0_44 + v0_45 + v0_46 + v0_47;
|
||||||
|
always @ (posedge clk) v1_6 <= v0_48 + v0_49 + v0_50 + v0_51 + v0_52 + v0_53 + v0_54 + v0_55;
|
||||||
|
always @ (posedge clk) v1_7 <= v0_56 + v0_57 + v0_58 + v0_59 + v0_60 + v0_61 + v0_62 + v0_63;
|
||||||
|
endmodule
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
// DESCRIPTION: Verilator: Verilog Test module
|
||||||
|
//
|
||||||
|
// This file ONLY is placed into the Public Domain, for any use,
|
||||||
|
// without warranty, 2025 by Antmicro.
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
`verilator_config
|
||||||
|
|
||||||
|
hier_block -module "Calculate"
|
||||||
|
hier_block -module "Check"
|
||||||
|
|
||||||
|
`ifdef WORKERS
|
||||||
|
hier_workers -module "Calculate" -workers `WORKERS
|
||||||
|
`endif
|
||||||
|
|
@ -14,9 +14,9 @@ test.init_benchmarksim()
|
||||||
test.cycles = (int(test.benchmark) if test.benchmark else 100000)
|
test.cycles = (int(test.benchmark) if test.benchmark else 100000)
|
||||||
test.sim_time = test.cycles * 10 + 1000
|
test.sim_time = test.cycles * 10 + 1000
|
||||||
|
|
||||||
THREADS = int(os.environ["THREADS"]) if "THREADS" in os.environ else 4
|
THREADS = 2
|
||||||
HIER_BLOCK_THREADS = int(
|
HIER_BLOCK_THREADS = 2
|
||||||
os.environ["HIER_BLOCK_THREADS"]) if "HIER_BLOCK_THREADS" in os.environ else 2
|
HIER_THREADS = 4
|
||||||
|
|
||||||
config_file = test.t_dir + "/" + test.name + ".vlt"
|
config_file = test.t_dir + "/" + test.name + ".vlt"
|
||||||
|
|
||||||
|
|
@ -25,18 +25,20 @@ test.compile(
|
||||||
v_flags2=[
|
v_flags2=[
|
||||||
config_file, "+define+SIM_CYCLES=" + str(test.cycles), "--prof-exec", "--hierarchical",
|
config_file, "+define+SIM_CYCLES=" + str(test.cycles), "--prof-exec", "--hierarchical",
|
||||||
"--stats", "-Wno-UNOPTFLAT",
|
"--stats", "-Wno-UNOPTFLAT",
|
||||||
(f"-DWORKERS={HIER_BLOCK_THREADS}" if test.vltmt and HIER_BLOCK_THREADS > 1 else "")
|
(f"-DWORKERS={HIER_BLOCK_THREADS}" if test.vltmt and HIER_BLOCK_THREADS > 1 else ""),
|
||||||
|
(f"--hierarchical-threads {HIER_THREADS}" if test.vltmt and HIER_THREADS > 1 else "")
|
||||||
],
|
],
|
||||||
threads=(THREADS if test.vltmt else 1))
|
threads=(THREADS if test.vltmt else 1),
|
||||||
|
context_threads=(HIER_THREADS if test.vltmt else 1))
|
||||||
|
|
||||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||||
r'Optimizations, Hierarchical DPI wrappers with costs\s+(\d+)', 6)
|
r'Optimizations, Hierarchical DPI wrappers with costs\s+(\d+)', 6)
|
||||||
|
|
||||||
if test.vltmt:
|
if test.vltmt:
|
||||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||||
r'Optimizations, Thread schedule count\s+(\d+)', 4)
|
r'Optimizations, Thread schedule count\s+(\d+)', 1)
|
||||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||||
r'Optimizations, Thread schedule total tasks\s+(\d+)', 12)
|
r'Optimizations, Thread schedule total tasks\s+(\d+)', 2)
|
||||||
|
|
||||||
test.execute(all_run_flags=[
|
test.execute(all_run_flags=[
|
||||||
"+verilator+prof+exec+start+2",
|
"+verilator+prof+exec+start+2",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue