Fix PGO profiling for multi-threaded hierarchical scenarios (#5888)
This commit is contained in:
parent
b9a571916c
commit
8965401d10
|
|
@ -214,7 +214,7 @@ public:
|
||||||
// METHODS
|
// METHODS
|
||||||
VlPgoProfiler() = default;
|
VlPgoProfiler() = default;
|
||||||
~VlPgoProfiler() = default;
|
~VlPgoProfiler() = default;
|
||||||
void write(const char* modelp, const std::string& filename) VL_MT_SAFE;
|
void write(const char* modelp, const std::string& filename, bool firstHierCall) VL_MT_SAFE;
|
||||||
void addCounter(size_t counter, const std::string& name) {
|
void addCounter(size_t counter, const std::string& name) {
|
||||||
VL_DEBUG_IF(assert(counter < N_Entries););
|
VL_DEBUG_IF(assert(counter < N_Entries););
|
||||||
m_records.emplace_back(Record{name, counter});
|
m_records.emplace_back(Record{name, counter});
|
||||||
|
|
@ -228,7 +228,8 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
template <std::size_t N_Entries>
|
template <std::size_t N_Entries>
|
||||||
void VlPgoProfiler<N_Entries>::write(const char* modelp, const std::string& filename) VL_MT_SAFE {
|
void VlPgoProfiler<N_Entries>::write(const char* modelp, const std::string& filename,
|
||||||
|
bool firstHierCall) VL_MT_SAFE {
|
||||||
static VerilatedMutex s_mutex;
|
static VerilatedMutex s_mutex;
|
||||||
const VerilatedLockGuard lock{s_mutex};
|
const VerilatedLockGuard lock{s_mutex};
|
||||||
|
|
||||||
|
|
@ -238,7 +239,7 @@ void VlPgoProfiler<N_Entries>::write(const char* modelp, const std::string& file
|
||||||
// each will collect is own data correctly. However when each is
|
// each will collect is own data correctly. However when each is
|
||||||
// destroyed we need to get all the data, not keep overwriting and only
|
// destroyed we need to get all the data, not keep overwriting and only
|
||||||
// get the last model's data.
|
// get the last model's data.
|
||||||
static bool s_firstCall = true;
|
static bool s_firstCall = firstHierCall;
|
||||||
|
|
||||||
VL_DEBUG_IF(VL_DBG_MSGF("+prof+vlt+file writing to '%s'\n", filename.c_str()););
|
VL_DEBUG_IF(VL_DBG_MSGF("+prof+vlt+file writing to '%s'\n", filename.c_str()););
|
||||||
|
|
||||||
|
|
@ -246,12 +247,14 @@ void VlPgoProfiler<N_Entries>::write(const char* modelp, const std::string& file
|
||||||
if (VL_UNLIKELY(!fp)) {
|
if (VL_UNLIKELY(!fp)) {
|
||||||
VL_FATAL_MT(filename.c_str(), 0, "", "+prof+vlt+file file not writable");
|
VL_FATAL_MT(filename.c_str(), 0, "", "+prof+vlt+file file not writable");
|
||||||
}
|
}
|
||||||
s_firstCall = false;
|
if (s_firstCall) {
|
||||||
|
|
||||||
// TODO Perhaps merge with verilated_coverage output format, so can
|
// TODO Perhaps merge with verilated_coverage output format, so can
|
||||||
// have a common merging and reporting tool, etc.
|
// have a common merging and reporting tool, etc.
|
||||||
fprintf(fp, "// Verilated model profile-guided optimization data dump file\n");
|
fprintf(fp, "// Verilated model profile-guided optimization data dump file\n");
|
||||||
fprintf(fp, "`verilator_config\n");
|
fprintf(fp, "`verilator_config\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
s_firstCall = false;
|
||||||
|
|
||||||
for (const Record& rec : m_records) {
|
for (const Record& rec : m_records) {
|
||||||
fprintf(fp, "profile_data -model \"%s\" -mtask \"%s\" -cost 64'd%" PRIu64 "\n", modelp,
|
fprintf(fp, "profile_data -model \"%s\" -mtask \"%s\" -cost 64'd%" PRIu64 "\n", modelp,
|
||||||
|
|
|
||||||
|
|
@ -731,8 +731,11 @@ void EmitCSyms::emitSymImp() {
|
||||||
puts("#endif // VM_TRACE\n");
|
puts("#endif // VM_TRACE\n");
|
||||||
}
|
}
|
||||||
if (v3Global.opt.profPgo()) {
|
if (v3Global.opt.profPgo()) {
|
||||||
|
// Do not overwrite data during the last hierarchical stage.
|
||||||
|
const string firstHierCall
|
||||||
|
= (v3Global.opt.hierBlocks().empty() || v3Global.opt.hierChild()) ? "true" : "false";
|
||||||
puts("_vm_pgoProfiler.write(\"" + topClassName()
|
puts("_vm_pgoProfiler.write(\"" + topClassName()
|
||||||
+ "\", _vm_contextp__->profVltFilename());\n");
|
+ "\", _vm_contextp__->profVltFilename(), " + firstHierCall + ");\n");
|
||||||
}
|
}
|
||||||
puts("}\n");
|
puts("}\n");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
||||||
|
|
||||||
test.scenarios('vlt_all')
|
test.scenarios('vlt_all')
|
||||||
test.init_benchmarksim()
|
test.init_benchmarksim()
|
||||||
test.cycles = (int(test.benchmark) if test.benchmark else 1000000)
|
test.cycles = (int(test.benchmark) if test.benchmark else 100000)
|
||||||
test.sim_time = test.cycles * 10 + 1000
|
test.sim_time = test.cycles * 10 + 1000
|
||||||
|
|
||||||
THREADS = int(os.environ["THREADS"]) if "THREADS" in os.environ else 4
|
THREADS = int(os.environ["THREADS"]) if "THREADS" in os.environ else 4
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||||
|
#
|
||||||
|
# Copyright 2025 by Wilson Snyder. This program is free software; you
|
||||||
|
# can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
# Version 2.0.
|
||||||
|
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
|
||||||
|
import vltest_bootstrap
|
||||||
|
|
||||||
|
test.scenarios('vltmt')
|
||||||
|
test.top_filename = "t/t_hier_block_perf.v"
|
||||||
|
cycles = 100000
|
||||||
|
test.sim_time = cycles * 10 + 1000
|
||||||
|
|
||||||
|
threads = 2
|
||||||
|
flags = ["--hierarchical", "-Wno-UNOPTFLAT", "-DSIM_CYCLES=" + str(cycles)]
|
||||||
|
|
||||||
|
test.compile(benchmarksim=1, v_flags2=["--prof-pgo"] + flags, threads=threads)
|
||||||
|
|
||||||
|
test.execute(all_run_flags=[
|
||||||
|
"+verilator+prof+exec+start+0",
|
||||||
|
" +verilator+prof+exec+file+/dev/null",
|
||||||
|
" +verilator+prof+vlt+file+" + test.obj_dir + "/profile.vlt"]) # yapf:disable
|
||||||
|
|
||||||
|
test.file_grep(test.obj_dir + "/profile.vlt", r'profile_data -model "V' + test.name + '"')
|
||||||
|
|
||||||
|
# Differentiate benchmarksim results
|
||||||
|
test.name = test.name + "_optimized"
|
||||||
|
test.compile(
|
||||||
|
benchmarksim=1,
|
||||||
|
# Intentionally no --prof-pgo here to make sure profile data can be read in
|
||||||
|
# without it (that is: --prof-pgo has no effect on profile_data hash names)
|
||||||
|
v_flags2=flags,
|
||||||
|
threads=threads)
|
||||||
|
|
||||||
|
test.execute()
|
||||||
|
|
||||||
|
test.passes()
|
||||||
|
|
@ -0,0 +1,42 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||||
|
#
|
||||||
|
# Copyright 2025 by Wilson Snyder. This program is free software; you
|
||||||
|
# can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
# Version 2.0.
|
||||||
|
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
|
||||||
|
import vltest_bootstrap
|
||||||
|
|
||||||
|
test.scenarios('vltmt')
|
||||||
|
test.top_filename = "t/t_hier_block_perf.v"
|
||||||
|
cycles = 100000
|
||||||
|
test.sim_time = cycles * 10 + 1000
|
||||||
|
|
||||||
|
threads = 2
|
||||||
|
config_file = test.t_dir + "/" + test.name + ".vlt"
|
||||||
|
flags = [config_file, "--hierarchical", "-Wno-UNOPTFLAT", "-DSIM_CYCLES=" + str(cycles)]
|
||||||
|
|
||||||
|
test.compile(benchmarksim=1, v_flags2=["--prof-pgo"] + flags, threads=threads)
|
||||||
|
|
||||||
|
test.execute(all_run_flags=[
|
||||||
|
"+verilator+prof+exec+start+0",
|
||||||
|
" +verilator+prof+exec+file+/dev/null",
|
||||||
|
" +verilator+prof+vlt+file+" + test.obj_dir + "/profile.vlt"]) # yapf:disable
|
||||||
|
|
||||||
|
test.file_grep(test.obj_dir + "/profile.vlt", r'profile_data -model "VTest"')
|
||||||
|
test.file_grep(test.obj_dir + "/profile.vlt", r'profile_data -model "V' + test.name + '"')
|
||||||
|
|
||||||
|
# Differentiate benchmarksim results
|
||||||
|
test.name = test.name + "_optimized"
|
||||||
|
test.compile(
|
||||||
|
benchmarksim=1,
|
||||||
|
# Intentionally no --prof-pgo here to make sure profile data can be read in
|
||||||
|
# without it (that is: --prof-pgo has no effect on profile_data hash names)
|
||||||
|
v_flags2=flags,
|
||||||
|
threads=threads)
|
||||||
|
|
||||||
|
test.execute()
|
||||||
|
|
||||||
|
test.passes()
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
// DESCRIPTION: Verilator: Verilog Test module
|
||||||
|
//
|
||||||
|
// This file ONLY is placed into the Public Domain, for any use,
|
||||||
|
// without warranty, 2025 by Antmicro.
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
`verilator_config
|
||||||
|
hier_workers -module "Test" -workers 2
|
||||||
Loading…
Reference in New Issue