This API is used if the user copies the process using `fork` and similar OS-level mechanisms. The `at_clone` member function ensures that all model-allocated resources are re-allocated, such that the copied child process/model can simulate correctly. A typical allocated resource is the thread pool, which every model has its own pool.
This commit is contained in:
parent
ca6ab584d0
commit
b4b74d72f0
|
|
@ -128,6 +128,43 @@ in the distribution. These headers use Doxygen comments, `///` and `//<`,
|
||||||
to indicate and document those functions that are part of the Verilated
|
to indicate and document those functions that are part of the Verilated
|
||||||
public API.
|
public API.
|
||||||
|
|
||||||
|
Process-Level Clone APIs
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Modern operating systems support process-level clone (a.k.a copying, forking)
|
||||||
|
with system call interfaces in C/C++, e.g., :code:`fork()` in Linux.
|
||||||
|
|
||||||
|
However, after cloning a parent process, some resources cannot be inherited
|
||||||
|
in the child process. For example, in POSIX systems, when you fork a process,
|
||||||
|
the child process inherits all the memory of the parent process. However,
|
||||||
|
only the thread that called fork is replicated in the child process. Other
|
||||||
|
threads are not.
|
||||||
|
|
||||||
|
Therefore, to support the process-level clone mechanisms, Verilator supports
|
||||||
|
:code:`prepareClone()` and :code:`atClone()` APIs to allow the user to manually
|
||||||
|
re-construct the model in the child process. The two APIs handle all necessary
|
||||||
|
resources required for releasing and re-initializing before and after cloning.
|
||||||
|
|
||||||
|
The two APIs are supported in the verilated models. Here is an example of usage
|
||||||
|
with Linux :code:`fork()` and :code:`pthread_atfork` APIs:
|
||||||
|
|
||||||
|
.. code-block:: C++
|
||||||
|
|
||||||
|
// static function pointers to fit pthread_atfork
|
||||||
|
static auto prepareClone = [](){ topp->prepareClone(); };
|
||||||
|
static auto atClone = [](){ topp->atClone(); };
|
||||||
|
|
||||||
|
// in main function, register the handlers:
|
||||||
|
pthread_atfork(prepareClone, atClone, atClone);
|
||||||
|
|
||||||
|
For better flexibility, you can also manually call the handlers before and
|
||||||
|
after :code:`fork()`.
|
||||||
|
|
||||||
|
With the process-level clone APIs, users can create process-level snapshots
|
||||||
|
for the verilated models. While the Verilator save/restore option provides
|
||||||
|
persistent and circuit-independent snapshots, the process-level clone APIs
|
||||||
|
enable in-memory, circuit-transparent, and highly efficient snapshots.
|
||||||
|
|
||||||
|
|
||||||
Direct Programming Interface (DPI)
|
Direct Programming Interface (DPI)
|
||||||
==================================
|
==================================
|
||||||
|
|
|
||||||
|
|
@ -2608,6 +2608,14 @@ VerilatedVirtualBase* VerilatedContext::threadPoolp() {
|
||||||
return m_threadPool.get();
|
return m_threadPool.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VerilatedContext::prepareClone() { delete m_threadPool.release(); }
|
||||||
|
|
||||||
|
VerilatedVirtualBase* VerilatedContext::threadPoolpOnClone() {
|
||||||
|
if (VL_UNLIKELY(m_threadPool)) m_threadPool.release();
|
||||||
|
m_threadPool = std::make_unique<VlThreadPool>(this, m_threads - 1);
|
||||||
|
return m_threadPool.get();
|
||||||
|
}
|
||||||
|
|
||||||
VerilatedVirtualBase*
|
VerilatedVirtualBase*
|
||||||
VerilatedContext::enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&)) {
|
VerilatedContext::enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&)) {
|
||||||
if (!m_executionProfiler) m_executionProfiler.reset(construct(*this));
|
if (!m_executionProfiler) m_executionProfiler.reset(construct(*this));
|
||||||
|
|
|
||||||
|
|
@ -568,6 +568,8 @@ public:
|
||||||
void addModel(VerilatedModel*);
|
void addModel(VerilatedModel*);
|
||||||
|
|
||||||
VerilatedVirtualBase* threadPoolp();
|
VerilatedVirtualBase* threadPoolp();
|
||||||
|
void prepareClone();
|
||||||
|
VerilatedVirtualBase* threadPoolpOnClone();
|
||||||
VerilatedVirtualBase*
|
VerilatedVirtualBase*
|
||||||
enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&));
|
enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&));
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -234,6 +234,12 @@ class EmitCModel final : public EmitCFunc {
|
||||||
puts("const char* hierName() const override final;\n");
|
puts("const char* hierName() const override final;\n");
|
||||||
puts("const char* modelName() const override final;\n");
|
puts("const char* modelName() const override final;\n");
|
||||||
puts("unsigned threads() const override final;\n");
|
puts("unsigned threads() const override final;\n");
|
||||||
|
puts("/// Prepare for cloning the model at the process level (e.g. fork in Linux)\n");
|
||||||
|
puts("/// Release necessary resources. Called before cloning.\n");
|
||||||
|
puts("void prepareClone() const;\n");
|
||||||
|
puts("/// Re-init after cloning the model at the process level (e.g. fork in Linux)\n");
|
||||||
|
puts("/// Re-allocate necessary resources. Called after cloning.\n");
|
||||||
|
puts("void atClone() const;\n");
|
||||||
if (v3Global.opt.trace()) {
|
if (v3Global.opt.trace()) {
|
||||||
puts("std::unique_ptr<VerilatedTraceConfig> traceConfig() const override final;\n");
|
puts("std::unique_ptr<VerilatedTraceConfig> traceConfig() const override final;\n");
|
||||||
}
|
}
|
||||||
|
|
@ -479,6 +485,15 @@ class EmitCModel final : public EmitCFunc {
|
||||||
+ "\"; }\n");
|
+ "\"; }\n");
|
||||||
puts("unsigned " + topClassName() + "::threads() const { return "
|
puts("unsigned " + topClassName() + "::threads() const { return "
|
||||||
+ cvtToStr(std::max(1, v3Global.opt.threads())) + "; }\n");
|
+ cvtToStr(std::max(1, v3Global.opt.threads())) + "; }\n");
|
||||||
|
puts("void " + topClassName()
|
||||||
|
+ "::prepareClone() const { contextp()->prepareClone(); }\n");
|
||||||
|
puts("void " + topClassName() + "::atClone() const {\n");
|
||||||
|
if (v3Global.opt.threads() > 1) {
|
||||||
|
puts("vlSymsp->__Vm_threadPoolp = static_cast<VlThreadPool*>(");
|
||||||
|
}
|
||||||
|
puts("contextp()->threadPoolpOnClone()");
|
||||||
|
if (v3Global.opt.threads() > 1) puts(")");
|
||||||
|
puts(";\n}\n");
|
||||||
|
|
||||||
if (v3Global.opt.trace()) {
|
if (v3Global.opt.trace()) {
|
||||||
puts("std::unique_ptr<VerilatedTraceConfig> " + topClassName()
|
puts("std::unique_ptr<VerilatedTraceConfig> " + topClassName()
|
||||||
|
|
|
||||||
|
|
@ -468,7 +468,7 @@ void EmitCSyms::emitSymHdr() {
|
||||||
|
|
||||||
if (v3Global.opt.mtasks()) {
|
if (v3Global.opt.mtasks()) {
|
||||||
puts("\n// MULTI-THREADING\n");
|
puts("\n// MULTI-THREADING\n");
|
||||||
puts("VlThreadPool* const __Vm_threadPoolp;\n");
|
puts("VlThreadPool* __Vm_threadPoolp;\n");
|
||||||
puts("bool __Vm_even_cycle__ico = false;\n");
|
puts("bool __Vm_even_cycle__ico = false;\n");
|
||||||
puts("bool __Vm_even_cycle__act = false;\n");
|
puts("bool __Vm_even_cycle__act = false;\n");
|
||||||
puts("bool __Vm_even_cycle__nba = false;\n");
|
puts("bool __Vm_even_cycle__nba = false;\n");
|
||||||
|
|
|
||||||
|
|
@ -97,6 +97,8 @@ sub check_cpp {
|
||||||
&& $func !~ /::traceInit$/
|
&& $func !~ /::traceInit$/
|
||||||
&& $func !~ /::traceFull$/
|
&& $func !~ /::traceFull$/
|
||||||
&& $func !~ /::final$/
|
&& $func !~ /::final$/
|
||||||
|
&& $func !~ /::prepareClone$/
|
||||||
|
&& $func !~ /::atClone$/
|
||||||
) {
|
) {
|
||||||
push @funcs, $func;
|
push @funcs, $func;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,89 @@
|
||||||
|
//
|
||||||
|
// DESCRIPTION: Verilator: Verilog Test module for prepareClone/atClone APIs
|
||||||
|
//
|
||||||
|
// This file ONLY is placed into the Public Domain, for any use,
|
||||||
|
// without warranty, 2023 by Yinan Xu.
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
#include <verilated.h>
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <sys/wait.h>
|
||||||
|
|
||||||
|
// These require the above. Comment prevents clang-format moving them
|
||||||
|
#include "TestCheck.h"
|
||||||
|
|
||||||
|
#include VM_PREFIX_INCLUDE
|
||||||
|
|
||||||
|
double sc_time_stamp() { return 0; }
|
||||||
|
|
||||||
|
// Note: Since the pthread_atfork API accepts only function pointers,
|
||||||
|
// we are using a static variable for the TOP just for a simple example.
|
||||||
|
// Without using the pthread_atfork API, the user can instead manually call
|
||||||
|
// prepareClone and atClone before and after calling fork, and topp can be
|
||||||
|
// allocated dynamically.
|
||||||
|
static VM_PREFIX* topp = nullptr;
|
||||||
|
static auto prepareClone = []() { topp->prepareClone(); };
|
||||||
|
static auto atClone = []() { topp->atClone(); };
|
||||||
|
|
||||||
|
void single_cycle(VM_PREFIX* topp) {
|
||||||
|
topp->clock = 1;
|
||||||
|
topp->eval();
|
||||||
|
|
||||||
|
topp->clock = 0;
|
||||||
|
topp->eval();
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
// We disable the buffering for stdout in this test.
|
||||||
|
// Redirecting the stdout to files with buffering causes duplicated stdout
|
||||||
|
// outputs in both parent and child processes, even if they are actually
|
||||||
|
// called before the fork.
|
||||||
|
setvbuf(stdout, nullptr, _IONBF, 0);
|
||||||
|
|
||||||
|
VerilatedContext* contextp = new VerilatedContext;
|
||||||
|
topp = new VM_PREFIX{contextp};
|
||||||
|
|
||||||
|
// To avoid resource leaks, prepareClone must be called before fork to
|
||||||
|
// free all the allocated resources. Though this would bring performance
|
||||||
|
// overhead to the parent process, we believe that fork should not be
|
||||||
|
// called frequently, and the overhead is minor compared to simulation.
|
||||||
|
pthread_atfork(prepareClone, atClone, atClone);
|
||||||
|
|
||||||
|
// If you care about critical performance, prepareClone can be avoided,
|
||||||
|
// with atClone being called only at the child process, as follows.
|
||||||
|
// It has the same functionality as the previous one, but has memory leaks.
|
||||||
|
// According to the sanitizer, 288 bytes are leaked for one fork call.
|
||||||
|
// pthread_atfork(nullptr, nullptr, atClone);
|
||||||
|
|
||||||
|
topp->reset = 1;
|
||||||
|
topp->is_parent = 0;
|
||||||
|
for (int i = 0; i < 5; i++) { single_cycle(topp); }
|
||||||
|
|
||||||
|
topp->reset = 0;
|
||||||
|
while (!contextp->gotFinish()) {
|
||||||
|
single_cycle(topp);
|
||||||
|
|
||||||
|
if (topp->do_clone) {
|
||||||
|
const int pid = fork();
|
||||||
|
if (pid < 0) {
|
||||||
|
printf("fork failed\n");
|
||||||
|
} else if (pid == 0) {
|
||||||
|
printf("child: here we go\n");
|
||||||
|
} else {
|
||||||
|
while (wait(nullptr) > 0)
|
||||||
|
;
|
||||||
|
printf("parent: here we go\n");
|
||||||
|
topp->is_parent = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
topp->final();
|
||||||
|
|
||||||
|
VL_DO_DANGLING(delete topp, topp);
|
||||||
|
VL_DO_DANGLING(delete contextp, contextp);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
counter = 0
|
||||||
|
counter = 1
|
||||||
|
counter = 2
|
||||||
|
counter = 3
|
||||||
|
counter = 4
|
||||||
|
counter = 5
|
||||||
|
child: here we go
|
||||||
|
counter = 6
|
||||||
|
counter = 7
|
||||||
|
counter = 8
|
||||||
|
parent: here we go
|
||||||
|
counter = 6
|
||||||
|
counter = 7
|
||||||
|
counter = 8
|
||||||
|
*-* All Finished *-*
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
#!/usr/bin/env perl
|
||||||
|
if (!$::Driver) { use strict; use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test module for prepareClone/atClone APIs
|
||||||
|
#
|
||||||
|
# This file ONLY is placed into the Public Domain, for any use,
|
||||||
|
# without warranty, 2023 by Yinan Xu.
|
||||||
|
# SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
scenarios(vlt_all => 1);
|
||||||
|
|
||||||
|
compile(
|
||||||
|
make_top_shell => 0,
|
||||||
|
make_main => 0,
|
||||||
|
verilator_flags2 => ["--exe $Self->{t_dir}/$Self->{name}.cpp",
|
||||||
|
"-cc"],
|
||||||
|
threads => $Self->{vltmt} ? 2 : 1,
|
||||||
|
);
|
||||||
|
|
||||||
|
execute(
|
||||||
|
check_finished => 1,
|
||||||
|
expect_filename => $Self->{golden_filename},
|
||||||
|
);
|
||||||
|
|
||||||
|
ok(1);
|
||||||
|
1;
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
// DESCRIPTION: Verilator: Verilog Test module for prepareClone/atClone APIs
|
||||||
|
//
|
||||||
|
// This model counts from 0 to 8. It forks a child process (in C++) at 6
|
||||||
|
// and waits for the child to simulate and exit for resumption (of the parent).
|
||||||
|
//
|
||||||
|
// This file ONLY is placed into the Public Domain, for any use,
|
||||||
|
// without warranty, 2023 by Yinan Xu.
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
module top(
|
||||||
|
input clock,
|
||||||
|
input reset,
|
||||||
|
input is_parent,
|
||||||
|
output do_clone
|
||||||
|
);
|
||||||
|
|
||||||
|
reg [3:0] counter;
|
||||||
|
|
||||||
|
assign do_clone = counter == 4'h6;
|
||||||
|
|
||||||
|
always @(posedge clock) begin
|
||||||
|
if (reset) begin
|
||||||
|
counter <= 4'h0;
|
||||||
|
end
|
||||||
|
else begin
|
||||||
|
counter <= counter + 4'h1;
|
||||||
|
$write("counter = %d\n", counter);
|
||||||
|
end
|
||||||
|
|
||||||
|
if (counter[3]) begin
|
||||||
|
if (is_parent) begin
|
||||||
|
$write("*-* All Finished *-*\n");
|
||||||
|
end
|
||||||
|
$finish(0);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
Loading…
Reference in New Issue