Add verilator_gantt profiling of DPI imports (#3084).
This commit is contained in:
parent
53b8a5b027
commit
9697a5ce6d
1
Changes
1
Changes
|
|
@ -14,6 +14,7 @@ Verilator 5.041 devel
|
||||||
**Other:**
|
**Other:**
|
||||||
|
|
||||||
* Add error on zero/negative unpacked dimensions (#1642). [Stefan Wallentowitz]
|
* Add error on zero/negative unpacked dimensions (#1642). [Stefan Wallentowitz]
|
||||||
|
* Add verilator_gantt profiling of DPI imports (#3084). [Geza Lore]
|
||||||
* Add error on non-packed struct randc (#5999). [Seth Pellegrino]
|
* Add error on non-packed struct randc (#5999). [Seth Pellegrino]
|
||||||
* Add configure `--enable-asan` to compile verilator_bin with the address sanitizer (#6404). [Geza Lore]
|
* Add configure `--enable-asan` to compile verilator_bin with the address sanitizer (#6404). [Geza Lore]
|
||||||
* Add $(LDFLAGS) and $(LIBS) to when building shared libraries (#6425) (#6426). [Ahmed El-Mahmoudy]
|
* Add $(LDFLAGS) and $(LIBS) to when building shared libraries (#6425) (#6426). [Ahmed El-Mahmoudy]
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,7 @@ def read_data(filename):
|
||||||
re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$')
|
re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$')
|
||||||
cpu = None
|
cpu = None
|
||||||
thread = 0
|
thread = 0
|
||||||
|
thread_last_ecpu = None
|
||||||
|
|
||||||
global LongestVcdStrValueLength
|
global LongestVcdStrValueLength
|
||||||
global ExecGraphTime
|
global ExecGraphTime
|
||||||
|
|
@ -109,9 +110,13 @@ def read_data(filename):
|
||||||
tick)
|
tick)
|
||||||
elif kind == "THREAD_SCHEDULE_WAIT_BEGIN":
|
elif kind == "THREAD_SCHEDULE_WAIT_BEGIN":
|
||||||
ecpu = int(re_payload_wait.match(payload).groups()[0])
|
ecpu = int(re_payload_wait.match(payload).groups()[0])
|
||||||
|
thread_last_ecpu = ecpu
|
||||||
ThreadScheduleWait[ecpu].append(tick)
|
ThreadScheduleWait[ecpu].append(tick)
|
||||||
elif kind == "THREAD_SCHEDULE_WAIT_END":
|
elif kind == "THREAD_SCHEDULE_WAIT_END":
|
||||||
ecpu = int(re_payload_wait.match(payload).groups()[0])
|
# Might have ended on different CPU then we got THREAD_SCHEDULE_WAIT_BEGIN
|
||||||
|
assert thread_last_ecpu is not None, "THREAD_SCHEDULE_WAIT_END without BEGIN"
|
||||||
|
ecpu = thread_last_ecpu
|
||||||
|
thread_last_ecpu = None
|
||||||
start = ThreadScheduleWait[ecpu].pop()
|
start = ThreadScheduleWait[ecpu].pop()
|
||||||
WaitingTime += tick - start
|
WaitingTime += tick - start
|
||||||
ThreadScheduleWaitIntervals.append((start, tick, ecpu))
|
ThreadScheduleWaitIntervals.append((start, tick, ecpu))
|
||||||
|
|
|
||||||
|
|
@ -446,7 +446,7 @@ void EmitCSyms::emitSymHdr() {
|
||||||
}
|
}
|
||||||
|
|
||||||
puts("\n// SYMS CLASS (contains all model state)\n");
|
puts("\n// SYMS CLASS (contains all model state)\n");
|
||||||
puts("class alignas(VL_CACHE_LINE_BYTES)" + EmitCUtil::symClassName()
|
puts("class alignas(VL_CACHE_LINE_BYTES) " + EmitCUtil::symClassName()
|
||||||
+ " final : public VerilatedSyms {\n");
|
+ " final : public VerilatedSyms {\n");
|
||||||
ofp()->putsPrivate(false); // public:
|
ofp()->putsPrivate(false); // public:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -681,7 +681,7 @@ class TaskVisitor final : public VNVisitor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// First argument is symbol table, then output if a function
|
// First argument is symbol table, then output if a function
|
||||||
const bool needSyms = !refp->taskp()->dpiImport();
|
const bool needSyms = !refp->taskp()->dpiImport() || v3Global.opt.profExec();
|
||||||
if (needSyms) ccallp->argTypes("vlSymsp");
|
if (needSyms) ccallp->argTypes("vlSymsp");
|
||||||
|
|
||||||
if (refp->taskp()->dpiContext()) {
|
if (refp->taskp()->dpiContext()) {
|
||||||
|
|
@ -972,7 +972,7 @@ class TaskVisitor final : public VNVisitor {
|
||||||
if (rtnvarp) {
|
if (rtnvarp) {
|
||||||
funcp->addStmtsp(createDpiTemp(rtnvarp, ""));
|
funcp->addStmtsp(createDpiTemp(rtnvarp, ""));
|
||||||
funcp->addStmtsp(createAssignInternalToDpi(rtnvarp, false, tmpSuffixp, ""));
|
funcp->addStmtsp(createAssignInternalToDpi(rtnvarp, false, tmpSuffixp, ""));
|
||||||
string stmt = "return " + rtnvarp->name();
|
string stmt = "return " + rtnvarp->name(); // TODO use AstCReturn?
|
||||||
stmt += rtnvarp->basicp()->isDpiPrimitive() ? ";\n" : "[0];\n";
|
stmt += rtnvarp->basicp()->isDpiPrimitive() ? ";\n" : "[0];\n";
|
||||||
funcp->addStmtsp(new AstCStmt{nodep->fileline(), stmt});
|
funcp->addStmtsp(new AstCStmt{nodep->fileline(), stmt});
|
||||||
}
|
}
|
||||||
|
|
@ -1077,6 +1077,12 @@ class TaskVisitor final : public VNVisitor {
|
||||||
void bodyDpiImportFunc(AstNodeFTask* nodep, AstVarScope* rtnvscp, AstCFunc* cfuncp,
|
void bodyDpiImportFunc(AstNodeFTask* nodep, AstVarScope* rtnvscp, AstCFunc* cfuncp,
|
||||||
AstCFunc* dpiFuncp) {
|
AstCFunc* dpiFuncp) {
|
||||||
const char* const tmpSuffixp = V3Task::dpiTemporaryVarSuffix();
|
const char* const tmpSuffixp = V3Task::dpiTemporaryVarSuffix();
|
||||||
|
|
||||||
|
if (v3Global.opt.profExec())
|
||||||
|
cfuncp->addStmtsp(
|
||||||
|
new AstCStmt{nodep->fileline(),
|
||||||
|
"VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"dpiimports\");\n"});
|
||||||
|
|
||||||
// Convert input/inout arguments to DPI types
|
// Convert input/inout arguments to DPI types
|
||||||
string args;
|
string args;
|
||||||
for (AstNode* stmtp = cfuncp->argsp(); stmtp; stmtp = stmtp->nextp()) {
|
for (AstNode* stmtp = cfuncp->argsp(); stmtp; stmtp = stmtp->nextp()) {
|
||||||
|
|
@ -1162,6 +1168,10 @@ class TaskVisitor final : public VNVisitor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (v3Global.opt.profExec())
|
||||||
|
cfuncp->addStmtsp(new AstCStmt{nodep->fileline(),
|
||||||
|
"VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPop();\n"});
|
||||||
}
|
}
|
||||||
|
|
||||||
AstVarScope* getDpiExporTrigger() {
|
AstVarScope* getDpiExporTrigger() {
|
||||||
|
|
@ -1285,9 +1295,12 @@ class TaskVisitor final : public VNVisitor {
|
||||||
|
|
||||||
if (cfuncp->dpiImportWrapper()) cfuncp->cname(nodep->cname());
|
if (cfuncp->dpiImportWrapper()) cfuncp->cname(nodep->cname());
|
||||||
|
|
||||||
|
const bool needSyms
|
||||||
|
= (!nodep->dpiImport() && !nodep->taskPublic()) || v3Global.opt.profExec();
|
||||||
|
if (needSyms) cfuncp->argTypes(EmitCUtil::symClassVar());
|
||||||
|
|
||||||
if (!nodep->dpiImport() && !nodep->taskPublic()) {
|
if (!nodep->dpiImport() && !nodep->taskPublic()) {
|
||||||
// Need symbol table
|
// Need symbol table
|
||||||
cfuncp->argTypes(EmitCUtil::symClassVar());
|
|
||||||
if (cfuncp->name() == "new") {
|
if (cfuncp->name() == "new") {
|
||||||
const string stmt = VIdProtect::protect("_ctor_var_reset") + "(vlSymsp);\n";
|
const string stmt = VIdProtect::protect("_ctor_var_reset") + "(vlSymsp);\n";
|
||||||
cfuncp->addInitsp(new AstCStmt{nodep->fileline(), stmt});
|
cfuncp->addInitsp(new AstCStmt{nodep->fileline(), stmt});
|
||||||
|
|
|
||||||
|
|
@ -12,10 +12,11 @@
|
||||||
import vltest_bootstrap
|
import vltest_bootstrap
|
||||||
|
|
||||||
test.scenarios('vlt_all')
|
test.scenarios('vlt_all')
|
||||||
test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles
|
test.top_filename = "t/t_gantt.v"
|
||||||
|
test.pli_filename = "t/t_gantt_c.cpp"
|
||||||
|
|
||||||
test.compile(
|
test.compile(
|
||||||
v_flags2=["--prof-exec"],
|
verilator_flags2=["--prof-exec", test.pli_filename],
|
||||||
# Checks below care about thread count, so use 2 (minimum reasonable)
|
# Checks below care about thread count, so use 2 (minimum reasonable)
|
||||||
threads=(2 if test.vltmt else 1))
|
threads=(2 if test.vltmt else 1))
|
||||||
|
|
||||||
|
|
@ -35,13 +36,13 @@ test.run(cmd=[
|
||||||
])
|
])
|
||||||
|
|
||||||
if test.vltmt:
|
if test.vltmt:
|
||||||
test.file_grep(gantt_log, r'Total threads += 2')
|
test.file_grep(gantt_log, r'Total threads += +(\d+)', 2)
|
||||||
test.file_grep(gantt_log, r'Total mtasks += 7')
|
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 6)
|
||||||
# Predicted thread utilization should be less than 100%
|
# Predicted thread utilization should be less than 100%
|
||||||
test.file_grep_not(gantt_log, r'Thread utilization =\s*\d\d\d+\.\d+%')
|
test.file_grep_not(gantt_log, r'Thread utilization =\s*\d\d\d+\.\d+%')
|
||||||
else:
|
else:
|
||||||
test.file_grep(gantt_log, r'Total threads += 1')
|
test.file_grep(gantt_log, r'Total threads += +(\d+)', 1)
|
||||||
test.file_grep(gantt_log, r'Total mtasks += 0')
|
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 0)
|
||||||
|
|
||||||
test.file_grep(gantt_log, r'\|\s+2\s+\|\s+2\.0+\s+\|\s+eval')
|
test.file_grep(gantt_log, r'\|\s+2\s+\|\s+2\.0+\s+\|\s+eval')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
// DESCRIPTION: Verilator: Verilog Test module
|
||||||
|
//
|
||||||
|
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||||
|
// any use, without warranty, 2021 by Wilson Snyder.
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
module t(
|
||||||
|
input clk
|
||||||
|
);
|
||||||
|
|
||||||
|
integer cyc = 0;
|
||||||
|
wire [63:0] result;
|
||||||
|
|
||||||
|
Test test(/*AUTOINST*/
|
||||||
|
// Outputs
|
||||||
|
.result (result[63:0]),
|
||||||
|
// Inputs
|
||||||
|
.clk (clk),
|
||||||
|
.cyc (cyc));
|
||||||
|
|
||||||
|
reg [63:0] sum;
|
||||||
|
|
||||||
|
always @ (posedge clk) begin
|
||||||
|
`ifdef TEST_VERBOSE
|
||||||
|
$write("[%0t] cyc==%0d result=%x\n", $time, cyc, result);
|
||||||
|
`endif
|
||||||
|
cyc <= cyc + 1;
|
||||||
|
sum <= result ^ {sum[62:0], sum[63] ^ sum[2] ^ sum[0]};
|
||||||
|
if (cyc == 0) begin
|
||||||
|
// Setup
|
||||||
|
sum <= '0;
|
||||||
|
end
|
||||||
|
else if (cyc < 10) begin
|
||||||
|
sum <= '0;
|
||||||
|
end
|
||||||
|
else if (cyc == 99) begin
|
||||||
|
$write("[%0t] cyc==%0d sum=%x\n", $time, cyc, sum);
|
||||||
|
// What checksum will we end up with (above print should match)
|
||||||
|
`define EXPECTED_SUM 64'haf665a181ead5e12
|
||||||
|
if (sum !== `EXPECTED_SUM) $stop;
|
||||||
|
$write("*-* All Finished *-*\n");
|
||||||
|
$finish;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
module Test(/*AUTOARG*/
|
||||||
|
// Outputs
|
||||||
|
result,
|
||||||
|
// Inputs
|
||||||
|
clk, cyc
|
||||||
|
);
|
||||||
|
|
||||||
|
input clk;
|
||||||
|
input int cyc;
|
||||||
|
output reg [63:0] result;
|
||||||
|
|
||||||
|
logic [63:0] adder;
|
||||||
|
|
||||||
|
import "DPI-C" pure function int dpii_return(input int i);
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
adder = 0;
|
||||||
|
for (int i = 0; i < 100000; ++i)
|
||||||
|
adder += {32'h0, (cyc+i)} ** 3 + {32'h0, dpii_return(1)};
|
||||||
|
|
||||||
|
result <= adder;
|
||||||
|
end
|
||||||
|
endmodule
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||||
|
//*************************************************************************
|
||||||
|
//
|
||||||
|
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||||
|
// any use, without warranty, 2025 by Wilson Snyder.
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
//
|
||||||
|
//*************************************************************************
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
int dpii_return(int i) { return i; }
|
||||||
|
}
|
||||||
|
|
@ -12,10 +12,11 @@
|
||||||
import vltest_bootstrap
|
import vltest_bootstrap
|
||||||
|
|
||||||
test.scenarios('vlt_all')
|
test.scenarios('vlt_all')
|
||||||
test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles
|
test.top_filename = "t/t_gantt.v"
|
||||||
|
test.pli_filename = "t/t_gantt_c.cpp"
|
||||||
|
|
||||||
test.compile(
|
test.compile(
|
||||||
v_flags2=["--prof-exec", "--hierarchical"],
|
verilator_flags2=["--prof-exec", "--hierarchical", test.pli_filename],
|
||||||
# Checks below care about thread count, so use 2 (minimum reasonable)
|
# Checks below care about thread count, so use 2 (minimum reasonable)
|
||||||
threads=(2 if test.vltmt else 1))
|
threads=(2 if test.vltmt else 1))
|
||||||
|
|
||||||
|
|
@ -35,13 +36,13 @@ test.run(cmd=[
|
||||||
])
|
])
|
||||||
|
|
||||||
if test.vltmt:
|
if test.vltmt:
|
||||||
test.file_grep(gantt_log, r'Total threads += 2')
|
test.file_grep(gantt_log, r'Total threads += +(\d+)', 2)
|
||||||
test.file_grep(gantt_log, r'Total mtasks += 11')
|
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 6)
|
||||||
# Predicted thread utilization should be less than 100%
|
# Predicted thread utilization should be less than 100%
|
||||||
test.file_grep_not(gantt_log, r'Thread utilization =\s*\d\d\d+\.\d+%')
|
test.file_grep_not(gantt_log, r'Thread utilization =\s*\d\d\d+\.\d+%')
|
||||||
else:
|
else:
|
||||||
test.file_grep(gantt_log, r'Total threads += 1')
|
test.file_grep(gantt_log, r'Total threads += +(\d+)', 1)
|
||||||
test.file_grep(gantt_log, r'Total mtasks += 0')
|
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 0)
|
||||||
|
|
||||||
test.file_grep(gantt_log, r'\|\s+2\s+\|\s+2\.0+\s+\|\s+eval')
|
test.file_grep(gantt_log, r'\|\s+2\s+\|\s+2\.0+\s+\|\s+eval')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,10 +12,11 @@
|
||||||
import vltest_bootstrap
|
import vltest_bootstrap
|
||||||
|
|
||||||
test.scenarios('vltmt')
|
test.scenarios('vltmt')
|
||||||
test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles
|
test.top_filename = "t/t_gantt.v"
|
||||||
|
test.pli_filename = "t/t_gantt_c.cpp"
|
||||||
|
|
||||||
test.compile(
|
test.compile(
|
||||||
v_flags2=["--prof-exec"],
|
verilator_flags2=["--prof-exec", test.pli_filename],
|
||||||
# Checks below care about thread count
|
# Checks below care about thread count
|
||||||
threads=4)
|
threads=4)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,14 +12,15 @@
|
||||||
import vltest_bootstrap
|
import vltest_bootstrap
|
||||||
|
|
||||||
test.scenarios('vlt_all')
|
test.scenarios('vlt_all')
|
||||||
test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles
|
test.top_filename = "t/t_gantt.v"
|
||||||
|
test.pli_filename = "t/t_gantt_c.cpp"
|
||||||
|
|
||||||
threads_num = (2 if test.vltmt else 1)
|
threads_num = (2 if test.vltmt else 1)
|
||||||
|
|
||||||
test.compile(
|
test.compile(
|
||||||
make_top_shell=False,
|
make_top_shell=False,
|
||||||
make_main=False,
|
make_main=False,
|
||||||
v_flags2=["--prof-exec --exe", test.pli_filename],
|
verilator_flags2=["--prof-exec --exe", test.pli_filename, "t/t_gantt_two.cpp"],
|
||||||
# Checks below care about thread count, so use 2 (minimum reasonable)
|
# Checks below care about thread count, so use 2 (minimum reasonable)
|
||||||
threads=threads_num,
|
threads=threads_num,
|
||||||
make_flags=["CPPFLAGS_ADD=\"-DVL_NO_LEGACY -DTEST_USE_THREADS=" + str(threads_num) + "\""])
|
make_flags=["CPPFLAGS_ADD=\"-DVL_NO_LEGACY -DTEST_USE_THREADS=" + str(threads_num) + "\""])
|
||||||
|
|
@ -41,11 +42,11 @@ test.run(cmd=[
|
||||||
"| tee " + gantt_log]) # yapf:disable
|
"| tee " + gantt_log]) # yapf:disable
|
||||||
|
|
||||||
if test.vltmt:
|
if test.vltmt:
|
||||||
test.file_grep(gantt_log, r'Total threads += 2')
|
test.file_grep(gantt_log, r'Total threads += +(\d+)', 2)
|
||||||
test.file_grep(gantt_log, r'Total mtasks += 7')
|
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 6)
|
||||||
else:
|
else:
|
||||||
test.file_grep(gantt_log, r'Total threads += 1')
|
test.file_grep(gantt_log, r'Total threads += +(\d+)', 1)
|
||||||
test.file_grep(gantt_log, r'Total mtasks += 0')
|
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 0)
|
||||||
|
|
||||||
test.file_grep(gantt_log, r'\|\s+4\s+\|\s+4\.0+\s+\|\s+eval')
|
test.file_grep(gantt_log, r'\|\s+4\s+\|\s+4\.0+\s+\|\s+eval')
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue