Add verilator_gantt profiling of DPI imports (#3084).
This commit is contained in:
parent
53b8a5b027
commit
9697a5ce6d
1
Changes
1
Changes
|
|
@ -14,6 +14,7 @@ Verilator 5.041 devel
|
|||
**Other:**
|
||||
|
||||
* Add error on zero/negative unpacked dimensions (#1642). [Stefan Wallentowitz]
|
||||
* Add verilator_gantt profiling of DPI imports (#3084). [Geza Lore]
|
||||
* Add error on non-packed struct randc (#5999). [Seth Pellegrino]
|
||||
* Add configure `--enable-asan` to compile verilator_bin with the address sanitizer (#6404). [Geza Lore]
|
||||
* Add $(LDFLAGS) and $(LIBS) to when building shared libraries (#6425) (#6426). [Ahmed El-Mahmoudy]
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ def read_data(filename):
|
|||
re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$')
|
||||
cpu = None
|
||||
thread = 0
|
||||
thread_last_ecpu = None
|
||||
|
||||
global LongestVcdStrValueLength
|
||||
global ExecGraphTime
|
||||
|
|
@ -109,9 +110,13 @@ def read_data(filename):
|
|||
tick)
|
||||
elif kind == "THREAD_SCHEDULE_WAIT_BEGIN":
|
||||
ecpu = int(re_payload_wait.match(payload).groups()[0])
|
||||
thread_last_ecpu = ecpu
|
||||
ThreadScheduleWait[ecpu].append(tick)
|
||||
elif kind == "THREAD_SCHEDULE_WAIT_END":
|
||||
ecpu = int(re_payload_wait.match(payload).groups()[0])
|
||||
# Might have ended on different CPU then we got THREAD_SCHEDULE_WAIT_BEGIN
|
||||
assert thread_last_ecpu is not None, "THREAD_SCHEDULE_WAIT_END without BEGIN"
|
||||
ecpu = thread_last_ecpu
|
||||
thread_last_ecpu = None
|
||||
start = ThreadScheduleWait[ecpu].pop()
|
||||
WaitingTime += tick - start
|
||||
ThreadScheduleWaitIntervals.append((start, tick, ecpu))
|
||||
|
|
|
|||
|
|
@ -446,7 +446,7 @@ void EmitCSyms::emitSymHdr() {
|
|||
}
|
||||
|
||||
puts("\n// SYMS CLASS (contains all model state)\n");
|
||||
puts("class alignas(VL_CACHE_LINE_BYTES)" + EmitCUtil::symClassName()
|
||||
puts("class alignas(VL_CACHE_LINE_BYTES) " + EmitCUtil::symClassName()
|
||||
+ " final : public VerilatedSyms {\n");
|
||||
ofp()->putsPrivate(false); // public:
|
||||
|
||||
|
|
|
|||
|
|
@ -681,7 +681,7 @@ class TaskVisitor final : public VNVisitor {
|
|||
}
|
||||
}
|
||||
// First argument is symbol table, then output if a function
|
||||
const bool needSyms = !refp->taskp()->dpiImport();
|
||||
const bool needSyms = !refp->taskp()->dpiImport() || v3Global.opt.profExec();
|
||||
if (needSyms) ccallp->argTypes("vlSymsp");
|
||||
|
||||
if (refp->taskp()->dpiContext()) {
|
||||
|
|
@ -972,7 +972,7 @@ class TaskVisitor final : public VNVisitor {
|
|||
if (rtnvarp) {
|
||||
funcp->addStmtsp(createDpiTemp(rtnvarp, ""));
|
||||
funcp->addStmtsp(createAssignInternalToDpi(rtnvarp, false, tmpSuffixp, ""));
|
||||
string stmt = "return " + rtnvarp->name();
|
||||
string stmt = "return " + rtnvarp->name(); // TODO use AstCReturn?
|
||||
stmt += rtnvarp->basicp()->isDpiPrimitive() ? ";\n" : "[0];\n";
|
||||
funcp->addStmtsp(new AstCStmt{nodep->fileline(), stmt});
|
||||
}
|
||||
|
|
@ -1077,6 +1077,12 @@ class TaskVisitor final : public VNVisitor {
|
|||
void bodyDpiImportFunc(AstNodeFTask* nodep, AstVarScope* rtnvscp, AstCFunc* cfuncp,
|
||||
AstCFunc* dpiFuncp) {
|
||||
const char* const tmpSuffixp = V3Task::dpiTemporaryVarSuffix();
|
||||
|
||||
if (v3Global.opt.profExec())
|
||||
cfuncp->addStmtsp(
|
||||
new AstCStmt{nodep->fileline(),
|
||||
"VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"dpiimports\");\n"});
|
||||
|
||||
// Convert input/inout arguments to DPI types
|
||||
string args;
|
||||
for (AstNode* stmtp = cfuncp->argsp(); stmtp; stmtp = stmtp->nextp()) {
|
||||
|
|
@ -1162,6 +1168,10 @@ class TaskVisitor final : public VNVisitor {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (v3Global.opt.profExec())
|
||||
cfuncp->addStmtsp(new AstCStmt{nodep->fileline(),
|
||||
"VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPop();\n"});
|
||||
}
|
||||
|
||||
AstVarScope* getDpiExporTrigger() {
|
||||
|
|
@ -1285,9 +1295,12 @@ class TaskVisitor final : public VNVisitor {
|
|||
|
||||
if (cfuncp->dpiImportWrapper()) cfuncp->cname(nodep->cname());
|
||||
|
||||
const bool needSyms
|
||||
= (!nodep->dpiImport() && !nodep->taskPublic()) || v3Global.opt.profExec();
|
||||
if (needSyms) cfuncp->argTypes(EmitCUtil::symClassVar());
|
||||
|
||||
if (!nodep->dpiImport() && !nodep->taskPublic()) {
|
||||
// Need symbol table
|
||||
cfuncp->argTypes(EmitCUtil::symClassVar());
|
||||
if (cfuncp->name() == "new") {
|
||||
const string stmt = VIdProtect::protect("_ctor_var_reset") + "(vlSymsp);\n";
|
||||
cfuncp->addInitsp(new AstCStmt{nodep->fileline(), stmt});
|
||||
|
|
|
|||
|
|
@ -12,10 +12,11 @@
|
|||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt_all')
|
||||
test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles
|
||||
test.top_filename = "t/t_gantt.v"
|
||||
test.pli_filename = "t/t_gantt_c.cpp"
|
||||
|
||||
test.compile(
|
||||
v_flags2=["--prof-exec"],
|
||||
verilator_flags2=["--prof-exec", test.pli_filename],
|
||||
# Checks below care about thread count, so use 2 (minimum reasonable)
|
||||
threads=(2 if test.vltmt else 1))
|
||||
|
||||
|
|
@ -35,13 +36,13 @@ test.run(cmd=[
|
|||
])
|
||||
|
||||
if test.vltmt:
|
||||
test.file_grep(gantt_log, r'Total threads += 2')
|
||||
test.file_grep(gantt_log, r'Total mtasks += 7')
|
||||
test.file_grep(gantt_log, r'Total threads += +(\d+)', 2)
|
||||
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 6)
|
||||
# Predicted thread utilization should be less than 100%
|
||||
test.file_grep_not(gantt_log, r'Thread utilization =\s*\d\d\d+\.\d+%')
|
||||
else:
|
||||
test.file_grep(gantt_log, r'Total threads += 1')
|
||||
test.file_grep(gantt_log, r'Total mtasks += 0')
|
||||
test.file_grep(gantt_log, r'Total threads += +(\d+)', 1)
|
||||
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 0)
|
||||
|
||||
test.file_grep(gantt_log, r'\|\s+2\s+\|\s+2\.0+\s+\|\s+eval')
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,70 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||
// any use, without warranty, 2021 by Wilson Snyder.
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
module t(
|
||||
input clk
|
||||
);
|
||||
|
||||
integer cyc = 0;
|
||||
wire [63:0] result;
|
||||
|
||||
Test test(/*AUTOINST*/
|
||||
// Outputs
|
||||
.result (result[63:0]),
|
||||
// Inputs
|
||||
.clk (clk),
|
||||
.cyc (cyc));
|
||||
|
||||
reg [63:0] sum;
|
||||
|
||||
always @ (posedge clk) begin
|
||||
`ifdef TEST_VERBOSE
|
||||
$write("[%0t] cyc==%0d result=%x\n", $time, cyc, result);
|
||||
`endif
|
||||
cyc <= cyc + 1;
|
||||
sum <= result ^ {sum[62:0], sum[63] ^ sum[2] ^ sum[0]};
|
||||
if (cyc == 0) begin
|
||||
// Setup
|
||||
sum <= '0;
|
||||
end
|
||||
else if (cyc < 10) begin
|
||||
sum <= '0;
|
||||
end
|
||||
else if (cyc == 99) begin
|
||||
$write("[%0t] cyc==%0d sum=%x\n", $time, cyc, sum);
|
||||
// What checksum will we end up with (above print should match)
|
||||
`define EXPECTED_SUM 64'haf665a181ead5e12
|
||||
if (sum !== `EXPECTED_SUM) $stop;
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
module Test(/*AUTOARG*/
|
||||
// Outputs
|
||||
result,
|
||||
// Inputs
|
||||
clk, cyc
|
||||
);
|
||||
|
||||
input clk;
|
||||
input int cyc;
|
||||
output reg [63:0] result;
|
||||
|
||||
logic [63:0] adder;
|
||||
|
||||
import "DPI-C" pure function int dpii_return(input int i);
|
||||
|
||||
always @(posedge clk) begin
|
||||
adder = 0;
|
||||
for (int i = 0; i < 100000; ++i)
|
||||
adder += {32'h0, (cyc+i)} ** 3 + {32'h0, dpii_return(1)};
|
||||
|
||||
result <= adder;
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||
//*************************************************************************
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||
// any use, without warranty, 2025 by Wilson Snyder.
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
extern "C" {
|
||||
int dpii_return(int i) { return i; }
|
||||
}
|
||||
|
|
@ -12,10 +12,11 @@
|
|||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt_all')
|
||||
test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles
|
||||
test.top_filename = "t/t_gantt.v"
|
||||
test.pli_filename = "t/t_gantt_c.cpp"
|
||||
|
||||
test.compile(
|
||||
v_flags2=["--prof-exec", "--hierarchical"],
|
||||
verilator_flags2=["--prof-exec", "--hierarchical", test.pli_filename],
|
||||
# Checks below care about thread count, so use 2 (minimum reasonable)
|
||||
threads=(2 if test.vltmt else 1))
|
||||
|
||||
|
|
@ -35,13 +36,13 @@ test.run(cmd=[
|
|||
])
|
||||
|
||||
if test.vltmt:
|
||||
test.file_grep(gantt_log, r'Total threads += 2')
|
||||
test.file_grep(gantt_log, r'Total mtasks += 11')
|
||||
test.file_grep(gantt_log, r'Total threads += +(\d+)', 2)
|
||||
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 6)
|
||||
# Predicted thread utilization should be less than 100%
|
||||
test.file_grep_not(gantt_log, r'Thread utilization =\s*\d\d\d+\.\d+%')
|
||||
else:
|
||||
test.file_grep(gantt_log, r'Total threads += 1')
|
||||
test.file_grep(gantt_log, r'Total mtasks += 0')
|
||||
test.file_grep(gantt_log, r'Total threads += +(\d+)', 1)
|
||||
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 0)
|
||||
|
||||
test.file_grep(gantt_log, r'\|\s+2\s+\|\s+2\.0+\s+\|\s+eval')
|
||||
|
||||
|
|
|
|||
|
|
@ -12,10 +12,11 @@
|
|||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vltmt')
|
||||
test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles
|
||||
test.top_filename = "t/t_gantt.v"
|
||||
test.pli_filename = "t/t_gantt_c.cpp"
|
||||
|
||||
test.compile(
|
||||
v_flags2=["--prof-exec"],
|
||||
verilator_flags2=["--prof-exec", test.pli_filename],
|
||||
# Checks below care about thread count
|
||||
threads=4)
|
||||
|
||||
|
|
|
|||
|
|
@ -12,14 +12,15 @@
|
|||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt_all')
|
||||
test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles
|
||||
test.top_filename = "t/t_gantt.v"
|
||||
test.pli_filename = "t/t_gantt_c.cpp"
|
||||
|
||||
threads_num = (2 if test.vltmt else 1)
|
||||
|
||||
test.compile(
|
||||
make_top_shell=False,
|
||||
make_main=False,
|
||||
v_flags2=["--prof-exec --exe", test.pli_filename],
|
||||
verilator_flags2=["--prof-exec --exe", test.pli_filename, "t/t_gantt_two.cpp"],
|
||||
# Checks below care about thread count, so use 2 (minimum reasonable)
|
||||
threads=threads_num,
|
||||
make_flags=["CPPFLAGS_ADD=\"-DVL_NO_LEGACY -DTEST_USE_THREADS=" + str(threads_num) + "\""])
|
||||
|
|
@ -41,11 +42,11 @@ test.run(cmd=[
|
|||
"| tee " + gantt_log]) # yapf:disable
|
||||
|
||||
if test.vltmt:
|
||||
test.file_grep(gantt_log, r'Total threads += 2')
|
||||
test.file_grep(gantt_log, r'Total mtasks += 7')
|
||||
test.file_grep(gantt_log, r'Total threads += +(\d+)', 2)
|
||||
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 6)
|
||||
else:
|
||||
test.file_grep(gantt_log, r'Total threads += 1')
|
||||
test.file_grep(gantt_log, r'Total mtasks += 0')
|
||||
test.file_grep(gantt_log, r'Total threads += +(\d+)', 1)
|
||||
test.file_grep(gantt_log, r'Total mtasks += +(\d+)', 0)
|
||||
|
||||
test.file_grep(gantt_log, r'\|\s+4\s+\|\s+4\.0+\s+\|\s+eval')
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue