Add visualization of multi-threaded waiting time with verilator_gantt (#5929)

This commit is contained in:
Bartłomiej Chmiel 2025-04-10 16:03:58 +02:00 committed by GitHub
parent fc8e1b5a2e
commit 84f26ed3d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 156 additions and 69 deletions

View File

@ -26,7 +26,9 @@ Global = {
} }
ElapsedTime = None # total elapsed time ElapsedTime = None # total elapsed time
ExecGraphTime = 0 # total elapsed time executing an exec graph ExecGraphTime = 0 # total elapsed time executing an exec graph
WaitingTime = 0 # total elapsed time waiting for mtasks
ExecGraphIntervals = [] # list of (start, end) pairs ExecGraphIntervals = [] # list of (start, end) pairs
ThreadScheduleWaitIntervals = [] # list of (start, tick, ecpu) pairs
###################################################################### ######################################################################
@ -37,6 +39,7 @@ def read_data(filename):
re_record = re.compile(r'^VLPROFEXEC (\S+) (\d+)(.*)$') re_record = re.compile(r'^VLPROFEXEC (\S+) (\d+)(.*)$')
re_payload_mtaskBegin = re.compile(r'id (\d+) predictStart (\d+) cpu (\d+)') re_payload_mtaskBegin = re.compile(r'id (\d+) predictStart (\d+) cpu (\d+)')
re_payload_mtaskEnd = re.compile(r'id (\d+) predictCost (\d+)') re_payload_mtaskEnd = re.compile(r'id (\d+) predictCost (\d+)')
re_payload_wait = re.compile(r'cpu (\d+)')
re_arg1 = re.compile(r'VLPROF arg\s+(\S+)\+([0-9.]*)\s*') re_arg1 = re.compile(r'VLPROF arg\s+(\S+)\+([0-9.]*)\s*')
re_arg2 = re.compile(r'VLPROF arg\s+(\S+)\s+([0-9.]*)\s*$') re_arg2 = re.compile(r'VLPROF arg\s+(\S+)\s+([0-9.]*)\s*$')
@ -49,9 +52,11 @@ def read_data(filename):
global LongestVcdStrValueLength global LongestVcdStrValueLength
global ExecGraphTime global ExecGraphTime
global WaitingTime
ExecGraphStack = [] ExecGraphStack = []
SectionStack = [] SectionStack = []
ThreadScheduleWait = collections.defaultdict(list)
mTaskThread = {} mTaskThread = {}
for line in fh: for line in fh:
@ -96,6 +101,14 @@ def read_data(filename):
Mtasks[mtask]['elapsed'] += tick - begin Mtasks[mtask]['elapsed'] += tick - begin
Mtasks[mtask]['predict_cost'] = predict_cost Mtasks[mtask]['predict_cost'] = predict_cost
Mtasks[mtask]['end'] = max(Mtasks[mtask]['end'], tick) Mtasks[mtask]['end'] = max(Mtasks[mtask]['end'], tick)
elif kind == "THREAD_SCHEDULE_WAIT_BEGIN":
ecpu = int(re_payload_wait.match(payload).groups()[0])
ThreadScheduleWait[ecpu].append(tick)
elif kind == "THREAD_SCHEDULE_WAIT_END":
ecpu = int(re_payload_wait.match(payload).groups()[0])
start = ThreadScheduleWait[ecpu].pop()
WaitingTime += tick - start
ThreadScheduleWaitIntervals.append((start, tick, ecpu))
elif kind == "EXEC_GRAPH_BEGIN": elif kind == "EXEC_GRAPH_BEGIN":
ExecGraphStack.append(tick) ExecGraphStack.append(tick)
elif kind == "EXEC_GRAPH_END": elif kind == "EXEC_GRAPH_END":
@ -169,6 +182,7 @@ def report():
print("\nSummary:") print("\nSummary:")
print(" Total elapsed time = {} rdtsc ticks".format(ElapsedTime)) print(" Total elapsed time = {} rdtsc ticks".format(ElapsedTime))
print(" Parallelized code = {:.2%} of elapsed time".format(ExecGraphTime / ElapsedTime)) print(" Parallelized code = {:.2%} of elapsed time".format(ExecGraphTime / ElapsedTime))
print(" Waiting time = {:.2%} of elapsed time".format(WaitingTime / ElapsedTime))
print(" Total threads = %d" % nthreads) print(" Total threads = %d" % nthreads)
print(" Total CPUs used = %d" % ncpus) print(" Total CPUs used = %d" % ncpus)
print(" Total mtasks = %d" % len(Mtasks)) print(" Total mtasks = %d" % len(Mtasks))
@ -422,6 +436,9 @@ def write_vcd(filename):
val = buf.ljust(LongestVcdStrValueLength * 8, "0") val = buf.ljust(LongestVcdStrValueLength * 8, "0")
values[time][code] = val values[time][code] = val
def addXValue(code, time):
values[time][code] = 'x'
# Measured graph # Measured graph
for thread in sorted(Threads): for thread in sorted(Threads):
mcode = getCode(32, 'measured', 't%d_mtask' % thread) mcode = getCode(32, 'measured', 't%d_mtask' % thread)
@ -446,6 +463,12 @@ def write_vcd(filename):
tStart = sorted(_['start'] for records in Threads.values() for _ in records) tStart = sorted(_['start'] for records in Threads.values() for _ in records)
tEnd = sorted(_['end'] for records in Threads.values() for _ in records) tEnd = sorted(_['end'] for records in Threads.values() for _ in records)
# Measured waiting time
for (start, end, cpu) in ThreadScheduleWaitIntervals:
mcode = getCode(32, 'measured', 'cpu%d_waiting' % cpu)
addXValue(mcode, start)
addValue(mcode, end, None)
# Predicted graph # Predicted graph
for start, end in ExecGraphIntervals: for start, end in ExecGraphIntervals:
# Find the earliest MTask start after the start point, and the # Find the earliest MTask start after the start point, and the
@ -532,6 +555,8 @@ def write_vcd(filename):
value = values[time][code] value = values[time][code]
if value is None: if value is None:
fh.write("bz v%x\n" % code) fh.write("bz v%x\n" % code)
elif value == 'x':
fh.write("bx v%x\n" % code)
elif isinstance(value, str): elif isinstance(value, str):
fh.write("b%s v%x\n" % (value, code)) fh.write("b%s v%x\n" % (value, code))
else: else:

View File

@ -51,6 +51,10 @@ predicted_parallelism
cpu#_thread cpu#_thread
For the given CPU number, the thread number measured to be executing. For the given CPU number, the thread number measured to be executing.
cpu#_waiting
For the given CPU number, aggregated waiting time for mtask dependencies.
Visualized as `X` values.
mtask#_cpu mtask#_cpu
For the given mtask id, the CPU it was measured to execute on. For the given mtask id, the CPU it was measured to execute on.

View File

@ -199,6 +199,12 @@ void VlExecutionProfiler::dump(const char* filenamep, uint64_t tickEnd)
fprintf(fp, " id %u predictCost %u\n", payload.m_id, payload.m_predictCost); fprintf(fp, " id %u predictCost %u\n", payload.m_id, payload.m_predictCost);
break; break;
} }
case VlExecutionRecord::Type::THREAD_SCHEDULE_WAIT_BEGIN:
case VlExecutionRecord::Type::THREAD_SCHEDULE_WAIT_END: {
const auto& payload = er.m_payload.threadScheduleWait;
fprintf(fp, " cpu %u\n", payload.m_cpu);
break;
}
case VlExecutionRecord::Type::SECTION_PUSH: { case VlExecutionRecord::Type::SECTION_PUSH: {
const auto& payload = er.m_payload.sectionPush; const auto& payload = er.m_payload.sectionPush;
fprintf(fp, " %s\n", payload.m_name); fprintf(fp, " %s\n", payload.m_name);

View File

@ -62,6 +62,8 @@ VL_ATTR_ALWINLINE QData VL_CPU_TICK() {
_VL_FOREACH_APPLY(macro, SECTION_POP) \ _VL_FOREACH_APPLY(macro, SECTION_POP) \
_VL_FOREACH_APPLY(macro, MTASK_BEGIN) \ _VL_FOREACH_APPLY(macro, MTASK_BEGIN) \
_VL_FOREACH_APPLY(macro, MTASK_END) \ _VL_FOREACH_APPLY(macro, MTASK_END) \
_VL_FOREACH_APPLY(macro, THREAD_SCHEDULE_WAIT_BEGIN) \
_VL_FOREACH_APPLY(macro, THREAD_SCHEDULE_WAIT_END) \
_VL_FOREACH_APPLY(macro, EXEC_GRAPH_BEGIN) \ _VL_FOREACH_APPLY(macro, EXEC_GRAPH_BEGIN) \
_VL_FOREACH_APPLY(macro, EXEC_GRAPH_END) _VL_FOREACH_APPLY(macro, EXEC_GRAPH_END)
// clang-format on // clang-format on
@ -95,6 +97,9 @@ class VlExecutionRecord final {
uint32_t m_id; // MTask id uint32_t m_id; // MTask id
uint32_t m_predictCost; // How long scheduler predicted would take uint32_t m_predictCost; // How long scheduler predicted would take
} mtaskEnd; } mtaskEnd;
struct {
uint32_t m_cpu; // Executing CPU id
} threadScheduleWait;
}; };
// STATE // STATE
@ -126,6 +131,14 @@ public:
m_payload.mtaskEnd.m_predictCost = predictCost; m_payload.mtaskEnd.m_predictCost = predictCost;
m_type = Type::MTASK_END; m_type = Type::MTASK_END;
} }
void threadScheduleWaitBegin() {
m_payload.threadScheduleWait.m_cpu = VlOs::getcpu();
m_type = Type::THREAD_SCHEDULE_WAIT_BEGIN;
}
void threadScheduleWaitEnd() {
m_payload.threadScheduleWait.m_cpu = VlOs::getcpu();
m_type = Type::THREAD_SCHEDULE_WAIT_END;
}
void execGraphBegin() { m_type = Type::EXEC_GRAPH_BEGIN; } void execGraphBegin() { m_type = Type::EXEC_GRAPH_BEGIN; }
void execGraphEnd() { m_type = Type::EXEC_GRAPH_END; } void execGraphEnd() { m_type = Type::EXEC_GRAPH_END; }
}; };

View File

@ -726,7 +726,13 @@ void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId,
varp->protect(false); // Do not protect as we still have references in AstText varp->protect(false); // Do not protect as we still have references in AstText
modp->addStmtsp(varp); modp->addStmtsp(varp);
// For now, reference is still via text bashing // For now, reference is still via text bashing
if (v3Global.opt.profExec()) {
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitBegin();\n");
}
addStrStmt("vlSelf->" + name + +".waitUntilUpstreamDone(even_cycle);\n"); addStrStmt("vlSelf->" + name + +".waitUntilUpstreamDone(even_cycle);\n");
if (v3Global.opt.profExec()) {
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitEnd();\n");
}
} }
if (v3Global.opt.profPgo()) { if (v3Global.opt.profPgo()) {
@ -878,8 +884,14 @@ void addThreadStartToExecGraph(AstExecGraph* const execGraphp,
} }
V3Stats::addStatSum("Optimizations, Thread schedule total tasks", i); V3Stats::addStatSum("Optimizations, Thread schedule total tasks", i);
if (v3Global.opt.profExec()) {
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitBegin();\n");
}
addStrStmt("vlSelf->__Vm_mtaskstate_final__" + std::to_string(scheduleId) + tag addStrStmt("vlSelf->__Vm_mtaskstate_final__" + std::to_string(scheduleId) + tag
+ ".waitUntilUpstreamDone(vlSymsp->__Vm_even_cycle__" + tag + ");\n"); + ".waitUntilUpstreamDone(vlSymsp->__Vm_even_cycle__" + tag + ");\n");
if (v3Global.opt.profExec()) {
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitEnd();\n");
}
// Free all assigned worker indices in this section // Free all assigned worker indices in this section
if (!v3Global.opt.hierBlocks().empty() && last > 0) { if (!v3Global.opt.hierBlocks().empty() && last > 0) {
addStrStmt("vlSymsp->__Vm_threadPoolp->freeWorkerIndexes(indexes);\n"); addStrStmt("vlSymsp->__Vm_threadPoolp->freeWorkerIndexes(indexes);\n");

View File

@ -910,6 +910,8 @@ VLPROFEXEC EXEC_GRAPH_END 12180
VLPROFEXEC EXEC_GRAPH_BEGIN 14000 VLPROFEXEC EXEC_GRAPH_BEGIN 14000
VLPROFEXEC MTASK_BEGIN 15610 id 6 predictStart 0 cpu 19 VLPROFEXEC MTASK_BEGIN 15610 id 6 predictStart 0 cpu 19
VLPROFEXEC MTASK_END 15820 id 6 predictCost 30 VLPROFEXEC MTASK_END 15820 id 6 predictCost 30
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 20000 cpu 19
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 21000 cpu 19
VLPROFEXEC MTASK_BEGIN 21700 id 10 predictStart 196 cpu 19 VLPROFEXEC MTASK_BEGIN 21700 id 10 predictStart 196 cpu 19
VLPROFEXEC MTASK_END 21875 id 10 predictCost 30 VLPROFEXEC MTASK_END 21875 id 10 predictCost 30
VLPROFEXEC EXEC_GRAPH_END 22085 VLPROFEXEC EXEC_GRAPH_END 22085
@ -934,4 +936,6 @@ VLPROFEXEC MTASK_BEGIN 20650 id 9 predictStart 167 cpu 10
VLPROFEXEC MTASK_END 20720 id 9 predictCost 30 VLPROFEXEC MTASK_END 20720 id 9 predictCost 30
VLPROFEXEC MTASK_BEGIN 21140 id 11 predictStart 197 cpu 10 VLPROFEXEC MTASK_BEGIN 21140 id 11 predictStart 197 cpu 10
VLPROFEXEC MTASK_END 21245 id 11 predictCost 30 VLPROFEXEC MTASK_END 21245 id 11 predictCost 30
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 22000 cpu 10
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 23000 cpu 10
VLPROF stat ticks 23415 VLPROF stat ticks 23415

View File

@ -7,6 +7,7 @@ Argument settings:
Summary: Summary:
Total elapsed time = 23415 rdtsc ticks Total elapsed time = 23415 rdtsc ticks
Parallelized code = 82.51% of elapsed time Parallelized code = 82.51% of elapsed time
Waiting time = 8.54% of elapsed time
Total threads = 2 Total threads = 2
Total CPUs used = 2 Total CPUs used = 2
Total mtasks = 7 Total mtasks = 7

View File

@ -5,18 +5,20 @@ $timescale 1ns $end
$scope module measured $end $scope module measured $end
$var wire 32 v5 cpu10_mtask [31:0] $end $var wire 32 v5 cpu10_mtask [31:0] $end
$var wire 32 v4 cpu10_thread [31:0] $end $var wire 32 v4 cpu10_thread [31:0] $end
$var wire 32 v7 cpu10_waiting [31:0] $end
$var wire 32 v2 cpu19_mtask [31:0] $end $var wire 32 v2 cpu19_mtask [31:0] $end
$var wire 32 v1 cpu19_thread [31:0] $end $var wire 32 v1 cpu19_thread [31:0] $end
$var wire 32 v6 cpu19_waiting [31:0] $end
$var wire 32 v0 t0_mtask [31:0] $end $var wire 32 v0 t0_mtask [31:0] $end
$var wire 32 v3 t1_mtask [31:0] $end $var wire 32 v3 t1_mtask [31:0] $end
$upscope $end $upscope $end
$scope module predicted $end $scope module predicted $end
$var wire 32 v6 t0_mtask [31:0] $end $var wire 32 v8 t0_mtask [31:0] $end
$var wire 32 v7 t1_mtask [31:0] $end $var wire 32 v9 t1_mtask [31:0] $end
$upscope $end $upscope $end
$scope module stats $end $scope module stats $end
$var wire 32 v8 measured_parallelism [31:0] $end $var wire 32 va measured_parallelism [31:0] $end
$var wire 32 v9 predicted_parallelism [31:0] $end $var wire 32 vb predicted_parallelism [31:0] $end
$upscope $end $upscope $end
$upscope $end $upscope $end
$enddefinitions $end $enddefinitions $end
@ -30,193 +32,203 @@ bz v4
bz v5 bz v5
bz v6 bz v6
bz v7 bz v7
b0 v8 bz v8
b0 v9 bz v9
b0 va
b0 vb
#2695 #2695
b110 v0 b110 v0
b0 v1 b0 v1
b110 v2 b110 v2
b110 v6 b110 v8
b101 v7 b101 v9
b1 v8 b1 va
b10 v9 b10 vb
#2905 #2905
bz v0 bz v0
bz v1 bz v1
bz v2 bz v2
b0 v8 b0 va
#3800 #3800
bz v6 bz v8
b111 v7 b111 v9
b1 v9 b1 vb
#4906 #4906
b1000 v7 b1000 v9
b1 v9 b1 vb
#5495 #5495
b101 v3 b101 v3
b1 v4 b1 v4
b101 v5 b101 v5
b1 v8 b1 va
#6090 #6090
bz v3 bz v3
bz v4 bz v4
bz v5 bz v5
b0 v8 b0 va
#6300 #6300
b111 v3 b111 v3
b1 v4 b1 v4
b111 v5 b111 v5
b1 v8 b1 va
#6895 #6895
bz v3 bz v3
bz v4 bz v4
bz v5 bz v5
b0 v8 b0 va
#7490 #7490
b1000 v3 b1000 v3
b1 v4 b1 v4
b1000 v5 b1000 v5
b1 v8 b1 va
#8540 #8540
bz v3 bz v3
bz v4 bz v4
bz v5 bz v5
b0 v8 b0 va
#8848 #8848
b1001 v7 b1001 v9
b1 v9 b1 vb
#9135 #9135
b1001 v3 b1001 v3
b1 v4 b1 v4
b1001 v5 b1001 v5
b1 v8 b1 va
#9695 #9695
b1010 v0 b1010 v0
b0 v1 b0 v1
b1010 v2 b1010 v2
b10 v8 b10 va
#9730 #9730
bz v3 bz v3
bz v4 bz v4
bz v5 bz v5
b1 v8 b1 va
#9870 #9870
bz v0 bz v0
bz v1 bz v1
bz v2 bz v2
b0 v8 b0 va
#9917 #9917
b1010 v6 b1010 v8
b10 v9 b10 vb
#9954 #9954
b1011 v7 b1011 v9
b10 v9 b10 vb
#10255 #10255
b1011 v3 b1011 v3
b1 v4 b1 v4
b1011 v5 b1011 v5
b1 v8 b1 va
#11023 #11023
bz v6 bz v8
b1 v9 b1 vb
#11060 #11060
bz v3 bz v3
bz v4 bz v4
bz v5 bz v5
bz v7 bz v9
b0 v8 b0 va
b0 v9 b0 vb
#15610 #15610
b110 v0 b110 v0
b0 v1 b0 v1
b110 v2 b110 v2
b110 v6 b110 v8
b101 v7 b101 v9
b1 v8 b1 va
b10 v9 b10 vb
#15820 #15820
bz v0 bz v0
bz v1 bz v1
bz v2 bz v2
b0 v8 b0 va
#16437 #16437
bz v6 bz v8
b111 v7 b111 v9
b1 v9 b1 vb
#17265 #17265
b1000 v7 b1000 v9
b1 v9 b1 vb
#18375 #18375
b101 v3 b101 v3
b1 v4 b1 v4
b101 v5 b101 v5
b1 v8 b1 va
#18970 #18970
bz v3 bz v3
bz v4 bz v4
bz v5 bz v5
b0 v8 b0 va
#19145 #19145
b111 v3 b111 v3
b1 v4 b1 v4
b111 v5 b111 v5
b1 v8 b1 va
#19320 #19320
bz v3 bz v3
bz v4 bz v4
bz v5 bz v5
b0 v8 b0 va
#19670 #19670
b1000 v3 b1000 v3
b1 v4 b1 v4
b1000 v5 b1000 v5
b1 v8 b1 va
#19810 #19810
bz v3 bz v3
bz v4 bz v4
bz v5 bz v5
b0 v8 b0 va
#20000
bx v6
#20219 #20219
b1001 v7 b1001 v9
b1 v9 b1 vb
#20650 #20650
b1001 v3 b1001 v3
b1 v4 b1 v4
b1001 v5 b1001 v5
b1 v8 b1 va
#20720 #20720
bz v3 bz v3
bz v4 bz v4
bz v5 bz v5
b0 v8 b0 va
#21000
bz v6
#21019 #21019
b1010 v6 b1010 v8
b10 v9 b10 vb
#21047 #21047
b1011 v7 b1011 v9
b10 v9 b10 vb
#21140 #21140
b1011 v3 b1011 v3
b1 v4 b1 v4
b1011 v5 b1011 v5
b1 v8 b1 va
#21245 #21245
bz v3 bz v3
bz v4 bz v4
bz v5 bz v5
b0 v8 b0 va
#21700 #21700
b1010 v0 b1010 v0
b0 v1 b0 v1
b1010 v2 b1010 v2
b1 v8 b1 va
#21847 #21847
bz v6 bz v8
b1 v9 b1 vb
#21875 #21875
bz v0 bz v0
bz v1 bz v1
bz v2 bz v2
bz v9
b0 va
b0 vb
#22000
bx v7
#23000
bz v7 bz v7
b0 v8
b0 v9

View File

@ -50,6 +50,8 @@ VLPROFEXEC MTASK_BEGIN 90465 id 85 predictStart 14315 cpu 2
VLPROFEXEC MTASK_END 155034 id 85 predictCost 30533 VLPROFEXEC MTASK_END 155034 id 85 predictCost 30533
VLPROFEXEC MTASK_BEGIN 156555 id 79 predictStart 44848 cpu 2 VLPROFEXEC MTASK_BEGIN 156555 id 79 predictStart 44848 cpu 2
VLPROFEXEC MTASK_END 294309 id 79 predictCost 48001 VLPROFEXEC MTASK_END 294309 id 79 predictCost 48001
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 295000 cpu 2
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 296000 cpu 2
VLPROFEXEC EXEC_GRAPH_END 300000 VLPROFEXEC EXEC_GRAPH_END 300000
VLPROFTHREAD 1 VLPROFTHREAD 1
VLPROFEXEC MTASK_BEGIN 77352 id 90 predictStart 14315 cpu 3 VLPROFEXEC MTASK_BEGIN 77352 id 90 predictStart 14315 cpu 3
@ -58,4 +60,6 @@ VLPROFEXEC MTASK_BEGIN 79799 id 81 predictStart 35907 cpu 3
VLPROFEXEC MTASK_END 80667 id 81 predictCost 29215 VLPROFEXEC MTASK_END 80667 id 81 predictCost 29215
VLPROFEXEC MTASK_BEGIN 81746 id 87 predictStart 65147 cpu 3 VLPROFEXEC MTASK_BEGIN 81746 id 87 predictStart 65147 cpu 3
VLPROFEXEC MTASK_END 82633 id 87 predictCost 33809 VLPROFEXEC MTASK_END 82633 id 87 predictCost 33809
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 83000 cpu 3
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 84000 cpu 3
VLPROF stat ticks 300000 VLPROF stat ticks 300000

View File

@ -7,6 +7,7 @@ Argument settings:
Summary: Summary:
Total elapsed time = 300000 rdtsc ticks Total elapsed time = 300000 rdtsc ticks
Parallelized code = 80.49% of elapsed time Parallelized code = 80.49% of elapsed time
Waiting time = 0.67% of elapsed time
Total threads = 2 Total threads = 2
Total CPUs used = 2 Total CPUs used = 2
Total mtasks = 5 Total mtasks = 5

View File

@ -13,6 +13,8 @@ VLPROFEXEC EXEC_GRAPH_END 12180
VLPROFEXEC EXEC_GRAPH_BEGIN 14000 VLPROFEXEC EXEC_GRAPH_BEGIN 14000
VLPROFEXEC MTASK_BEGIN 15610 id 6 predictStart 0 cpu 19 VLPROFEXEC MTASK_BEGIN 15610 id 6 predictStart 0 cpu 19
VLPROFEXEC MTASK_END 15820 id 6 predictCost 30 VLPROFEXEC MTASK_END 15820 id 6 predictCost 30
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 16000 cpu 19
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 17000 cpu 19
VLPROFEXEC MTASK_BEGIN 21700 id 10 predictStart 196 cpu 19 VLPROFEXEC MTASK_BEGIN 21700 id 10 predictStart 196 cpu 19
VLPROFEXEC MTASK_END 21875 id 10 predictCost 30 VLPROFEXEC MTASK_END 21875 id 10 predictCost 30
VLPROFEXEC EXEC_GRAPH_END 22085 VLPROFEXEC EXEC_GRAPH_END 22085
@ -27,6 +29,8 @@ VLPROFEXEC MTASK_BEGIN 9135 id 9 predictStart 167 cpu 10
VLPROFEXEC MTASK_END 9730 id 9 predictCost 30 VLPROFEXEC MTASK_END 9730 id 9 predictCost 30
VLPROFEXEC MTASK_BEGIN 10255 id 11 predictStart 197 cpu 10 VLPROFEXEC MTASK_BEGIN 10255 id 11 predictStart 197 cpu 10
VLPROFEXEC MTASK_END 11060 id 11 predictCost 30 VLPROFEXEC MTASK_END 11060 id 11 predictCost 30
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 17000 cpu 10
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 18000 cpu 10
VLPROFEXEC MTASK_BEGIN 18375 id 5 predictStart 0 cpu 10 VLPROFEXEC MTASK_BEGIN 18375 id 5 predictStart 0 cpu 10
VLPROFEXEC MTASK_END 18970 id 5 predictCost 30 VLPROFEXEC MTASK_END 18970 id 5 predictCost 30
VLPROFEXEC MTASK_BEGIN 19145 id 7 predictStart 30 cpu 10 VLPROFEXEC MTASK_BEGIN 19145 id 7 predictStart 30 cpu 10

View File

@ -7,6 +7,7 @@ Argument settings:
Summary: Summary:
Total elapsed time = 23415 rdtsc ticks Total elapsed time = 23415 rdtsc ticks
Parallelized code = 82.51% of elapsed time Parallelized code = 82.51% of elapsed time
Waiting time = 8.54% of elapsed time
Total threads = 2 Total threads = 2
Total CPUs used = 2 Total CPUs used = 2
Total mtasks = 7 Total mtasks = 7