Add visualization of multi-threaded waiting time with verilator_gantt (#5929)
This commit is contained in:
parent
fc8e1b5a2e
commit
84f26ed3d7
|
|
@ -26,7 +26,9 @@ Global = {
|
||||||
}
|
}
|
||||||
ElapsedTime = None # total elapsed time
|
ElapsedTime = None # total elapsed time
|
||||||
ExecGraphTime = 0 # total elapsed time executing an exec graph
|
ExecGraphTime = 0 # total elapsed time executing an exec graph
|
||||||
|
WaitingTime = 0 # total elapsed time waiting for mtasks
|
||||||
ExecGraphIntervals = [] # list of (start, end) pairs
|
ExecGraphIntervals = [] # list of (start, end) pairs
|
||||||
|
ThreadScheduleWaitIntervals = [] # list of (start, tick, ecpu) pairs
|
||||||
|
|
||||||
######################################################################
|
######################################################################
|
||||||
|
|
||||||
|
|
@ -37,6 +39,7 @@ def read_data(filename):
|
||||||
re_record = re.compile(r'^VLPROFEXEC (\S+) (\d+)(.*)$')
|
re_record = re.compile(r'^VLPROFEXEC (\S+) (\d+)(.*)$')
|
||||||
re_payload_mtaskBegin = re.compile(r'id (\d+) predictStart (\d+) cpu (\d+)')
|
re_payload_mtaskBegin = re.compile(r'id (\d+) predictStart (\d+) cpu (\d+)')
|
||||||
re_payload_mtaskEnd = re.compile(r'id (\d+) predictCost (\d+)')
|
re_payload_mtaskEnd = re.compile(r'id (\d+) predictCost (\d+)')
|
||||||
|
re_payload_wait = re.compile(r'cpu (\d+)')
|
||||||
|
|
||||||
re_arg1 = re.compile(r'VLPROF arg\s+(\S+)\+([0-9.]*)\s*')
|
re_arg1 = re.compile(r'VLPROF arg\s+(\S+)\+([0-9.]*)\s*')
|
||||||
re_arg2 = re.compile(r'VLPROF arg\s+(\S+)\s+([0-9.]*)\s*$')
|
re_arg2 = re.compile(r'VLPROF arg\s+(\S+)\s+([0-9.]*)\s*$')
|
||||||
|
|
@ -49,9 +52,11 @@ def read_data(filename):
|
||||||
|
|
||||||
global LongestVcdStrValueLength
|
global LongestVcdStrValueLength
|
||||||
global ExecGraphTime
|
global ExecGraphTime
|
||||||
|
global WaitingTime
|
||||||
|
|
||||||
ExecGraphStack = []
|
ExecGraphStack = []
|
||||||
SectionStack = []
|
SectionStack = []
|
||||||
|
ThreadScheduleWait = collections.defaultdict(list)
|
||||||
mTaskThread = {}
|
mTaskThread = {}
|
||||||
|
|
||||||
for line in fh:
|
for line in fh:
|
||||||
|
|
@ -96,6 +101,14 @@ def read_data(filename):
|
||||||
Mtasks[mtask]['elapsed'] += tick - begin
|
Mtasks[mtask]['elapsed'] += tick - begin
|
||||||
Mtasks[mtask]['predict_cost'] = predict_cost
|
Mtasks[mtask]['predict_cost'] = predict_cost
|
||||||
Mtasks[mtask]['end'] = max(Mtasks[mtask]['end'], tick)
|
Mtasks[mtask]['end'] = max(Mtasks[mtask]['end'], tick)
|
||||||
|
elif kind == "THREAD_SCHEDULE_WAIT_BEGIN":
|
||||||
|
ecpu = int(re_payload_wait.match(payload).groups()[0])
|
||||||
|
ThreadScheduleWait[ecpu].append(tick)
|
||||||
|
elif kind == "THREAD_SCHEDULE_WAIT_END":
|
||||||
|
ecpu = int(re_payload_wait.match(payload).groups()[0])
|
||||||
|
start = ThreadScheduleWait[ecpu].pop()
|
||||||
|
WaitingTime += tick - start
|
||||||
|
ThreadScheduleWaitIntervals.append((start, tick, ecpu))
|
||||||
elif kind == "EXEC_GRAPH_BEGIN":
|
elif kind == "EXEC_GRAPH_BEGIN":
|
||||||
ExecGraphStack.append(tick)
|
ExecGraphStack.append(tick)
|
||||||
elif kind == "EXEC_GRAPH_END":
|
elif kind == "EXEC_GRAPH_END":
|
||||||
|
|
@ -169,6 +182,7 @@ def report():
|
||||||
print("\nSummary:")
|
print("\nSummary:")
|
||||||
print(" Total elapsed time = {} rdtsc ticks".format(ElapsedTime))
|
print(" Total elapsed time = {} rdtsc ticks".format(ElapsedTime))
|
||||||
print(" Parallelized code = {:.2%} of elapsed time".format(ExecGraphTime / ElapsedTime))
|
print(" Parallelized code = {:.2%} of elapsed time".format(ExecGraphTime / ElapsedTime))
|
||||||
|
print(" Waiting time = {:.2%} of elapsed time".format(WaitingTime / ElapsedTime))
|
||||||
print(" Total threads = %d" % nthreads)
|
print(" Total threads = %d" % nthreads)
|
||||||
print(" Total CPUs used = %d" % ncpus)
|
print(" Total CPUs used = %d" % ncpus)
|
||||||
print(" Total mtasks = %d" % len(Mtasks))
|
print(" Total mtasks = %d" % len(Mtasks))
|
||||||
|
|
@ -422,6 +436,9 @@ def write_vcd(filename):
|
||||||
val = buf.ljust(LongestVcdStrValueLength * 8, "0")
|
val = buf.ljust(LongestVcdStrValueLength * 8, "0")
|
||||||
values[time][code] = val
|
values[time][code] = val
|
||||||
|
|
||||||
|
def addXValue(code, time):
|
||||||
|
values[time][code] = 'x'
|
||||||
|
|
||||||
# Measured graph
|
# Measured graph
|
||||||
for thread in sorted(Threads):
|
for thread in sorted(Threads):
|
||||||
mcode = getCode(32, 'measured', 't%d_mtask' % thread)
|
mcode = getCode(32, 'measured', 't%d_mtask' % thread)
|
||||||
|
|
@ -446,6 +463,12 @@ def write_vcd(filename):
|
||||||
tStart = sorted(_['start'] for records in Threads.values() for _ in records)
|
tStart = sorted(_['start'] for records in Threads.values() for _ in records)
|
||||||
tEnd = sorted(_['end'] for records in Threads.values() for _ in records)
|
tEnd = sorted(_['end'] for records in Threads.values() for _ in records)
|
||||||
|
|
||||||
|
# Measured waiting time
|
||||||
|
for (start, end, cpu) in ThreadScheduleWaitIntervals:
|
||||||
|
mcode = getCode(32, 'measured', 'cpu%d_waiting' % cpu)
|
||||||
|
addXValue(mcode, start)
|
||||||
|
addValue(mcode, end, None)
|
||||||
|
|
||||||
# Predicted graph
|
# Predicted graph
|
||||||
for start, end in ExecGraphIntervals:
|
for start, end in ExecGraphIntervals:
|
||||||
# Find the earliest MTask start after the start point, and the
|
# Find the earliest MTask start after the start point, and the
|
||||||
|
|
@ -532,6 +555,8 @@ def write_vcd(filename):
|
||||||
value = values[time][code]
|
value = values[time][code]
|
||||||
if value is None:
|
if value is None:
|
||||||
fh.write("bz v%x\n" % code)
|
fh.write("bz v%x\n" % code)
|
||||||
|
elif value == 'x':
|
||||||
|
fh.write("bx v%x\n" % code)
|
||||||
elif isinstance(value, str):
|
elif isinstance(value, str):
|
||||||
fh.write("b%s v%x\n" % (value, code))
|
fh.write("b%s v%x\n" % (value, code))
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,10 @@ predicted_parallelism
|
||||||
cpu#_thread
|
cpu#_thread
|
||||||
For the given CPU number, the thread number measured to be executing.
|
For the given CPU number, the thread number measured to be executing.
|
||||||
|
|
||||||
|
cpu#_waiting
|
||||||
|
For the given CPU number, aggregated waiting time for mtask dependencies.
|
||||||
|
Visualized as `X` values.
|
||||||
|
|
||||||
mtask#_cpu
|
mtask#_cpu
|
||||||
For the given mtask id, the CPU it was measured to execute on.
|
For the given mtask id, the CPU it was measured to execute on.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -199,6 +199,12 @@ void VlExecutionProfiler::dump(const char* filenamep, uint64_t tickEnd)
|
||||||
fprintf(fp, " id %u predictCost %u\n", payload.m_id, payload.m_predictCost);
|
fprintf(fp, " id %u predictCost %u\n", payload.m_id, payload.m_predictCost);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case VlExecutionRecord::Type::THREAD_SCHEDULE_WAIT_BEGIN:
|
||||||
|
case VlExecutionRecord::Type::THREAD_SCHEDULE_WAIT_END: {
|
||||||
|
const auto& payload = er.m_payload.threadScheduleWait;
|
||||||
|
fprintf(fp, " cpu %u\n", payload.m_cpu);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case VlExecutionRecord::Type::SECTION_PUSH: {
|
case VlExecutionRecord::Type::SECTION_PUSH: {
|
||||||
const auto& payload = er.m_payload.sectionPush;
|
const auto& payload = er.m_payload.sectionPush;
|
||||||
fprintf(fp, " %s\n", payload.m_name);
|
fprintf(fp, " %s\n", payload.m_name);
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,8 @@ VL_ATTR_ALWINLINE QData VL_CPU_TICK() {
|
||||||
_VL_FOREACH_APPLY(macro, SECTION_POP) \
|
_VL_FOREACH_APPLY(macro, SECTION_POP) \
|
||||||
_VL_FOREACH_APPLY(macro, MTASK_BEGIN) \
|
_VL_FOREACH_APPLY(macro, MTASK_BEGIN) \
|
||||||
_VL_FOREACH_APPLY(macro, MTASK_END) \
|
_VL_FOREACH_APPLY(macro, MTASK_END) \
|
||||||
|
_VL_FOREACH_APPLY(macro, THREAD_SCHEDULE_WAIT_BEGIN) \
|
||||||
|
_VL_FOREACH_APPLY(macro, THREAD_SCHEDULE_WAIT_END) \
|
||||||
_VL_FOREACH_APPLY(macro, EXEC_GRAPH_BEGIN) \
|
_VL_FOREACH_APPLY(macro, EXEC_GRAPH_BEGIN) \
|
||||||
_VL_FOREACH_APPLY(macro, EXEC_GRAPH_END)
|
_VL_FOREACH_APPLY(macro, EXEC_GRAPH_END)
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
@ -95,6 +97,9 @@ class VlExecutionRecord final {
|
||||||
uint32_t m_id; // MTask id
|
uint32_t m_id; // MTask id
|
||||||
uint32_t m_predictCost; // How long scheduler predicted would take
|
uint32_t m_predictCost; // How long scheduler predicted would take
|
||||||
} mtaskEnd;
|
} mtaskEnd;
|
||||||
|
struct {
|
||||||
|
uint32_t m_cpu; // Executing CPU id
|
||||||
|
} threadScheduleWait;
|
||||||
};
|
};
|
||||||
|
|
||||||
// STATE
|
// STATE
|
||||||
|
|
@ -126,6 +131,14 @@ public:
|
||||||
m_payload.mtaskEnd.m_predictCost = predictCost;
|
m_payload.mtaskEnd.m_predictCost = predictCost;
|
||||||
m_type = Type::MTASK_END;
|
m_type = Type::MTASK_END;
|
||||||
}
|
}
|
||||||
|
void threadScheduleWaitBegin() {
|
||||||
|
m_payload.threadScheduleWait.m_cpu = VlOs::getcpu();
|
||||||
|
m_type = Type::THREAD_SCHEDULE_WAIT_BEGIN;
|
||||||
|
}
|
||||||
|
void threadScheduleWaitEnd() {
|
||||||
|
m_payload.threadScheduleWait.m_cpu = VlOs::getcpu();
|
||||||
|
m_type = Type::THREAD_SCHEDULE_WAIT_END;
|
||||||
|
}
|
||||||
void execGraphBegin() { m_type = Type::EXEC_GRAPH_BEGIN; }
|
void execGraphBegin() { m_type = Type::EXEC_GRAPH_BEGIN; }
|
||||||
void execGraphEnd() { m_type = Type::EXEC_GRAPH_END; }
|
void execGraphEnd() { m_type = Type::EXEC_GRAPH_END; }
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -726,7 +726,13 @@ void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId,
|
||||||
varp->protect(false); // Do not protect as we still have references in AstText
|
varp->protect(false); // Do not protect as we still have references in AstText
|
||||||
modp->addStmtsp(varp);
|
modp->addStmtsp(varp);
|
||||||
// For now, reference is still via text bashing
|
// For now, reference is still via text bashing
|
||||||
|
if (v3Global.opt.profExec()) {
|
||||||
|
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitBegin();\n");
|
||||||
|
}
|
||||||
addStrStmt("vlSelf->" + name + +".waitUntilUpstreamDone(even_cycle);\n");
|
addStrStmt("vlSelf->" + name + +".waitUntilUpstreamDone(even_cycle);\n");
|
||||||
|
if (v3Global.opt.profExec()) {
|
||||||
|
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitEnd();\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (v3Global.opt.profPgo()) {
|
if (v3Global.opt.profPgo()) {
|
||||||
|
|
@ -878,8 +884,14 @@ void addThreadStartToExecGraph(AstExecGraph* const execGraphp,
|
||||||
}
|
}
|
||||||
V3Stats::addStatSum("Optimizations, Thread schedule total tasks", i);
|
V3Stats::addStatSum("Optimizations, Thread schedule total tasks", i);
|
||||||
|
|
||||||
|
if (v3Global.opt.profExec()) {
|
||||||
|
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitBegin();\n");
|
||||||
|
}
|
||||||
addStrStmt("vlSelf->__Vm_mtaskstate_final__" + std::to_string(scheduleId) + tag
|
addStrStmt("vlSelf->__Vm_mtaskstate_final__" + std::to_string(scheduleId) + tag
|
||||||
+ ".waitUntilUpstreamDone(vlSymsp->__Vm_even_cycle__" + tag + ");\n");
|
+ ".waitUntilUpstreamDone(vlSymsp->__Vm_even_cycle__" + tag + ");\n");
|
||||||
|
if (v3Global.opt.profExec()) {
|
||||||
|
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitEnd();\n");
|
||||||
|
}
|
||||||
// Free all assigned worker indices in this section
|
// Free all assigned worker indices in this section
|
||||||
if (!v3Global.opt.hierBlocks().empty() && last > 0) {
|
if (!v3Global.opt.hierBlocks().empty() && last > 0) {
|
||||||
addStrStmt("vlSymsp->__Vm_threadPoolp->freeWorkerIndexes(indexes);\n");
|
addStrStmt("vlSymsp->__Vm_threadPoolp->freeWorkerIndexes(indexes);\n");
|
||||||
|
|
|
||||||
|
|
@ -910,6 +910,8 @@ VLPROFEXEC EXEC_GRAPH_END 12180
|
||||||
VLPROFEXEC EXEC_GRAPH_BEGIN 14000
|
VLPROFEXEC EXEC_GRAPH_BEGIN 14000
|
||||||
VLPROFEXEC MTASK_BEGIN 15610 id 6 predictStart 0 cpu 19
|
VLPROFEXEC MTASK_BEGIN 15610 id 6 predictStart 0 cpu 19
|
||||||
VLPROFEXEC MTASK_END 15820 id 6 predictCost 30
|
VLPROFEXEC MTASK_END 15820 id 6 predictCost 30
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 20000 cpu 19
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 21000 cpu 19
|
||||||
VLPROFEXEC MTASK_BEGIN 21700 id 10 predictStart 196 cpu 19
|
VLPROFEXEC MTASK_BEGIN 21700 id 10 predictStart 196 cpu 19
|
||||||
VLPROFEXEC MTASK_END 21875 id 10 predictCost 30
|
VLPROFEXEC MTASK_END 21875 id 10 predictCost 30
|
||||||
VLPROFEXEC EXEC_GRAPH_END 22085
|
VLPROFEXEC EXEC_GRAPH_END 22085
|
||||||
|
|
@ -934,4 +936,6 @@ VLPROFEXEC MTASK_BEGIN 20650 id 9 predictStart 167 cpu 10
|
||||||
VLPROFEXEC MTASK_END 20720 id 9 predictCost 30
|
VLPROFEXEC MTASK_END 20720 id 9 predictCost 30
|
||||||
VLPROFEXEC MTASK_BEGIN 21140 id 11 predictStart 197 cpu 10
|
VLPROFEXEC MTASK_BEGIN 21140 id 11 predictStart 197 cpu 10
|
||||||
VLPROFEXEC MTASK_END 21245 id 11 predictCost 30
|
VLPROFEXEC MTASK_END 21245 id 11 predictCost 30
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 22000 cpu 10
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 23000 cpu 10
|
||||||
VLPROF stat ticks 23415
|
VLPROF stat ticks 23415
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ Argument settings:
|
||||||
Summary:
|
Summary:
|
||||||
Total elapsed time = 23415 rdtsc ticks
|
Total elapsed time = 23415 rdtsc ticks
|
||||||
Parallelized code = 82.51% of elapsed time
|
Parallelized code = 82.51% of elapsed time
|
||||||
|
Waiting time = 8.54% of elapsed time
|
||||||
Total threads = 2
|
Total threads = 2
|
||||||
Total CPUs used = 2
|
Total CPUs used = 2
|
||||||
Total mtasks = 7
|
Total mtasks = 7
|
||||||
|
|
|
||||||
|
|
@ -5,18 +5,20 @@ $timescale 1ns $end
|
||||||
$scope module measured $end
|
$scope module measured $end
|
||||||
$var wire 32 v5 cpu10_mtask [31:0] $end
|
$var wire 32 v5 cpu10_mtask [31:0] $end
|
||||||
$var wire 32 v4 cpu10_thread [31:0] $end
|
$var wire 32 v4 cpu10_thread [31:0] $end
|
||||||
|
$var wire 32 v7 cpu10_waiting [31:0] $end
|
||||||
$var wire 32 v2 cpu19_mtask [31:0] $end
|
$var wire 32 v2 cpu19_mtask [31:0] $end
|
||||||
$var wire 32 v1 cpu19_thread [31:0] $end
|
$var wire 32 v1 cpu19_thread [31:0] $end
|
||||||
|
$var wire 32 v6 cpu19_waiting [31:0] $end
|
||||||
$var wire 32 v0 t0_mtask [31:0] $end
|
$var wire 32 v0 t0_mtask [31:0] $end
|
||||||
$var wire 32 v3 t1_mtask [31:0] $end
|
$var wire 32 v3 t1_mtask [31:0] $end
|
||||||
$upscope $end
|
$upscope $end
|
||||||
$scope module predicted $end
|
$scope module predicted $end
|
||||||
$var wire 32 v6 t0_mtask [31:0] $end
|
$var wire 32 v8 t0_mtask [31:0] $end
|
||||||
$var wire 32 v7 t1_mtask [31:0] $end
|
$var wire 32 v9 t1_mtask [31:0] $end
|
||||||
$upscope $end
|
$upscope $end
|
||||||
$scope module stats $end
|
$scope module stats $end
|
||||||
$var wire 32 v8 measured_parallelism [31:0] $end
|
$var wire 32 va measured_parallelism [31:0] $end
|
||||||
$var wire 32 v9 predicted_parallelism [31:0] $end
|
$var wire 32 vb predicted_parallelism [31:0] $end
|
||||||
$upscope $end
|
$upscope $end
|
||||||
$upscope $end
|
$upscope $end
|
||||||
$enddefinitions $end
|
$enddefinitions $end
|
||||||
|
|
@ -30,193 +32,203 @@ bz v4
|
||||||
bz v5
|
bz v5
|
||||||
bz v6
|
bz v6
|
||||||
bz v7
|
bz v7
|
||||||
b0 v8
|
bz v8
|
||||||
b0 v9
|
bz v9
|
||||||
|
b0 va
|
||||||
|
b0 vb
|
||||||
#2695
|
#2695
|
||||||
b110 v0
|
b110 v0
|
||||||
b0 v1
|
b0 v1
|
||||||
b110 v2
|
b110 v2
|
||||||
b110 v6
|
b110 v8
|
||||||
b101 v7
|
b101 v9
|
||||||
b1 v8
|
b1 va
|
||||||
b10 v9
|
b10 vb
|
||||||
#2905
|
#2905
|
||||||
bz v0
|
bz v0
|
||||||
bz v1
|
bz v1
|
||||||
bz v2
|
bz v2
|
||||||
b0 v8
|
b0 va
|
||||||
#3800
|
#3800
|
||||||
bz v6
|
bz v8
|
||||||
b111 v7
|
b111 v9
|
||||||
b1 v9
|
b1 vb
|
||||||
#4906
|
#4906
|
||||||
b1000 v7
|
b1000 v9
|
||||||
b1 v9
|
b1 vb
|
||||||
#5495
|
#5495
|
||||||
b101 v3
|
b101 v3
|
||||||
b1 v4
|
b1 v4
|
||||||
b101 v5
|
b101 v5
|
||||||
b1 v8
|
b1 va
|
||||||
#6090
|
#6090
|
||||||
bz v3
|
bz v3
|
||||||
bz v4
|
bz v4
|
||||||
bz v5
|
bz v5
|
||||||
b0 v8
|
b0 va
|
||||||
#6300
|
#6300
|
||||||
b111 v3
|
b111 v3
|
||||||
b1 v4
|
b1 v4
|
||||||
b111 v5
|
b111 v5
|
||||||
b1 v8
|
b1 va
|
||||||
#6895
|
#6895
|
||||||
bz v3
|
bz v3
|
||||||
bz v4
|
bz v4
|
||||||
bz v5
|
bz v5
|
||||||
b0 v8
|
b0 va
|
||||||
#7490
|
#7490
|
||||||
b1000 v3
|
b1000 v3
|
||||||
b1 v4
|
b1 v4
|
||||||
b1000 v5
|
b1000 v5
|
||||||
b1 v8
|
b1 va
|
||||||
#8540
|
#8540
|
||||||
bz v3
|
bz v3
|
||||||
bz v4
|
bz v4
|
||||||
bz v5
|
bz v5
|
||||||
b0 v8
|
b0 va
|
||||||
#8848
|
#8848
|
||||||
b1001 v7
|
b1001 v9
|
||||||
b1 v9
|
b1 vb
|
||||||
#9135
|
#9135
|
||||||
b1001 v3
|
b1001 v3
|
||||||
b1 v4
|
b1 v4
|
||||||
b1001 v5
|
b1001 v5
|
||||||
b1 v8
|
b1 va
|
||||||
#9695
|
#9695
|
||||||
b1010 v0
|
b1010 v0
|
||||||
b0 v1
|
b0 v1
|
||||||
b1010 v2
|
b1010 v2
|
||||||
b10 v8
|
b10 va
|
||||||
#9730
|
#9730
|
||||||
bz v3
|
bz v3
|
||||||
bz v4
|
bz v4
|
||||||
bz v5
|
bz v5
|
||||||
b1 v8
|
b1 va
|
||||||
#9870
|
#9870
|
||||||
bz v0
|
bz v0
|
||||||
bz v1
|
bz v1
|
||||||
bz v2
|
bz v2
|
||||||
b0 v8
|
b0 va
|
||||||
#9917
|
#9917
|
||||||
b1010 v6
|
b1010 v8
|
||||||
b10 v9
|
b10 vb
|
||||||
#9954
|
#9954
|
||||||
b1011 v7
|
b1011 v9
|
||||||
b10 v9
|
b10 vb
|
||||||
#10255
|
#10255
|
||||||
b1011 v3
|
b1011 v3
|
||||||
b1 v4
|
b1 v4
|
||||||
b1011 v5
|
b1011 v5
|
||||||
b1 v8
|
b1 va
|
||||||
#11023
|
#11023
|
||||||
bz v6
|
bz v8
|
||||||
b1 v9
|
b1 vb
|
||||||
#11060
|
#11060
|
||||||
bz v3
|
bz v3
|
||||||
bz v4
|
bz v4
|
||||||
bz v5
|
bz v5
|
||||||
bz v7
|
bz v9
|
||||||
b0 v8
|
b0 va
|
||||||
b0 v9
|
b0 vb
|
||||||
#15610
|
#15610
|
||||||
b110 v0
|
b110 v0
|
||||||
b0 v1
|
b0 v1
|
||||||
b110 v2
|
b110 v2
|
||||||
b110 v6
|
b110 v8
|
||||||
b101 v7
|
b101 v9
|
||||||
b1 v8
|
b1 va
|
||||||
b10 v9
|
b10 vb
|
||||||
#15820
|
#15820
|
||||||
bz v0
|
bz v0
|
||||||
bz v1
|
bz v1
|
||||||
bz v2
|
bz v2
|
||||||
b0 v8
|
b0 va
|
||||||
#16437
|
#16437
|
||||||
bz v6
|
bz v8
|
||||||
b111 v7
|
b111 v9
|
||||||
b1 v9
|
b1 vb
|
||||||
#17265
|
#17265
|
||||||
b1000 v7
|
b1000 v9
|
||||||
b1 v9
|
b1 vb
|
||||||
#18375
|
#18375
|
||||||
b101 v3
|
b101 v3
|
||||||
b1 v4
|
b1 v4
|
||||||
b101 v5
|
b101 v5
|
||||||
b1 v8
|
b1 va
|
||||||
#18970
|
#18970
|
||||||
bz v3
|
bz v3
|
||||||
bz v4
|
bz v4
|
||||||
bz v5
|
bz v5
|
||||||
b0 v8
|
b0 va
|
||||||
#19145
|
#19145
|
||||||
b111 v3
|
b111 v3
|
||||||
b1 v4
|
b1 v4
|
||||||
b111 v5
|
b111 v5
|
||||||
b1 v8
|
b1 va
|
||||||
#19320
|
#19320
|
||||||
bz v3
|
bz v3
|
||||||
bz v4
|
bz v4
|
||||||
bz v5
|
bz v5
|
||||||
b0 v8
|
b0 va
|
||||||
#19670
|
#19670
|
||||||
b1000 v3
|
b1000 v3
|
||||||
b1 v4
|
b1 v4
|
||||||
b1000 v5
|
b1000 v5
|
||||||
b1 v8
|
b1 va
|
||||||
#19810
|
#19810
|
||||||
bz v3
|
bz v3
|
||||||
bz v4
|
bz v4
|
||||||
bz v5
|
bz v5
|
||||||
b0 v8
|
b0 va
|
||||||
|
#20000
|
||||||
|
bx v6
|
||||||
#20219
|
#20219
|
||||||
b1001 v7
|
b1001 v9
|
||||||
b1 v9
|
b1 vb
|
||||||
#20650
|
#20650
|
||||||
b1001 v3
|
b1001 v3
|
||||||
b1 v4
|
b1 v4
|
||||||
b1001 v5
|
b1001 v5
|
||||||
b1 v8
|
b1 va
|
||||||
#20720
|
#20720
|
||||||
bz v3
|
bz v3
|
||||||
bz v4
|
bz v4
|
||||||
bz v5
|
bz v5
|
||||||
b0 v8
|
b0 va
|
||||||
|
#21000
|
||||||
|
bz v6
|
||||||
#21019
|
#21019
|
||||||
b1010 v6
|
b1010 v8
|
||||||
b10 v9
|
b10 vb
|
||||||
#21047
|
#21047
|
||||||
b1011 v7
|
b1011 v9
|
||||||
b10 v9
|
b10 vb
|
||||||
#21140
|
#21140
|
||||||
b1011 v3
|
b1011 v3
|
||||||
b1 v4
|
b1 v4
|
||||||
b1011 v5
|
b1011 v5
|
||||||
b1 v8
|
b1 va
|
||||||
#21245
|
#21245
|
||||||
bz v3
|
bz v3
|
||||||
bz v4
|
bz v4
|
||||||
bz v5
|
bz v5
|
||||||
b0 v8
|
b0 va
|
||||||
#21700
|
#21700
|
||||||
b1010 v0
|
b1010 v0
|
||||||
b0 v1
|
b0 v1
|
||||||
b1010 v2
|
b1010 v2
|
||||||
b1 v8
|
b1 va
|
||||||
#21847
|
#21847
|
||||||
bz v6
|
bz v8
|
||||||
b1 v9
|
b1 vb
|
||||||
#21875
|
#21875
|
||||||
bz v0
|
bz v0
|
||||||
bz v1
|
bz v1
|
||||||
bz v2
|
bz v2
|
||||||
|
bz v9
|
||||||
|
b0 va
|
||||||
|
b0 vb
|
||||||
|
#22000
|
||||||
|
bx v7
|
||||||
|
#23000
|
||||||
bz v7
|
bz v7
|
||||||
b0 v8
|
|
||||||
b0 v9
|
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,8 @@ VLPROFEXEC MTASK_BEGIN 90465 id 85 predictStart 14315 cpu 2
|
||||||
VLPROFEXEC MTASK_END 155034 id 85 predictCost 30533
|
VLPROFEXEC MTASK_END 155034 id 85 predictCost 30533
|
||||||
VLPROFEXEC MTASK_BEGIN 156555 id 79 predictStart 44848 cpu 2
|
VLPROFEXEC MTASK_BEGIN 156555 id 79 predictStart 44848 cpu 2
|
||||||
VLPROFEXEC MTASK_END 294309 id 79 predictCost 48001
|
VLPROFEXEC MTASK_END 294309 id 79 predictCost 48001
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 295000 cpu 2
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 296000 cpu 2
|
||||||
VLPROFEXEC EXEC_GRAPH_END 300000
|
VLPROFEXEC EXEC_GRAPH_END 300000
|
||||||
VLPROFTHREAD 1
|
VLPROFTHREAD 1
|
||||||
VLPROFEXEC MTASK_BEGIN 77352 id 90 predictStart 14315 cpu 3
|
VLPROFEXEC MTASK_BEGIN 77352 id 90 predictStart 14315 cpu 3
|
||||||
|
|
@ -58,4 +60,6 @@ VLPROFEXEC MTASK_BEGIN 79799 id 81 predictStart 35907 cpu 3
|
||||||
VLPROFEXEC MTASK_END 80667 id 81 predictCost 29215
|
VLPROFEXEC MTASK_END 80667 id 81 predictCost 29215
|
||||||
VLPROFEXEC MTASK_BEGIN 81746 id 87 predictStart 65147 cpu 3
|
VLPROFEXEC MTASK_BEGIN 81746 id 87 predictStart 65147 cpu 3
|
||||||
VLPROFEXEC MTASK_END 82633 id 87 predictCost 33809
|
VLPROFEXEC MTASK_END 82633 id 87 predictCost 33809
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 83000 cpu 3
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 84000 cpu 3
|
||||||
VLPROF stat ticks 300000
|
VLPROF stat ticks 300000
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ Argument settings:
|
||||||
Summary:
|
Summary:
|
||||||
Total elapsed time = 300000 rdtsc ticks
|
Total elapsed time = 300000 rdtsc ticks
|
||||||
Parallelized code = 80.49% of elapsed time
|
Parallelized code = 80.49% of elapsed time
|
||||||
|
Waiting time = 0.67% of elapsed time
|
||||||
Total threads = 2
|
Total threads = 2
|
||||||
Total CPUs used = 2
|
Total CPUs used = 2
|
||||||
Total mtasks = 5
|
Total mtasks = 5
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,8 @@ VLPROFEXEC EXEC_GRAPH_END 12180
|
||||||
VLPROFEXEC EXEC_GRAPH_BEGIN 14000
|
VLPROFEXEC EXEC_GRAPH_BEGIN 14000
|
||||||
VLPROFEXEC MTASK_BEGIN 15610 id 6 predictStart 0 cpu 19
|
VLPROFEXEC MTASK_BEGIN 15610 id 6 predictStart 0 cpu 19
|
||||||
VLPROFEXEC MTASK_END 15820 id 6 predictCost 30
|
VLPROFEXEC MTASK_END 15820 id 6 predictCost 30
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 16000 cpu 19
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 17000 cpu 19
|
||||||
VLPROFEXEC MTASK_BEGIN 21700 id 10 predictStart 196 cpu 19
|
VLPROFEXEC MTASK_BEGIN 21700 id 10 predictStart 196 cpu 19
|
||||||
VLPROFEXEC MTASK_END 21875 id 10 predictCost 30
|
VLPROFEXEC MTASK_END 21875 id 10 predictCost 30
|
||||||
VLPROFEXEC EXEC_GRAPH_END 22085
|
VLPROFEXEC EXEC_GRAPH_END 22085
|
||||||
|
|
@ -27,6 +29,8 @@ VLPROFEXEC MTASK_BEGIN 9135 id 9 predictStart 167 cpu 10
|
||||||
VLPROFEXEC MTASK_END 9730 id 9 predictCost 30
|
VLPROFEXEC MTASK_END 9730 id 9 predictCost 30
|
||||||
VLPROFEXEC MTASK_BEGIN 10255 id 11 predictStart 197 cpu 10
|
VLPROFEXEC MTASK_BEGIN 10255 id 11 predictStart 197 cpu 10
|
||||||
VLPROFEXEC MTASK_END 11060 id 11 predictCost 30
|
VLPROFEXEC MTASK_END 11060 id 11 predictCost 30
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 17000 cpu 10
|
||||||
|
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 18000 cpu 10
|
||||||
VLPROFEXEC MTASK_BEGIN 18375 id 5 predictStart 0 cpu 10
|
VLPROFEXEC MTASK_BEGIN 18375 id 5 predictStart 0 cpu 10
|
||||||
VLPROFEXEC MTASK_END 18970 id 5 predictCost 30
|
VLPROFEXEC MTASK_END 18970 id 5 predictCost 30
|
||||||
VLPROFEXEC MTASK_BEGIN 19145 id 7 predictStart 30 cpu 10
|
VLPROFEXEC MTASK_BEGIN 19145 id 7 predictStart 30 cpu 10
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ Argument settings:
|
||||||
Summary:
|
Summary:
|
||||||
Total elapsed time = 23415 rdtsc ticks
|
Total elapsed time = 23415 rdtsc ticks
|
||||||
Parallelized code = 82.51% of elapsed time
|
Parallelized code = 82.51% of elapsed time
|
||||||
|
Waiting time = 8.54% of elapsed time
|
||||||
Total threads = 2
|
Total threads = 2
|
||||||
Total CPUs used = 2
|
Total CPUs used = 2
|
||||||
Total mtasks = 7
|
Total mtasks = 7
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue