Ignore dependencies from different hierarchical schedules (#5954)
Signed-off-by: Bartłomiej Chmiel <bchmiel@antmicro.com>
This commit is contained in:
parent
a80aa07de6
commit
5f4646f617
|
|
@ -66,6 +66,7 @@ class ThreadSchedule final {
|
|||
uint32_t m_id; // Unique ID of a schedule
|
||||
static uint32_t s_nextId; // Next ID number to use
|
||||
std::unordered_set<const ExecMTask*> mtasks; // Mtasks in this schedule
|
||||
uint32_t m_endTime = 0; // Latest task end time in this schedule
|
||||
|
||||
public:
|
||||
// CONSTANTS
|
||||
|
|
@ -196,6 +197,7 @@ public:
|
|||
uint32_t scheduleOn(const ExecMTask* mtaskp, uint32_t bestThreadId) {
|
||||
mtasks.emplace(mtaskp);
|
||||
const uint32_t bestEndTime = mtaskp->predictStart() + mtaskp->cost();
|
||||
m_endTime = std::max(m_endTime, bestEndTime);
|
||||
mtaskState[mtaskp].completionTime = bestEndTime;
|
||||
mtaskState[mtaskp].threadId = bestThreadId;
|
||||
|
||||
|
|
@ -208,6 +210,7 @@ public:
|
|||
return bestEndTime;
|
||||
}
|
||||
bool contains(const ExecMTask* mtaskp) const { return mtasks.count(mtaskp); }
|
||||
uint32_t endTime() const { return m_endTime; }
|
||||
};
|
||||
|
||||
uint32_t ThreadSchedule::s_nextId = 0;
|
||||
|
|
@ -256,6 +259,8 @@ class PackThreads final {
|
|||
// METHODS
|
||||
uint32_t completionTime(const ThreadSchedule& schedule, const ExecMTask* mtaskp,
|
||||
uint32_t threadId) {
|
||||
// Ignore tasks that were scheduled on a different schedule
|
||||
if (!schedule.contains(mtaskp)) return 0;
|
||||
const ThreadSchedule::MTaskState& state = schedule.mtaskState.at(mtaskp);
|
||||
UASSERT(state.threadId != ThreadSchedule::UNASSIGNED, "Mtask should have assigned thread");
|
||||
if (threadId == state.threadId) {
|
||||
|
|
@ -373,19 +378,24 @@ class PackThreads final {
|
|||
}
|
||||
}
|
||||
|
||||
const uint32_t endTime = schedule.endTime();
|
||||
|
||||
if (!bestMtaskp && mode == SchedulingMode::WIDE_TASK_DISCOVERED) {
|
||||
mode = SchedulingMode::WIDE_TASK_SCHEDULING;
|
||||
const uint32_t size = m_nThreads / maxThreadWorkers;
|
||||
UASSERT(size, "Thread pool size should be bigger than 0");
|
||||
// If no tasks were added to the normal thread schedule, remove it.
|
||||
if (schedule.mtaskState.empty()) result.erase(result.begin());
|
||||
// If no tasks were added to the normal thread schedule, clear it.
|
||||
if (schedule.mtaskState.empty()) result.clear();
|
||||
result.emplace_back(ThreadSchedule{size});
|
||||
std::fill(busyUntil.begin(), busyUntil.end(), endTime);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!bestMtaskp && mode == SchedulingMode::WIDE_TASK_SCHEDULING) {
|
||||
mode = SchedulingMode::SCHEDULING;
|
||||
if (!schedule.mtaskState.empty()) result.emplace_back(ThreadSchedule{m_nThreads});
|
||||
UASSERT(!schedule.mtaskState.empty(), "Mtask should be added");
|
||||
result.emplace_back(ThreadSchedule{m_nThreads});
|
||||
std::fill(busyUntil.begin(), busyUntil.end(), endTime);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -393,24 +403,7 @@ class PackThreads final {
|
|||
|
||||
bestMtaskp->predictStart(bestTime);
|
||||
const uint32_t bestEndTime = schedule.scheduleOn(bestMtaskp, bestThreadId);
|
||||
|
||||
// Populate busyUntil timestamps. For multi-worker tasks, set timestamps for
|
||||
// offsetted threads.
|
||||
if (mode != SchedulingMode::WIDE_TASK_SCHEDULING) {
|
||||
busyUntil[bestThreadId] = bestEndTime;
|
||||
} else {
|
||||
for (int i = 0; i < maxThreadWorkers; ++i) {
|
||||
const size_t threadId = bestThreadId + (i * schedule.threads.size());
|
||||
UASSERT(threadId < busyUntil.size(),
|
||||
"Incorrect busyUntil offset: threadId=" + cvtToStr(threadId)
|
||||
+ " bestThreadId=" + cvtToStr(bestThreadId) + " i=" + cvtToStr(i)
|
||||
+ " schedule-size=" + cvtToStr(schedule.threads.size())
|
||||
+ " maxThreadWorkers=" + cvtToStr(maxThreadWorkers));
|
||||
busyUntil[threadId] = bestEndTime;
|
||||
UINFO(6, "Will schedule " << bestMtaskp->name() << " onto thread " << threadId
|
||||
<< endl);
|
||||
}
|
||||
}
|
||||
busyUntil[bestThreadId] = bestEndTime;
|
||||
|
||||
// Update the ready list
|
||||
const size_t erased = readyMTasks.erase(bestMtaskp);
|
||||
|
|
@ -439,6 +432,10 @@ class PackThreads final {
|
|||
public:
|
||||
// SELF TEST
|
||||
static void selfTest() {
|
||||
selfTestHierFirst();
|
||||
selfTestNormalFirst();
|
||||
}
|
||||
static void selfTestNormalFirst() {
|
||||
V3Graph graph;
|
||||
FileLine* const flp = v3Global.rootp()->fileline();
|
||||
std::vector<AstMTaskBody*> mTaskBodyps;
|
||||
|
|
@ -466,18 +463,28 @@ public:
|
|||
t4->cost(100);
|
||||
t4->priority(100);
|
||||
t4->threads(3);
|
||||
ExecMTask* const t5 = new ExecMTask{&graph, makeBody()};
|
||||
t5->cost(100);
|
||||
t5->priority(100);
|
||||
ExecMTask* const t6 = new ExecMTask{&graph, makeBody()};
|
||||
t6->cost(100);
|
||||
t6->priority(100);
|
||||
|
||||
/*
|
||||
0
|
||||
/ \
|
||||
1 2
|
||||
/ \
|
||||
3 4
|
||||
3 4
|
||||
/ \
|
||||
5 6
|
||||
*/
|
||||
new V3GraphEdge{&graph, t0, t1, 1};
|
||||
new V3GraphEdge{&graph, t0, t2, 1};
|
||||
new V3GraphEdge{&graph, t2, t3, 1};
|
||||
new V3GraphEdge{&graph, t2, t4, 1};
|
||||
new V3GraphEdge{&graph, t3, t5, 1};
|
||||
new V3GraphEdge{&graph, t4, t6, 1};
|
||||
|
||||
constexpr uint32_t threads = 6;
|
||||
PackThreads packer{threads,
|
||||
|
|
@ -485,6 +492,7 @@ public:
|
|||
10}; // Sandbag denom
|
||||
|
||||
const std::vector<ThreadSchedule> scheduled = packer.pack(graph);
|
||||
UASSERT_SELFTEST(size_t, scheduled.size(), 3);
|
||||
UASSERT_SELFTEST(size_t, scheduled[0].threads.size(), threads);
|
||||
UASSERT_SELFTEST(size_t, scheduled[0].threads[0].size(), 2);
|
||||
for (size_t i = 1; i < scheduled[0].threads.size(); ++i)
|
||||
|
|
@ -494,17 +502,23 @@ public:
|
|||
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][1], t1);
|
||||
|
||||
UASSERT_SELFTEST(size_t, scheduled[1].threads.size(), threads / 3);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[1][0], t2);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[1][1], t3);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][0], t4);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][0], t2);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][1], t3);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[1][0], t4);
|
||||
|
||||
UASSERT_SELFTEST(size_t, ThreadSchedule::mtaskState.size(), 5);
|
||||
UASSERT_SELFTEST(size_t, scheduled[2].threads.size(), threads);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[2].threads[0][0], t5);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[2].threads[1][0], t6);
|
||||
|
||||
UASSERT_SELFTEST(size_t, ThreadSchedule::mtaskState.size(), 7);
|
||||
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t0), 0);
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t1), 0);
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t2), 1);
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t3), 1);
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t4), 0);
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t2), 0);
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t3), 0);
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t4), 1);
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t5), 0);
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t6), 1);
|
||||
|
||||
// On its native thread, we see the actual end time for t0:
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t0, 0), 1000);
|
||||
|
|
@ -518,14 +532,97 @@ public:
|
|||
// with t0's sandbagged time; compounding caused trouble in
|
||||
// practice.
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t1, 1), 1130);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 0), 1229);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 1), 1199);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 0), 1329);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 1), 1299);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 0), 1329);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 1), 1359);
|
||||
|
||||
// Wide task scheduling
|
||||
|
||||
// Task does not depend on previous or future schedules
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t2, 0), 0);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[2], t2, 0), 0);
|
||||
|
||||
// We allow sandbagging for hierarchical children tasks, this does not affect
|
||||
// wide task scheduling. When the next schedule is created it doesn't matter
|
||||
// anyway.
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 0), 1200);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 1), 1230);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 2), 1230);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 3), 1230);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 4), 1230);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 5), 1230);
|
||||
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 0), 1300);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 1), 1330);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 2), 1330);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 3), 1330);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 4), 1330);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 5), 1330);
|
||||
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 0), 1360);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 1), 1330);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 2), 1360);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 3), 1360);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 4), 1360);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 5), 1360);
|
||||
|
||||
for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree();
|
||||
ThreadSchedule::mtaskState.clear();
|
||||
}
|
||||
static void selfTestHierFirst() {
|
||||
V3Graph graph;
|
||||
FileLine* const flp = v3Global.rootp()->fileline();
|
||||
std::vector<AstMTaskBody*> mTaskBodyps;
|
||||
const auto makeBody = [&]() {
|
||||
AstMTaskBody* const bodyp = new AstMTaskBody{flp};
|
||||
mTaskBodyps.push_back(bodyp);
|
||||
bodyp->addStmtsp(new AstComment{flp, ""});
|
||||
return bodyp;
|
||||
};
|
||||
ExecMTask* const t0 = new ExecMTask{&graph, makeBody()};
|
||||
t0->cost(1000);
|
||||
t0->priority(1100);
|
||||
t0->threads(2);
|
||||
ExecMTask* const t1 = new ExecMTask{&graph, makeBody()};
|
||||
t1->cost(100);
|
||||
t1->priority(100);
|
||||
|
||||
/*
|
||||
0
|
||||
|
|
||||
1
|
||||
*/
|
||||
new V3GraphEdge{&graph, t0, t1, 1};
|
||||
|
||||
constexpr uint32_t threads = 2;
|
||||
PackThreads packer{threads,
|
||||
3, // Sandbag numerator
|
||||
10}; // Sandbag denom
|
||||
|
||||
const std::vector<ThreadSchedule> scheduled = packer.pack(graph);
|
||||
UASSERT_SELFTEST(size_t, scheduled.size(), 2);
|
||||
UASSERT_SELFTEST(size_t, scheduled[0].threads.size(), threads / 2);
|
||||
UASSERT_SELFTEST(size_t, scheduled[0].threads[0].size(), 1);
|
||||
for (size_t i = 1; i < scheduled[0].threads.size(); ++i)
|
||||
UASSERT_SELFTEST(size_t, scheduled[0].threads[i].size(), 0);
|
||||
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][0], t0);
|
||||
|
||||
UASSERT_SELFTEST(size_t, scheduled[1].threads.size(), threads);
|
||||
UASSERT_SELFTEST(size_t, scheduled[1].threads[0].size(), 1);
|
||||
for (size_t i = 1; i < scheduled[1].threads.size(); ++i)
|
||||
UASSERT_SELFTEST(size_t, scheduled[1].threads[i].size(), 0);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][0], t1);
|
||||
|
||||
UASSERT_SELFTEST(size_t, ThreadSchedule::mtaskState.size(), 2);
|
||||
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t0), 0);
|
||||
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t1), 0);
|
||||
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t0, 0), 1000);
|
||||
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 0), 1100);
|
||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 1), 1130);
|
||||
|
||||
for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree();
|
||||
ThreadSchedule::mtaskState.clear();
|
||||
}
|
||||
|
||||
static std::vector<ThreadSchedule> apply(V3Graph& mtaskGraph) {
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ if test.vltmt:
|
|||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Thread schedule count\s+(\d+)', 4)
|
||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Thread schedule total tasks\s+(\d+)', 10)
|
||||
r'Optimizations, Thread schedule total tasks\s+(\d+)', 12)
|
||||
|
||||
test.execute(all_run_flags=[
|
||||
"+verilator+prof+exec+start+2",
|
||||
|
|
|
|||
Loading…
Reference in New Issue