2024-03-10 16:58:58 +01:00
|
|
|
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
|
|
|
|
//*************************************************************************
|
|
|
|
|
// DESCRIPTION: Verilator: AstExecGraph code construction
|
|
|
|
|
//
|
|
|
|
|
// Code available from: https://verilator.org
|
|
|
|
|
//
|
|
|
|
|
//*************************************************************************
|
|
|
|
|
//
|
2025-01-01 14:30:25 +01:00
|
|
|
// Copyright 2003-2025 by Wilson Snyder. This program is free software; you
|
2024-03-10 16:58:58 +01:00
|
|
|
// can redistribute it and/or modify it under the terms of either the GNU
|
|
|
|
|
// Lesser General Public License Version 3 or the Perl Artistic License
|
|
|
|
|
// Version 2.0.
|
|
|
|
|
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
|
|
|
|
//
|
|
|
|
|
//*************************************************************************
|
|
|
|
|
|
|
|
|
|
#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT
|
|
|
|
|
|
|
|
|
|
#include "V3ExecGraph.h"
|
|
|
|
|
|
2025-06-28 02:38:01 +02:00
|
|
|
#include "V3Control.h"
|
2024-03-10 16:58:58 +01:00
|
|
|
#include "V3EmitCBase.h"
|
|
|
|
|
#include "V3File.h"
|
|
|
|
|
#include "V3GraphStream.h"
|
2024-03-16 15:02:17 +01:00
|
|
|
#include "V3Hasher.h"
|
2024-03-10 16:58:58 +01:00
|
|
|
#include "V3InstrCount.h"
|
|
|
|
|
#include "V3Os.h"
|
|
|
|
|
#include "V3Stats.h"
|
|
|
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
|
|
VL_DEFINE_DEBUG_FUNCTIONS;
|
|
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
AstCFunc* ExecMTask::createCFunc(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp,
|
|
|
|
|
uint32_t id) {
|
2025-11-13 00:54:22 +01:00
|
|
|
const std::string newName = execGraphp->name() + "_mtask" + std::to_string(id);
|
|
|
|
|
AstCFunc* const newp = new AstCFunc{execGraphp->fileline(), newName, scopep};
|
|
|
|
|
newp->isLoose(true);
|
|
|
|
|
newp->dontCombine(true);
|
|
|
|
|
newp->addStmtsp(stmtsp);
|
|
|
|
|
if (scopep) scopep->addBlocksp(newp);
|
|
|
|
|
return newp;
|
2024-03-16 15:02:17 +01:00
|
|
|
}
|
|
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
ExecMTask::ExecMTask(AstExecGraph* execGraphp, AstScope* scopep,
|
|
|
|
|
AstNodeStmt* stmtsp) VL_MT_DISABLED //
|
|
|
|
|
: V3GraphVertex{execGraphp->depGraphp()},
|
|
|
|
|
m_id{s_nextId++},
|
|
|
|
|
m_funcp{createCFunc(execGraphp, scopep, stmtsp, m_id)},
|
|
|
|
|
m_hashName{V3Hasher::uncachedHash(m_funcp).toString()} {}
|
|
|
|
|
|
2024-03-16 15:02:17 +01:00
|
|
|
void ExecMTask::dump(std::ostream& str) const {
|
|
|
|
|
str << name() << "." << cvtToHex(this);
|
|
|
|
|
if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-06 14:04:26 +02:00
|
|
|
std::atomic<uint32_t> ExecMTask::s_nextId{0};
|
2024-03-16 15:02:17 +01:00
|
|
|
|
2024-03-10 16:58:58 +01:00
|
|
|
namespace V3ExecGraph {
|
|
|
|
|
|
|
|
|
|
//######################################################################
|
|
|
|
|
// ThreadSchedule
|
|
|
|
|
|
|
|
|
|
// The thread schedule, containing all information needed later. Note that this is a simple
|
|
|
|
|
// aggregate data type and the only way to get hold of an instance of it is via
|
|
|
|
|
// PackThreads::pack, which is moved from there and is const, which means we can only acquire a
|
|
|
|
|
// const reference to is so no further modifications are allowed, so all members are public
|
|
|
|
|
// (attributes).
|
|
|
|
|
class ThreadSchedule final {
|
|
|
|
|
friend class PackThreads;
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
uint32_t m_id; // Unique ID of a schedule
|
|
|
|
|
static uint32_t s_nextId; // Next ID number to use
|
|
|
|
|
std::unordered_set<const ExecMTask*> mtasks; // Mtasks in this schedule
|
2025-05-08 12:45:10 +02:00
|
|
|
uint32_t m_endTime = 0; // Latest task end time in this schedule
|
2025-03-24 23:39:29 +01:00
|
|
|
|
2024-03-10 16:58:58 +01:00
|
|
|
public:
|
|
|
|
|
// CONSTANTS
|
|
|
|
|
static constexpr uint32_t UNASSIGNED = 0xffffffff;
|
|
|
|
|
|
|
|
|
|
// TYPES
|
|
|
|
|
struct MTaskState final {
|
|
|
|
|
uint32_t completionTime = 0; // Estimated time this mtask will complete
|
|
|
|
|
uint32_t threadId = UNASSIGNED; // Thread id this MTask is assigned to
|
|
|
|
|
const ExecMTask* nextp = nullptr; // Next MTask on same thread after this
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// MEMBERS
|
|
|
|
|
// Allocation of sequence of MTasks to threads. Can be considered a map from thread ID to
|
|
|
|
|
// the sequence of MTasks to be executed by that thread.
|
2025-10-28 01:49:41 +01:00
|
|
|
std::vector<std::vector<const ExecMTask*>> m_threads;
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
// Global state for each mtask.
|
2025-10-28 01:49:41 +01:00
|
|
|
static std::unordered_map<const ExecMTask*, MTaskState> s_mtaskState;
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
explicit ThreadSchedule(uint32_t nThreads)
|
2025-07-04 00:59:32 +02:00
|
|
|
: m_id{s_nextId++}
|
2025-10-28 01:49:41 +01:00
|
|
|
, m_threads{nThreads} {}
|
2024-03-10 16:58:58 +01:00
|
|
|
ThreadSchedule(ThreadSchedule&&) = default;
|
|
|
|
|
ThreadSchedule& operator=(ThreadSchedule&&) = default;
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
private:
|
|
|
|
|
VL_UNCOPYABLE(ThreadSchedule);
|
|
|
|
|
|
2025-05-20 18:15:09 +02:00
|
|
|
static constexpr double s_threadBoxWidth = 2.5;
|
|
|
|
|
static constexpr double s_threadBoxHeight = 1.5;
|
|
|
|
|
static constexpr double s_horizontalGap = s_threadBoxWidth / 2;
|
|
|
|
|
|
2024-03-10 16:58:58 +01:00
|
|
|
// Debugging
|
2025-05-20 18:15:09 +02:00
|
|
|
// Variant of dumpDotFilePrefixed without --dump option check
|
|
|
|
|
static void dumpDotFilePrefixedAlways(const std::vector<ThreadSchedule>& schedules,
|
|
|
|
|
const string& nameComment, uint32_t nThreads) {
|
|
|
|
|
dumpDotFile(schedules, v3Global.debugFilename(nameComment) + ".dot", nThreads);
|
|
|
|
|
}
|
|
|
|
|
static void dumpDotFile(const std::vector<ThreadSchedule>& schedules, const string& filename,
|
|
|
|
|
uint32_t nThreads) {
|
2024-03-10 16:58:58 +01:00
|
|
|
// This generates a file used by graphviz, https://www.graphviz.org
|
|
|
|
|
const std::unique_ptr<std::ofstream> logp{V3File::new_ofstream(filename)};
|
2025-03-24 00:51:54 +01:00
|
|
|
if (logp->fail()) v3fatal("Can't write file: " << filename);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
// Header
|
|
|
|
|
*logp << "digraph v3graph {\n";
|
|
|
|
|
*logp << " graph[layout=\"neato\" labelloc=t labeljust=l label=\"" << filename << "\"]\n";
|
|
|
|
|
*logp << " node[shape=\"rect\" ratio=\"fill\" fixedsize=true]\n";
|
|
|
|
|
|
|
|
|
|
// Thread labels
|
|
|
|
|
*logp << "\n // Threads\n";
|
2025-05-20 18:15:09 +02:00
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < nThreads; ++i) {
|
|
|
|
|
const string name = "t" + std::to_string(i);
|
|
|
|
|
const string label = "Thread " + std::to_string(i);
|
|
|
|
|
constexpr double posX = -s_horizontalGap;
|
|
|
|
|
const double posY = -static_cast<double>(i) * s_threadBoxHeight;
|
|
|
|
|
dumpDotFileEmitBlock(logp, name, label, s_threadBoxWidth, s_threadBoxHeight, posX,
|
|
|
|
|
posY, "grey");
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// MTask nodes
|
|
|
|
|
*logp << "\n // MTasks\n";
|
|
|
|
|
|
2025-05-20 18:15:09 +02:00
|
|
|
uint32_t maxCost = 0;
|
2025-10-28 01:49:41 +01:00
|
|
|
for (const auto& state : ThreadSchedule::s_mtaskState) {
|
2025-05-20 18:15:09 +02:00
|
|
|
const ExecMTask* const mtaskp = state.first;
|
|
|
|
|
maxCost = std::max(maxCost, mtaskp->cost());
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
2025-05-20 18:15:09 +02:00
|
|
|
// To avoid segments getting too large, limit maximal mtask length.
|
|
|
|
|
// Based on the mtask cost, normalize it using segment cost
|
|
|
|
|
constexpr uint32_t segmentsPerLongestMtask = 4;
|
|
|
|
|
const uint32_t segmentCost = maxCost / segmentsPerLongestMtask;
|
|
|
|
|
|
|
|
|
|
// Create columns of tasks whose execution intervals overlaps.
|
|
|
|
|
// Keep offset for each column for correctly aligned tasks.
|
|
|
|
|
std::vector<double> offsets(nThreads, 0.0);
|
|
|
|
|
for (const ThreadSchedule& schedule : schedules) {
|
|
|
|
|
if (schedule.mtasks.empty()) continue;
|
|
|
|
|
using Column = std::vector<const ExecMTask*>;
|
|
|
|
|
std::vector<Column> columns = {{}};
|
|
|
|
|
|
|
|
|
|
// Order tasks based on their start time
|
|
|
|
|
struct Cmp final {
|
|
|
|
|
bool operator()(const ExecMTask* const a, const ExecMTask* const b) const {
|
|
|
|
|
if (startTime(a) == startTime(b)) return threadId(a) < threadId(b);
|
|
|
|
|
return startTime(a) < startTime(b);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
const std::multiset<const ExecMTask*, Cmp> tasks(schedule.mtasks.begin(),
|
|
|
|
|
schedule.mtasks.end());
|
|
|
|
|
|
|
|
|
|
for (const ExecMTask* const mtaskp : tasks) {
|
|
|
|
|
Column& column = columns.back();
|
|
|
|
|
UASSERT(column.size() <= nThreads, "Invalid partitioning");
|
|
|
|
|
|
|
|
|
|
bool intersects = true;
|
|
|
|
|
for (const ExecMTask* const earlierMtask : column) {
|
|
|
|
|
if (endTime(mtaskp) <= startTime(earlierMtask)
|
|
|
|
|
|| startTime(mtaskp) >= endTime(earlierMtask)) {
|
|
|
|
|
intersects = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (intersects) {
|
|
|
|
|
column.emplace_back(mtaskp);
|
|
|
|
|
} else {
|
|
|
|
|
columns.emplace_back(Column{mtaskp});
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-05-20 18:15:09 +02:00
|
|
|
UASSERT(!columns.front().empty(), "Should be populated by mtasks");
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-05-20 18:15:09 +02:00
|
|
|
for (const Column& column : columns) {
|
|
|
|
|
double lastColumnOffset = 0;
|
|
|
|
|
for (const ExecMTask* const mtaskp : column) {
|
|
|
|
|
dumpDotFileEmitMTask(logp, mtaskp, schedule, segmentCost, offsets);
|
|
|
|
|
lastColumnOffset = std::max(lastColumnOffset, offsets[threadId(mtaskp)]);
|
|
|
|
|
}
|
|
|
|
|
// Even out column offset
|
|
|
|
|
std::fill(offsets.begin(), offsets.end(), lastColumnOffset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dumpDotFileEmitFork(logp, offsets.front(), nThreads);
|
|
|
|
|
|
|
|
|
|
// Emit MTask dependency edges
|
|
|
|
|
*logp << "\n // MTask dependencies\n";
|
|
|
|
|
|
2025-10-28 01:49:41 +01:00
|
|
|
for (const std::vector<const ExecMTask*>& thread : schedule.m_threads) {
|
2025-05-20 18:15:09 +02:00
|
|
|
if (thread.empty()) break; // No more threads
|
|
|
|
|
|
|
|
|
|
// Show that schedule ends when all tasks are finished
|
|
|
|
|
*logp << " " << thread.back()->name() << " -> fork_"
|
|
|
|
|
<< static_cast<int>(offsets.front()) << "\n";
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-05-20 18:15:09 +02:00
|
|
|
// Show that tasks from the same thread are executed in a sequence
|
|
|
|
|
for (size_t i = 1; i < thread.size(); ++i)
|
|
|
|
|
*logp << " " << thread[i - 1]->name() << " -> " << thread[i]->name() << "\n";
|
|
|
|
|
|
|
|
|
|
// Emit cross-task dependencies
|
|
|
|
|
for (const ExecMTask* const mtaskp : thread) {
|
|
|
|
|
for (const V3GraphEdge& edge : mtaskp->outEdges()) {
|
|
|
|
|
const ExecMTask* const topMTaskp = edge.top()->cast<const ExecMTask>();
|
|
|
|
|
if (topMTaskp && schedule.contains(topMTaskp)
|
|
|
|
|
&& threadId(topMTaskp) != threadId(mtaskp))
|
|
|
|
|
*logp << " " << mtaskp->name() << " -> " << topMTaskp->name() << "\n";
|
|
|
|
|
}
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Trailer
|
|
|
|
|
*logp << "}\n";
|
|
|
|
|
logp->close();
|
|
|
|
|
}
|
2025-05-20 18:15:09 +02:00
|
|
|
static void dumpDotFileEmitBlock(const std::unique_ptr<std::ofstream>& logp,
|
|
|
|
|
const string& name, const string& label, double width,
|
|
|
|
|
double height, double xPos, double yPos,
|
|
|
|
|
const string& fillColor) {
|
|
|
|
|
*logp << " " << name << " [label=\"" << label << "\" width=" << width
|
|
|
|
|
<< " height=" << height << " pos=\"" << xPos << "," << yPos
|
|
|
|
|
<< "!\" style=\"filled\" fillcolor=\"" << fillColor << "\"]\n";
|
|
|
|
|
}
|
|
|
|
|
static void dumpDotFileEmitMTask(const std::unique_ptr<std::ofstream>& logp,
|
|
|
|
|
const ExecMTask* const mtaskp, const ThreadSchedule& schedule,
|
|
|
|
|
uint32_t segmentCost, std::vector<double>& offsets) {
|
|
|
|
|
for (int i = 0; i < mtaskp->threads(); ++i) {
|
|
|
|
|
// Keep original name for the original thread of hierarchical task to keep
|
|
|
|
|
// dependency tracking, add '_' for the rest to differentiate them.
|
|
|
|
|
const string name = i == 0 ? mtaskp->name() : mtaskp->name() + '_' + std::to_string(i);
|
|
|
|
|
const string label = mtaskp->name() + " (" + std::to_string(startTime(mtaskp)) + ':'
|
|
|
|
|
+ std::to_string(endTime(mtaskp)) + ')'
|
|
|
|
|
+ "\\ncost=" + std::to_string(mtaskp->cost())
|
|
|
|
|
+ "\\npriority=" + std::to_string(mtaskp->priority());
|
|
|
|
|
const double width
|
|
|
|
|
= std::max(s_threadBoxWidth,
|
|
|
|
|
s_threadBoxWidth * static_cast<double>(mtaskp->cost()) / segmentCost);
|
2025-10-28 01:49:41 +01:00
|
|
|
const uint32_t mtaskThreadId = threadId(mtaskp) + i * schedule.m_threads.size();
|
2025-05-20 18:15:09 +02:00
|
|
|
const double xPos = width / 2 + offsets[mtaskThreadId];
|
|
|
|
|
offsets[mtaskThreadId] += width + s_horizontalGap;
|
|
|
|
|
const double yPos = -s_threadBoxHeight * static_cast<double>(mtaskThreadId);
|
|
|
|
|
const string fillColor = i == 0 ? "white" : "lightgreen";
|
|
|
|
|
dumpDotFileEmitBlock(logp, name, label, width, s_threadBoxHeight, xPos, yPos,
|
|
|
|
|
fillColor);
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-05-20 18:15:09 +02:00
|
|
|
static void dumpDotFileEmitFork(const std::unique_ptr<std::ofstream>& logp, double offset,
|
|
|
|
|
uint32_t nThreads) {
|
|
|
|
|
const string& name = "fork_" + std::to_string(static_cast<int>(offset));
|
|
|
|
|
constexpr double width = s_threadBoxWidth / 8;
|
|
|
|
|
const double height = s_threadBoxHeight * nThreads;
|
|
|
|
|
const double xPos = offset - s_horizontalGap / 2;
|
|
|
|
|
const double yPos
|
|
|
|
|
= -static_cast<double>(nThreads) / 2 * s_threadBoxHeight + s_threadBoxHeight / 2;
|
|
|
|
|
dumpDotFileEmitBlock(logp, name, "", width, height, xPos, yPos, "black");
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public:
|
2025-03-24 23:39:29 +01:00
|
|
|
static uint32_t threadId(const ExecMTask* mtaskp) {
|
2025-10-28 01:49:41 +01:00
|
|
|
const auto& it = s_mtaskState.find(mtaskp);
|
|
|
|
|
return it != s_mtaskState.end() ? it->second.threadId : UNASSIGNED;
|
2025-03-24 23:39:29 +01:00
|
|
|
}
|
|
|
|
|
static uint32_t startTime(const ExecMTask* mtaskp) {
|
2025-10-28 01:49:41 +01:00
|
|
|
return s_mtaskState.at(mtaskp).completionTime - mtaskp->cost();
|
2025-03-24 23:39:29 +01:00
|
|
|
}
|
|
|
|
|
static uint32_t endTime(const ExecMTask* mtaskp) {
|
2025-10-28 01:49:41 +01:00
|
|
|
return s_mtaskState.at(mtaskp).completionTime;
|
2025-03-24 23:39:29 +01:00
|
|
|
}
|
|
|
|
|
|
2024-03-10 16:58:58 +01:00
|
|
|
// Returns the number of cross-thread dependencies of the given MTask. If > 0, the MTask must
|
|
|
|
|
// test whether its dependencies are ready before starting, and therefore may need to block.
|
|
|
|
|
uint32_t crossThreadDependencies(const ExecMTask* mtaskp) const {
|
|
|
|
|
const uint32_t thisThreadId = threadId(mtaskp);
|
|
|
|
|
uint32_t result = 0;
|
2024-03-26 00:06:25 +01:00
|
|
|
for (const V3GraphEdge& edge : mtaskp->inEdges()) {
|
|
|
|
|
const ExecMTask* const prevp = edge.fromp()->as<ExecMTask>();
|
2025-03-24 23:39:29 +01:00
|
|
|
if (threadId(prevp) != thisThreadId && contains(prevp)) ++result;
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
uint32_t id() const { return m_id; }
|
|
|
|
|
uint32_t scheduleOn(const ExecMTask* mtaskp, uint32_t bestThreadId) {
|
|
|
|
|
mtasks.emplace(mtaskp);
|
|
|
|
|
const uint32_t bestEndTime = mtaskp->predictStart() + mtaskp->cost();
|
2025-05-08 12:45:10 +02:00
|
|
|
m_endTime = std::max(m_endTime, bestEndTime);
|
2025-10-28 01:49:41 +01:00
|
|
|
s_mtaskState[mtaskp].completionTime = bestEndTime;
|
|
|
|
|
s_mtaskState[mtaskp].threadId = bestThreadId;
|
2025-03-24 23:39:29 +01:00
|
|
|
|
|
|
|
|
// Reference to thread in schedule we are assigning this MTask to.
|
2025-10-28 01:49:41 +01:00
|
|
|
std::vector<const ExecMTask*>& bestThread = m_threads[bestThreadId];
|
|
|
|
|
if (!bestThread.empty()) s_mtaskState[bestThread.back()].nextp = mtaskp;
|
2025-03-24 23:39:29 +01:00
|
|
|
|
|
|
|
|
// Add the MTask to the schedule
|
|
|
|
|
bestThread.push_back(mtaskp);
|
|
|
|
|
return bestEndTime;
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
2025-03-24 23:39:29 +01:00
|
|
|
bool contains(const ExecMTask* mtaskp) const { return mtasks.count(mtaskp); }
|
2025-05-08 12:45:10 +02:00
|
|
|
uint32_t endTime() const { return m_endTime; }
|
2024-03-10 16:58:58 +01:00
|
|
|
};
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
uint32_t ThreadSchedule::s_nextId = 0;
|
2025-10-28 01:49:41 +01:00
|
|
|
std::unordered_map<const ExecMTask*, ThreadSchedule::MTaskState> ThreadSchedule::s_mtaskState{};
|
2025-05-27 16:11:56 +02:00
|
|
|
constexpr double V3ExecGraph::ThreadSchedule::s_threadBoxWidth;
|
2025-03-24 23:39:29 +01:00
|
|
|
|
2024-03-10 16:58:58 +01:00
|
|
|
//######################################################################
|
|
|
|
|
// PackThreads
|
|
|
|
|
|
|
|
|
|
// Statically pack tasks into threads.
|
|
|
|
|
//
|
|
|
|
|
// The simplest thing that could possibly work would be to assume that our
|
|
|
|
|
// predictions of task runtimes are precise, and that every thread will
|
2025-02-25 18:44:14 +01:00
|
|
|
// make progress at an equal rate. Simulate a single "clock", pack the
|
2024-03-10 16:58:58 +01:00
|
|
|
// highest priority ready task into whatever thread becomes ready earliest,
|
|
|
|
|
// repeating until no tasks remain.
|
|
|
|
|
//
|
|
|
|
|
// That doesn't work well, as our predictions of task runtimes have wide
|
|
|
|
|
// error bars (+/- 60% is typical.)
|
|
|
|
|
//
|
|
|
|
|
// So be a little more clever: let each task have a different end time,
|
|
|
|
|
// depending on which thread is looking. Be a little bit pessimistic when
|
|
|
|
|
// thread A checks the end time of an mtask running on thread B. This extra
|
|
|
|
|
// "padding" avoids tight "layovers" at cross-thread dependencies.
|
|
|
|
|
class PackThreads final {
|
|
|
|
|
// TYPES
|
|
|
|
|
struct MTaskCmp final {
|
|
|
|
|
bool operator()(const ExecMTask* ap, const ExecMTask* bp) const {
|
|
|
|
|
return ap->id() < bp->id();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// MEMBERS
|
|
|
|
|
const uint32_t m_nThreads; // Number of threads
|
2025-05-26 15:37:35 +02:00
|
|
|
const uint32_t m_nHierThreads; // Number of threads used for hierarchical tasks
|
2024-03-10 16:58:58 +01:00
|
|
|
const uint32_t m_sandbagNumerator; // Numerator padding for est runtime
|
|
|
|
|
const uint32_t m_sandbagDenom; // Denominator padding for est runtime
|
|
|
|
|
|
|
|
|
|
// CONSTRUCTORS
|
|
|
|
|
explicit PackThreads(uint32_t nThreads = v3Global.opt.threads(),
|
2025-05-26 15:37:35 +02:00
|
|
|
uint32_t nHierThreads = v3Global.opt.hierThreads(),
|
2024-03-10 16:58:58 +01:00
|
|
|
unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100)
|
|
|
|
|
: m_nThreads{nThreads}
|
2025-05-26 15:37:35 +02:00
|
|
|
, m_nHierThreads{nHierThreads}
|
2024-03-10 16:58:58 +01:00
|
|
|
, m_sandbagNumerator{sandbagNumerator}
|
|
|
|
|
, m_sandbagDenom{sandbagDenom} {}
|
|
|
|
|
~PackThreads() = default;
|
2024-03-16 13:17:24 +01:00
|
|
|
VL_UNCOPYABLE(PackThreads);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
// METHODS
|
|
|
|
|
uint32_t completionTime(const ThreadSchedule& schedule, const ExecMTask* mtaskp,
|
|
|
|
|
uint32_t threadId) {
|
2025-05-08 12:45:10 +02:00
|
|
|
// Ignore tasks that were scheduled on a different schedule
|
|
|
|
|
if (!schedule.contains(mtaskp)) return 0;
|
2025-10-28 01:49:41 +01:00
|
|
|
const ThreadSchedule::MTaskState& state = schedule.s_mtaskState.at(mtaskp);
|
2024-03-10 16:58:58 +01:00
|
|
|
UASSERT(state.threadId != ThreadSchedule::UNASSIGNED, "Mtask should have assigned thread");
|
|
|
|
|
if (threadId == state.threadId) {
|
|
|
|
|
// No overhead on same thread
|
|
|
|
|
return state.completionTime;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add some padding to the estimated runtime when looking from
|
|
|
|
|
// another thread
|
|
|
|
|
uint32_t sandbaggedEndTime
|
|
|
|
|
= state.completionTime + (m_sandbagNumerator * mtaskp->cost()) / m_sandbagDenom;
|
|
|
|
|
|
|
|
|
|
// If task B is packed after task A on thread 0, don't let thread 1
|
|
|
|
|
// think that A finishes earlier than thread 0 thinks that B
|
|
|
|
|
// finishes, otherwise we get priority inversions and fail the self
|
|
|
|
|
// test.
|
|
|
|
|
if (state.nextp) {
|
|
|
|
|
const uint32_t successorEndTime
|
|
|
|
|
= completionTime(schedule, state.nextp, state.threadId);
|
|
|
|
|
if ((sandbaggedEndTime >= successorEndTime) && (successorEndTime > 1)) {
|
|
|
|
|
sandbaggedEndTime = successorEndTime - 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
UINFO(6, "Sandbagged end time for " << mtaskp->name() << " on th " << threadId << " = "
|
2025-05-23 02:29:32 +02:00
|
|
|
<< sandbaggedEndTime);
|
2024-03-10 16:58:58 +01:00
|
|
|
return sandbaggedEndTime;
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
static bool isReady(ThreadSchedule& schedule, const ExecMTask* mtaskp) {
|
2024-03-26 00:06:25 +01:00
|
|
|
for (const V3GraphEdge& edgeIn : mtaskp->inEdges()) {
|
|
|
|
|
const ExecMTask* const prevp = edgeIn.fromp()->as<const ExecMTask>();
|
2024-03-10 16:58:58 +01:00
|
|
|
if (schedule.threadId(prevp) == ThreadSchedule::UNASSIGNED) {
|
|
|
|
|
// This predecessor is not assigned yet
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Pack an MTasks from given graph into m_nThreads threads, return the schedule.
|
2025-03-24 23:39:29 +01:00
|
|
|
std::vector<ThreadSchedule> pack(V3Graph& mtaskGraph) {
|
|
|
|
|
std::vector<ThreadSchedule> result;
|
|
|
|
|
result.emplace_back(ThreadSchedule{m_nThreads});
|
|
|
|
|
|
|
|
|
|
// To support scheduling tasks that utilize more than one thread, we introduce a wide
|
|
|
|
|
// task (ExecMTask with threads() > 1). Those tasks are scheduled on a separate thread
|
|
|
|
|
// schedule to ensure that indexes for simulation-time thread pool workers are not shadowed
|
|
|
|
|
// by another tasks.
|
|
|
|
|
// For retaining control over thread schedules, we distinguish SchedulingModes:
|
|
|
|
|
enum class SchedulingMode {
|
|
|
|
|
SCHEDULING // Schedule normal tasks
|
|
|
|
|
,
|
|
|
|
|
WIDE_TASK_DISCOVERED // We found a wide task, if this is the only one available,
|
|
|
|
|
// switch to WIDE_TASK_SCHEDULING
|
|
|
|
|
,
|
|
|
|
|
WIDE_TASK_SCHEDULING // Schedule wide tasks
|
|
|
|
|
};
|
|
|
|
|
SchedulingMode mode = SchedulingMode::SCHEDULING;
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
// Time each thread is occupied until
|
2025-05-26 15:37:35 +02:00
|
|
|
std::vector<uint32_t> busyUntil(std::max(m_nThreads, m_nHierThreads), 0);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
// MTasks ready to be assigned next. All their dependencies are already assigned.
|
|
|
|
|
std::set<ExecMTask*, MTaskCmp> readyMTasks;
|
2025-03-24 23:39:29 +01:00
|
|
|
int maxThreadWorkers = 1;
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
// Build initial ready list
|
2024-03-26 00:06:25 +01:00
|
|
|
for (V3GraphVertex& vtx : mtaskGraph.vertices()) {
|
|
|
|
|
ExecMTask* const mtaskp = vtx.as<ExecMTask>();
|
2025-03-24 23:39:29 +01:00
|
|
|
if (isReady(result.back(), mtaskp)) readyMTasks.insert(mtaskp);
|
|
|
|
|
// TODO right now we schedule tasks assuming they take the same number of threads for
|
|
|
|
|
// simplification.
|
|
|
|
|
maxThreadWorkers = std::max(maxThreadWorkers, mtaskp->threads());
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (!readyMTasks.empty()) {
|
|
|
|
|
// For each task in the ready set, compute when it might start
|
|
|
|
|
// on each thread (in that thread's local time frame.)
|
|
|
|
|
uint32_t bestTime = 0xffffffff;
|
|
|
|
|
uint32_t bestThreadId = 0;
|
|
|
|
|
ExecMTask* bestMtaskp = nullptr; // Todo: const ExecMTask*
|
2025-03-24 23:39:29 +01:00
|
|
|
ThreadSchedule& schedule = result.back();
|
2025-10-28 01:49:41 +01:00
|
|
|
for (uint32_t threadId = 0; threadId < schedule.m_threads.size(); ++threadId) {
|
2024-03-10 16:58:58 +01:00
|
|
|
for (ExecMTask* const mtaskp : readyMTasks) {
|
2025-03-24 23:39:29 +01:00
|
|
|
if (mode != SchedulingMode::WIDE_TASK_SCHEDULING && mtaskp->threads() > 1) {
|
|
|
|
|
mode = SchedulingMode::WIDE_TASK_DISCOVERED;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (mode == SchedulingMode::WIDE_TASK_SCHEDULING && mtaskp->threads() <= 1)
|
|
|
|
|
continue;
|
|
|
|
|
|
2024-03-10 16:58:58 +01:00
|
|
|
uint32_t timeBegin = busyUntil[threadId];
|
|
|
|
|
if (timeBegin > bestTime) {
|
|
|
|
|
UINFO(6, "th " << threadId << " busy until " << timeBegin
|
|
|
|
|
<< ", later than bestTime " << bestTime
|
2025-05-23 02:29:32 +02:00
|
|
|
<< ", skipping thread.");
|
2024-03-10 16:58:58 +01:00
|
|
|
break;
|
|
|
|
|
}
|
2024-03-26 00:06:25 +01:00
|
|
|
for (const V3GraphEdge& edge : mtaskp->inEdges()) {
|
|
|
|
|
const ExecMTask* const priorp = edge.fromp()->as<ExecMTask>();
|
2024-03-10 16:58:58 +01:00
|
|
|
const uint32_t priorEndTime = completionTime(schedule, priorp, threadId);
|
|
|
|
|
if (priorEndTime > timeBegin) timeBegin = priorEndTime;
|
|
|
|
|
}
|
|
|
|
|
UINFO(6, "Task " << mtaskp->name() << " start at " << timeBegin
|
2025-05-23 02:29:32 +02:00
|
|
|
<< " on thread " << threadId);
|
2024-03-10 16:58:58 +01:00
|
|
|
if ((timeBegin < bestTime)
|
|
|
|
|
|| ((timeBegin == bestTime)
|
|
|
|
|
&& bestMtaskp // Redundant, but appeases static analysis tools
|
|
|
|
|
&& (mtaskp->priority() > bestMtaskp->priority()))) {
|
|
|
|
|
bestTime = timeBegin;
|
|
|
|
|
bestThreadId = threadId;
|
|
|
|
|
bestMtaskp = mtaskp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-08 12:45:10 +02:00
|
|
|
const uint32_t endTime = schedule.endTime();
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
if (!bestMtaskp && mode == SchedulingMode::WIDE_TASK_DISCOVERED) {
|
|
|
|
|
mode = SchedulingMode::WIDE_TASK_SCHEDULING;
|
2025-05-26 15:37:35 +02:00
|
|
|
const uint32_t size = m_nHierThreads / maxThreadWorkers;
|
2025-03-24 23:39:29 +01:00
|
|
|
UASSERT(size, "Thread pool size should be bigger than 0");
|
2025-05-08 12:45:10 +02:00
|
|
|
// If no tasks were added to the normal thread schedule, clear it.
|
2025-10-28 01:49:41 +01:00
|
|
|
if (schedule.s_mtaskState.empty()) result.clear();
|
2025-03-24 23:39:29 +01:00
|
|
|
result.emplace_back(ThreadSchedule{size});
|
2025-05-08 12:45:10 +02:00
|
|
|
std::fill(busyUntil.begin(), busyUntil.end(), endTime);
|
2025-03-24 23:39:29 +01:00
|
|
|
continue;
|
|
|
|
|
}
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
if (!bestMtaskp && mode == SchedulingMode::WIDE_TASK_SCHEDULING) {
|
|
|
|
|
mode = SchedulingMode::SCHEDULING;
|
2025-10-28 01:49:41 +01:00
|
|
|
UASSERT(!schedule.s_mtaskState.empty(), "Mtask should be added");
|
2025-05-08 12:45:10 +02:00
|
|
|
result.emplace_back(ThreadSchedule{m_nThreads});
|
|
|
|
|
std::fill(busyUntil.begin(), busyUntil.end(), endTime);
|
2025-03-24 23:39:29 +01:00
|
|
|
continue;
|
|
|
|
|
}
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
UASSERT(bestMtaskp, "Should have found some task");
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
bestMtaskp->predictStart(bestTime);
|
|
|
|
|
const uint32_t bestEndTime = schedule.scheduleOn(bestMtaskp, bestThreadId);
|
2025-05-08 12:45:10 +02:00
|
|
|
busyUntil[bestThreadId] = bestEndTime;
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
// Update the ready list
|
|
|
|
|
const size_t erased = readyMTasks.erase(bestMtaskp);
|
|
|
|
|
UASSERT_OBJ(erased > 0, bestMtaskp, "Should have erased something?");
|
2024-03-26 00:06:25 +01:00
|
|
|
for (V3GraphEdge& edgeOut : bestMtaskp->outEdges()) {
|
|
|
|
|
ExecMTask* const nextp = edgeOut.top()->as<ExecMTask>();
|
2024-03-10 16:58:58 +01:00
|
|
|
// Dependent MTask should not yet be assigned to a thread
|
|
|
|
|
UASSERT(schedule.threadId(nextp) == ThreadSchedule::UNASSIGNED,
|
|
|
|
|
"Tasks after one being assigned should not be assigned yet");
|
|
|
|
|
// Dependent MTask should not be ready yet, since dependency is just being assigned
|
|
|
|
|
UASSERT_OBJ(readyMTasks.find(nextp) == readyMTasks.end(), nextp,
|
|
|
|
|
"Tasks after one being assigned should not be ready");
|
|
|
|
|
if (isReady(schedule, nextp)) {
|
|
|
|
|
readyMTasks.insert(nextp);
|
2025-05-23 02:29:32 +02:00
|
|
|
UINFO(6, "Inserted " << nextp->name() << " into ready");
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
// All schedules are combined on a single graph
|
2025-05-20 18:15:09 +02:00
|
|
|
if (dumpGraphLevel() >= 4)
|
|
|
|
|
ThreadSchedule::dumpDotFilePrefixedAlways(result, "schedule", m_nThreads);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
return result;
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
2024-03-16 13:17:24 +01:00
|
|
|
public:
|
2024-03-10 16:58:58 +01:00
|
|
|
// SELF TEST
|
|
|
|
|
static void selfTest() {
|
2025-05-08 12:45:10 +02:00
|
|
|
selfTestHierFirst();
|
|
|
|
|
selfTestNormalFirst();
|
|
|
|
|
}
|
|
|
|
|
static void selfTestNormalFirst() {
|
2024-03-16 15:02:17 +01:00
|
|
|
FileLine* const flp = v3Global.rootp()->fileline();
|
2025-11-03 07:32:03 +01:00
|
|
|
AstExecGraph* const execGraphp = new AstExecGraph{flp, "test"};
|
|
|
|
|
V3Graph& graph = *execGraphp->depGraphp();
|
|
|
|
|
const auto makeBody = [&]() -> AstNodeStmt* { return new AstComment{flp, ""}; };
|
|
|
|
|
ExecMTask* const t0 = new ExecMTask{execGraphp, nullptr, makeBody()};
|
2024-03-10 16:58:58 +01:00
|
|
|
t0->cost(1000);
|
|
|
|
|
t0->priority(1100);
|
2025-11-03 07:32:03 +01:00
|
|
|
ExecMTask* const t1 = new ExecMTask{execGraphp, nullptr, makeBody()};
|
2024-03-10 16:58:58 +01:00
|
|
|
t1->cost(100);
|
|
|
|
|
t1->priority(100);
|
2025-11-03 07:32:03 +01:00
|
|
|
ExecMTask* const t2 = new ExecMTask{execGraphp, nullptr, makeBody()};
|
2024-03-10 16:58:58 +01:00
|
|
|
t2->cost(100);
|
|
|
|
|
t2->priority(100);
|
2025-03-24 23:39:29 +01:00
|
|
|
t2->threads(2);
|
2025-11-03 07:32:03 +01:00
|
|
|
ExecMTask* const t3 = new ExecMTask{execGraphp, nullptr, makeBody()};
|
2025-03-24 23:39:29 +01:00
|
|
|
t3->cost(100);
|
|
|
|
|
t3->priority(100);
|
|
|
|
|
t3->threads(3);
|
2025-11-03 07:32:03 +01:00
|
|
|
ExecMTask* const t4 = new ExecMTask{execGraphp, nullptr, makeBody()};
|
2025-03-24 23:39:29 +01:00
|
|
|
t4->cost(100);
|
|
|
|
|
t4->priority(100);
|
|
|
|
|
t4->threads(3);
|
2025-11-03 07:32:03 +01:00
|
|
|
ExecMTask* const t5 = new ExecMTask{execGraphp, nullptr, makeBody()};
|
2025-05-08 12:45:10 +02:00
|
|
|
t5->cost(100);
|
|
|
|
|
t5->priority(100);
|
2025-11-03 07:32:03 +01:00
|
|
|
ExecMTask* const t6 = new ExecMTask{execGraphp, nullptr, makeBody()};
|
2025-05-08 12:45:10 +02:00
|
|
|
t6->cost(100);
|
|
|
|
|
t6->priority(100);
|
2025-03-24 23:39:29 +01:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
0
|
|
|
|
|
/ \
|
|
|
|
|
1 2
|
|
|
|
|
/ \
|
2025-05-08 12:45:10 +02:00
|
|
|
3 4
|
|
|
|
|
/ \
|
|
|
|
|
5 6
|
2025-03-24 23:39:29 +01:00
|
|
|
*/
|
2024-03-10 16:58:58 +01:00
|
|
|
new V3GraphEdge{&graph, t0, t1, 1};
|
|
|
|
|
new V3GraphEdge{&graph, t0, t2, 1};
|
2025-03-24 23:39:29 +01:00
|
|
|
new V3GraphEdge{&graph, t2, t3, 1};
|
|
|
|
|
new V3GraphEdge{&graph, t2, t4, 1};
|
2025-05-08 12:45:10 +02:00
|
|
|
new V3GraphEdge{&graph, t3, t5, 1};
|
|
|
|
|
new V3GraphEdge{&graph, t4, t6, 1};
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-05-26 15:37:35 +02:00
|
|
|
constexpr uint32_t threads = 2;
|
|
|
|
|
constexpr uint32_t hierThreads = 6;
|
|
|
|
|
PackThreads packer{threads, hierThreads,
|
2024-03-10 16:58:58 +01:00
|
|
|
3, // Sandbag numerator
|
|
|
|
|
10}; // Sandbag denom
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
const std::vector<ThreadSchedule> scheduled = packer.pack(graph);
|
2025-05-08 12:45:10 +02:00
|
|
|
UASSERT_SELFTEST(size_t, scheduled.size(), 3);
|
2025-10-28 01:49:41 +01:00
|
|
|
UASSERT_SELFTEST(size_t, scheduled[0].m_threads.size(), threads);
|
|
|
|
|
UASSERT_SELFTEST(size_t, scheduled[0].m_threads[0].size(), 2);
|
|
|
|
|
for (size_t i = 1; i < scheduled[0].m_threads.size(); ++i)
|
|
|
|
|
UASSERT_SELFTEST(size_t, scheduled[0].m_threads[i].size(), 0);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-10-28 01:49:41 +01:00
|
|
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].m_threads[0][0], t0);
|
|
|
|
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].m_threads[0][1], t1);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-10-28 01:49:41 +01:00
|
|
|
UASSERT_SELFTEST(size_t, scheduled[1].m_threads.size(), hierThreads / 3);
|
|
|
|
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].m_threads[0][0], t2);
|
|
|
|
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].m_threads[0][1], t3);
|
|
|
|
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].m_threads[1][0], t4);
|
2025-05-08 12:45:10 +02:00
|
|
|
|
2025-10-28 01:49:41 +01:00
|
|
|
UASSERT_SELFTEST(size_t, scheduled[2].m_threads.size(), threads);
|
|
|
|
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[2].m_threads[0][0], t5);
|
|
|
|
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[2].m_threads[1][0], t6);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-10-28 01:49:41 +01:00
|
|
|
UASSERT_SELFTEST(size_t, ThreadSchedule::s_mtaskState.size(), 7);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t0), 0);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t1), 0);
|
2025-05-08 12:45:10 +02:00
|
|
|
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t2), 0);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t3), 0);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t4), 1);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t5), 0);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t6), 1);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
// On its native thread, we see the actual end time for t0:
|
2025-03-24 23:39:29 +01:00
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t0, 0), 1000);
|
2024-03-10 16:58:58 +01:00
|
|
|
// On the other thread, we see a sandbagged end time which does not
|
|
|
|
|
// exceed the t1 end time:
|
2025-03-24 23:39:29 +01:00
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t0, 1), 1099);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
// Actual end time on native thread:
|
2025-03-24 23:39:29 +01:00
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t1, 0), 1100);
|
2024-03-10 16:58:58 +01:00
|
|
|
// Sandbagged end time seen on thread 1. Note it does not compound
|
|
|
|
|
// with t0's sandbagged time; compounding caused trouble in
|
|
|
|
|
// practice.
|
2025-03-24 23:39:29 +01:00
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t1, 1), 1130);
|
2025-05-08 12:45:10 +02:00
|
|
|
|
|
|
|
|
// Wide task scheduling
|
|
|
|
|
|
|
|
|
|
// Task does not depend on previous or future schedules
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t2, 0), 0);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[2], t2, 0), 0);
|
|
|
|
|
|
|
|
|
|
// We allow sandbagging for hierarchical children tasks, this does not affect
|
|
|
|
|
// wide task scheduling. When the next schedule is created it doesn't matter
|
|
|
|
|
// anyway.
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 0), 1200);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 1), 1230);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 2), 1230);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 3), 1230);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 4), 1230);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 5), 1230);
|
|
|
|
|
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 0), 1300);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 1), 1330);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 2), 1330);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 3), 1330);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 4), 1330);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 5), 1330);
|
|
|
|
|
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 0), 1360);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 1), 1330);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 2), 1360);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 3), 1360);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 4), 1360);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 5), 1360);
|
|
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
for (V3GraphVertex& vtx : graph.vertices()) vtx.as<ExecMTask>()->funcp()->deleteTree();
|
|
|
|
|
VL_DO_DANGLING(execGraphp->deleteTree(), execGraphp);
|
2025-10-28 01:49:41 +01:00
|
|
|
ThreadSchedule::s_mtaskState.clear();
|
2025-05-08 12:45:10 +02:00
|
|
|
}
|
|
|
|
|
static void selfTestHierFirst() {
|
|
|
|
|
FileLine* const flp = v3Global.rootp()->fileline();
|
2025-11-03 07:32:03 +01:00
|
|
|
AstExecGraph* const execGraphp = new AstExecGraph{flp, "test"};
|
|
|
|
|
V3Graph& graph = *execGraphp->depGraphp();
|
|
|
|
|
const auto makeBody = [&]() -> AstNodeStmt* { return new AstComment{flp, ""}; };
|
|
|
|
|
ExecMTask* const t0 = new ExecMTask{execGraphp, nullptr, makeBody()};
|
2025-05-08 12:45:10 +02:00
|
|
|
t0->cost(1000);
|
|
|
|
|
t0->priority(1100);
|
|
|
|
|
t0->threads(2);
|
2025-11-03 07:32:03 +01:00
|
|
|
ExecMTask* const t1 = new ExecMTask{execGraphp, nullptr, makeBody()};
|
2025-05-08 12:45:10 +02:00
|
|
|
t1->cost(100);
|
|
|
|
|
t1->priority(100);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
|
|
1
|
|
|
|
|
*/
|
|
|
|
|
new V3GraphEdge{&graph, t0, t1, 1};
|
|
|
|
|
|
2025-05-26 15:37:35 +02:00
|
|
|
constexpr uint32_t threads = 1;
|
|
|
|
|
constexpr uint32_t hierThreads = 2;
|
|
|
|
|
PackThreads packer{threads, hierThreads,
|
2025-05-08 12:45:10 +02:00
|
|
|
3, // Sandbag numerator
|
|
|
|
|
10}; // Sandbag denom
|
|
|
|
|
|
|
|
|
|
const std::vector<ThreadSchedule> scheduled = packer.pack(graph);
|
|
|
|
|
UASSERT_SELFTEST(size_t, scheduled.size(), 2);
|
2025-10-28 01:49:41 +01:00
|
|
|
UASSERT_SELFTEST(size_t, scheduled[0].m_threads.size(), hierThreads / 2);
|
|
|
|
|
UASSERT_SELFTEST(size_t, scheduled[0].m_threads[0].size(), 1);
|
|
|
|
|
for (size_t i = 1; i < scheduled[0].m_threads.size(); ++i)
|
|
|
|
|
UASSERT_SELFTEST(size_t, scheduled[0].m_threads[i].size(), 0);
|
2025-05-08 12:45:10 +02:00
|
|
|
|
2025-10-28 01:49:41 +01:00
|
|
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].m_threads[0][0], t0);
|
2025-05-08 12:45:10 +02:00
|
|
|
|
2025-10-28 01:49:41 +01:00
|
|
|
UASSERT_SELFTEST(size_t, scheduled[1].m_threads.size(), threads);
|
|
|
|
|
UASSERT_SELFTEST(size_t, scheduled[1].m_threads[0].size(), 1);
|
|
|
|
|
for (size_t i = 1; i < scheduled[1].m_threads.size(); ++i)
|
|
|
|
|
UASSERT_SELFTEST(size_t, scheduled[1].m_threads[i].size(), 0);
|
|
|
|
|
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].m_threads[0][0], t1);
|
2025-05-08 12:45:10 +02:00
|
|
|
|
2025-10-28 01:49:41 +01:00
|
|
|
UASSERT_SELFTEST(size_t, ThreadSchedule::s_mtaskState.size(), 2);
|
2025-05-08 12:45:10 +02:00
|
|
|
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t0), 0);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t1), 0);
|
|
|
|
|
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t0, 0), 1000);
|
|
|
|
|
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 0), 1100);
|
|
|
|
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 1), 1130);
|
2024-03-23 23:12:43 +01:00
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
for (V3GraphVertex& vtx : graph.vertices()) vtx.as<ExecMTask>()->funcp()->deleteTree();
|
|
|
|
|
VL_DO_DANGLING(execGraphp->deleteTree(), execGraphp);
|
2025-10-28 01:49:41 +01:00
|
|
|
ThreadSchedule::s_mtaskState.clear();
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
static std::vector<ThreadSchedule> apply(V3Graph& mtaskGraph) {
|
2024-03-16 13:17:24 +01:00
|
|
|
return PackThreads{}.pack(mtaskGraph);
|
|
|
|
|
}
|
2024-03-10 16:58:58 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
using EstimateAndProfiled = std::pair<uint64_t, uint64_t>; // cost est, cost profiled
|
|
|
|
|
using Costs = std::unordered_map<uint32_t, EstimateAndProfiled>;
|
|
|
|
|
|
|
|
|
|
void normalizeCosts(Costs& costs) {
|
|
|
|
|
const auto scaleCost = [](uint64_t value, double multiplier) {
|
|
|
|
|
double scaled = static_cast<double>(value) * multiplier;
|
|
|
|
|
if (value && scaled < 1) scaled = 1;
|
|
|
|
|
return static_cast<uint64_t>(scaled);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// For all costs with a profile, compute sum
|
|
|
|
|
uint64_t sumCostProfiled = 0; // For data with estimate and profile
|
|
|
|
|
uint64_t sumCostEstimate = 0; // For data with estimate and profile
|
|
|
|
|
for (const auto& est : costs) {
|
|
|
|
|
if (est.second.second) {
|
|
|
|
|
sumCostEstimate += est.second.first;
|
|
|
|
|
sumCostProfiled += est.second.second;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (sumCostEstimate) {
|
|
|
|
|
// For data where we don't have profiled data, compute how much to
|
|
|
|
|
// scale up/down the estimate to make on same relative scale as
|
|
|
|
|
// profiled data. (Improves results if only a few profiles missing.)
|
|
|
|
|
const double estToProfile
|
|
|
|
|
= static_cast<double>(sumCostProfiled) / static_cast<double>(sumCostEstimate);
|
2025-05-23 02:29:32 +02:00
|
|
|
UINFO(5, "Estimated data needs scaling by " << estToProfile
|
|
|
|
|
<< ", sumCostProfiled=" << sumCostProfiled
|
|
|
|
|
<< " sumCostEstimate=" << sumCostEstimate);
|
2024-03-10 16:58:58 +01:00
|
|
|
for (auto& est : costs) {
|
|
|
|
|
uint64_t& costEstimate = est.second.first;
|
|
|
|
|
costEstimate = scaleCost(costEstimate, estToProfile);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// COSTS can overflow a uint32. Using maximum value of costs, scale all down
|
|
|
|
|
uint64_t maxCost = 0;
|
|
|
|
|
for (auto& est : costs) {
|
|
|
|
|
const uint64_t& costEstimate = est.second.first;
|
|
|
|
|
const uint64_t& costProfiled = est.second.second;
|
|
|
|
|
if (maxCost < costEstimate) maxCost = costEstimate;
|
|
|
|
|
if (maxCost < costProfiled) maxCost = costProfiled;
|
2025-05-23 02:29:32 +02:00
|
|
|
UINFO(9, "Post uint scale: ce = " << est.second.first << " cp=" << est.second.second);
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
const uint64_t scaleDownTo = 10000000; // Extra room for future algorithms to add costs
|
|
|
|
|
if (maxCost > scaleDownTo) {
|
|
|
|
|
const double scaleup = static_cast<double>(scaleDownTo) / static_cast<double>(maxCost);
|
2025-05-23 02:29:32 +02:00
|
|
|
UINFO(5, "Scaling data to within 32-bits by multiply by=" << scaleup
|
|
|
|
|
<< ", maxCost=" << maxCost);
|
2024-03-10 16:58:58 +01:00
|
|
|
for (auto& est : costs) {
|
|
|
|
|
est.second.first = scaleCost(est.second.first, scaleup);
|
|
|
|
|
est.second.second = scaleCost(est.second.second, scaleup);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
void removeEmptyMTasks(V3Graph* execMTaskGraphp) {
|
|
|
|
|
for (V3GraphVertex* const vtxp : execMTaskGraphp->vertices().unlinkable()) {
|
|
|
|
|
ExecMTask* const mtaskp = vtxp->as<ExecMTask>();
|
|
|
|
|
AstCFunc* const funcp = mtaskp->funcp();
|
|
|
|
|
if (funcp->stmtsp()) continue;
|
|
|
|
|
|
|
|
|
|
UINFO(6, "Removing empty MTask " << mtaskp->name());
|
|
|
|
|
// Redirect edges
|
|
|
|
|
mtaskp->rerouteEdges(execMTaskGraphp);
|
|
|
|
|
// Delete the MTask function
|
|
|
|
|
VL_DO_DANGLING(funcp->unlinkFrBack()->deleteTree(), funcp);
|
|
|
|
|
// Delete the MTask vertex
|
|
|
|
|
VL_DO_DANGLING(mtaskp->unlinkDelete(execMTaskGraphp), mtaskp);
|
|
|
|
|
}
|
|
|
|
|
// Remove redundant dependencies
|
|
|
|
|
execMTaskGraphp->removeRedundantEdgesMax(&V3GraphEdge::followAlwaysTrue);
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-10 16:58:58 +01:00
|
|
|
void fillinCosts(V3Graph* execMTaskGraphp) {
|
|
|
|
|
// Pass 1: See what profiling data applies
|
|
|
|
|
Costs costs; // For each mtask, costs
|
|
|
|
|
|
2024-03-26 00:06:25 +01:00
|
|
|
for (V3GraphVertex& vtx : execMTaskGraphp->vertices()) {
|
|
|
|
|
ExecMTask* const mtp = vtx.as<ExecMTask>();
|
2024-03-10 16:58:58 +01:00
|
|
|
// This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits
|
2025-11-03 07:32:03 +01:00
|
|
|
const uint64_t costEstimate = V3InstrCount::count(mtp->funcp(), false);
|
2024-03-10 16:58:58 +01:00
|
|
|
const uint64_t costProfiled
|
2025-06-28 02:38:01 +02:00
|
|
|
= V3Control::getProfileData(v3Global.opt.prefix(), mtp->hashName());
|
2024-03-10 16:58:58 +01:00
|
|
|
if (costProfiled) {
|
|
|
|
|
UINFO(5, "Profile data for mtask " << mtp->id() << " " << mtp->hashName()
|
2025-05-23 02:29:32 +02:00
|
|
|
<< " cost override " << costProfiled);
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
costs[mtp->id()] = std::make_pair(costEstimate, costProfiled);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
normalizeCosts(costs /*ref*/);
|
|
|
|
|
|
|
|
|
|
int totalEstimates = 0;
|
|
|
|
|
int missingProfiles = 0;
|
2024-03-26 00:06:25 +01:00
|
|
|
for (V3GraphVertex& vtx : execMTaskGraphp->vertices()) {
|
|
|
|
|
ExecMTask* const mtp = vtx.as<ExecMTask>();
|
2024-03-10 16:58:58 +01:00
|
|
|
const uint32_t costEstimate = costs[mtp->id()].first;
|
|
|
|
|
const uint64_t costProfiled = costs[mtp->id()].second;
|
2025-05-23 02:29:32 +02:00
|
|
|
UINFO(9, "ce = " << costEstimate << " cp=" << costProfiled);
|
2024-03-10 16:58:58 +01:00
|
|
|
UASSERT(costEstimate <= (1UL << 31), "cost scaling math would overflow uint32");
|
|
|
|
|
UASSERT(costProfiled <= (1UL << 31), "cost scaling math would overflow uint32");
|
|
|
|
|
const uint64_t costProfiled32 = static_cast<uint32_t>(costProfiled);
|
|
|
|
|
uint32_t costToUse = costProfiled32;
|
|
|
|
|
if (!costProfiled32) {
|
|
|
|
|
costToUse = costEstimate;
|
|
|
|
|
if (costEstimate != 0) ++missingProfiles;
|
|
|
|
|
}
|
|
|
|
|
if (costEstimate != 0) ++totalEstimates;
|
|
|
|
|
mtp->cost(costToUse);
|
|
|
|
|
mtp->priority(costToUse);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (missingProfiles) {
|
2025-06-28 02:38:01 +02:00
|
|
|
if (FileLine* const fl = V3Control::getProfileDataFileLine()) {
|
|
|
|
|
if (V3Control::containsMTaskProfileData()) {
|
2025-01-20 20:24:09 +01:00
|
|
|
fl->v3warn(PROFOUTOFDATE, "Profile data for mtasks may be out of date. "
|
|
|
|
|
<< missingProfiles << " of " << totalEstimates
|
|
|
|
|
<< " mtasks had no data");
|
|
|
|
|
}
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void finalizeCosts(V3Graph* execMTaskGraphp) {
|
|
|
|
|
GraphStreamUnordered ser(execMTaskGraphp, GraphWay::REVERSE);
|
|
|
|
|
while (const V3GraphVertex* const vxp = ser.nextp()) {
|
|
|
|
|
ExecMTask* const mtp = const_cast<V3GraphVertex*>(vxp)->as<ExecMTask>();
|
|
|
|
|
// "Priority" is the critical path from the start of the mtask, to
|
|
|
|
|
// the end of the graph reachable from this mtask. Given the
|
|
|
|
|
// choice among several ready mtasks, we'll want to start the
|
|
|
|
|
// highest priority one first, so we're always working on the "long
|
|
|
|
|
// pole"
|
2024-03-26 00:06:25 +01:00
|
|
|
for (V3GraphEdge& edge : mtp->outEdges()) {
|
|
|
|
|
const ExecMTask* const followp = edge.top()->as<ExecMTask>();
|
2024-03-10 16:58:58 +01:00
|
|
|
if ((followp->priority() + mtp->cost()) > mtp->priority()) {
|
|
|
|
|
mtp->priority(followp->priority() + mtp->cost());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Removing tasks may cause edges that were formerly non-transitive to
|
|
|
|
|
// become transitive. Also we just created new edges around the removed
|
|
|
|
|
// tasks, which could be transitive. Prune out all transitive edges.
|
|
|
|
|
execMTaskGraphp->removeTransitiveEdges();
|
|
|
|
|
|
|
|
|
|
// Record summary stats for final m_tasks graph.
|
|
|
|
|
const auto report = execMTaskGraphp->parallelismReport(
|
|
|
|
|
[](const V3GraphVertex* vtxp) { return vtxp->as<const ExecMTask>()->cost(); });
|
|
|
|
|
V3Stats::addStat("MTask graph, final, critical path cost", report.criticalPathCost());
|
|
|
|
|
V3Stats::addStat("MTask graph, final, total graph cost", report.totalGraphCost());
|
|
|
|
|
V3Stats::addStat("MTask graph, final, mtask count", report.vertexCount());
|
|
|
|
|
V3Stats::addStat("MTask graph, final, edge count", report.edgeCount());
|
|
|
|
|
V3Stats::addStat("MTask graph, final, parallelism factor", report.parallelismFactor());
|
|
|
|
|
if (debug() >= 3) {
|
|
|
|
|
UINFO(0, "\n");
|
2025-05-23 02:29:32 +02:00
|
|
|
UINFO(0, " Final mtask parallelism report:");
|
|
|
|
|
UINFO(0, " Critical path cost = " << report.criticalPathCost());
|
|
|
|
|
UINFO(0, " Total graph cost = " << report.totalGraphCost());
|
|
|
|
|
UINFO(0, " MTask vertex count = " << report.vertexCount());
|
|
|
|
|
UINFO(0, " Edge count = " << report.edgeCount());
|
|
|
|
|
UINFO(0, " Parallelism factor = " << report.parallelismFactor());
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, AstCFunc* funcp,
|
|
|
|
|
const ExecMTask* mtaskp) {
|
2025-11-03 07:32:03 +01:00
|
|
|
AstScope* const scopep = v3Global.rootp()->topScopep()->scopep();
|
2024-03-10 16:58:58 +01:00
|
|
|
AstNodeModule* const modp = v3Global.rootp()->topModulep();
|
|
|
|
|
FileLine* const fl = modp->fileline();
|
|
|
|
|
|
|
|
|
|
// Helper function to make the code a bit more legible
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
const auto addCStmt = [=](const string& stmt) -> void { //
|
2024-03-10 16:58:58 +01:00
|
|
|
funcp->addStmtsp(new AstCStmt{fl, stmt});
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (const uint32_t nDependencies = schedule.crossThreadDependencies(mtaskp)) {
|
|
|
|
|
// This mtask has dependencies executed on another thread, so it may block. Create the task
|
|
|
|
|
// state variable and wait to be notified.
|
|
|
|
|
const string name = "__Vm_mtaskstate_" + cvtToStr(mtaskp->id());
|
2025-10-28 01:49:41 +01:00
|
|
|
AstBasicDType* const s_mtaskStateDtypep
|
2024-03-10 16:58:58 +01:00
|
|
|
= v3Global.rootp()->typeTablep()->findBasicDType(fl, VBasicDTypeKwd::MTASKSTATE);
|
2025-10-28 01:49:41 +01:00
|
|
|
AstVar* const varp = new AstVar{fl, VVarType::MODULETEMP, name, s_mtaskStateDtypep};
|
2024-03-10 16:58:58 +01:00
|
|
|
varp->valuep(new AstConst{fl, nDependencies});
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
varp->protect(false); // Do not protect as we have references in text
|
2024-03-10 16:58:58 +01:00
|
|
|
modp->addStmtsp(varp);
|
|
|
|
|
// For now, reference is still via text bashing
|
2025-04-10 16:03:58 +02:00
|
|
|
if (v3Global.opt.profExec()) {
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitBegin();");
|
2025-04-10 16:03:58 +02:00
|
|
|
}
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("vlSelf->" + name + +".waitUntilUpstreamDone(even_cycle);");
|
2025-04-10 16:03:58 +02:00
|
|
|
if (v3Global.opt.profExec()) {
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitEnd();");
|
2025-04-10 16:03:58 +02:00
|
|
|
}
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (v3Global.opt.profPgo()) {
|
|
|
|
|
// No lock around startCounter, as counter numbers are unique per thread
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("vlSymsp->_vm_pgoProfiler.startCounter(" + std::to_string(mtaskp->id()) + ");");
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
// Call the MTask function
|
|
|
|
|
AstCCall* const callp = new AstCCall{fl, mtaskp->funcp()};
|
|
|
|
|
callp->selfPointer(VSelfPointerText{VSelfPointerText::VlSyms{}, scopep->nameDotless()});
|
|
|
|
|
callp->dtypeSetVoid();
|
|
|
|
|
funcp->addStmtsp(callp->makeStmt());
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
if (v3Global.opt.profPgo()) {
|
|
|
|
|
// No lock around stopCounter, as counter numbers are unique per thread
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("vlSymsp->_vm_pgoProfiler.stopCounter(" + std::to_string(mtaskp->id()) + ");");
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// For any dependent mtask that's on another thread, signal one dependency completion.
|
2024-03-26 00:06:25 +01:00
|
|
|
for (const V3GraphEdge& edge : mtaskp->outEdges()) {
|
|
|
|
|
const ExecMTask* const nextp = edge.top()->as<ExecMTask>();
|
2025-03-24 23:39:29 +01:00
|
|
|
if (schedule.threadId(nextp) != threadId && schedule.contains(nextp)) {
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("vlSelf->__Vm_mtaskstate_" + cvtToStr(nextp->id())
|
|
|
|
|
+ ".signalUpstreamDone(even_cycle);");
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule& schedule,
|
|
|
|
|
const string& tag) {
|
|
|
|
|
AstNodeModule* const modp = v3Global.rootp()->topModulep();
|
|
|
|
|
FileLine* const fl = modp->fileline();
|
|
|
|
|
|
|
|
|
|
std::vector<AstCFunc*> funcps;
|
|
|
|
|
|
|
|
|
|
// For each thread, create a function representing its entry point
|
2025-10-28 01:49:41 +01:00
|
|
|
for (const std::vector<const ExecMTask*>& thread : schedule.m_threads) {
|
2024-03-10 16:58:58 +01:00
|
|
|
if (thread.empty()) continue;
|
|
|
|
|
const uint32_t threadId = schedule.threadId(thread.front());
|
2025-05-05 11:25:39 +02:00
|
|
|
const string name{"__Vthread__" + tag + "__s" + cvtToStr(schedule.id()) + "__t"
|
|
|
|
|
+ cvtToStr(threadId)};
|
2024-03-10 16:58:58 +01:00
|
|
|
AstCFunc* const funcp = new AstCFunc{fl, name, nullptr, "void"};
|
|
|
|
|
modp->addStmtsp(funcp);
|
|
|
|
|
funcps.push_back(funcp);
|
|
|
|
|
funcp->isStatic(true); // Uses void self pointer, so static and hand rolled
|
|
|
|
|
funcp->isLoose(true);
|
|
|
|
|
funcp->entryPoint(true);
|
|
|
|
|
funcp->argTypes("void* voidSelf, bool even_cycle");
|
|
|
|
|
|
2025-02-25 18:44:14 +01:00
|
|
|
// Setup vlSelf and vlSyms
|
2025-08-26 04:05:40 +02:00
|
|
|
funcp->addStmtsp(new AstCStmt{fl, EmitCUtil::voidSelfAssign(modp)});
|
|
|
|
|
funcp->addStmtsp(new AstCStmt{fl, EmitCUtil::symClassAssign()});
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
// Invoke each mtask scheduled to this thread from the thread function
|
|
|
|
|
for (const ExecMTask* const mtaskp : thread) {
|
|
|
|
|
addMTaskToFunction(schedule, threadId, funcp, mtaskp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Unblock the fake "final" mtask when this thread is finished
|
2025-03-24 23:39:29 +01:00
|
|
|
funcp->addStmtsp(new AstCStmt{fl, "vlSelf->__Vm_mtaskstate_final__"
|
|
|
|
|
+ cvtToStr(schedule.id()) + tag
|
2025-09-26 14:25:47 +02:00
|
|
|
+ ".signalUpstreamDone(even_cycle);"});
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create the fake "final" mtask state variable
|
2025-10-28 01:49:41 +01:00
|
|
|
AstBasicDType* const s_mtaskStateDtypep
|
2024-03-10 16:58:58 +01:00
|
|
|
= v3Global.rootp()->typeTablep()->findBasicDType(fl, VBasicDTypeKwd::MTASKSTATE);
|
2025-10-28 01:49:41 +01:00
|
|
|
AstVar* const varp = new AstVar{fl, VVarType::MODULETEMP,
|
|
|
|
|
"__Vm_mtaskstate_final__" + cvtToStr(schedule.id()) + tag,
|
|
|
|
|
s_mtaskStateDtypep};
|
2024-03-10 16:58:58 +01:00
|
|
|
varp->valuep(new AstConst(fl, funcps.size()));
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
varp->protect(false); // Do not protect as we have references in text
|
2024-03-10 16:58:58 +01:00
|
|
|
modp->addStmtsp(varp);
|
|
|
|
|
|
|
|
|
|
return funcps;
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
void addThreadStartWrapper(AstExecGraph* const execGraphp) {
|
2024-03-10 16:58:58 +01:00
|
|
|
// FileLine used for constructing nodes below
|
|
|
|
|
FileLine* const fl = v3Global.rootp()->fileline();
|
|
|
|
|
const string& tag = execGraphp->name();
|
|
|
|
|
|
|
|
|
|
// Add thread function invocations to execGraph
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
const auto addCStmt = [=](const string& stmt) -> void { //
|
2024-03-10 16:58:58 +01:00
|
|
|
execGraphp->addStmtsp(new AstCStmt{fl, stmt});
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (v3Global.opt.profExec()) {
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).execGraphBegin();");
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("vlSymsp->__Vm_even_cycle__" + tag + " = !vlSymsp->__Vm_even_cycle__" + tag + ";");
|
2024-03-10 16:58:58 +01:00
|
|
|
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
if (!v3Global.opt.hierBlocks().empty()) addCStmt("std::vector<size_t> indexes;");
|
2025-03-24 23:39:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void addThreadEndWrapper(AstExecGraph* const execGraphp) {
|
|
|
|
|
// Add thread function invocations to execGraph
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
const auto addCStmt = [=](const string& stmt) -> void { //
|
2025-03-24 23:39:29 +01:00
|
|
|
FileLine* const flp = v3Global.rootp()->fileline();
|
|
|
|
|
execGraphp->addStmtsp(new AstCStmt{flp, stmt});
|
|
|
|
|
};
|
|
|
|
|
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("Verilated::mtaskId(0);");
|
|
|
|
|
if (v3Global.opt.profExec()) { addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).execGraphEnd();"); }
|
2025-03-24 23:39:29 +01:00
|
|
|
}
|
|
|
|
|
void addThreadStartToExecGraph(AstExecGraph* const execGraphp,
|
|
|
|
|
const std::vector<AstCFunc*>& funcps, uint32_t scheduleId) {
|
|
|
|
|
// FileLine used for constructing nodes below
|
|
|
|
|
FileLine* const fl = v3Global.rootp()->fileline();
|
|
|
|
|
const string& tag = execGraphp->name();
|
|
|
|
|
|
|
|
|
|
// Add thread function invocations to execGraph
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
const auto addCStmt = [=](const string& stmt) -> void { //
|
2025-03-24 23:39:29 +01:00
|
|
|
execGraphp->addStmtsp(new AstCStmt{fl, stmt});
|
|
|
|
|
};
|
|
|
|
|
|
2024-03-10 16:58:58 +01:00
|
|
|
const uint32_t last = funcps.size() - 1;
|
2025-03-24 23:39:29 +01:00
|
|
|
if (!v3Global.opt.hierBlocks().empty() && last > 0) {
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("for (size_t i = 0; i < " + std::to_string(last) + "; ++i) {\n" //
|
|
|
|
|
+ "indexes.push_back(vlSymsp->__Vm_threadPoolp->assignWorkerIndex());\n" //
|
|
|
|
|
+ "}");
|
2025-03-24 23:39:29 +01:00
|
|
|
}
|
|
|
|
|
uint32_t i = 0;
|
|
|
|
|
for (AstCFunc* const funcp : funcps) {
|
2024-03-10 16:58:58 +01:00
|
|
|
if (i != last) {
|
|
|
|
|
// The first N-1 will run on the thread pool.
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
AstCStmt* const cstmtp = new AstCStmt{fl};
|
|
|
|
|
execGraphp->addStmtsp(cstmtp);
|
|
|
|
|
cstmtp->add("vlSymsp->__Vm_threadPoolp->workerp(");
|
2025-03-24 23:39:29 +01:00
|
|
|
if (v3Global.opt.hierChild() || !v3Global.opt.hierBlocks().empty()) {
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
cstmtp->add("indexes[" + std::to_string(i) + "]");
|
2025-03-24 23:39:29 +01:00
|
|
|
} else {
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
cstmtp->add(std::to_string(i));
|
2025-03-24 23:39:29 +01:00
|
|
|
}
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
cstmtp->add(")->addTask(");
|
|
|
|
|
cstmtp->add(new AstAddrOfCFunc{fl, funcp});
|
|
|
|
|
cstmtp->add(", vlSelf, vlSymsp->__Vm_even_cycle__" + tag + ");");
|
2024-03-10 16:58:58 +01:00
|
|
|
} else {
|
|
|
|
|
// The last will run on the main thread.
|
|
|
|
|
AstCCall* const callp = new AstCCall{fl, funcp};
|
|
|
|
|
callp->dtypeSetVoid();
|
|
|
|
|
callp->argTypes("vlSelf, vlSymsp->__Vm_even_cycle__" + tag);
|
|
|
|
|
execGraphp->addStmtsp(callp->makeStmt());
|
|
|
|
|
}
|
2025-03-24 23:39:29 +01:00
|
|
|
++i;
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
2025-03-24 23:39:29 +01:00
|
|
|
V3Stats::addStatSum("Optimizations, Thread schedule total tasks", i);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-04-10 16:03:58 +02:00
|
|
|
if (v3Global.opt.profExec()) {
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitBegin();");
|
2025-04-10 16:03:58 +02:00
|
|
|
}
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("vlSelf->__Vm_mtaskstate_final__" + std::to_string(scheduleId) + tag
|
|
|
|
|
+ ".waitUntilUpstreamDone(vlSymsp->__Vm_even_cycle__" + tag + ");");
|
2025-04-10 16:03:58 +02:00
|
|
|
if (v3Global.opt.profExec()) {
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).threadScheduleWaitEnd();");
|
2025-04-10 16:03:58 +02:00
|
|
|
}
|
2025-03-24 23:39:29 +01:00
|
|
|
// Free all assigned worker indices in this section
|
|
|
|
|
if (!v3Global.opt.hierBlocks().empty() && last > 0) {
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("vlSymsp->__Vm_threadPoolp->freeWorkerIndexes(indexes);");
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
void processMTaskBodies(AstExecGraph* const execGraphp) {
|
|
|
|
|
for (V3GraphVertex* const vtxp : execGraphp->depGraphp()->vertices().unlinkable()) {
|
|
|
|
|
ExecMTask* const mtaskp = vtxp->as<ExecMTask>();
|
|
|
|
|
AstCFunc* const funcp = mtaskp->funcp();
|
|
|
|
|
// Temporarily unlink function body so we can add more statemetns
|
|
|
|
|
AstNode* stmtsp = funcp->stmtsp()->unlinkFrBackWithNext();
|
2024-03-10 16:58:58 +01:00
|
|
|
|
|
|
|
|
// Helper function to make the code a bit more legible
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
const auto addCStmt = [=](const string& stmt) -> void { //
|
2025-11-03 07:32:03 +01:00
|
|
|
funcp->addStmtsp(new AstCStmt{execGraphp->fileline(), stmt});
|
2024-03-10 16:58:58 +01:00
|
|
|
};
|
|
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
// Profiling mtaskStart
|
2025-04-24 13:50:07 +02:00
|
|
|
if (v3Global.opt.profExec()) {
|
2025-11-03 07:32:03 +01:00
|
|
|
std::string args = std::to_string(mtaskp->id());
|
|
|
|
|
args += ", " + std::to_string(mtaskp->predictStart());
|
|
|
|
|
args += ", \"";
|
|
|
|
|
if (v3Global.opt.hierChild()) args += v3Global.opt.topModule();
|
|
|
|
|
args += "\"";
|
|
|
|
|
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(" + args + ");");
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
// Set mtask ID in the run-time system
|
2025-11-03 07:32:03 +01:00
|
|
|
addCStmt("Verilated::mtaskId(" + std::to_string(mtaskp->id()) + ");");
|
|
|
|
|
// Add back the body
|
|
|
|
|
funcp->addStmtsp(stmtsp);
|
2024-03-10 16:58:58 +01:00
|
|
|
// Flush message queue
|
Internals: Refactor text based Ast constructs (#6280) (#6571)
Remove the large variety of ways raw "text" is represented in the Ast.
Particularly, the only thing that represents a string to be emitted in
the output is AstText.
There are 5 AstNodes that can contain AstText, and V3Emit will throw an
error if an AstText is encountered anywhere else:
- AstCStmt: Internally generated procedural statements involving raw
text.
- AstCStmtUser: This is the old AstUCStmt, renamed so it sorts next to
AstCStmt, as it's largely equivalent. We should never create this
internally unless used to represent user input. It is used for $c,
statements in the input, and for some 'systemc_* blocks.
- AstCExpr: Internally generaged expression involving raw text.
- AstCExprUser: This is the old AstUCFunc, renamed so it sorts next to
AstCExpr. It is largely equivalent, but also has more optimizations
disabled. This should never be created internally, it is only used for
$c expressions in the input.
- AstTextBlock: Use by V3ProtectLib only, to generate the hierarchical
wrappers.
Text "tracking" for indentation is always on for AstCStmt, AstCExpr, and
AstTextBlock, as these are always generated by us, and should always be
well formed.
Tracking is always off for AstCStmtUser and AstCExprUser, as these
contain arbitrary user input that might not be safe to parse for
indentation.
Remove subsequently redundant AstNodeSimpleText and AstNodeText types.
This patch also fixes incorrect indentation in emitted waveform tracing
functions, and makes the output more readable for hier block SV stubs.
With that, all raw text nodes are handled as a proper AstNodeStmt or
AstNodeExpr as required for #6280.
2025-10-21 13:41:29 +02:00
|
|
|
addCStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);");
|
2025-11-03 07:32:03 +01:00
|
|
|
// Profiling mtaskEnd
|
2025-04-24 13:50:07 +02:00
|
|
|
if (v3Global.opt.profExec()) {
|
2025-11-03 07:32:03 +01:00
|
|
|
const std::string& args = std::to_string(mtaskp->cost());
|
|
|
|
|
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(" + args + ");");
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-16 13:17:24 +01:00
|
|
|
void implementExecGraph(AstExecGraph* const execGraphp, const ThreadSchedule& schedule) {
|
2024-03-10 16:58:58 +01:00
|
|
|
// Nothing to be done if there are no MTasks in the graph at all.
|
|
|
|
|
if (execGraphp->depGraphp()->empty()) return;
|
|
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
// Create a function to be run by each thread.
|
2024-03-10 16:58:58 +01:00
|
|
|
const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule, execGraphp->name());
|
|
|
|
|
UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?");
|
|
|
|
|
|
|
|
|
|
// Start the thread functions at the point this AstExecGraph is located in the tree.
|
2025-03-24 23:39:29 +01:00
|
|
|
addThreadStartToExecGraph(execGraphp, funcps, schedule.id());
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
// Called by Verilator top stage
|
2024-03-10 16:58:58 +01:00
|
|
|
void implement(AstNetlist* netlistp) {
|
2025-11-03 07:32:03 +01:00
|
|
|
// Gather all ExecGraphs
|
|
|
|
|
std::vector<AstExecGraph*> execGraphps;
|
|
|
|
|
netlistp->topModulep()->foreach([&](AstExecGraph* egp) { execGraphps.emplace_back(egp); });
|
|
|
|
|
|
|
|
|
|
// Process each
|
|
|
|
|
for (AstExecGraph* const execGraphp : execGraphps) {
|
|
|
|
|
// We can delete the placeholder calls to the MTask functions that
|
|
|
|
|
// were used for code analysis until now. We will replace them with
|
|
|
|
|
// statements that dispatch execution to the thread pool.
|
|
|
|
|
if (execGraphp->stmtsp()) execGraphp->stmtsp()->unlinkFrBackWithNext()->deleteTree();
|
|
|
|
|
|
|
|
|
|
// Some MTasks may have become empty after scheduling due to
|
|
|
|
|
// optimizations after scheduling. Remove those.
|
|
|
|
|
removeEmptyMTasks(execGraphp->depGraphp());
|
|
|
|
|
|
|
|
|
|
// In some very small test cases, we might end up with a completely
|
|
|
|
|
// empty ExecGraph, if so just delete it.
|
|
|
|
|
if (execGraphp->depGraphp()->empty()) {
|
|
|
|
|
VL_DO_DANGLING(execGraphp->unlinkFrBack()->deleteTree(), execGraphp);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-10 16:58:58 +01:00
|
|
|
// Back in V3Order, we partitioned mtasks using provisional cost
|
|
|
|
|
// estimates. However, V3Order precedes some optimizations (notably
|
|
|
|
|
// V3LifePost) that can change the cost of logic within each mtask.
|
|
|
|
|
// Now that logic is final, recompute the cost and priority of each
|
|
|
|
|
// ExecMTask.
|
|
|
|
|
fillinCosts(execGraphp->depGraphp());
|
|
|
|
|
finalizeCosts(execGraphp->depGraphp());
|
|
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
if (dumpGraphLevel() >= 4) execGraphp->depGraphp()->dumpDotFilePrefixedAlways("pack");
|
|
|
|
|
|
|
|
|
|
addThreadStartWrapper(execGraphp);
|
|
|
|
|
|
2024-03-16 13:17:24 +01:00
|
|
|
// Schedule the mtasks: statically associate each mtask with a thread,
|
2025-02-25 18:44:14 +01:00
|
|
|
// and determine the order in which each thread will run its mtasks.
|
2025-03-24 23:39:29 +01:00
|
|
|
const std::vector<ThreadSchedule> packed = PackThreads::apply(*execGraphp->depGraphp());
|
|
|
|
|
V3Stats::addStatSum("Optimizations, Thread schedule count",
|
|
|
|
|
static_cast<double>(packed.size()));
|
2024-03-16 13:17:24 +01:00
|
|
|
|
2025-11-03 07:32:03 +01:00
|
|
|
// Process MTask function bodies to add additional code
|
|
|
|
|
processMTaskBodies(execGraphp);
|
2024-03-10 16:58:58 +01:00
|
|
|
|
2025-03-24 23:39:29 +01:00
|
|
|
for (const ThreadSchedule& schedule : packed) {
|
|
|
|
|
// Replace the graph body with its multi-threaded implementation.
|
|
|
|
|
implementExecGraph(execGraphp, schedule);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
addThreadEndWrapper(execGraphp);
|
2025-11-03 07:32:03 +01:00
|
|
|
}
|
2024-03-10 16:58:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void selfTest() {
|
|
|
|
|
{ // Test that omitted profile data correctly scales estimates
|
|
|
|
|
Costs costs({// id est prof
|
|
|
|
|
{1, {10, 1000}},
|
|
|
|
|
{2, {20, 0}}, // Note no profile
|
|
|
|
|
{3, {30, 3000}}});
|
|
|
|
|
normalizeCosts(costs);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[1].first, 1000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[1].second, 1000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[2].first, 2000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[2].second, 0);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[3].first, 3000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[3].second, 3000);
|
|
|
|
|
}
|
|
|
|
|
{ // Test that very large profile data properly scales
|
|
|
|
|
Costs costs({// id est prof
|
|
|
|
|
{1, {10, 100000000000}},
|
|
|
|
|
{2, {20, 200000000000}},
|
|
|
|
|
{3, {30, 1}}}); // Make sure doesn't underflow
|
|
|
|
|
normalizeCosts(costs);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[1].first, 2500000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[1].second, 5000000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[2].first, 5000000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[2].second, 10000000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[3].first, 7500000);
|
|
|
|
|
UASSERT_SELFTEST(uint64_t, costs[3].second, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PackThreads::selfTest();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace V3ExecGraph
|