Set runtime worker thread stack sizes on macOS (#6721)

The default stack size of secondary thread on macOS is 512k, which is
too small even to run some of the tests. Unfortunately changing the
thread size must happen via `pthred_create` attributes, which are not
available via the c++ threading APIs. Use pthreads directly on macOS,
and set the worker thread sizes to the same as the main thread stack.
This commit is contained in:
Geza Lore 2025-11-23 01:13:46 +00:00 committed by GitHub
parent 9632c614be
commit ea9cc0e4c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 58 additions and 18 deletions

View File

@ -56,12 +56,38 @@ VlMTaskVertex::VlMTaskVertex(uint32_t upstreamDepCount)
VlWorkerThread::VlWorkerThread(VerilatedContext* contextp) VlWorkerThread::VlWorkerThread(VerilatedContext* contextp)
: m_ready_size{0} : m_ready_size{0}
, m_cthread{startWorker, this, contextp} {} , m_contextp{contextp} {
#ifdef VL_USE_PTHREADS
// Init attributes
pthread_attr_t attr;
pthread_attr_init(&attr);
// Attempt to use the same stack size as the current (main) thread if possible
const size_t stacksize = pthread_get_stacksize_np(pthread_self());
if (!stacksize || pthread_attr_setstacksize(&attr, stacksize)) {
// Fall back on default atributes if failed to get/set stack size
pthread_attr_destroy(&attr);
pthread_attr_init(&attr);
}
// Create thread
if (pthread_create(&m_pthread, &attr, &VlWorkerThread::start, this)) {
std::cerr << "pthread_create failed" << std::endl;
std::abort();
}
// Destroy attributes
pthread_attr_destroy(&attr);
#else
m_cthread = std::thread(start, this);
#endif
}
VlWorkerThread::~VlWorkerThread() { VlWorkerThread::~VlWorkerThread() {
shutdown(); shutdown();
// The thread should exit; join it. // The thread should exit; join it.
#ifdef VL_USE_PTHREADS
pthread_join(m_pthread, nullptr);
#else
m_cthread.join(); m_cthread.join();
#endif
} }
static void shutdownTask(void*, bool) { // LCOV_EXCL_LINE static void shutdownTask(void*, bool) { // LCOV_EXCL_LINE
@ -83,23 +109,24 @@ void VlWorkerThread::wait() {
while (!flag.load()) std::this_thread::yield(); while (!flag.load()) std::this_thread::yield();
} }
void VlWorkerThread::workerLoop() { void VlWorkerThread::main() {
// Initialize thread_locals
Verilated::threadContextp(m_contextp);
// One work item
ExecRec work; ExecRec work;
// Wait for the first task without spinning, in case the thread is never actually used. // Wait for the first task without spinning, in case the thread is never actually used.
dequeWork</* SpinWait: */ false>(&work); dequeWork</* SpinWait: */ false>(&work);
// Loop until shutdown task is received
while (true) { while (VL_UNLIKELY(work.m_fnp != shutdownTask)) {
if (VL_UNLIKELY(work.m_fnp == shutdownTask)) break;
work.m_fnp(work.m_selfp, work.m_evenCycle); work.m_fnp(work.m_selfp, work.m_evenCycle);
// Wait for next task with spinning. // Wait for next task with spinning.
dequeWork</* SpinWait: */ true>(&work); dequeWork</* SpinWait: */ true>(&work);
} }
} }
void VlWorkerThread::startWorker(VlWorkerThread* workerp, VerilatedContext* contextp) { void* VlWorkerThread::start(void* argp) {
Verilated::threadContextp(contextp); reinterpret_cast<VlWorkerThread*>(argp)->main();
workerp->workerLoop(); return nullptr;
} }
//============================================================================= //=============================================================================

View File

@ -34,6 +34,15 @@
#include <thread> #include <thread>
#include <vector> #include <vector>
// Use pthreads directly on macOS (could do this on Linux too if needing APIs unavailable via C++)
#if defined(_POSIX_THREADS) && defined(__APPLE__)
#define VL_USE_PTHREADS
#endif
#ifdef VL_USE_PTHREADS
#include <pthread.h>
#endif
class VlExecutionProfiler; class VlExecutionProfiler;
class VlThreadPool; class VlThreadPool;
@ -117,7 +126,8 @@ public:
}; };
class VlWorkerThread final { class VlWorkerThread final {
private: friend class VlThreadPool;
// TYPES // TYPES
struct ExecRec final { struct ExecRec final {
VlExecFnp m_fnp = nullptr; // Function to execute VlExecFnp m_fnp = nullptr; // Function to execute
@ -142,15 +152,21 @@ private:
std::vector<ExecRec> m_ready VL_GUARDED_BY(m_mutex); std::vector<ExecRec> m_ready VL_GUARDED_BY(m_mutex);
// Store the size atomically, so we can spin wait // Store the size atomically, so we can spin wait
std::atomic<size_t> m_ready_size; std::atomic<size_t> m_ready_size;
// Thread context
VerilatedContext* const m_contextp;
// Underlying thread record
#ifdef VL_USE_PTHREADS
pthread_t m_pthread{};
#else
std::thread m_cthread{};
#endif
std::thread m_cthread; // Underlying C++ thread record // METHDOS
static void* start(void*); // Static entry point, invokes 'main'
void main(); // 'main' loop of thread
VL_UNCOPYABLE(VlWorkerThread); VL_UNCOPYABLE(VlWorkerThread);
protected:
friend class VlThreadPool;
const std::thread& cthread() const { return m_cthread; }
public: public:
// CONSTRUCTORS // CONSTRUCTORS
explicit VlWorkerThread(VerilatedContext* contextp); explicit VlWorkerThread(VerilatedContext* contextp);
@ -192,9 +208,6 @@ public:
void shutdown(); // Finish current tasks, then terminate thread void shutdown(); // Finish current tasks, then terminate thread
void wait(); // Blocks calling thread until all tasks complete in this thread void wait(); // Blocks calling thread until all tasks complete in this thread
void workerLoop();
static void startWorker(VlWorkerThread* workerp, VerilatedContext* contextp);
}; };
class VlThreadPool final : public VerilatedVirtualBase { class VlThreadPool final : public VerilatedVirtualBase {