Set runtime worker thread stack sizes on macOS (#6721)

The default stack size of secondary thread on macOS is 512k, which is too small even to run some of the tests. Unfortunately changing the thread size must happen via `pthred_create` attributes, which are not available via the c++ threading APIs. Use pthreads directly on macOS, and set the worker thread sizes to the same as the main thread stack.
2025-11-23 01:13:46 +00:00 · 2025-11-23 01:13:46 +00:00 · ea9cc0e4c1
parent 9632c614be
commit ea9cc0e4c1
2 changed files with 58 additions and 18 deletions
--- a/include/verilated_threads.cpp
+++ b/include/verilated_threads.cpp
@ -56,12 +56,38 @@ VlMTaskVertex::VlMTaskVertex(uint32_t upstreamDepCount)
 VlWorkerThread::VlWorkerThread(VerilatedContext* contextp)
    : m_ready_size{0}
-    , m_cthread{startWorker, this, contextp} {}
+    , m_contextp{contextp} {
 #ifdef VL_USE_PTHREADS
    // Init attributes
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    // Attempt to use the same stack size as the current (main) thread if possible
    const size_t stacksize = pthread_get_stacksize_np(pthread_self());
    if (!stacksize || pthread_attr_setstacksize(&attr, stacksize)) {
        // Fall back on default atributes if failed to get/set stack size
        pthread_attr_destroy(&attr);
        pthread_attr_init(&attr);
    }
    // Create thread
    if (pthread_create(&m_pthread, &attr, &VlWorkerThread::start, this)) {
        std::cerr << "pthread_create failed" << std::endl;
        std::abort();
    }
    // Destroy attributes
    pthread_attr_destroy(&attr);
 #else
    m_cthread = std::thread(start, this);
 #endif
 }
 VlWorkerThread::~VlWorkerThread() {
    shutdown();
    // The thread should exit; join it.
 #ifdef VL_USE_PTHREADS
    pthread_join(m_pthread, nullptr);
 #else
    m_cthread.join();
 #endif
 }
 static void shutdownTask(void*, bool) {  // LCOV_EXCL_LINE
@ -83,23 +109,24 @@ void VlWorkerThread::wait() {
    while (!flag.load()) std::this_thread::yield();
 }
-void VlWorkerThread::workerLoop() {
+void VlWorkerThread::main() {
    // Initialize thread_locals
    Verilated::threadContextp(m_contextp);
    // One work item
    ExecRec work;
    // Wait for the first task without spinning, in case the thread is never actually used.
    dequeWork</* SpinWait: */ false>(&work);
-
+    // Loop until shutdown task is received
-    while (true) {
+    while (VL_UNLIKELY(work.m_fnp != shutdownTask)) {
        if (VL_UNLIKELY(work.m_fnp == shutdownTask)) break;
        work.m_fnp(work.m_selfp, work.m_evenCycle);
        // Wait for next task with spinning.
        dequeWork</* SpinWait: */ true>(&work);
    }
 }
-void VlWorkerThread::startWorker(VlWorkerThread* workerp, VerilatedContext* contextp) {
+void* VlWorkerThread::start(void* argp) {
-    Verilated::threadContextp(contextp);
+    reinterpret_cast<VlWorkerThread*>(argp)->main();
-    workerp->workerLoop();
+    return nullptr;
 }
 //=============================================================================
--- a/include/verilated_threads.h
+++ b/include/verilated_threads.h
@ -34,6 +34,15 @@
 #include <thread>
 #include <vector>
 // Use pthreads directly on macOS (could do this on Linux too if needing APIs unavailable via C++)
 #if defined(_POSIX_THREADS) && defined(__APPLE__)
 #define VL_USE_PTHREADS
 #endif
 #ifdef VL_USE_PTHREADS
 #include <pthread.h>
 #endif
 class VlExecutionProfiler;
 class VlThreadPool;
@ -117,7 +126,8 @@ public:
 };
 class VlWorkerThread final {
-private:
+    friend class VlThreadPool;
    // TYPES
    struct ExecRec final {
        VlExecFnp m_fnp = nullptr;  // Function to execute
@ -142,15 +152,21 @@ private:
    std::vector<ExecRec> m_ready VL_GUARDED_BY(m_mutex);
    // Store the size atomically, so we can spin wait
    std::atomic<size_t> m_ready_size;
    // Thread context
    VerilatedContext* const m_contextp;
    // Underlying thread record
 #ifdef VL_USE_PTHREADS
    pthread_t m_pthread{};
 #else
    std::thread m_cthread{};
 #endif
-    std::thread m_cthread;  // Underlying C++ thread record
+    // METHDOS
    static void* start(void*);  // Static entry point, invokes 'main'
    void main();  // 'main' loop of thread
    VL_UNCOPYABLE(VlWorkerThread);
 protected:
    friend class VlThreadPool;
    const std::thread& cthread() const { return m_cthread; }
 public:
    // CONSTRUCTORS
    explicit VlWorkerThread(VerilatedContext* contextp);
@ -192,9 +208,6 @@ public:
    void shutdown();  // Finish current tasks, then terminate thread
    void wait();  // Blocks calling thread until all tasks complete in this thread
    void workerLoop();
    static void startWorker(VlWorkerThread* workerp, VerilatedContext* contextp);
 };
 class VlThreadPool final : public VerilatedVirtualBase {