Set runtime worker thread stack sizes on macOS (#6721)

The default stack size of secondary thread on macOS is 512k, which is too small even to run some of the tests. Unfortunately changing the thread size must happen via `pthred_create` attributes, which are not available via the c++ threading APIs. Use pthreads directly on macOS, and set the worker thread sizes to the same as the main thread stack.
2025-11-23 01:13:46 +00:00 · 2025-11-23 01:13:46 +00:00 · ea9cc0e4c1
parent 9632c614be
commit ea9cc0e4c1
2 changed files with 58 additions and 18 deletions
--- a/include/verilated_threads.cpp
+++ b/include/verilated_threads.cpp
@ -56,12 +56,38 @@ VlMTaskVertex::VlMTaskVertex(uint32_t upstreamDepCount)

 VlWorkerThread::VlWorkerThread(VerilatedContext* contextp)
    : m_ready_size{0}
-    , m_cthread{startWorker, this, contextp} {}
+    , m_contextp{contextp} {
+#ifdef VL_USE_PTHREADS
+    // Init attributes
+    pthread_attr_t attr;
+    pthread_attr_init(&attr);
+    // Attempt to use the same stack size as the current (main) thread if possible
+    const size_t stacksize = pthread_get_stacksize_np(pthread_self());
+    if (!stacksize || pthread_attr_setstacksize(&attr, stacksize)) {
+        // Fall back on default atributes if failed to get/set stack size
+        pthread_attr_destroy(&attr);
+        pthread_attr_init(&attr);
+    }
+    // Create thread
+    if (pthread_create(&m_pthread, &attr, &VlWorkerThread::start, this)) {
+        std::cerr << "pthread_create failed" << std::endl;
+        std::abort();
+    }
+    // Destroy attributes
+    pthread_attr_destroy(&attr);
+#else
+    m_cthread = std::thread(start, this);
+#endif
+}

 VlWorkerThread::~VlWorkerThread() {
    shutdown();
    // The thread should exit; join it.
+#ifdef VL_USE_PTHREADS
+    pthread_join(m_pthread, nullptr);
+#else
    m_cthread.join();
+#endif
 }

 static void shutdownTask(void*, bool) {  // LCOV_EXCL_LINE
@ -83,23 +109,24 @@ void VlWorkerThread::wait() {
    while (!flag.load()) std::this_thread::yield();
 }

-void VlWorkerThread::workerLoop() {
+void VlWorkerThread::main() {
+    // Initialize thread_locals
+    Verilated::threadContextp(m_contextp);
+    // One work item
    ExecRec work;
-
    // Wait for the first task without spinning, in case the thread is never actually used.
    dequeWork</* SpinWait: */ false>(&work);
-
-    while (true) {
-        if (VL_UNLIKELY(work.m_fnp == shutdownTask)) break;
+    // Loop until shutdown task is received
+    while (VL_UNLIKELY(work.m_fnp != shutdownTask)) {
        work.m_fnp(work.m_selfp, work.m_evenCycle);
        // Wait for next task with spinning.
        dequeWork</* SpinWait: */ true>(&work);
    }
 }

-void VlWorkerThread::startWorker(VlWorkerThread* workerp, VerilatedContext* contextp) {
-    Verilated::threadContextp(contextp);
-    workerp->workerLoop();
+void* VlWorkerThread::start(void* argp) {
+    reinterpret_cast<VlWorkerThread*>(argp)->main();
+    return nullptr;
 }

 //=============================================================================
--- a/include/verilated_threads.h
+++ b/include/verilated_threads.h
@ -34,6 +34,15 @@
 #include <thread>
 #include <vector>

+// Use pthreads directly on macOS (could do this on Linux too if needing APIs unavailable via C++)
+#if defined(_POSIX_THREADS) && defined(__APPLE__)
+#define VL_USE_PTHREADS
+#endif
+
+#ifdef VL_USE_PTHREADS
+#include <pthread.h>
+#endif
+
 class VlExecutionProfiler;
 class VlThreadPool;

@ -117,7 +126,8 @@ public:
 };

 class VlWorkerThread final {
-private:
+    friend class VlThreadPool;
+
    // TYPES
    struct ExecRec final {
        VlExecFnp m_fnp = nullptr;  // Function to execute
@ -142,15 +152,21 @@ private:
    std::vector<ExecRec> m_ready VL_GUARDED_BY(m_mutex);
    // Store the size atomically, so we can spin wait
    std::atomic<size_t> m_ready_size;
+    // Thread context
+    VerilatedContext* const m_contextp;
+    // Underlying thread record
+#ifdef VL_USE_PTHREADS
+    pthread_t m_pthread{};
+#else
+    std::thread m_cthread{};
+#endif

-    std::thread m_cthread;  // Underlying C++ thread record
+    // METHDOS
+    static void* start(void*);  // Static entry point, invokes 'main'
+    void main();  // 'main' loop of thread

    VL_UNCOPYABLE(VlWorkerThread);

-protected:
-    friend class VlThreadPool;
-    const std::thread& cthread() const { return m_cthread; }
-
 public:
    // CONSTRUCTORS
    explicit VlWorkerThread(VerilatedContext* contextp);
@ -192,9 +208,6 @@ public:

    void shutdown();  // Finish current tasks, then terminate thread
    void wait();  // Blocks calling thread until all tasks complete in this thread
-
-    void workerLoop();
-    static void startWorker(VlWorkerThread* workerp, VerilatedContext* contextp);
 };

 class VlThreadPool final : public VerilatedVirtualBase {