From 5b84635bde5d281bdfd144f2efd62a47104c5ec2 Mon Sep 17 00:00:00 2001
From: Yangyu Chen <cyy@cyyself.name>
Date: Wed, 28 Jan 2026 00:05:28 +0800
Subject: [PATCH] Add VerilatedContext::useNumaAssign and set on threads() call
 (#6954)

---
 include/verilated.cpp                         |  3 ++
 include/verilated.h                           |  9 ++++
 include/verilated_threads.cpp                 |  5 +-
 include/verilated_threads.h                   |  2 +-
 .../t/t_gantt_numa_default_threads.cpp        | 53 +++++++++++++++++++
 .../t/t_gantt_numa_default_threads.py         | 51 ++++++++++++++++++
 6 files changed, 120 insertions(+), 3 deletions(-)
 create mode 100644 test_regress/t/t_gantt_numa_default_threads.cpp
 create mode 100755 test_regress/t/t_gantt_numa_default_threads.py

diff --git a/include/verilated.cpp b/include/verilated.cpp
index 9cb8f222b..9fb1ba055 100644
--- a/include/verilated.cpp
+++ b/include/verilated.cpp
@@ -2816,6 +2816,7 @@ void VerilatedContext::threads(unsigned n) {
             "%Error: Cannot set simulation threads after the thread pool has been created.");
     }
 
+    m_useNumaAssign = true;
     if (m_threads == n) return;  // To avoid unnecessary warnings
     m_threads = n;
     const unsigned threadsAvailableToProcess = VlOs::getProcessDefaultParallelism();
@@ -2826,6 +2827,8 @@ void VerilatedContext::threads(unsigned n) {
     }
 }
 
+void VerilatedContext::useNumaAssign(bool flag) { m_useNumaAssign = flag; }
+
 void VerilatedContext::commandArgs(int argc, const char** argv) VL_MT_SAFE_EXCLUDES(m_argMutex) {
     // Not locking m_argMutex here, it is done in impp()->commandArgsAddGuts
     // m_argMutex here is the same as in impp()->commandArgsAddGuts;
diff --git a/include/verilated.h b/include/verilated.h
index a8e795058..9dee17abf 100644
--- a/include/verilated.h
+++ b/include/verilated.h
@@ -433,6 +433,8 @@ protected:
     const std::unique_ptr<VerilatedContextImpData> m_impdatap;
     // Number of threads to use for simulation (size of m_threadPool + 1 for main thread)
     unsigned m_threads = VlOs::getProcessDefaultParallelism();
+    // Use numa automatic CPU-to-thread assignment
+    bool m_useNumaAssign = false;
     // Number of threads in added models
     unsigned m_threadsInModels = 0;
     // The thread pool shared by all models added to this context
@@ -599,6 +601,13 @@ public:
     /// Can only be called before the thread pool is created (before first model is added).
     void threads(unsigned n);
 
+    /// Use numa automatic CPU-to-thread assignment.
+    bool useNumaAssign() const VL_MT_SAFE { return m_useNumaAssign; }
+    /// Set numa assignment of threads to cores
+    /// Defaults false; set true automatically when threads() called;
+    /// call this to override back to false if numa assignment not wanted.
+    void useNumaAssign(bool flag);
+
     /// Trace signals in models within the context; called by application code
     void trace(VerilatedTraceBaseC* tfp, int levels, int options = 0);
     /// Allow traces to at some point be enabled (disables some optimizations)
diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp
index bb0c1513d..d814b585c 100644
--- a/include/verilated_threads.cpp
+++ b/include/verilated_threads.cpp
@@ -137,7 +137,7 @@ VlThreadPool::VlThreadPool(VerilatedContext* contextp, unsigned nThreads) {
         m_workers.push_back(new VlWorkerThread{contextp});
         m_unassignedWorkers.push(i);
     }
-    m_numaStatus = numaAssign();
+    m_numaStatus = numaAssign(contextp);
 }
 
 VlThreadPool::~VlThreadPool() {
@@ -145,8 +145,9 @@ VlThreadPool::~VlThreadPool() {
     for (auto& i : m_workers) delete i;
 }
 
-std::string VlThreadPool::numaAssign() {
+std::string VlThreadPool::numaAssign(VerilatedContext* contextp) {
 #if defined(__linux) || defined(CPU_ZERO) || defined(VL_CPPCHECK)  // Linux-like pthreads
+    if (contextp && !contextp->useNumaAssign()) { return "NUMA assignment not requested"; }
     std::string numa_strategy = VlOs::getenvStr("VERILATOR_NUMA_STRATEGY", "default");
     if (numa_strategy == "none") {
         return "no NUMA assignment requested";
diff --git a/include/verilated_threads.h b/include/verilated_threads.h
index 44e6f9d3c..9ac23392b 100644
--- a/include/verilated_threads.h
+++ b/include/verilated_threads.h
@@ -254,7 +254,7 @@ public:
 private:
     VL_UNCOPYABLE(VlThreadPool);
 
-    std::string numaAssign();
+    std::string numaAssign(VerilatedContext* contextp);
 };
 
 #endif
diff --git a/test_regress/t/t_gantt_numa_default_threads.cpp b/test_regress/t/t_gantt_numa_default_threads.cpp
new file mode 100644
index 000000000..88140ff82
--- /dev/null
+++ b/test_regress/t/t_gantt_numa_default_threads.cpp
@@ -0,0 +1,53 @@
+// -*- mode: C++; c-file-style: "cc-mode" -*-
+//*************************************************************************
+//
+// Copyright 2026 by Wilson Snyder. This program is free software; you can
+// redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+//
+//*************************************************************************
+
+// Generated header
+#include "Vt_gantt_numa_default_threads.h"
+// General headers
+#include "verilated.h"
+
+#include "TestCheck.h"
+
+int errors = 0;
+
+std::unique_ptr<Vt_gantt_numa_default_threads> topp;
+
+int main(int argc, char** argv) {
+    vluint64_t sim_time = 1100;
+    const std::unique_ptr<VerilatedContext> contextp{new VerilatedContext};
+    contextp->debug(0);
+    contextp->commandArgs(argc, argv);
+    srand48(5);
+    TEST_CHECK_EQ(contextp->useNumaAssign(), false);
+    contextp->threads(3);
+    TEST_CHECK_EQ(contextp->useNumaAssign(), true);
+    contextp->useNumaAssign(false);
+    TEST_CHECK_EQ(contextp->useNumaAssign(), false);
+    topp.reset(new VM_PREFIX{"top"});
+
+    topp->clk = 0;
+    topp->eval();
+    { contextp->timeInc(10); }
+
+    while ((contextp->time() < sim_time) && !contextp->gotFinish()) {
+        topp->eval();
+        topp->clk = !topp->clk;
+        topp->eval();
+        contextp->timeInc(5);
+    }
+    if (!contextp->gotFinish()) {
+        vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish");
+    }
+    topp->final();
+
+    topp.reset();
+    return (errors ? 10 : 0);
+}
diff --git a/test_regress/t/t_gantt_numa_default_threads.py b/test_regress/t/t_gantt_numa_default_threads.py
new file mode 100755
index 000000000..494172677
--- /dev/null
+++ b/test_regress/t/t_gantt_numa_default_threads.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2026 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+import os
+import sys
+import vltest_bootstrap
+
+test.scenarios('vltmt')
+
+test.top_filename = "t/t_gantt.v"
+test.pli_filename = "t/t_gantt_numa_default_threads.cpp"
+
+# Require enough cores so default thread count stays >= model threads
+# (we don't call contextp->threads in this test)
+test.skip_if_too_few_cores()
+
+test.compile(
+    make_main=False,
+    verilator_flags2=[
+        "--prof-exec",
+        "--exe",
+        test.pli_filename,
+        test.t_dir + "/t_gantt_c.cpp",
+    ],
+    threads=test.get_default_vltmt_threads,
+)
+
+test.execute(all_run_flags=[
+    "+verilator+prof+exec+start+2",
+    " +verilator+prof+exec+window+2",
+    " +verilator+prof+exec+file+" + test.obj_dir + "/profile_exec.dat",
+])
+
+gantt_log = test.obj_dir + "/gantt_default_threads.log"
+test.run(cmd=[
+    os.environ["VERILATOR_ROOT"] + "/bin/verilator_gantt",
+    "--no-vcd",
+    test.obj_dir + "/profile_exec.dat",
+    "| tee " + gantt_log,
+])
+
+if sys.platform != "darwin":
+    test.file_grep(gantt_log, r"NUMA status += NUMA assignment not requested")
+
+test.passes()