diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst
index 22ee03181..10dfcd611 100644
--- a/docs/guide/exe_verilator.rst
+++ b/docs/guide/exe_verilator.rst
@@ -168,10 +168,11 @@ Summary:
 
 .. option:: --build-jobs <value>
 
-   Specify the level of parallelism for :vlopt:`--build`. If zero, uses the
-   number of threads in the current hardware. Otherwise, the <value> must
-   be a positive integer specifying the maximum number of parallel build
-   jobs.
+   Specify the level of parallelism for :vlopt:`--build`.  If zero, uses the
+   number of threads available to the process, which is the number of threads
+   assigned by processor affinity (e.g. using `numactl`), or the number of
+   threads in the host hardware if unspecified.  Otherwise, the <value> must be
+   a positive integer specifying the maximum number of parallel build jobs.
 
    If not provided, and :vlopt:`-j` is provided, the :vlopt:`-j` value is
    used.
@@ -881,9 +882,10 @@ Summary:
    of Verilator if :vlopt:`--verilate-jobs` isn't provided. Also sets
    :vlopt:`--output-groups` if isn't provided.
 
-   If zero, uses the number of threads in the current hardware. Otherwise,
-   must be a positive integer specifying the maximum number of parallel
-   build jobs.
+   If zero, uses the number of threads available to the process, which is the
+   number of threads assigned by processor affinity (e.g. using `numactl`), or
+   the number of threads in the host hardware if unspecified.  Otherwise, must
+   be a positive integer specifying the maximum number of parallel build jobs.
 
 .. option:: --no-json-edit-nums
 
@@ -1831,7 +1833,9 @@ Summary:
 .. option:: --verilate-jobs <value>
 
    Specify the level of parallelism for the internal compilation steps of
-   Verilator. If zero, uses the number of threads in the current hardware.
+   Verilator.  If zero, uses the number of threads available to the process,
+   which is the number of threads assigned by processor affinity (e.g. using
+   `numactl`), or the number of threads in the host hardware if unspecified.
    Otherwise, must be a positive integer specifying the maximum number of
    parallel build jobs.
 
diff --git a/include/verilated.cpp b/include/verilated.cpp
index d2bf2d93c..185c01838 100644
--- a/include/verilated.cpp
+++ b/include/verilated.cpp
@@ -2807,11 +2807,11 @@ void VerilatedContext::threads(unsigned n) {
 
     if (m_threads == n) return;  // To avoid unnecessary warnings
     m_threads = n;
-    const unsigned hardwareThreadsAvailable = std::thread::hardware_concurrency();
-    if (m_threads > hardwareThreadsAvailable) {
-        VL_PRINTF_MT("%%Warning: System has %u hardware threads but simulation thread count set "
-                     "to %u. This will likely cause significant slowdown.\n",
-                     hardwareThreadsAvailable, m_threads);
+    const unsigned threadsAvailableToProcess = VlOs::getProcessDefaultParallelism();
+    if (m_threads > threadsAvailableToProcess) {
+        VL_PRINTF_MT("%%Warning: Process has %u hardware threads available, but simulation thread "
+                     "count set to %u. This will likely cause significant slowdown.\n",
+                     threadsAvailableToProcess, m_threads);
     }
 }
 
diff --git a/include/verilated.h b/include/verilated.h
index 294ab94a1..fd784b48f 100644
--- a/include/verilated.h
+++ b/include/verilated.h
@@ -451,7 +451,7 @@ protected:
     // Implementation details
     const std::unique_ptr<VerilatedContextImpData> m_impdatap;
     // Number of threads to use for simulation (size of m_threadPool + 1 for main thread)
-    unsigned m_threads = std::thread::hardware_concurrency();
+    unsigned m_threads = VlOs::getProcessDefaultParallelism();
     // Number of threads in added models
     unsigned m_threadsInModels = 0;
     // The thread pool shared by all models added to this context
diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp
index acb8a11eb..e6ffa7e3f 100644
--- a/include/verilated_threads.cpp
+++ b/include/verilated_threads.cpp
@@ -118,30 +118,18 @@ VlThreadPool::~VlThreadPool() {
     for (auto& i : m_workers) delete i;
 }
 
-bool VlThreadPool::isNumactlRunning() {
-    // We assume if current thread is CPU-masked, then under numactl, otherwise not.
-    // This shows that numactl is visible through the affinity mask
-#if defined(__linux) || defined(CPU_ZERO)  // Linux-like; assume we have pthreads etc
-    const unsigned num_cpus = std::thread::hardware_concurrency();
-    cpu_set_t cpuset;
-    CPU_ZERO(&cpuset);
-    const int rc = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
-    if (rc != 0) return true;  // Error; assuming returning true is the least-damage option
-    for (unsigned c = 0; c < std::min(num_cpus, static_cast<unsigned>(CPU_SETSIZE)); ++c) {
-        if (!CPU_ISSET(c, &cpuset)) return true;
-    }
-#endif
-    return false;
-}
-
 std::string VlThreadPool::numaAssign() {
 #if defined(__linux) || defined(CPU_ZERO) || defined(VL_CPPCHECK)  // Linux-like pthreads
-    // If not under numactl, make a reasonable processor affinity selection
-    if (isNumactlRunning()) return "running under numactl";  // User presumably set affinity
+    // Get number of processor available to the current process
+    const unsigned num_proc = VlOs::getProcessAvailableParallelism();
+    if (!num_proc) return "Can't determine number of available threads";
+    // If fewer than hardware threads in the host, user presumably set affinity
+    if (num_proc < std::thread::hardware_concurrency()) return "processor affinity already set";
+
+    // Make a reasonable processor affinity selection
     const int num_threads = static_cast<int>(m_workers.size());
-    const int num_proc = static_cast<int>(std::thread::hardware_concurrency());
     if (num_threads < 2) return "too few threads";
-    if (num_threads > num_proc) return "too many threads";
+    if (static_cast<unsigned>(num_threads) > num_proc) return "too many threads";
 
     // Read CPU info.
     // Uncertain if any modern system has gaps in the processor id (Solaris
diff --git a/include/verilated_threads.h b/include/verilated_threads.h
index 643ebcf0b..f39015971 100644
--- a/include/verilated_threads.h
+++ b/include/verilated_threads.h
@@ -241,8 +241,6 @@ public:
 private:
     VL_UNCOPYABLE(VlThreadPool);
 
-    // cppcheck-suppress unusedPrivateFunction
-    static bool isNumactlRunning();
     std::string numaAssign();
 };
 
diff --git a/include/verilatedos.h b/include/verilatedos.h
index 2049eb0cf..7fd03f1e8 100644
--- a/include/verilatedos.h
+++ b/include/verilatedos.h
@@ -643,6 +643,16 @@ extern std::string getenvStr(const std::string& envvar,
 /// Return currently executing processor number; may do an OS call underneath so slow
 extern uint16_t getcpu() VL_MT_SAFE;
 
+/// Return number of processors available to the current process. This might be
+/// less than the number of logical processors in the machine, if a processor
+/// affinity mask was used, e.g. via 'numactl -C 0-3'. Returns 0 if cannot
+/// be determiend.
+extern unsigned getProcessAvailableParallelism() VL_MT_SAFE;
+
+/// Return getProcessAvailableParallelism if non-zero, otherwise the number of
+/// hardware threads in the host machine.
+extern unsigned getProcessDefaultParallelism() VL_MT_SAFE;
+
 /// Return memory usage in bytes, or 0 if unknown
 extern void memUsageBytes(uint64_t& peakr, uint64_t& currentr) VL_MT_SAFE;
 
diff --git a/include/verilatedos_c.h b/include/verilatedos_c.h
index 849419701..cf1588971 100644
--- a/include/verilatedos_c.h
+++ b/include/verilatedos_c.h
@@ -104,6 +104,34 @@ uint16_t getcpu() VL_MT_SAFE {
 #endif
 }
 
+//=============================================================================
+// Vlos::getProcessAvailableParallelism implementation
+
+unsigned getProcessAvailableParallelism() VL_MT_SAFE {
+#if defined(__linux) || defined(CPU_ZERO)  // Linux-like; assume we have pthreads etc
+    cpu_set_t cpuset;
+    CPU_ZERO(&cpuset);
+    const int rc = pthread_getaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+    if (rc == 0) {
+        unsigned nCpus = 0;
+        for (int i = 0; i < CPU_SETSIZE; ++i) {
+            if (CPU_ISSET(i, &cpuset)) ++nCpus;
+        }
+        return nCpus;
+    }
+#endif
+    // Cannot determine
+    return 0;
+}
+
+//=============================================================================
+// Vlos::getProcessDefaultParallelism implementation
+
+unsigned getProcessDefaultParallelism() VL_MT_SAFE {
+    const unsigned n = getProcessAvailableParallelism();
+    return n ? n : std::thread::hardware_concurrency();
+}
+
 //=========================================================================
 // VlOs::memPeakUsageBytes implementation
 
diff --git a/nodist/install_test b/nodist/install_test
index e345f9474..67c0fe68e 100755
--- a/nodist/install_test
+++ b/nodist/install_test
@@ -3,7 +3,6 @@
 ######################################################################
 
 import argparse
-import multiprocessing
 import os
 import shutil
 import subprocess
@@ -93,7 +92,7 @@ def cleanenv():
 
 
 def calc_jobs():
-    return multiprocessing.cpu_count() + 1
+    return len(os.sched_getaffinity(0)) + 1
 
 
 def run(command):
diff --git a/src/V3Options.cpp b/src/V3Options.cpp
index 46b1221c5..1b38796d0 100644
--- a/src/V3Options.cpp
+++ b/src/V3Options.cpp
@@ -1267,7 +1267,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
                                                                               << "' was passed");
             val = 1;
         } else if (val == 0) {
-            val = std::thread::hardware_concurrency();
+            val = VlOs::getProcessDefaultParallelism();
         }
         m_buildJobs = val;
     });
@@ -1781,7 +1781,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
                         << valp << "' was passed");
             val = 1;
         } else if (val == 0) {
-            val = std::thread::hardware_concurrency();
+            val = VlOs::getProcessDefaultParallelism();
         }
         m_verilateJobs = val;
     });
@@ -1932,7 +1932,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
             int val = 0;
             if (i < argc && std::isdigit(argv[i][0])) {
                 val = std::atoi(argv[i]);  // Can't be negative due to isdigit above
-                if (val == 0) val = std::thread::hardware_concurrency();
+                if (val == 0) val = VlOs::getProcessDefaultParallelism();
                 ++i;
             }
             if (m_buildJobs == -1) m_buildJobs = val;
diff --git a/test_regress/driver.py b/test_regress/driver.py
index 13f247e74..b7baec517 100755
--- a/test_regress/driver.py
+++ b/test_regress/driver.py
@@ -2763,7 +2763,7 @@ def _calc_hashset() -> list:
 
 @lru_cache(maxsize=1)
 def max_procs() -> int:
-    procs = multiprocessing.cpu_count()
+    procs = len(os.sched_getaffinity(0))
     if procs < 2:
         print("driver.py: Python didn't find at least two CPUs")
     return procs
diff --git a/test_regress/t/t_a7_hier_block_cmake.py b/test_regress/t/t_a7_hier_block_cmake.py
index 508e8b999..fac3ee154 100755
--- a/test_regress/t/t_a7_hier_block_cmake.py
+++ b/test_regress/t/t_a7_hier_block_cmake.py
@@ -8,7 +8,7 @@
 # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
 
 import vltest_bootstrap
-import multiprocessing
+import os
 
 # If a test fails, broken .cmake may disturb the next run
 test.clean_objs()
@@ -30,7 +30,7 @@ test.run(logfile=test.obj_dir + "/cmake.log",
 test.run(logfile=test.obj_dir + "/build.log",
          cmd=[
              'cd "' + test.obj_dir + '" && cmake --build', '.', ('-v' if test.verbose else ''),
-             '-j ' + str(multiprocessing.cpu_count()), '--', "CXX_FLAGS=" + str(threads)
+             '-j ' + str(len(os.sched_getaffinity(0))), '--', "CXX_FLAGS=" + str(threads)
          ])
 
 test.run(logfile=test.obj_dir + "/run.log",
diff --git a/test_regress/t/t_threads_crazy_context.py b/test_regress/t/t_threads_crazy_context.py
index 6ef9e273d..ae52d0316 100755
--- a/test_regress/t/t_threads_crazy_context.py
+++ b/test_regress/t/t_threads_crazy_context.py
@@ -19,7 +19,7 @@ test.execute()
 if test.vltmt:
     test.file_grep(
         test.run_log_filename,
-        r'System has \d+ hardware threads but simulation thread count set to 1024\. This will likely cause significant slowdown\.'
+        r'Process has \d+ hardware threads available, but simulation thread count set to 1024\. This will likely cause significant slowdown\.'
     )
 
 test.passes()