From 80088caa131498333b4379cdffc501c84e7deaa8 Mon Sep 17 00:00:00 2001 From: Yangyu Chen Date: Fri, 19 Dec 2025 16:50:01 +0800 Subject: [PATCH] VlThreadPool: Improve too many threads check logic As the thread pool is initialized for `m_threads - 1` workers, when `m_threads` equals `the number of processors`, the `num_threads` will be equal to `the number of processors - 1`, which is not needed to set affinity. This also improves backward compatibility. As mentioned in issue #6826, when the new context API is not used, the default thread pool is created with the number of workers equal to `the number of processors - 1`. In this case, when using `--threads n` with `n` less than the number of processors, the thread pool is still created with the `number of processors - 1` workers, so we will even set wrong affinity, and let some SMTs on the same core be used for emulator model threads, which degrades performance. With this fix, the affinity will not be set in this case. Signed-off-by: Yangyu Chen --- include/verilated_threads.cpp | 2 +- test_regress/t/t_gantt_numa.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp index 96911eda6..cf503fda9 100644 --- a/include/verilated_threads.cpp +++ b/include/verilated_threads.cpp @@ -156,7 +156,7 @@ std::string VlThreadPool::numaAssign() { // Make a reasonable processor affinity selection const int num_threads = static_cast(m_workers.size()); if (num_threads < 2) return "too few threads"; - if (static_cast(num_threads) > num_proc) return "too many threads"; + if (static_cast(num_threads) >= num_proc - 1) return "too many threads"; // Read CPU info. // Uncertain if any modern system has gaps in the processor id (Solaris diff --git a/test_regress/t/t_gantt_numa.py b/test_regress/t/t_gantt_numa.py index a58c53bda..87f1f1f10 100755 --- a/test_regress/t/t_gantt_numa.py +++ b/test_regress/t/t_gantt_numa.py @@ -41,7 +41,8 @@ for trial in range(0, trials): if sys.platform != "darwin": test.file_grep(gantt_log, r'CPU info:') - test.file_grep(gantt_log, r'NUMA status += (assigned|%Warning: no /proc/cpuinfo)') + test.file_grep(gantt_log, + r'NUMA status += (assigned|too many threads|%Warning: no /proc/cpuinfo)') # False fails occasionally # test.file_grep_not(gantt_log, r'%Warning:') # e.g. There were fewer CPUs (1) than threads (3).