From 5b84635bde5d281bdfd144f2efd62a47104c5ec2 Mon Sep 17 00:00:00 2001 From: Yangyu Chen Date: Wed, 28 Jan 2026 00:05:28 +0800 Subject: [PATCH] Add VerilatedContext::useNumaAssign and set on threads() call (#6954) --- include/verilated.cpp | 3 ++ include/verilated.h | 9 ++++ include/verilated_threads.cpp | 5 +- include/verilated_threads.h | 2 +- .../t/t_gantt_numa_default_threads.cpp | 53 +++++++++++++++++++ .../t/t_gantt_numa_default_threads.py | 51 ++++++++++++++++++ 6 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 test_regress/t/t_gantt_numa_default_threads.cpp create mode 100755 test_regress/t/t_gantt_numa_default_threads.py diff --git a/include/verilated.cpp b/include/verilated.cpp index 9cb8f222b..9fb1ba055 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -2816,6 +2816,7 @@ void VerilatedContext::threads(unsigned n) { "%Error: Cannot set simulation threads after the thread pool has been created."); } + m_useNumaAssign = true; if (m_threads == n) return; // To avoid unnecessary warnings m_threads = n; const unsigned threadsAvailableToProcess = VlOs::getProcessDefaultParallelism(); @@ -2826,6 +2827,8 @@ void VerilatedContext::threads(unsigned n) { } } +void VerilatedContext::useNumaAssign(bool flag) { m_useNumaAssign = flag; } + void VerilatedContext::commandArgs(int argc, const char** argv) VL_MT_SAFE_EXCLUDES(m_argMutex) { // Not locking m_argMutex here, it is done in impp()->commandArgsAddGuts // m_argMutex here is the same as in impp()->commandArgsAddGuts; diff --git a/include/verilated.h b/include/verilated.h index a8e795058..9dee17abf 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -433,6 +433,8 @@ protected: const std::unique_ptr m_impdatap; // Number of threads to use for simulation (size of m_threadPool + 1 for main thread) unsigned m_threads = VlOs::getProcessDefaultParallelism(); + // Use numa automatic CPU-to-thread assignment + bool m_useNumaAssign = false; // Number of threads in added models unsigned m_threadsInModels = 0; // The thread pool shared by all models added to this context @@ -599,6 +601,13 @@ public: /// Can only be called before the thread pool is created (before first model is added). void threads(unsigned n); + /// Use numa automatic CPU-to-thread assignment. + bool useNumaAssign() const VL_MT_SAFE { return m_useNumaAssign; } + /// Set numa assignment of threads to cores + /// Defaults false; set true automatically when threads() called; + /// call this to override back to false if numa assignment not wanted. + void useNumaAssign(bool flag); + /// Trace signals in models within the context; called by application code void trace(VerilatedTraceBaseC* tfp, int levels, int options = 0); /// Allow traces to at some point be enabled (disables some optimizations) diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp index bb0c1513d..d814b585c 100644 --- a/include/verilated_threads.cpp +++ b/include/verilated_threads.cpp @@ -137,7 +137,7 @@ VlThreadPool::VlThreadPool(VerilatedContext* contextp, unsigned nThreads) { m_workers.push_back(new VlWorkerThread{contextp}); m_unassignedWorkers.push(i); } - m_numaStatus = numaAssign(); + m_numaStatus = numaAssign(contextp); } VlThreadPool::~VlThreadPool() { @@ -145,8 +145,9 @@ VlThreadPool::~VlThreadPool() { for (auto& i : m_workers) delete i; } -std::string VlThreadPool::numaAssign() { +std::string VlThreadPool::numaAssign(VerilatedContext* contextp) { #if defined(__linux) || defined(CPU_ZERO) || defined(VL_CPPCHECK) // Linux-like pthreads + if (contextp && !contextp->useNumaAssign()) { return "NUMA assignment not requested"; } std::string numa_strategy = VlOs::getenvStr("VERILATOR_NUMA_STRATEGY", "default"); if (numa_strategy == "none") { return "no NUMA assignment requested"; diff --git a/include/verilated_threads.h b/include/verilated_threads.h index 44e6f9d3c..9ac23392b 100644 --- a/include/verilated_threads.h +++ b/include/verilated_threads.h @@ -254,7 +254,7 @@ public: private: VL_UNCOPYABLE(VlThreadPool); - std::string numaAssign(); + std::string numaAssign(VerilatedContext* contextp); }; #endif diff --git a/test_regress/t/t_gantt_numa_default_threads.cpp b/test_regress/t/t_gantt_numa_default_threads.cpp new file mode 100644 index 000000000..88140ff82 --- /dev/null +++ b/test_regress/t/t_gantt_numa_default_threads.cpp @@ -0,0 +1,53 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// +// Copyright 2026 by Wilson Snyder. This program is free software; you can +// redistribute it and/or modify it under the terms of either the GNU +// Lesser General Public License Version 3 or the Perl Artistic License +// Version 2.0. +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* + +// Generated header +#include "Vt_gantt_numa_default_threads.h" +// General headers +#include "verilated.h" + +#include "TestCheck.h" + +int errors = 0; + +std::unique_ptr topp; + +int main(int argc, char** argv) { + vluint64_t sim_time = 1100; + const std::unique_ptr contextp{new VerilatedContext}; + contextp->debug(0); + contextp->commandArgs(argc, argv); + srand48(5); + TEST_CHECK_EQ(contextp->useNumaAssign(), false); + contextp->threads(3); + TEST_CHECK_EQ(contextp->useNumaAssign(), true); + contextp->useNumaAssign(false); + TEST_CHECK_EQ(contextp->useNumaAssign(), false); + topp.reset(new VM_PREFIX{"top"}); + + topp->clk = 0; + topp->eval(); + { contextp->timeInc(10); } + + while ((contextp->time() < sim_time) && !contextp->gotFinish()) { + topp->eval(); + topp->clk = !topp->clk; + topp->eval(); + contextp->timeInc(5); + } + if (!contextp->gotFinish()) { + vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); + } + topp->final(); + + topp.reset(); + return (errors ? 10 : 0); +} diff --git a/test_regress/t/t_gantt_numa_default_threads.py b/test_regress/t/t_gantt_numa_default_threads.py new file mode 100755 index 000000000..494172677 --- /dev/null +++ b/test_regress/t/t_gantt_numa_default_threads.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2026 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import os +import sys +import vltest_bootstrap + +test.scenarios('vltmt') + +test.top_filename = "t/t_gantt.v" +test.pli_filename = "t/t_gantt_numa_default_threads.cpp" + +# Require enough cores so default thread count stays >= model threads +# (we don't call contextp->threads in this test) +test.skip_if_too_few_cores() + +test.compile( + make_main=False, + verilator_flags2=[ + "--prof-exec", + "--exe", + test.pli_filename, + test.t_dir + "/t_gantt_c.cpp", + ], + threads=test.get_default_vltmt_threads, +) + +test.execute(all_run_flags=[ + "+verilator+prof+exec+start+2", + " +verilator+prof+exec+window+2", + " +verilator+prof+exec+file+" + test.obj_dir + "/profile_exec.dat", +]) + +gantt_log = test.obj_dir + "/gantt_default_threads.log" +test.run(cmd=[ + os.environ["VERILATOR_ROOT"] + "/bin/verilator_gantt", + "--no-vcd", + test.obj_dir + "/profile_exec.dat", + "| tee " + gantt_log, +]) + +if sys.platform != "darwin": + test.file_grep(gantt_log, r"NUMA status += NUMA assignment not requested") + +test.passes()