diff --git a/bin/verilator b/bin/verilator index 7d27dad5d..f2c858558 100755 --- a/bin/verilator +++ b/bin/verilator @@ -284,33 +284,34 @@ detailed descriptions of these arguments. --bbox-unsup Blackbox unsupported language features --bin Override Verilator binary --build Build model executable/library after Verilation - -CFLAGS C++ compiler arguments for makefile --cc Create C++ output --cdc Clock domain crossing analysis + -CFLAGS C++ compiler arguments for makefile --clk Mark specified signal as clock - --make Generate scripts for specified build tool + --no-clk Prevent marking specified signal as clock --compiler Tune for specified C++ compiler --converge-limit Tune convergence settle time --coverage Enable all coverage --coverage-line Enable line coverage --coverage-max-width Maximum array depth for coverage --coverage-toggle Enable toggle coverage - --coverage-user Enable SVL user coverage --coverage-underscore Enable coverage of _signals + --coverage-user Enable SVL user coverage -D[=] Set preprocessor define --debug Enable debugging --debug-check Enable debugging assertions --no-debug-leak Disable leaking memory in --debug mode --debugi Enable debugging at a specified level --debugi- Enable debugging a source file at a level + --no-decoration Disable comments and symbol decorations --default-language Default language to parse +define+= Set preprocessor define --dpi-hdr-only Only produce the DPI header file --dump-defines Show preprocessor defines with -E --dump-tree Enable dumping .tree files + --dump-tree-addrids Use short identifiers instead of addresses --dump-treei Enable dumping .tree files at a level --dump-treei- Enable dumping .tree file at a source file at a level - --dump-tree-addrids Use short identifiers instead of addresses -E Preprocess, but do not compile --error-limit Abort after this number of errors --exe Link to create executable @@ -321,6 +322,7 @@ detailed descriptions of these arguments. --flatten Force inlining of all modules, tasks and functions -fno- Disable internal optimization stage -G= Overwrite top-level parameter + --gate-stmts Tune gate optimizer depth --gdb Run Verilator under GDB interactively --gdbbt Run Verilator under GDB for backtrace --generate-key Create random key for --protect-key @@ -328,53 +330,51 @@ detailed descriptions of these arguments. --help Display this help --hierarchical Enable hierarchical Verilation -I Directory to search for includes - -j Parallelism for --build - --gate-stmts Tune gate optimizer depth --if-depth Tune IFDEPTH warning +incdir+ Directory to search for includes --inline-mult Tune module inlining --instr-count-dpi Assumed dynamic instruction count of DPI imports - -LDFLAGS Linker pre-object arguments for makefile + -j Parallelism for --build --l2-name Verilog scope name of the top module --language Default language standard to parse + -LDFLAGS Linker pre-object arguments for makefile --lib-create Create a DPI library +libext++[ext]... Extensions for finding modules --lint-only Lint, but do not make output + --make Generate scripts for specified build tool -MAKEFLAGS Arguments to pass to make during --build --max-num-width Maximum number width (default: 64K) - --MMD Create .d dependency files - --MP Create phony dependency targets --Mdir Name of output object directory + --MMD Create .d dependency files --mod-prefix Name to prepend to lower classes - --no-clk Prevent marking specified signal as clock - --no-decoration Disable comments and symbol decorations - --no-pins64 Don't use uint64_t's for 33-64 bit sigs - --no-skip-identical Disable skipping identical output + --MP Create phony dependency targets +notimingchecks Ignored -O0 Disable optimizations -O3 High performance optimizations -O Selectable optimizations -o Name of final executable + --no-order-clock-delay Disable ordering clock enable assignments --no-verilate Skip verilation and just compile previously Verilated code. --output-split Split .cpp files into pieces --output-split-cfuncs Split model functions --output-split-ctrace Split tracing functions -P Disable line numbers and blanks with -E --pins-bv Specify types for top level ports - --pins-sc-uint Specify types for top level ports --pins-sc-biguint Specify types for top level ports + --pins-sc-uint Specify types for top level ports --pins-uint8 Specify types for top level ports + --no-pins64 Don't use uint64_t's for 33-64 bit sigs --pipe-filter Filter all input through a script --pp-comments Show preprocessor comments with -E --prefix Name of top level class + --private Debugging; see docs --prof-c Compile C++ code with profiling --prof-cfuncs Name functions for profiling --prof-exec Enable generating execution profile for gantt chart --prof-pgo Enable generating profiling data for PGO - --protect-key Key for symbol protection --protect-ids Hash identifier names for obscurity + --protect-key Key for symbol protection --protect-lib Create a DPI protected library - --private Debugging; see docs --public Debugging; see docs --public-flat-rw Mark all variables, etc as public_flat_rw -pvalue+= Overwrite toplevel parameter @@ -385,6 +385,7 @@ detailed descriptions of these arguments. --rr Run Verilator and record with rr --savable Enable model save-restore --sc Create SystemC output + --no-skip-identical Disable skipping identical output --stats Create statistics file --stats-vars Provide statistics on variables -sv Enable SystemVerilog parsing @@ -412,6 +413,7 @@ detailed descriptions of these arguments. --unused-regexp Tune UNUSED lint signals -V Verbose version and config -v Verilog library + --no-verilate Skip verilation and just compile previously Verilated code. +verilog1995ext+ Synonym for +1364-1995ext+ +verilog2001ext+ Synonym for +1364-2001ext+ --version Displays program version and exits @@ -426,6 +428,9 @@ detailed descriptions of these arguments. -Wno-lint Disable all lint warnings -Wno-style Disable all style warnings -Wpedantic Warn on compliance-test issues + -Wwarn- Enable specified warning message + -Wwarn-lint Enable lint warning message + -Wwarn-style Enable style warning message --x-assign Assign non-initial Xs to this value --x-initial Assign initial Xs to this value --x-initial-edge Enable initial X->0 and X->1 edge triggers diff --git a/configure.ac b/configure.ac index 1dc039fac..281dd64cd 100644 --- a/configure.ac +++ b/configure.ac @@ -355,7 +355,7 @@ AC_SUBST(CFG_CXXFLAGS_PROFILE) #_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++20) #_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++20) case "$(which lsb_release 2>&1 > /dev/null && lsb_release -d)" in -*Ubuntu*22.04*) +*Arch*Linux* | *Ubuntu*22.04*) _MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++17) _MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++17) ;; diff --git a/docs/CONTRIBUTORS b/docs/CONTRIBUTORS index d598cebd5..efd6749cc 100644 --- a/docs/CONTRIBUTORS +++ b/docs/CONTRIBUTORS @@ -68,6 +68,7 @@ Lukasz Dalek Maarten De Braekeleer Maciej Sobkowski Marco Widmer +Mariusz Glebocki Markus Krause Marlon James Marshal Qiao diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index 3375c613b..76fc1ed2d 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -129,16 +129,6 @@ Summary: is also used). Verilator manages the build itself, and for this --build requires GNU Make to be available on the platform. -.. option:: -CFLAGS - - Add specified C compiler argument to the generated makefiles. For - multiple flags either pass them as a single argument with space - separators quoted in the shell (:command:`-CFLAGS "-a -b"`), or use - multiple -CFLAGS options (:command:`-CFLAGS -a -CFLAGS -b`). - - When make is run on the generated makefile these will be passed to the - C++ compiler (g++/clang++/msvc++). - .. option:: --cc Specifies C++ without SystemC output mode; see also :vlopt:`--sc` @@ -156,6 +146,16 @@ Summary: have interest in adding more traditional CDC checks, please contact the authors. +.. option:: -CFLAGS + + Add specified C compiler argument to the generated makefiles. For + multiple flags either pass them as a single argument with space + separators quoted in the shell (:command:`-CFLAGS "-a -b"`), or use + multiple -CFLAGS options (:command:`-CFLAGS -a -CFLAGS -b`). + + When make is run on the generated makefile these will be passed to the + C++ compiler (g++/clang++/msvc++). + .. option:: --clk With :vlopt:`--clk`, the specified signal is marked as a clock signal. @@ -176,6 +176,11 @@ Summary: clock, and remove it from the combinatorial logic reevaluation checking code. This may greatly improve performance. +.. option:: --no-clk + + Prevent the specified signal from being marked as clock. See + :vlopt:`--clk`. + .. option:: --compiler Enables workarounds for the specified C++ compiler (list below). @@ -285,6 +290,13 @@ Summary: <--debugi>`). Higher levels produce more detailed messages. See :vlopt:`--debug` for other implications of enabling debug. +.. option:: --no-decoration + + When creating output Verilated code, minimize comments, white space, + symbol names and other decorative items, at the cost of greatly reduced + readability. This may assist C++ compile times. This will not typically + change the ultimate model's performance, but may in some cases. + .. option:: --default-language Select the language to be used by default when first processing each @@ -582,21 +594,6 @@ Summary: to limit the number of parallel build jobs but attempt to execute all independent build steps in parallel. -.. option:: -LDFLAGS - - Add specified C linker arguments to the generated makefiles. For multiple - flags either pass them as a single argument with space separators quoted - in the shell (``-LDFLAGS "-a -b"``), or use multiple -LDFLAGS arguments - (``-LDFLAGS -a -LDFLAGS -b``). - - When make is run on the generated makefile these will be passed to the - C++ linker (ld) **after** the primary file being linked. This flag is - called :vlopt:`-LDFLAGS` as that's the traditional name in simulators; - it's would have been better called LDLIBS as that's the Makefile - variable it controls. (In Make, LDFLAGS is before the first object, - LDLIBS after. -L libraries need to be in the Make variable LDLIBS, not - LDFLAGS.) - .. option:: --l2-name Instead of using the module name when showing Verilog scope, use the @@ -613,12 +610,20 @@ Summary: A synonym for :vlopt:`--default-language`, for compatibility with other tools and earlier versions of Verilator. -.. option:: +libext+[+][...] +.. option:: -LDFLAGS - Specify the extensions that should be used for finding modules. If for - example module "my" is referenced, look in :file:`my.`. Note - "+libext+" is fairly standard across Verilog tools. Defaults to - ".v+.sv". + Add specified C linker arguments to the generated makefiles. For multiple + flags either pass them as a single argument with space separators quoted + in the shell (``-LDFLAGS "-a -b"``), or use multiple -LDFLAGS arguments + (``-LDFLAGS -a -LDFLAGS -b``). + + When make is run on the generated makefile these will be passed to the + C++ linker (ld) **after** the primary file being linked. This flag is + called :vlopt:`-LDFLAGS` as that's the traditional name in simulators; + it's would have been better called LDLIBS as that's the Makefile + variable it controls. (In Make, LDFLAGS is before the first object, + LDLIBS after. -L libraries need to be in the Make variable LDLIBS, not + LDFLAGS.) .. option:: --lib-create @@ -637,6 +642,13 @@ Summary: See also :vlopt:`--protect-lib`. +.. option:: +libext+[+][...] + + Specify the extensions that should be used for finding modules. If for + example module "my" is referenced, look in :file:`my.`. Note + "+libext+" is fairly standard across Verilog tools. Defaults to + ".v+.sv". + .. option:: --lint-only Check the files for lint violations only, do not create any other @@ -675,17 +687,6 @@ Summary: Set the maximum number literal width (e.g. in 1024'd22 this it the 1024). Defaults to 64K. -.. option:: --MMD =item --no-MMD - - Enable/disable creation of .d dependency files, used for make dependency - detection, similar to gcc -MMD option. By default this option is - enabled for :vlopt:`--cc` or :vlopt:`--sc` modes. - -.. option:: --MP - - When creating .d dependency files with :vlopt:`--MMD` option, make phony - targets. Similar to :command:`gcc -MP` option. - .. option:: --Mdir Specifies the name of the Make object directory. All generated files @@ -693,33 +694,23 @@ Summary: The directory is created if it does not exist and the parent directories exist; otherwise manually create the Mdir before calling Verilator. +.. option:: --MMD + +.. option:: --no-MMD + + Enable/disable creation of .d dependency files, used for make dependency + detection, similar to gcc -MMD option. By default this option is + enabled for :vlopt:`--cc` or :vlopt:`--sc` modes. + .. option:: --mod-prefix Specifies the name to prepend to all lower level classes. Defaults to the same as :vlopt:`--prefix`. -.. option:: --no-clk +.. option:: --MP - Prevent the specified signal from being marked as clock. See - :vlopt:`--clk`. - -.. option:: --no-decoration - - When creating output Verilated code, minimize comments, white space, - symbol names and other decorative items, at the cost of greatly reduced - readability. This may assist C++ compile times. This will not typically - change the ultimate model's performance, but may in some cases. - -.. option:: --no-pins64 - - Backward compatible alias for :vlopt:`--pins-bv 33 <--pins-bv>`. - -.. option:: --no-skip-identical =item --skip-identical - - Rarely needed. Disables or enables skipping execution of Verilator if - all source files are identical, and all output files exist with newer - dates. By default this option is enabled for :vlopt:`--cc` or - :vlopt:`--sc` modes only. + When creating .d dependency files with :vlopt:`--MMD` option, make phony + targets. Similar to :command:`gcc -MP` option. .. option:: +notimingchecks @@ -802,11 +793,6 @@ Summary: With :vlopt:`-E`, disable generation of :code:`&96;line` markers and blank lines, similar to :command:`gcc -P`. -.. option:: --pins64 - - Backward compatible alias for :vlopt:`--pins-bv 65 <--pins-bv>`. Note - that's a 65, not a 64. - .. option:: --pins-bv Specifies SystemC inputs/outputs of greater than or equal to @@ -839,6 +825,15 @@ Summary: of uint32_t. Likewise pins of width 9-16 will use uint16_t instead of uint32_t. +.. option:: --pins64 + + Backward compatible alias for :vlopt:`--pins-bv 65 <--pins-bv>`. Note + that's a 65, not a 64. + +.. option:: --no-pins64 + + Backward compatible alias for :vlopt:`--pins-bv 33 <--pins-bv>`. + .. option:: --pipe-filter Rarely needed. Verilator will spawn the specified command as a @@ -868,6 +863,11 @@ Summary: prepended to the name of the :vlopt:`--top` option, or V prepended to the first Verilog filename passed on the command line. +.. option:: --private + + Opposite of :vlopt:`--public`. Is the default; this option exists for + backwards compatibility. + .. option:: --prof-c When compiling the C++ code, enable the compiler's profiling flag @@ -901,23 +901,6 @@ Summary: Deprecated. Same as --prof-exec and --prof-pgo together. -.. option:: --protect-key - - Specifies the private key for :vlopt:`--protect-ids`. For best security - this key should be 16 or more random bytes, a reasonable secure choice - is the output of :command:`verilator --generate-key` . Typically, a key - would be created by the user once for a given protected design library, - then every Verilator run for subsequent versions of that library would - be passed the same :vlopt:`--protect-key`. Thus, if the input Verilog is - similar between library versions (Verilator runs), the Verilated code - will likewise be mostly similar. - - If :vlopt:`--protect-key` is not specified and a key is needed, - Verilator will generate a new key for every Verilator run. As the key is - not saved, this is best for security, but means every Verilator run will - give vastly different output even for identical input, perhaps harming - compile times (and certainly thrashing any "ccache"). - .. option:: --protect-ids Hash any private identifiers (variable, module, and assertion block @@ -938,6 +921,23 @@ Summary: prototypes. Use of the VPI is not recommended as many design details may be exposed, and an INSECURE warning will be issued. +.. option:: --protect-key + + Specifies the private key for :vlopt:`--protect-ids`. For best security + this key should be 16 or more random bytes, a reasonable secure choice + is the output of :command:`verilator --generate-key` . Typically, a key + would be created by the user once for a given protected design library, + then every Verilator run for subsequent versions of that library would + be passed the same :vlopt:`--protect-key`. Thus, if the input Verilog is + similar between library versions (Verilator runs), the Verilated code + will likewise be mostly similar. + + If :vlopt:`--protect-key` is not specified and a key is needed, + Verilator will generate a new key for every Verilator run. As the key is + not saved, this is best for security, but means every Verilator run will + give vastly different output even for identical input, perhaps harming + compile times (and certainly thrashing any "ccache"). + .. option:: --protect-lib Produces a DPI library similar to :vlopt:`--lib-create`, but hides @@ -949,11 +949,6 @@ Summary: in the distribution for a demonstration of how to build and use the DPI library. -.. option:: --private - - Opposite of :vlopt:`--public`. Is the default; this option exists for - backwards compatibility. - .. option:: --public This is only for historical debug use. Using it may result in @@ -1046,6 +1041,15 @@ Summary: Specifies SystemC output mode; see also :vlopt:`--cc` option. +.. option:: --skip-identical + +.. option:: --no-skip-identical + + Rarely needed. Disables or enables skipping execution of Verilator if + all source files are identical, and all output files exist with newer + dates. By default this option is enabled for :vlopt:`--cc` or + :vlopt:`--sc` modes only. + .. option:: --stats Creates a dump file with statistics on the design in diff --git a/include/verilated.mk.in b/include/verilated.mk.in index 34e975bcc..a5dc4bdb3 100644 --- a/include/verilated.mk.in +++ b/include/verilated.mk.in @@ -187,7 +187,7 @@ VM_SLOW += $(VM_CLASSES_SLOW) $(VM_SUPPORT_SLOW) VK_FAST_OBJS = $(addsuffix .o, $(VM_FAST)) VK_SLOW_OBJS = $(addsuffix .o, $(VM_SLOW)) -VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES)) +VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES)) # Note VM_GLOBAL_FAST and VM_GLOBAL_SLOW holds the files required from the # run-time library. In practice everything is actually in VM_GLOBAL_FAST, diff --git a/include/verilated_profiler.cpp b/include/verilated_profiler.cpp index ed25093d1..9f37addf9 100644 --- a/include/verilated_profiler.cpp +++ b/include/verilated_profiler.cpp @@ -99,6 +99,10 @@ void VlExecutionProfiler::configure(const VerilatedContext& context) { } } +void VlExecutionProfiler::startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId) { + profilep->setupThread(threadId); +} + void VlExecutionProfiler::setupThread(uint32_t threadId) { // Reserve some space in the thread-local profiling buffer, in order to try to avoid malloc // while profiling. diff --git a/include/verilated_profiler.h b/include/verilated_profiler.h index d47be4da4..f85c95528 100644 --- a/include/verilated_profiler.h +++ b/include/verilated_profiler.h @@ -23,11 +23,6 @@ #define VERILATOR_VERILATED_PROFILER_H_ #include "verilatedos.h" - -#ifndef VL_PROFILER -#error "verilated_profiler.h/cpp expects VL_PROFILER (from --prof-{exec, pgo}" -#endif - #include "verilated.h" #include @@ -186,6 +181,9 @@ public: void clear() VL_MT_SAFE_EXCLUDES(m_mutex); // Write profiling data into file void dump(const char* filenamep, uint64_t tickEnd) VL_MT_SAFE_EXCLUDES(m_mutex); + + // Called via VlStartWorkerCb in VlWorkerThread::startWorker + static void startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId); }; //============================================================================= diff --git a/include/verilated_save.cpp b/include/verilated_save.cpp index 777feccb0..20d4014ad 100644 --- a/include/verilated_save.cpp +++ b/include/verilated_save.cpp @@ -38,13 +38,13 @@ # include #endif -#ifndef O_LARGEFILE // For example on WIN32 +#ifndef O_LARGEFILE // WIN32 headers omit this # define O_LARGEFILE 0 #endif -#ifndef O_NONBLOCK +#ifndef O_NONBLOCK // WIN32 headers omit this # define O_NONBLOCK 0 #endif -#ifndef O_CLOEXEC +#ifndef O_CLOEXEC // WIN32 headers omit this # define O_CLOEXEC 0 #endif // clang-format on diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp index 12a579026..a78ea9ae6 100644 --- a/include/verilated_threads.cpp +++ b/include/verilated_threads.cpp @@ -24,10 +24,6 @@ #include "verilatedos.h" #include "verilated_threads.h" -#ifdef VL_PROFILER -#include "verilated_profiler.h" -#endif - #include #include #include @@ -52,44 +48,35 @@ VlMTaskVertex::VlMTaskVertex(uint32_t upstreamDepCount) // VlWorkerThread VlWorkerThread::VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, - VlExecutionProfiler* profilerp) + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) : m_ready_size{0} - , m_exiting{false} - , m_cthread{startWorker, this, threadId, profilerp} + , m_cthread{startWorker, this, threadId, profilerp, startCb} , m_contextp{contextp} {} VlWorkerThread::~VlWorkerThread() { - m_exiting.store(true, std::memory_order_release); - wakeUp(); + shutdown(); // The thread should exit; join it. m_cthread.join(); } +void VlWorkerThread::shutdownTask(void*, bool) { + // Deliberately empty, we use the address of this function as a magic number +} + void VlWorkerThread::workerLoop() { ExecRec work; - work.m_fnp = nullptr; while (true) { - if (VL_LIKELY(!work.m_fnp)) dequeWork(&work); - - // Do this here, not above, to avoid a race with the destructor. - if (VL_UNLIKELY(m_exiting.load(std::memory_order_acquire))) break; - - if (VL_LIKELY(work.m_fnp)) { - work.m_fnp(work.m_selfp, work.m_evenCycle); - work.m_fnp = nullptr; - } + dequeWork(&work); + if (VL_UNLIKELY(work.m_fnp == shutdownTask)) break; + work.m_fnp(work.m_selfp, work.m_evenCycle); } } void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId, - VlExecutionProfiler* profilerp) { + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) { Verilated::threadContextp(workerp->m_contextp); -#ifdef VL_PROFILER - // Note: setupThread is not defined without VL_PROFILER, hence the #ifdef. Still, we might - // not be profiling execution (e.g.: PGO only), so profilerp might still be nullptr. - if (profilerp) profilerp->setupThread(threadId); -#endif + if (VL_UNLIKELY(startCb)) startCb(profilerp, threadId); workerp->workerLoop(); } @@ -97,7 +84,7 @@ void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId, // VlThreadPool VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads, - VlExecutionProfiler* profiler) { + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) { // --threads N passes nThreads=N-1, as the "main" threads counts as 1 ++nThreads; const unsigned cpus = std::thread::hardware_concurrency(); @@ -111,7 +98,7 @@ VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads, } // Create worker threads for (uint32_t threadId = 1; threadId < nThreads; ++threadId) { - m_workers.push_back(new VlWorkerThread{threadId, contextp, profiler}); + m_workers.push_back(new VlWorkerThread{threadId, contextp, profilerp, startCb}); } } diff --git a/include/verilated_threads.h b/include/verilated_threads.h index 257c87162..eeb8f9342 100644 --- a/include/verilated_threads.h +++ b/include/verilated_threads.h @@ -50,6 +50,9 @@ #endif // clang-format on +class VlExecutionProfiler; +class VlThreadPool; + // VlMTaskVertex and VlThreadpool will work with multiple model class types. // Since the type is opaque to VlMTaskVertex and VlThreadPool, represent it // as a void* here. @@ -57,6 +60,9 @@ using VlSelfP = void*; using VlExecFnp = void (*)(VlSelfP, bool); +// VlWorkerThread::startWorker callback, used to hook in VlExecutionProfiler +using VlStartWorkerCb = void (*)(VlExecutionProfiler*, uint32_t threadId); + // Track dependencies for a single MTask. class VlMTaskVertex final { // MEMBERS @@ -129,9 +135,6 @@ public: } }; -class VlExecutionProfiler; -class VlThreadPool; - class VlWorkerThread final { private: // TYPES @@ -162,7 +165,6 @@ private: // Store the size atomically, so we can spin wait std::atomic m_ready_size; - std::atomic m_exiting; // Worker thread should exit std::thread m_cthread; // Underlying C++ thread record VerilatedContext* const m_contextp; // Context for spawned thread @@ -171,7 +173,7 @@ private: public: // CONSTRUCTORS explicit VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, - VlExecutionProfiler* profilerp); + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); ~VlWorkerThread(); // METHODS @@ -195,7 +197,6 @@ public: m_ready.erase(m_ready.begin()); m_ready_size.fetch_sub(1, std::memory_order_relaxed); } - inline void wakeUp() { addTask(nullptr, nullptr, false); } inline void addTask(VlExecFnp fnp, VlSelfP selfp, bool evenCycle) VL_MT_SAFE_EXCLUDES(m_mutex) { bool notify; @@ -207,9 +208,13 @@ public: } if (notify) m_cv.notify_one(); } + + inline void shutdown() { addTask(shutdownTask, nullptr, false); } + static void shutdownTask(void*, bool); + void workerLoop(); static void startWorker(VlWorkerThread* workerp, uint32_t threadId, - VlExecutionProfiler* profilerp); + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); }; class VlThreadPool final { @@ -221,7 +226,8 @@ public: // Construct a thread pool with 'nThreads' dedicated threads. The thread // pool will create these threads and make them available to execute tasks // via this->workerp(index)->addTask(...) - VlThreadPool(VerilatedContext* contextp, int nThreads, VlExecutionProfiler* profilerp); + VlThreadPool(VerilatedContext* contextp, int nThreads, VlExecutionProfiler* profilerp, + VlStartWorkerCb startCb); ~VlThreadPool(); // METHODS diff --git a/include/verilated_vcd_c.cpp b/include/verilated_vcd_c.cpp index 9db71aabc..9b01ea4ce 100644 --- a/include/verilated_vcd_c.cpp +++ b/include/verilated_vcd_c.cpp @@ -38,13 +38,13 @@ # include #endif -#ifndef O_LARGEFILE // For example on WIN32 +#ifndef O_LARGEFILE // WIN32 headers omit this # define O_LARGEFILE 0 #endif -#ifndef O_NONBLOCK +#ifndef O_NONBLOCK // WIN32 headers omit this # define O_NONBLOCK 0 #endif -#ifndef O_CLOEXEC +#ifndef O_CLOEXEC // WIN32 headers omit this # define O_CLOEXEC 0 #endif diff --git a/src/V3Const.cpp b/src/V3Const.cpp index bfd6919df..9b96c88c8 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -79,14 +79,32 @@ class ConstBitOpTreeVisitor final : public VNVisitor { // bool indicating if the term is clean (0/1 value, or if the top bits might be dirty) using ResultTerm = std::tuple; - struct LeafInfo final { // Leaf node (either AstConst or AstVarRef) + class LeafInfo final { // Leaf node (either AstConst or AstVarRef) bool m_polarity = true; int m_lsb = 0; int m_wordIdx = -1; // -1 means AstWordSel is not used. AstVarRef* m_refp = nullptr; const AstConst* m_constp = nullptr; - int width() const { + public: + void setLeaf(AstVarRef* refp) { + UASSERT(!m_refp && !m_constp, "Must be called just once"); + m_refp = refp; + } + void setLeaf(const AstConst* constp) { + UASSERT(!m_refp && !m_constp, "Must be called just once"); + m_constp = constp; + } + AstVarRef* refp() const { return m_refp; } + const AstConst* constp() const { return m_constp; } + int wordIdx() const { return m_wordIdx; } + bool polarity() const { return m_polarity; } + int lsb() const { return m_lsb; } + + void wordIdx(int i) { m_wordIdx = i; } + void lsb(int l) { m_lsb = l; } + void polarity(bool p) { m_polarity = p; } + int varWidth() const { UASSERT(m_refp, "m_refp should be set"); const int width = m_refp->varp()->widthMin(); if (!m_refp->isWide()) { @@ -339,25 +357,25 @@ class ConstBitOpTreeVisitor final : public VNVisitor { UINFO(9, "Increment to " << m_ops << " " << nodep << " called from line " << line << "\n"); } VarInfo& getVarInfo(const LeafInfo& ref) { - UASSERT_OBJ(ref.m_refp, m_rootp, "null varref in And/Or/Xor optimization"); - AstNode* nodep = ref.m_refp->varScopep(); - if (!nodep) nodep = ref.m_refp->varp(); // Not scoped + UASSERT_OBJ(ref.refp(), m_rootp, "null varref in And/Or/Xor optimization"); + AstNode* nodep = ref.refp()->varScopep(); + if (!nodep) nodep = ref.refp()->varp(); // Not scoped int baseIdx = nodep->user4(); if (baseIdx == 0) { // Not set yet baseIdx = m_varInfos.size(); const int numWords - = ref.m_refp->dtypep()->isWide() ? ref.m_refp->dtypep()->widthWords() : 1; + = ref.refp()->dtypep()->isWide() ? ref.refp()->dtypep()->widthWords() : 1; m_varInfos.resize(m_varInfos.size() + numWords); nodep->user4(baseIdx); } - const size_t idx = baseIdx + std::max(0, ref.m_wordIdx); + const size_t idx = baseIdx + std::max(0, ref.wordIdx()); VarInfo* varInfop = m_varInfos[idx].get(); if (!varInfop) { - varInfop = new VarInfo{this, ref.m_refp, ref.width()}; + varInfop = new VarInfo{this, ref.refp(), ref.varWidth()}; m_varInfos[idx].reset(varInfop); } else { - if (!varInfop->sameVarAs(ref.m_refp)) - CONST_BITOP_SET_FAILED("different var (scope?)", ref.m_refp); + if (!varInfop->sameVarAs(ref.refp())) + CONST_BITOP_SET_FAILED("different var (scope?)", ref.refp()); } return *varInfop; } @@ -373,9 +391,9 @@ class ConstBitOpTreeVisitor final : public VNVisitor { bool ok = !m_failed; if (expectConst) { - ok &= !info.m_refp && info.m_constp; + ok &= !info.refp() && info.constp(); } else { - ok &= info.m_refp && !info.m_constp; + ok &= info.refp() && !info.constp(); } return ok ? info : LeafInfo{}; } @@ -411,22 +429,20 @@ class ConstBitOpTreeVisitor final : public VNVisitor { CONST_BITOP_RETURN_IF(!m_leafp, nodep); AstConst* const constp = VN_CAST(nodep->bitp(), Const); CONST_BITOP_RETURN_IF(!constp, nodep->rhsp()); - UASSERT_OBJ(m_leafp->m_wordIdx == -1, nodep, "Unexpected nested WordSel"); - m_leafp->m_wordIdx = constp->toSInt(); + UASSERT_OBJ(m_leafp->wordIdx() == -1, nodep, "Unexpected nested WordSel"); + m_leafp->wordIdx(constp->toSInt()); iterate(nodep->fromp()); } virtual void visit(AstVarRef* nodep) override { CONST_BITOP_RETURN_IF(!m_leafp, nodep); - UASSERT_OBJ(!m_leafp->m_refp, nodep, m_leafp->m_refp << " is already set"); - m_leafp->m_refp = nodep; - m_leafp->m_polarity = m_polarity; - m_leafp->m_lsb = m_lsb; + m_leafp->setLeaf(nodep); + m_leafp->polarity(m_polarity); + m_leafp->lsb(m_lsb); } virtual void visit(AstConst* nodep) override { CONST_BITOP_RETURN_IF(!m_leafp, nodep); - UASSERT_OBJ(!m_leafp->m_constp, nodep, m_leafp->m_constp << " is already set"); - m_leafp->m_constp = nodep; - m_leafp->m_lsb = m_lsb; + m_leafp->setLeaf(nodep); + m_leafp->lsb(m_lsb); } virtual void visit(AstRedXor* nodep) override { @@ -438,36 +454,36 @@ class ConstBitOpTreeVisitor final : public VNVisitor { CONST_BITOP_RETURN_IF(!andp, lhsp); const LeafInfo& mask = findLeaf(andp->lhsp(), true); - CONST_BITOP_RETURN_IF(!mask.m_constp || mask.m_lsb != 0, andp->lhsp()); + CONST_BITOP_RETURN_IF(!mask.constp() || mask.lsb() != 0, andp->lhsp()); const LeafInfo& ref = findLeaf(andp->rhsp(), false); - CONST_BITOP_RETURN_IF(!ref.m_refp, andp->rhsp()); + CONST_BITOP_RETURN_IF(!ref.refp(), andp->rhsp()); restorer.disableRestore(); // Now all subtree succeeded - const V3Number& maskNum = mask.m_constp->num(); + const V3Number& maskNum = mask.constp()->num(); incrOps(nodep, __LINE__); incrOps(andp, __LINE__); // Mark all bits checked in this reduction - const int maxBitIdx = std::min(ref.m_lsb + maskNum.width(), ref.width()); - for (int bitIdx = ref.m_lsb; bitIdx < maxBitIdx; ++bitIdx) { - const int maskIdx = bitIdx - ref.m_lsb; + const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.varWidth()); + for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { + const int maskIdx = bitIdx - ref.lsb(); if (maskNum.bitIs0(maskIdx)) continue; // Set true, m_polarity takes care of the entire parity m_bitPolarities.emplace_back(ref, true, bitIdx); } } else { // '^leaf' const LeafInfo& ref = findLeaf(lhsp, false); - CONST_BITOP_RETURN_IF(!ref.m_refp, lhsp); + CONST_BITOP_RETURN_IF(!ref.refp(), lhsp); restorer.disableRestore(); // Now all checks passed incrOps(nodep, __LINE__); // Mark all bits checked by this comparison - for (int bitIdx = ref.m_lsb; bitIdx < ref.width(); ++bitIdx) { + for (int bitIdx = ref.lsb(); bitIdx < ref.varWidth(); ++bitIdx) { m_bitPolarities.emplace_back(ref, true, bitIdx); } } @@ -492,7 +508,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { AstNode* opp = right ? nodep->rhsp() : nodep->lhsp(); const bool origFailed = m_failed; iterate(opp); - if (leafInfo.m_constp || m_failed) { + if (leafInfo.constp() || m_failed) { // Revert changes in leaf restorer.restoreNow(); // Reach past a cast then add to frozen nodes to be added to final reduction @@ -502,14 +518,14 @@ class ConstBitOpTreeVisitor final : public VNVisitor { continue; } restorer.disableRestore(); // Now all checks passed - if (leafInfo.m_refp) { + if (leafInfo.refp()) { // The conditional on the lsb being in range is necessary for some degenerate // case, e.g.: (IData)((QData)wide[0] >> 32), or <1-bit-var> >> 1, which is // just zero - if (leafInfo.m_lsb < leafInfo.width()) { - m_bitPolarities.emplace_back(leafInfo, isXorTree() || leafInfo.m_polarity, - leafInfo.m_lsb); - } else if (isAndTree() && leafInfo.m_polarity) { + if (leafInfo.lsb() < leafInfo.varWidth()) { + m_bitPolarities.emplace_back(leafInfo, isXorTree() || leafInfo.polarity(), + leafInfo.lsb()); + } else if (isAndTree() && leafInfo.polarity()) { // If there is a constant 0 term in an And tree, we must include it. Fudge // this by adding a bit with both polarities, which will simplify to zero m_bitPolarities.emplace_back(leafInfo, true, 0); @@ -530,38 +546,38 @@ class ConstBitOpTreeVisitor final : public VNVisitor { if (const AstAnd* const andp = VN_CAST(nodep->rhsp(), And)) { // comp == (mask & v) const LeafInfo& mask = findLeaf(andp->lhsp(), true); - CONST_BITOP_RETURN_IF(!mask.m_constp || mask.m_lsb != 0, andp->lhsp()); + CONST_BITOP_RETURN_IF(!mask.constp() || mask.lsb() != 0, andp->lhsp()); const LeafInfo& ref = findLeaf(andp->rhsp(), false); - CONST_BITOP_RETURN_IF(!ref.m_refp, andp->rhsp()); + CONST_BITOP_RETURN_IF(!ref.refp(), andp->rhsp()); restorer.disableRestore(); // Now all checks passed - const V3Number& maskNum = mask.m_constp->num(); + const V3Number& maskNum = mask.constp()->num(); incrOps(nodep, __LINE__); incrOps(andp, __LINE__); // Mark all bits checked by this comparison - const int maxBitIdx = std::min(ref.m_lsb + maskNum.width(), ref.width()); - for (int bitIdx = ref.m_lsb; bitIdx < maxBitIdx; ++bitIdx) { - const int maskIdx = bitIdx - ref.m_lsb; + const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.varWidth()); + for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { + const int maskIdx = bitIdx - ref.lsb(); if (maskNum.bitIs0(maskIdx)) continue; const bool polarity = compNum.bitIs1(maskIdx) != maskFlip; m_bitPolarities.emplace_back(ref, polarity, bitIdx); } } else { // comp == v const LeafInfo& ref = findLeaf(nodep->rhsp(), false); - CONST_BITOP_RETURN_IF(!ref.m_refp, nodep->rhsp()); + CONST_BITOP_RETURN_IF(!ref.refp(), nodep->rhsp()); restorer.disableRestore(); // Now all checks passed incrOps(nodep, __LINE__); // Mark all bits checked by this comparison - const int maxBitIdx = std::min(ref.m_lsb + compNum.width(), ref.width()); - for (int bitIdx = ref.m_lsb; bitIdx < maxBitIdx; ++bitIdx) { - const int maskIdx = bitIdx - ref.m_lsb; + const int maxBitIdx = std::min(ref.lsb() + compNum.width(), ref.varWidth()); + for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { + const int maskIdx = bitIdx - ref.lsb(); const bool polarity = compNum.bitIs1(maskIdx) != maskFlip; m_bitPolarities.emplace_back(ref, polarity, bitIdx); } diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index 6b67ece1e..73bb22059 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -694,8 +694,8 @@ void EmitCSyms::emitSymImp() { puts("}\n\n"); // Constructor - puts(symClassName() + "::" + symClassName() + "(VerilatedContext* contextp, const char* namep," - + topClassName() + "* modelp)\n"); + puts(symClassName() + "::" + symClassName() + + "(VerilatedContext* contextp, const char* namep, " + topClassName() + "* modelp)\n"); puts(" : VerilatedSyms{contextp}\n"); puts(" // Setup internal state of the Syms class\n"); puts(" , __Vm_modelp{modelp}\n"); @@ -724,7 +724,10 @@ void EmitCSyms::emitSymImp() { // duration of the eval call. puts(" , __Vm_threadPoolp{new VlThreadPool{_vm_contextp__, " + cvtToStr(v3Global.opt.threads() - 1) + ", " - + (v3Global.opt.profExec() ? "&__Vm_executionProfiler" : "nullptr") + "}}\n"); + + (v3Global.opt.profExec() + ? "&__Vm_executionProfiler, &VlExecutionProfiler::startWorkerSetup" + : "nullptr, nullptr") + + "}}\n"); } puts(" // Setup module instances\n"); @@ -965,7 +968,8 @@ void EmitCSyms::emitSymImp() { } closeSplit(); - VL_DO_CLEAR(delete m_ofp, m_ofp = nullptr); + m_ofp = nullptr; + VL_DO_CLEAR(delete m_ofpBase, m_ofpBase = nullptr); } //###################################################################### diff --git a/src/V3EmitMk.cpp b/src/V3EmitMk.cpp index b748d9553..3fb3907be 100644 --- a/src/V3EmitMk.cpp +++ b/src/V3EmitMk.cpp @@ -197,7 +197,6 @@ public: of.puts("# User CFLAGS (from -CFLAGS on Verilator command line)\n"); of.puts("VM_USER_CFLAGS = \\\n"); if (!v3Global.opt.libCreate().empty()) of.puts("\t-fPIC \\\n"); - if (v3Global.opt.usesProfiler()) of.puts("\t-DVL_PROFILER \\\n"); const V3StringList& cFlags = v3Global.opt.cFlags(); for (const string& i : cFlags) of.puts("\t" + i + " \\\n"); of.puts("\n"); diff --git a/src/V3File.cpp b/src/V3File.cpp index f6b9cf11d..305d19c08 100644 --- a/src/V3File.cpp +++ b/src/V3File.cpp @@ -920,13 +920,16 @@ void V3OutFormatter::printf(const char* fmt...) { // V3OutFormatter: A class for printing to a file, with automatic indentation of C++ code. V3OutFile::V3OutFile(const string& filename, V3OutFormatter::Language lang) - : V3OutFormatter{filename, lang} { + : V3OutFormatter{filename, lang} + , m_bufferp{new std::array{}} { if ((m_fp = V3File::new_fopen_w(filename)) == nullptr) { v3fatal("Cannot write " << filename); } } V3OutFile::~V3OutFile() { + writeBlock(); + if (m_fp) fclose(m_fp); m_fp = nullptr; } diff --git a/src/V3File.h b/src/V3File.h index dd337b165..6c45a0456 100644 --- a/src/V3File.h +++ b/src/V3File.h @@ -22,6 +22,7 @@ #include "V3Error.h" +#include #include #include #include @@ -183,18 +184,56 @@ public: // V3OutFile: A class for printing to a file, with automatic indentation of C++ code. class V3OutFile VL_NOT_FINAL : public V3OutFormatter { + // Size of m_bufferp. + // 128kB has been experimentally determined to be in the zone of buffer sizes that work best. + // It is also considered to be the smallest I/O buffer size in GNU coreutils (io_blksize) that + // allows to best minimize syscall overhead. + // The hard boundaries are CPU L2/L3 cache size on the top and filesystem block size + // on the bottom. + static constexpr std::size_t WRITE_BUFFER_SIZE_BYTES = 128 * 1024; + // MEMBERS + std::unique_ptr> m_bufferp; // Write buffer + std::size_t m_usedBytes = 0; // Number of bytes stored in m_bufferp FILE* m_fp = nullptr; public: V3OutFile(const string& filename, V3OutFormatter::Language lang); + V3OutFile(const V3OutFile&) = delete; + V3OutFile& operator=(const V3OutFile&) = delete; + V3OutFile(V3OutFile&&) = delete; + V3OutFile& operator=(V3OutFile&&) = delete; + virtual ~V3OutFile() override; void putsForceIncs(); private: + void writeBlock() { + if (VL_LIKELY(m_usedBytes > 0)) fwrite(m_bufferp->data(), m_usedBytes, 1, m_fp); + m_usedBytes = 0; + } + // CALLBACKS - virtual void putcOutput(char chr) override { fputc(chr, m_fp); } - virtual void putsOutput(const char* str) override { fputs(str, m_fp); } + virtual void putcOutput(char chr) override { + m_bufferp->at(m_usedBytes++) = chr; + if (VL_UNLIKELY(m_usedBytes >= WRITE_BUFFER_SIZE_BYTES)) writeBlock(); + } + virtual void putsOutput(const char* str) override { + std::size_t len = strlen(str); + std::size_t availableBytes = WRITE_BUFFER_SIZE_BYTES - m_usedBytes; + while (VL_UNLIKELY(len >= availableBytes)) { + memcpy(m_bufferp->data() + m_usedBytes, str, availableBytes); + m_usedBytes = WRITE_BUFFER_SIZE_BYTES; + writeBlock(); + str += availableBytes; + len -= availableBytes; + availableBytes = WRITE_BUFFER_SIZE_BYTES; + } + if (len > 0) { + memcpy(m_bufferp->data() + m_usedBytes, str, len); + m_usedBytes += len; + } + } }; class V3OutCFile VL_NOT_FINAL : public V3OutFile { diff --git a/src/bisonpre b/src/bisonpre index 05b213d93..e03a7e41e 100755 --- a/src/bisonpre +++ b/src/bisonpre @@ -150,7 +150,7 @@ def clean_output(filename, outname, is_output, is_c): lines = out out = [] - with open(outname, "w") as fh: + with open(outname, "w", encoding="utf-8") as fh: for line in lines: # Fix filename refs line = re.sub(basename, newbase, line) diff --git a/test_regress/driver.pl b/test_regress/driver.pl index 541fb296f..968b89f81 100755 --- a/test_regress/driver.pl +++ b/test_regress/driver.pl @@ -1110,11 +1110,6 @@ sub compile { return 1; } - if ($self->{vltmt} && !$self->cfg_with_threaded) { - $self->skip("Test requires Verilator configured with threads\n"); - return 1; - } - if ($param{verilator_make_cmake} && !$self->have_cmake) { $self->skip("Test requires CMake; ignore error since not available or version too old\n"); return 1; @@ -2340,10 +2335,6 @@ sub cxx_version { return $_Cxx_Version; } -sub cfg_with_threaded { - return 1; # C++11 now always required -} - our $_Cfg_with_ccache; sub cfg_with_ccache { diff --git a/test_regress/t/t_verilated_all.pl b/test_regress/t/t_verilated_all.pl index 1a8bc4469..450b5bd9c 100755 --- a/test_regress/t/t_verilated_all.pl +++ b/test_regress/t/t_verilated_all.pl @@ -17,10 +17,8 @@ compile( verilator_flags2 => ["--cc", "--coverage-toggle --coverage-line --coverage-user", "--trace --vpi ", - ($Self->cfg_with_threaded - ? "--threads 2 $root/include/verilated_threads.cpp" : ""), - ($Self->cfg_with_threaded - ? "--trace-threads 1" : ""), + "--threads 2", + "--trace-threads 1", "--prof-exec", "--prof-pgo", "$root/include/verilated_save.cpp"], ); @@ -58,7 +56,7 @@ foreach my $file (sort keys %hit) { && $file !~ /_sc/ && $file !~ /_fst/ && $file !~ /_heavy/ - && ($file !~ /_thread/ || $Self->cfg_with_threaded)) { + && ($file !~ /_thread/)) { error("Include file not covered by t_verilated_all test: ", $file); } }