From f2fba51fe287d9f02aa052b2f1588662a4ce69cc Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sun, 19 Jun 2022 15:13:29 -0400 Subject: [PATCH 001/119] devel release --- Changes | 7 +++++++ configure.ac | 10 +++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Changes b/Changes index 984897289..b597d565f 100644 --- a/Changes +++ b/Changes @@ -8,6 +8,13 @@ The changes in each Verilator version are described below. The contributors that suggested a given feature are shown in []. Thanks! +Verilator 4.225 devel +========================== + +**Minor:** + + + Verilator 4.224 2022-06-19 ========================== diff --git a/configure.ac b/configure.ac index 03a6e424d..b86bb48f5 100644 --- a/configure.ac +++ b/configure.ac @@ -5,14 +5,14 @@ # General Public License Version 3 or the Perl Artistic License Version 2.0. # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 -#AC_INIT([Verilator],[#.### YYYY-MM-DD]) -#AC_INIT([Verilator],[#.### devel]) -AC_INIT([Verilator],[4.224 2022-06-19], - [https://verilator.org], - [verilator],[https://verilator.org]) # When releasing, also update header of Changes file # and commit using "devel release" or "Version bump" message # Then 'make maintainer-dist' +#AC_INIT([Verilator],[#.### YYYY-MM-DD]) +#AC_INIT([Verilator],[#.### devel]) +AC_INIT([Verilator],[4.225 devel], + [https://verilator.org], + [verilator],[https://verilator.org]) AC_CONFIG_HEADERS(src/config_build.h) AC_CONFIG_FILES(Makefile src/Makefile src/Makefile_obj include/verilated.mk include/verilated_config.h verilator.pc verilator-config.cmake verilator-config-version.cmake) From 11032b1936450f31d11e83983098df4c32a0ba22 Mon Sep 17 00:00:00 2001 From: Unai Martinez-Corral <38422348+umarcor@users.noreply.github.com> Date: Mon, 20 Jun 2022 16:59:27 +0100 Subject: [PATCH 002/119] Fix bisonpre for MSYS2 (#3471) --- src/bisonpre | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bisonpre b/src/bisonpre index 05b213d93..e03a7e41e 100755 --- a/src/bisonpre +++ b/src/bisonpre @@ -150,7 +150,7 @@ def clean_output(filename, outname, is_output, is_c): lines = out out = [] - with open(outname, "w") as fh: + with open(outname, "w", encoding="utf-8") as fh: for line in lines: # Fix filename refs line = re.sub(basename, newbase, line) From 49455721a32c92eb7dea52c15d1e62d63588e9d4 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Tue, 21 Jun 2022 19:28:23 -0400 Subject: [PATCH 003/119] Commentary --- include/verilated_save.cpp | 6 +++--- include/verilated_vcd_c.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/verilated_save.cpp b/include/verilated_save.cpp index 777feccb0..20d4014ad 100644 --- a/include/verilated_save.cpp +++ b/include/verilated_save.cpp @@ -38,13 +38,13 @@ # include #endif -#ifndef O_LARGEFILE // For example on WIN32 +#ifndef O_LARGEFILE // WIN32 headers omit this # define O_LARGEFILE 0 #endif -#ifndef O_NONBLOCK +#ifndef O_NONBLOCK // WIN32 headers omit this # define O_NONBLOCK 0 #endif -#ifndef O_CLOEXEC +#ifndef O_CLOEXEC // WIN32 headers omit this # define O_CLOEXEC 0 #endif // clang-format on diff --git a/include/verilated_vcd_c.cpp b/include/verilated_vcd_c.cpp index 9db71aabc..9b01ea4ce 100644 --- a/include/verilated_vcd_c.cpp +++ b/include/verilated_vcd_c.cpp @@ -38,13 +38,13 @@ # include #endif -#ifndef O_LARGEFILE // For example on WIN32 +#ifndef O_LARGEFILE // WIN32 headers omit this # define O_LARGEFILE 0 #endif -#ifndef O_NONBLOCK +#ifndef O_NONBLOCK // WIN32 headers omit this # define O_NONBLOCK 0 #endif -#ifndef O_CLOEXEC +#ifndef O_CLOEXEC // WIN32 headers omit this # define O_CLOEXEC 0 #endif From fa99cbbc73ca17ca8cc8baecb6d0d2fdc569b73a Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Tue, 21 Jun 2022 19:28:26 -0400 Subject: [PATCH 004/119] Commentary: Fix mis-sorted option names. No functional change. --- bin/verilator | 37 +++---- docs/guide/exe_verilator.rst | 182 ++++++++++++++++++----------------- 2 files changed, 113 insertions(+), 106 deletions(-) diff --git a/bin/verilator b/bin/verilator index a50e353bb..62f7be232 100755 --- a/bin/verilator +++ b/bin/verilator @@ -284,33 +284,34 @@ detailed descriptions of these arguments. --bbox-unsup Blackbox unsupported language features --bin Override Verilator binary --build Build model executable/library after Verilation - -CFLAGS C++ compiler arguments for makefile --cc Create C++ output --cdc Clock domain crossing analysis + -CFLAGS C++ compiler arguments for makefile --clk Mark specified signal as clock - --make Generate scripts for specified build tool + --no-clk Prevent marking specified signal as clock --compiler Tune for specified C++ compiler --converge-limit Tune convergence settle time --coverage Enable all coverage --coverage-line Enable line coverage --coverage-max-width Maximum array depth for coverage --coverage-toggle Enable toggle coverage - --coverage-user Enable SVL user coverage --coverage-underscore Enable coverage of _signals + --coverage-user Enable SVL user coverage -D[=] Set preprocessor define --debug Enable debugging --debug-check Enable debugging assertions --no-debug-leak Disable leaking memory in --debug mode --debugi Enable debugging at a specified level --debugi- Enable debugging a source file at a level + --no-decoration Disable comments and symbol decorations --default-language Default language to parse +define+= Set preprocessor define --dpi-hdr-only Only produce the DPI header file --dump-defines Show preprocessor defines with -E --dump-tree Enable dumping .tree files + --dump-tree-addrids Use short identifiers instead of addresses --dump-treei Enable dumping .tree files at a level --dump-treei- Enable dumping .tree file at a source file at a level - --dump-tree-addrids Use short identifiers instead of addresses -E Preprocess, but do not compile --error-limit Abort after this number of errors --exe Link to create executable @@ -321,6 +322,7 @@ detailed descriptions of these arguments. --flatten Force inlining of all modules, tasks and functions -fno- Disable internal optimization stage -G= Overwrite top-level parameter + --gate-stmts Tune gate optimizer depth --gdb Run Verilator under GDB interactively --gdbbt Run Verilator under GDB for backtrace --generate-key Create random key for --protect-key @@ -328,54 +330,50 @@ detailed descriptions of these arguments. --help Display this help --hierarchical Enable hierarchical Verilation -I Directory to search for includes - -j Parallelism for --build - --gate-stmts Tune gate optimizer depth --if-depth Tune IFDEPTH warning +incdir+ Directory to search for includes --inline-mult Tune module inlining --instr-count-dpi Assumed dynamic instruction count of DPI imports - -LDFLAGS Linker pre-object arguments for makefile + -j Parallelism for --build --l2-name Verilog scope name of the top module --language Default language standard to parse + -LDFLAGS Linker pre-object arguments for makefile --lib-create Create a DPI library +libext++[ext]... Extensions for finding modules --lint-only Lint, but do not make output + --make Generate scripts for specified build tool -MAKEFLAGS Arguments to pass to make during --build --max-num-width Maximum number width (default: 64K) - --MMD Create .d dependency files - --MP Create phony dependency targets --Mdir Name of output object directory + --MMD Create .d dependency files --mod-prefix Name to prepend to lower classes - --no-clk Prevent marking specified signal as clock - --no-decoration Disable comments and symbol decorations - --no-pins64 Don't use uint64_t's for 33-64 bit sigs - --no-skip-identical Disable skipping identical output + --MP Create phony dependency targets +notimingchecks Ignored -O0 Disable optimizations -O3 High performance optimizations -O Selectable optimizations -o Name of final executable --no-order-clock-delay Disable ordering clock enable assignments - --no-verilate Skip verilation and just compile previously Verilated code. --output-split Split .cpp files into pieces --output-split-cfuncs Split model functions --output-split-ctrace Split tracing functions -P Disable line numbers and blanks with -E --pins-bv Specify types for top level ports - --pins-sc-uint Specify types for top level ports --pins-sc-biguint Specify types for top level ports + --pins-sc-uint Specify types for top level ports --pins-uint8 Specify types for top level ports + --no-pins64 Don't use uint64_t's for 33-64 bit sigs --pipe-filter Filter all input through a script --pp-comments Show preprocessor comments with -E --prefix Name of top level class + --private Debugging; see docs --prof-c Compile C++ code with profiling --prof-cfuncs Name functions for profiling --prof-exec Enable generating execution profile for gantt chart --prof-pgo Enable generating profiling data for PGO - --protect-key Key for symbol protection --protect-ids Hash identifier names for obscurity + --protect-key Key for symbol protection --protect-lib Create a DPI protected library - --private Debugging; see docs --public Debugging; see docs --public-flat-rw Mark all variables, etc as public_flat_rw -pvalue+= Overwrite toplevel parameter @@ -386,6 +384,7 @@ detailed descriptions of these arguments. --rr Run Verilator and record with rr --savable Enable model save-restore --sc Create SystemC output + --no-skip-identical Disable skipping identical output --stats Create statistics file --stats-vars Provide statistics on variables -sv Enable SystemVerilog parsing @@ -413,6 +412,7 @@ detailed descriptions of these arguments. --unused-regexp Tune UNUSED lint signals -V Verbose version and config -v Verilog library + --no-verilate Skip verilation and just compile previously Verilated code. +verilog1995ext+ Synonym for +1364-1995ext+ +verilog2001ext+ Synonym for +1364-2001ext+ --version Displays program version and exits @@ -427,6 +427,9 @@ detailed descriptions of these arguments. -Wno-lint Disable all lint warnings -Wno-style Disable all style warnings -Wpedantic Warn on compliance-test issues + -Wwarn- Enable specified warning message + -Wwarn-lint Enable lint warning message + -Wwarn-style Enable style warning message --x-assign Assign non-initial Xs to this value --x-initial Assign initial Xs to this value --x-initial-edge Enable initial X->0 and X->1 edge triggers diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index bf99a7d65..5e1c6282f 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -129,16 +129,6 @@ Summary: is also used). Verilator manages the build itself, and for this --build requires GNU Make to be available on the platform. -.. option:: -CFLAGS - - Add specified C compiler argument to the generated makefiles. For - multiple flags either pass them as a single argument with space - separators quoted in the shell (:command:`-CFLAGS "-a -b"`), or use - multiple -CFLAGS options (:command:`-CFLAGS -a -CFLAGS -b`). - - When make is run on the generated makefile these will be passed to the - C++ compiler (g++/clang++/msvc++). - .. option:: --cc Specifies C++ without SystemC output mode; see also :vlopt:`--sc` @@ -156,6 +146,16 @@ Summary: have interest in adding more traditional CDC checks, please contact the authors. +.. option:: -CFLAGS + + Add specified C compiler argument to the generated makefiles. For + multiple flags either pass them as a single argument with space + separators quoted in the shell (:command:`-CFLAGS "-a -b"`), or use + multiple -CFLAGS options (:command:`-CFLAGS -a -CFLAGS -b`). + + When make is run on the generated makefile these will be passed to the + C++ compiler (g++/clang++/msvc++). + .. option:: --clk With :vlopt:`--clk`, the specified signal-name is taken as a root clock @@ -179,6 +179,11 @@ Summary: remove it from the combinatorial logic reevaluation checking code. This may greatly improve performance. +.. option:: --no-clk + + Prevent the specified signal from being marked as clock. See + :vlopt:`--clk`. + .. option:: --compiler Enables workarounds for the specified C++ compiler (list below). @@ -288,6 +293,13 @@ Summary: <--debugi>`). Higher levels produce more detailed messages. See :vlopt:`--debug` for other implications of enabling debug. +.. option:: --no-decoration + + When creating output Verilated code, minimize comments, white space, + symbol names and other decorative items, at the cost of greatly reduced + readability. This may assist C++ compile times. This will not typically + change the ultimate model's performance, but may in some cases. + .. option:: --default-language Select the language to be used by default when first processing each @@ -585,21 +597,6 @@ Summary: to limit the number of parallel build jobs but attempt to execute all independent build steps in parallel. -.. option:: -LDFLAGS - - Add specified C linker arguments to the generated makefiles. For multiple - flags either pass them as a single argument with space separators quoted - in the shell (``-LDFLAGS "-a -b"``), or use multiple -LDFLAGS arguments - (``-LDFLAGS -a -LDFLAGS -b``). - - When make is run on the generated makefile these will be passed to the - C++ linker (ld) **after** the primary file being linked. This flag is - called :vlopt:`-LDFLAGS` as that's the traditional name in simulators; - it's would have been better called LDLIBS as that's the Makefile - variable it controls. (In Make, LDFLAGS is before the first object, - LDLIBS after. -L libraries need to be in the Make variable LDLIBS, not - LDFLAGS.) - .. option:: --l2-name Instead of using the module name when showing Verilog scope, use the @@ -616,12 +613,20 @@ Summary: A synonym for :vlopt:`--default-language`, for compatibility with other tools and earlier versions of Verilator. -.. option:: +libext+[+][...] +.. option:: -LDFLAGS - Specify the extensions that should be used for finding modules. If for - example module "my" is referenced, look in :file:`my.`. Note - "+libext+" is fairly standard across Verilog tools. Defaults to - ".v+.sv". + Add specified C linker arguments to the generated makefiles. For multiple + flags either pass them as a single argument with space separators quoted + in the shell (``-LDFLAGS "-a -b"``), or use multiple -LDFLAGS arguments + (``-LDFLAGS -a -LDFLAGS -b``). + + When make is run on the generated makefile these will be passed to the + C++ linker (ld) **after** the primary file being linked. This flag is + called :vlopt:`-LDFLAGS` as that's the traditional name in simulators; + it's would have been better called LDLIBS as that's the Makefile + variable it controls. (In Make, LDFLAGS is before the first object, + LDLIBS after. -L libraries need to be in the Make variable LDLIBS, not + LDFLAGS.) .. option:: --lib-create @@ -640,6 +645,13 @@ Summary: See also :vlopt:`--protect-lib`. +.. option:: +libext+[+][...] + + Specify the extensions that should be used for finding modules. If for + example module "my" is referenced, look in :file:`my.`. Note + "+libext+" is fairly standard across Verilog tools. Defaults to + ".v+.sv". + .. option:: --lint-only Check the files for lint violations only, do not create any other @@ -678,17 +690,6 @@ Summary: Set the maximum number literal width (e.g. in 1024'd22 this it the 1024). Defaults to 64K. -.. option:: --MMD =item --no-MMD - - Enable/disable creation of .d dependency files, used for make dependency - detection, similar to gcc -MMD option. By default this option is - enabled for :vlopt:`--cc` or :vlopt:`--sc` modes. - -.. option:: --MP - - When creating .d dependency files with :vlopt:`--MMD` option, make phony - targets. Similar to :command:`gcc -MP` option. - .. option:: --Mdir Specifies the name of the Make object directory. All generated files @@ -696,33 +697,23 @@ Summary: The directory is created if it does not exist and the parent directories exist; otherwise manually create the Mdir before calling Verilator. +.. option:: --MMD + +.. option:: --no-MMD + + Enable/disable creation of .d dependency files, used for make dependency + detection, similar to gcc -MMD option. By default this option is + enabled for :vlopt:`--cc` or :vlopt:`--sc` modes. + .. option:: --mod-prefix Specifies the name to prepend to all lower level classes. Defaults to the same as :vlopt:`--prefix`. -.. option:: --no-clk +.. option:: --MP - Prevent the specified signal from being marked as clock. See - :vlopt:`--clk`. - -.. option:: --no-decoration - - When creating output Verilated code, minimize comments, white space, - symbol names and other decorative items, at the cost of greatly reduced - readability. This may assist C++ compile times. This will not typically - change the ultimate model's performance, but may in some cases. - -.. option:: --no-pins64 - - Backward compatible alias for :vlopt:`--pins-bv 33 <--pins-bv>`. - -.. option:: --no-skip-identical =item --skip-identical - - Rarely needed. Disables or enables skipping execution of Verilator if - all source files are identical, and all output files exist with newer - dates. By default this option is enabled for :vlopt:`--cc` or - :vlopt:`--sc` modes only. + When creating .d dependency files with :vlopt:`--MMD` option, make phony + targets. Similar to :command:`gcc -MP` option. .. option:: +notimingchecks @@ -801,11 +792,6 @@ Summary: With :vlopt:`-E`, disable generation of :code:`&96;line` markers and blank lines, similar to :command:`gcc -P`. -.. option:: --pins64 - - Backward compatible alias for :vlopt:`--pins-bv 65 <--pins-bv>`. Note - that's a 65, not a 64. - .. option:: --pins-bv Specifies SystemC inputs/outputs of greater than or equal to @@ -838,6 +824,15 @@ Summary: of uint32_t. Likewise pins of width 9-16 will use uint16_t instead of uint32_t. +.. option:: --pins64 + + Backward compatible alias for :vlopt:`--pins-bv 65 <--pins-bv>`. Note + that's a 65, not a 64. + +.. option:: --no-pins64 + + Backward compatible alias for :vlopt:`--pins-bv 33 <--pins-bv>`. + .. option:: --pipe-filter Rarely needed. Verilator will spawn the specified command as a @@ -867,6 +862,11 @@ Summary: prepended to the name of the :vlopt:`--top` option, or V prepended to the first Verilog filename passed on the command line. +.. option:: --private + + Opposite of :vlopt:`--public`. Is the default; this option exists for + backwards compatibility. + .. option:: --prof-c When compiling the C++ code, enable the compiler's profiling flag @@ -900,23 +900,6 @@ Summary: Deprecated. Same as --prof-exec and --prof-pgo together. -.. option:: --protect-key - - Specifies the private key for :vlopt:`--protect-ids`. For best security - this key should be 16 or more random bytes, a reasonable secure choice - is the output of :command:`verilator --generate-key` . Typically, a key - would be created by the user once for a given protected design library, - then every Verilator run for subsequent versions of that library would - be passed the same :vlopt:`--protect-key`. Thus, if the input Verilog is - similar between library versions (Verilator runs), the Verilated code - will likewise be mostly similar. - - If :vlopt:`--protect-key` is not specified and a key is needed, - Verilator will generate a new key for every Verilator run. As the key is - not saved, this is best for security, but means every Verilator run will - give vastly different output even for identical input, perhaps harming - compile times (and certainly thrashing any "ccache"). - .. option:: --protect-ids Hash any private identifiers (variable, module, and assertion block @@ -937,6 +920,23 @@ Summary: prototypes. Use of the VPI is not recommended as many design details may be exposed, and an INSECURE warning will be issued. +.. option:: --protect-key + + Specifies the private key for :vlopt:`--protect-ids`. For best security + this key should be 16 or more random bytes, a reasonable secure choice + is the output of :command:`verilator --generate-key` . Typically, a key + would be created by the user once for a given protected design library, + then every Verilator run for subsequent versions of that library would + be passed the same :vlopt:`--protect-key`. Thus, if the input Verilog is + similar between library versions (Verilator runs), the Verilated code + will likewise be mostly similar. + + If :vlopt:`--protect-key` is not specified and a key is needed, + Verilator will generate a new key for every Verilator run. As the key is + not saved, this is best for security, but means every Verilator run will + give vastly different output even for identical input, perhaps harming + compile times (and certainly thrashing any "ccache"). + .. option:: --protect-lib Produces a DPI library similar to :vlopt:`--lib-create`, but hides @@ -948,11 +948,6 @@ Summary: in the distribution for a demonstration of how to build and use the DPI library. -.. option:: --private - - Opposite of :vlopt:`--public`. Is the default; this option exists for - backwards compatibility. - .. option:: --public This is only for historical debug use. Using it may result in @@ -1045,6 +1040,15 @@ Summary: Specifies SystemC output mode; see also :vlopt:`--cc` option. +.. option:: --skip-identical + +.. option:: --no-skip-identical + + Rarely needed. Disables or enables skipping execution of Verilator if + all source files are identical, and all output files exist with newer + dates. By default this option is enabled for :vlopt:`--cc` or + :vlopt:`--sc` modes only. + .. option:: --stats Creates a dump file with statistics on the design in From fc4d6a62af538aac00b489033611f5e7f13eb7c6 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Wed, 22 Jun 2022 20:06:23 -0400 Subject: [PATCH 005/119] Remove VL_PROFILER ifdef. Partial (#3454). --- include/verilated_profiler.cpp | 4 ++++ include/verilated_profiler.h | 8 +++----- include/verilated_threads.cpp | 20 ++++++-------------- include/verilated_threads.h | 16 ++++++++++------ src/V3EmitCSyms.cpp | 9 ++++++--- src/V3EmitMk.cpp | 1 - 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/include/verilated_profiler.cpp b/include/verilated_profiler.cpp index ed25093d1..9f37addf9 100644 --- a/include/verilated_profiler.cpp +++ b/include/verilated_profiler.cpp @@ -99,6 +99,10 @@ void VlExecutionProfiler::configure(const VerilatedContext& context) { } } +void VlExecutionProfiler::startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId) { + profilep->setupThread(threadId); +} + void VlExecutionProfiler::setupThread(uint32_t threadId) { // Reserve some space in the thread-local profiling buffer, in order to try to avoid malloc // while profiling. diff --git a/include/verilated_profiler.h b/include/verilated_profiler.h index d47be4da4..f85c95528 100644 --- a/include/verilated_profiler.h +++ b/include/verilated_profiler.h @@ -23,11 +23,6 @@ #define VERILATOR_VERILATED_PROFILER_H_ #include "verilatedos.h" - -#ifndef VL_PROFILER -#error "verilated_profiler.h/cpp expects VL_PROFILER (from --prof-{exec, pgo}" -#endif - #include "verilated.h" #include @@ -186,6 +181,9 @@ public: void clear() VL_MT_SAFE_EXCLUDES(m_mutex); // Write profiling data into file void dump(const char* filenamep, uint64_t tickEnd) VL_MT_SAFE_EXCLUDES(m_mutex); + + // Called via VlStartWorkerCb in VlWorkerThread::startWorker + static void startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId); }; //============================================================================= diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp index 12a579026..dbdd3330a 100644 --- a/include/verilated_threads.cpp +++ b/include/verilated_threads.cpp @@ -24,10 +24,6 @@ #include "verilatedos.h" #include "verilated_threads.h" -#ifdef VL_PROFILER -#include "verilated_profiler.h" -#endif - #include #include #include @@ -52,10 +48,10 @@ VlMTaskVertex::VlMTaskVertex(uint32_t upstreamDepCount) // VlWorkerThread VlWorkerThread::VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, - VlExecutionProfiler* profilerp) + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) : m_ready_size{0} , m_exiting{false} - , m_cthread{startWorker, this, threadId, profilerp} + , m_cthread{startWorker, this, threadId, profilerp, startCb} , m_contextp{contextp} {} VlWorkerThread::~VlWorkerThread() { @@ -83,13 +79,9 @@ void VlWorkerThread::workerLoop() { } void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId, - VlExecutionProfiler* profilerp) { + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) { Verilated::threadContextp(workerp->m_contextp); -#ifdef VL_PROFILER - // Note: setupThread is not defined without VL_PROFILER, hence the #ifdef. Still, we might - // not be profiling execution (e.g.: PGO only), so profilerp might still be nullptr. - if (profilerp) profilerp->setupThread(threadId); -#endif + if (VL_UNLIKELY(startCb)) startCb(profilerp, threadId); workerp->workerLoop(); } @@ -97,7 +89,7 @@ void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId, // VlThreadPool VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads, - VlExecutionProfiler* profiler) { + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) { // --threads N passes nThreads=N-1, as the "main" threads counts as 1 ++nThreads; const unsigned cpus = std::thread::hardware_concurrency(); @@ -111,7 +103,7 @@ VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads, } // Create worker threads for (uint32_t threadId = 1; threadId < nThreads; ++threadId) { - m_workers.push_back(new VlWorkerThread{threadId, contextp, profiler}); + m_workers.push_back(new VlWorkerThread{threadId, contextp, profilerp, startCb}); } } diff --git a/include/verilated_threads.h b/include/verilated_threads.h index 257c87162..805a73d2d 100644 --- a/include/verilated_threads.h +++ b/include/verilated_threads.h @@ -50,6 +50,9 @@ #endif // clang-format on +class VlExecutionProfiler; +class VlThreadPool; + // VlMTaskVertex and VlThreadpool will work with multiple model class types. // Since the type is opaque to VlMTaskVertex and VlThreadPool, represent it // as a void* here. @@ -57,6 +60,9 @@ using VlSelfP = void*; using VlExecFnp = void (*)(VlSelfP, bool); +// VlWorkerThread::startWorker callback, used to hook in VlExecutionProfiler +using VlStartWorkerCb = void (*)(VlExecutionProfiler*, uint32_t threadId); + // Track dependencies for a single MTask. class VlMTaskVertex final { // MEMBERS @@ -129,9 +135,6 @@ public: } }; -class VlExecutionProfiler; -class VlThreadPool; - class VlWorkerThread final { private: // TYPES @@ -171,7 +174,7 @@ private: public: // CONSTRUCTORS explicit VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, - VlExecutionProfiler* profilerp); + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); ~VlWorkerThread(); // METHODS @@ -209,7 +212,7 @@ public: } void workerLoop(); static void startWorker(VlWorkerThread* workerp, uint32_t threadId, - VlExecutionProfiler* profilerp); + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); }; class VlThreadPool final { @@ -221,7 +224,8 @@ public: // Construct a thread pool with 'nThreads' dedicated threads. The thread // pool will create these threads and make them available to execute tasks // via this->workerp(index)->addTask(...) - VlThreadPool(VerilatedContext* contextp, int nThreads, VlExecutionProfiler* profilerp); + VlThreadPool(VerilatedContext* contextp, int nThreads, VlExecutionProfiler* profilerp, + VlStartWorkerCb startCb); ~VlThreadPool(); // METHODS diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index f9baa3fd5..28d455fd9 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -677,8 +677,8 @@ void EmitCSyms::emitSymImp() { puts("}\n\n"); // Constructor - puts(symClassName() + "::" + symClassName() + "(VerilatedContext* contextp, const char* namep," - + topClassName() + "* modelp)\n"); + puts(symClassName() + "::" + symClassName() + + "(VerilatedContext* contextp, const char* namep, " + topClassName() + "* modelp)\n"); puts(" : VerilatedSyms{contextp}\n"); puts(" // Setup internal state of the Syms class\n"); puts(" , __Vm_modelp{modelp}\n"); @@ -707,7 +707,10 @@ void EmitCSyms::emitSymImp() { // duration of the eval call. puts(" , __Vm_threadPoolp{new VlThreadPool{_vm_contextp__, " + cvtToStr(v3Global.opt.threads() - 1) + ", " - + (v3Global.opt.profExec() ? "&__Vm_executionProfiler" : "nullptr") + "}}\n"); + + (v3Global.opt.profExec() + ? "&__Vm_executionProfiler, &VlExecutionProfiler::startWorkerSetup" + : "nullptr, nullptr") + + "}}\n"); } puts(" // Setup module instances\n"); diff --git a/src/V3EmitMk.cpp b/src/V3EmitMk.cpp index b748d9553..3fb3907be 100644 --- a/src/V3EmitMk.cpp +++ b/src/V3EmitMk.cpp @@ -197,7 +197,6 @@ public: of.puts("# User CFLAGS (from -CFLAGS on Verilator command line)\n"); of.puts("VM_USER_CFLAGS = \\\n"); if (!v3Global.opt.libCreate().empty()) of.puts("\t-fPIC \\\n"); - if (v3Global.opt.usesProfiler()) of.puts("\t-DVL_PROFILER \\\n"); const V3StringList& cFlags = v3Global.opt.cFlags(); for (const string& i : cFlags) of.puts("\t" + i + " \\\n"); of.puts("\n"); From ced39d0982a7d0e50d6208b3262504ac741d9c1f Mon Sep 17 00:00:00 2001 From: Yutetsu TAKATSUKASA Date: Mon, 27 Jun 2022 22:41:33 +0900 Subject: [PATCH 006/119] Internals: preparation for fixing #3470 (#3475) * Internals: Let LeafInfo class. No functional change is intended. * Internals: Rename LeafInfo::width -> LeafInfo::varWidth(). No functional change is intende. --- src/V3Const.cpp | 106 ++++++++++++++++++++++++++++-------------------- 1 file changed, 61 insertions(+), 45 deletions(-) diff --git a/src/V3Const.cpp b/src/V3Const.cpp index e246180a2..7dae3f014 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -79,14 +79,32 @@ class ConstBitOpTreeVisitor final : public VNVisitor { // bool indicating if the term is clean (0/1 value, or if the top bits might be dirty) using ResultTerm = std::tuple; - struct LeafInfo final { // Leaf node (either AstConst or AstVarRef) + class LeafInfo final { // Leaf node (either AstConst or AstVarRef) bool m_polarity = true; int m_lsb = 0; int m_wordIdx = -1; // -1 means AstWordSel is not used. AstVarRef* m_refp = nullptr; const AstConst* m_constp = nullptr; - int width() const { + public: + void setLeaf(AstVarRef* refp) { + UASSERT(!m_refp && !m_constp, "Must be called just once"); + m_refp = refp; + } + void setLeaf(const AstConst* constp) { + UASSERT(!m_refp && !m_constp, "Must be called just once"); + m_constp = constp; + } + AstVarRef* refp() const { return m_refp; } + const AstConst* constp() const { return m_constp; } + int wordIdx() const { return m_wordIdx; } + bool polarity() const { return m_polarity; } + int lsb() const { return m_lsb; } + + void wordIdx(int i) { m_wordIdx = i; } + void lsb(int l) { m_lsb = l; } + void polarity(bool p) { m_polarity = p; } + int varWidth() const { UASSERT(m_refp, "m_refp should be set"); const int width = m_refp->varp()->widthMin(); if (!m_refp->isWide()) { @@ -339,25 +357,25 @@ class ConstBitOpTreeVisitor final : public VNVisitor { UINFO(9, "Increment to " << m_ops << " " << nodep << " called from line " << line << "\n"); } VarInfo& getVarInfo(const LeafInfo& ref) { - UASSERT_OBJ(ref.m_refp, m_rootp, "null varref in And/Or/Xor optimization"); - AstNode* nodep = ref.m_refp->varScopep(); - if (!nodep) nodep = ref.m_refp->varp(); // Not scoped + UASSERT_OBJ(ref.refp(), m_rootp, "null varref in And/Or/Xor optimization"); + AstNode* nodep = ref.refp()->varScopep(); + if (!nodep) nodep = ref.refp()->varp(); // Not scoped int baseIdx = nodep->user4(); if (baseIdx == 0) { // Not set yet baseIdx = m_varInfos.size(); const int numWords - = ref.m_refp->dtypep()->isWide() ? ref.m_refp->dtypep()->widthWords() : 1; + = ref.refp()->dtypep()->isWide() ? ref.refp()->dtypep()->widthWords() : 1; m_varInfos.resize(m_varInfos.size() + numWords); nodep->user4(baseIdx); } - const size_t idx = baseIdx + std::max(0, ref.m_wordIdx); + const size_t idx = baseIdx + std::max(0, ref.wordIdx()); VarInfo* varInfop = m_varInfos[idx].get(); if (!varInfop) { - varInfop = new VarInfo{this, ref.m_refp, ref.width()}; + varInfop = new VarInfo{this, ref.refp(), ref.varWidth()}; m_varInfos[idx].reset(varInfop); } else { - if (!varInfop->sameVarAs(ref.m_refp)) - CONST_BITOP_SET_FAILED("different var (scope?)", ref.m_refp); + if (!varInfop->sameVarAs(ref.refp())) + CONST_BITOP_SET_FAILED("different var (scope?)", ref.refp()); } return *varInfop; } @@ -373,9 +391,9 @@ class ConstBitOpTreeVisitor final : public VNVisitor { bool ok = !m_failed; if (expectConst) { - ok &= !info.m_refp && info.m_constp; + ok &= !info.refp() && info.constp(); } else { - ok &= info.m_refp && !info.m_constp; + ok &= info.refp() && !info.constp(); } return ok ? info : LeafInfo{}; } @@ -411,22 +429,20 @@ class ConstBitOpTreeVisitor final : public VNVisitor { CONST_BITOP_RETURN_IF(!m_leafp, nodep); AstConst* const constp = VN_CAST(nodep->bitp(), Const); CONST_BITOP_RETURN_IF(!constp, nodep->rhsp()); - UASSERT_OBJ(m_leafp->m_wordIdx == -1, nodep, "Unexpected nested WordSel"); - m_leafp->m_wordIdx = constp->toSInt(); + UASSERT_OBJ(m_leafp->wordIdx() == -1, nodep, "Unexpected nested WordSel"); + m_leafp->wordIdx(constp->toSInt()); iterate(nodep->fromp()); } virtual void visit(AstVarRef* nodep) override { CONST_BITOP_RETURN_IF(!m_leafp, nodep); - UASSERT_OBJ(!m_leafp->m_refp, nodep, m_leafp->m_refp << " is already set"); - m_leafp->m_refp = nodep; - m_leafp->m_polarity = m_polarity; - m_leafp->m_lsb = m_lsb; + m_leafp->setLeaf(nodep); + m_leafp->polarity(m_polarity); + m_leafp->lsb(m_lsb); } virtual void visit(AstConst* nodep) override { CONST_BITOP_RETURN_IF(!m_leafp, nodep); - UASSERT_OBJ(!m_leafp->m_constp, nodep, m_leafp->m_constp << " is already set"); - m_leafp->m_constp = nodep; - m_leafp->m_lsb = m_lsb; + m_leafp->setLeaf(nodep); + m_leafp->lsb(m_lsb); } virtual void visit(AstRedXor* nodep) override { @@ -438,36 +454,36 @@ class ConstBitOpTreeVisitor final : public VNVisitor { CONST_BITOP_RETURN_IF(!andp, lhsp); const LeafInfo& mask = findLeaf(andp->lhsp(), true); - CONST_BITOP_RETURN_IF(!mask.m_constp || mask.m_lsb != 0, andp->lhsp()); + CONST_BITOP_RETURN_IF(!mask.constp() || mask.lsb() != 0, andp->lhsp()); const LeafInfo& ref = findLeaf(andp->rhsp(), false); - CONST_BITOP_RETURN_IF(!ref.m_refp, andp->rhsp()); + CONST_BITOP_RETURN_IF(!ref.refp(), andp->rhsp()); restorer.disableRestore(); // Now all subtree succeeded - const V3Number& maskNum = mask.m_constp->num(); + const V3Number& maskNum = mask.constp()->num(); incrOps(nodep, __LINE__); incrOps(andp, __LINE__); // Mark all bits checked in this reduction - const int maxBitIdx = std::min(ref.m_lsb + maskNum.width(), ref.width()); - for (int bitIdx = ref.m_lsb; bitIdx < maxBitIdx; ++bitIdx) { - const int maskIdx = bitIdx - ref.m_lsb; + const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.varWidth()); + for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { + const int maskIdx = bitIdx - ref.lsb(); if (maskNum.bitIs0(maskIdx)) continue; // Set true, m_polarity takes care of the entire parity m_bitPolarities.emplace_back(ref, true, bitIdx); } } else { // '^leaf' const LeafInfo& ref = findLeaf(lhsp, false); - CONST_BITOP_RETURN_IF(!ref.m_refp, lhsp); + CONST_BITOP_RETURN_IF(!ref.refp(), lhsp); restorer.disableRestore(); // Now all checks passed incrOps(nodep, __LINE__); // Mark all bits checked by this comparison - for (int bitIdx = ref.m_lsb; bitIdx < ref.width(); ++bitIdx) { + for (int bitIdx = ref.lsb(); bitIdx < ref.varWidth(); ++bitIdx) { m_bitPolarities.emplace_back(ref, true, bitIdx); } } @@ -492,7 +508,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { AstNode* opp = right ? nodep->rhsp() : nodep->lhsp(); const bool origFailed = m_failed; iterate(opp); - if (leafInfo.m_constp || m_failed) { + if (leafInfo.constp() || m_failed) { // Revert changes in leaf restorer.restoreNow(); // Reach past a cast then add to frozen nodes to be added to final reduction @@ -502,14 +518,14 @@ class ConstBitOpTreeVisitor final : public VNVisitor { continue; } restorer.disableRestore(); // Now all checks passed - if (leafInfo.m_refp) { + if (leafInfo.refp()) { // The conditional on the lsb being in range is necessary for some degenerate // case, e.g.: (IData)((QData)wide[0] >> 32), or <1-bit-var> >> 1, which is // just zero - if (leafInfo.m_lsb < leafInfo.width()) { - m_bitPolarities.emplace_back(leafInfo, isXorTree() || leafInfo.m_polarity, - leafInfo.m_lsb); - } else if (isAndTree() && leafInfo.m_polarity) { + if (leafInfo.lsb() < leafInfo.varWidth()) { + m_bitPolarities.emplace_back(leafInfo, isXorTree() || leafInfo.polarity(), + leafInfo.lsb()); + } else if (isAndTree() && leafInfo.polarity()) { // If there is a constant 0 term in an And tree, we must include it. Fudge // this by adding a bit with both polarities, which will simplify to zero m_bitPolarities.emplace_back(leafInfo, true, 0); @@ -530,38 +546,38 @@ class ConstBitOpTreeVisitor final : public VNVisitor { if (const AstAnd* const andp = VN_CAST(nodep->rhsp(), And)) { // comp == (mask & v) const LeafInfo& mask = findLeaf(andp->lhsp(), true); - CONST_BITOP_RETURN_IF(!mask.m_constp || mask.m_lsb != 0, andp->lhsp()); + CONST_BITOP_RETURN_IF(!mask.constp() || mask.lsb() != 0, andp->lhsp()); const LeafInfo& ref = findLeaf(andp->rhsp(), false); - CONST_BITOP_RETURN_IF(!ref.m_refp, andp->rhsp()); + CONST_BITOP_RETURN_IF(!ref.refp(), andp->rhsp()); restorer.disableRestore(); // Now all checks passed - const V3Number& maskNum = mask.m_constp->num(); + const V3Number& maskNum = mask.constp()->num(); incrOps(nodep, __LINE__); incrOps(andp, __LINE__); // Mark all bits checked by this comparison - const int maxBitIdx = std::min(ref.m_lsb + maskNum.width(), ref.width()); - for (int bitIdx = ref.m_lsb; bitIdx < maxBitIdx; ++bitIdx) { - const int maskIdx = bitIdx - ref.m_lsb; + const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.varWidth()); + for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { + const int maskIdx = bitIdx - ref.lsb(); if (maskNum.bitIs0(maskIdx)) continue; const bool polarity = compNum.bitIs1(maskIdx) != maskFlip; m_bitPolarities.emplace_back(ref, polarity, bitIdx); } } else { // comp == v const LeafInfo& ref = findLeaf(nodep->rhsp(), false); - CONST_BITOP_RETURN_IF(!ref.m_refp, nodep->rhsp()); + CONST_BITOP_RETURN_IF(!ref.refp(), nodep->rhsp()); restorer.disableRestore(); // Now all checks passed incrOps(nodep, __LINE__); // Mark all bits checked by this comparison - const int maxBitIdx = std::min(ref.m_lsb + compNum.width(), ref.width()); - for (int bitIdx = ref.m_lsb; bitIdx < maxBitIdx; ++bitIdx) { - const int maskIdx = bitIdx - ref.m_lsb; + const int maxBitIdx = std::min(ref.lsb() + compNum.width(), ref.varWidth()); + for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { + const int maskIdx = bitIdx - ref.lsb(); const bool polarity = compNum.bitIs1(maskIdx) != maskFlip; m_bitPolarities.emplace_back(ref, polarity, bitIdx); } From 1bb6433649d205ad19ce277452ddfed99787fe0d Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 27 Jun 2022 14:16:20 +0100 Subject: [PATCH 007/119] Improve worker thread shutdown. Always ensure worker thread task queue is drained before shutting down. --- include/verilated_threads.cpp | 21 ++++++++------------- include/verilated_threads.h | 6 ++++-- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp index dbdd3330a..a78ea9ae6 100644 --- a/include/verilated_threads.cpp +++ b/include/verilated_threads.cpp @@ -50,31 +50,26 @@ VlMTaskVertex::VlMTaskVertex(uint32_t upstreamDepCount) VlWorkerThread::VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) : m_ready_size{0} - , m_exiting{false} , m_cthread{startWorker, this, threadId, profilerp, startCb} , m_contextp{contextp} {} VlWorkerThread::~VlWorkerThread() { - m_exiting.store(true, std::memory_order_release); - wakeUp(); + shutdown(); // The thread should exit; join it. m_cthread.join(); } +void VlWorkerThread::shutdownTask(void*, bool) { + // Deliberately empty, we use the address of this function as a magic number +} + void VlWorkerThread::workerLoop() { ExecRec work; - work.m_fnp = nullptr; while (true) { - if (VL_LIKELY(!work.m_fnp)) dequeWork(&work); - - // Do this here, not above, to avoid a race with the destructor. - if (VL_UNLIKELY(m_exiting.load(std::memory_order_acquire))) break; - - if (VL_LIKELY(work.m_fnp)) { - work.m_fnp(work.m_selfp, work.m_evenCycle); - work.m_fnp = nullptr; - } + dequeWork(&work); + if (VL_UNLIKELY(work.m_fnp == shutdownTask)) break; + work.m_fnp(work.m_selfp, work.m_evenCycle); } } diff --git a/include/verilated_threads.h b/include/verilated_threads.h index 805a73d2d..eeb8f9342 100644 --- a/include/verilated_threads.h +++ b/include/verilated_threads.h @@ -165,7 +165,6 @@ private: // Store the size atomically, so we can spin wait std::atomic m_ready_size; - std::atomic m_exiting; // Worker thread should exit std::thread m_cthread; // Underlying C++ thread record VerilatedContext* const m_contextp; // Context for spawned thread @@ -198,7 +197,6 @@ public: m_ready.erase(m_ready.begin()); m_ready_size.fetch_sub(1, std::memory_order_relaxed); } - inline void wakeUp() { addTask(nullptr, nullptr, false); } inline void addTask(VlExecFnp fnp, VlSelfP selfp, bool evenCycle) VL_MT_SAFE_EXCLUDES(m_mutex) { bool notify; @@ -210,6 +208,10 @@ public: } if (notify) m_cv.notify_one(); } + + inline void shutdown() { addTask(shutdownTask, nullptr, false); } + static void shutdownTask(void*, bool); + void workerLoop(); static void startWorker(VlWorkerThread* workerp, uint32_t threadId, VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); From 383e384739dbca4243ce0003ecd46048a03b5921 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 27 Jun 2022 15:23:32 +0100 Subject: [PATCH 008/119] Remove always true cfg_with_threaded from test driver --- test_regress/driver.pl | 9 --------- test_regress/t/t_verilated_all.pl | 8 +++----- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/test_regress/driver.pl b/test_regress/driver.pl index 541fb296f..968b89f81 100755 --- a/test_regress/driver.pl +++ b/test_regress/driver.pl @@ -1110,11 +1110,6 @@ sub compile { return 1; } - if ($self->{vltmt} && !$self->cfg_with_threaded) { - $self->skip("Test requires Verilator configured with threads\n"); - return 1; - } - if ($param{verilator_make_cmake} && !$self->have_cmake) { $self->skip("Test requires CMake; ignore error since not available or version too old\n"); return 1; @@ -2340,10 +2335,6 @@ sub cxx_version { return $_Cxx_Version; } -sub cfg_with_threaded { - return 1; # C++11 now always required -} - our $_Cfg_with_ccache; sub cfg_with_ccache { diff --git a/test_regress/t/t_verilated_all.pl b/test_regress/t/t_verilated_all.pl index 1a8bc4469..450b5bd9c 100755 --- a/test_regress/t/t_verilated_all.pl +++ b/test_regress/t/t_verilated_all.pl @@ -17,10 +17,8 @@ compile( verilator_flags2 => ["--cc", "--coverage-toggle --coverage-line --coverage-user", "--trace --vpi ", - ($Self->cfg_with_threaded - ? "--threads 2 $root/include/verilated_threads.cpp" : ""), - ($Self->cfg_with_threaded - ? "--trace-threads 1" : ""), + "--threads 2", + "--trace-threads 1", "--prof-exec", "--prof-pgo", "$root/include/verilated_save.cpp"], ); @@ -58,7 +56,7 @@ foreach my $file (sort keys %hit) { && $file !~ /_sc/ && $file !~ /_fst/ && $file !~ /_heavy/ - && ($file !~ /_thread/ || $Self->cfg_with_threaded)) { + && ($file !~ /_thread/)) { error("Include file not covered by t_verilated_all test: ", $file); } } From 31a83cb0d86d08adebb8ea5b0782ea5d46500108 Mon Sep 17 00:00:00 2001 From: Felix Yan Date: Wed, 29 Jun 2022 18:24:30 +0300 Subject: [PATCH 009/119] Fix use of C++17 on Arch Linux (#3479) --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index b86bb48f5..b3d2e47e5 100644 --- a/configure.ac +++ b/configure.ac @@ -355,7 +355,7 @@ AC_SUBST(CFG_CXXFLAGS_PROFILE) #_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++20) #_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++20) case "$(which lsb_release 2>&1 > /dev/null && lsb_release -d)" in -*Ubuntu*22.04*) +*Arch*Linux* | *Ubuntu*22.04*) _MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++17) _MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++17) ;; From 2873dbe15472faca5e14d83b3b9d03221dcb5e31 Mon Sep 17 00:00:00 2001 From: Mariusz Glebocki Date: Mon, 4 Jul 2022 16:23:31 +0200 Subject: [PATCH 010/119] Optimize file writing by using a memory buffer. (#3461) --- docs/CONTRIBUTORS | 1 + src/V3EmitCSyms.cpp | 3 ++- src/V3File.cpp | 5 ++++- src/V3File.h | 43 +++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/docs/CONTRIBUTORS b/docs/CONTRIBUTORS index d598cebd5..efd6749cc 100644 --- a/docs/CONTRIBUTORS +++ b/docs/CONTRIBUTORS @@ -68,6 +68,7 @@ Lukasz Dalek Maarten De Braekeleer Maciej Sobkowski Marco Widmer +Mariusz Glebocki Markus Krause Marlon James Marshal Qiao diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index 28d455fd9..c66f346b0 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -951,7 +951,8 @@ void EmitCSyms::emitSymImp() { } closeSplit(); - VL_DO_CLEAR(delete m_ofp, m_ofp = nullptr); + m_ofp = nullptr; + VL_DO_CLEAR(delete m_ofpBase, m_ofpBase = nullptr); } //###################################################################### diff --git a/src/V3File.cpp b/src/V3File.cpp index f6b9cf11d..305d19c08 100644 --- a/src/V3File.cpp +++ b/src/V3File.cpp @@ -920,13 +920,16 @@ void V3OutFormatter::printf(const char* fmt...) { // V3OutFormatter: A class for printing to a file, with automatic indentation of C++ code. V3OutFile::V3OutFile(const string& filename, V3OutFormatter::Language lang) - : V3OutFormatter{filename, lang} { + : V3OutFormatter{filename, lang} + , m_bufferp{new std::array{}} { if ((m_fp = V3File::new_fopen_w(filename)) == nullptr) { v3fatal("Cannot write " << filename); } } V3OutFile::~V3OutFile() { + writeBlock(); + if (m_fp) fclose(m_fp); m_fp = nullptr; } diff --git a/src/V3File.h b/src/V3File.h index dd337b165..6c45a0456 100644 --- a/src/V3File.h +++ b/src/V3File.h @@ -22,6 +22,7 @@ #include "V3Error.h" +#include #include #include #include @@ -183,18 +184,56 @@ public: // V3OutFile: A class for printing to a file, with automatic indentation of C++ code. class V3OutFile VL_NOT_FINAL : public V3OutFormatter { + // Size of m_bufferp. + // 128kB has been experimentally determined to be in the zone of buffer sizes that work best. + // It is also considered to be the smallest I/O buffer size in GNU coreutils (io_blksize) that + // allows to best minimize syscall overhead. + // The hard boundaries are CPU L2/L3 cache size on the top and filesystem block size + // on the bottom. + static constexpr std::size_t WRITE_BUFFER_SIZE_BYTES = 128 * 1024; + // MEMBERS + std::unique_ptr> m_bufferp; // Write buffer + std::size_t m_usedBytes = 0; // Number of bytes stored in m_bufferp FILE* m_fp = nullptr; public: V3OutFile(const string& filename, V3OutFormatter::Language lang); + V3OutFile(const V3OutFile&) = delete; + V3OutFile& operator=(const V3OutFile&) = delete; + V3OutFile(V3OutFile&&) = delete; + V3OutFile& operator=(V3OutFile&&) = delete; + virtual ~V3OutFile() override; void putsForceIncs(); private: + void writeBlock() { + if (VL_LIKELY(m_usedBytes > 0)) fwrite(m_bufferp->data(), m_usedBytes, 1, m_fp); + m_usedBytes = 0; + } + // CALLBACKS - virtual void putcOutput(char chr) override { fputc(chr, m_fp); } - virtual void putsOutput(const char* str) override { fputs(str, m_fp); } + virtual void putcOutput(char chr) override { + m_bufferp->at(m_usedBytes++) = chr; + if (VL_UNLIKELY(m_usedBytes >= WRITE_BUFFER_SIZE_BYTES)) writeBlock(); + } + virtual void putsOutput(const char* str) override { + std::size_t len = strlen(str); + std::size_t availableBytes = WRITE_BUFFER_SIZE_BYTES - m_usedBytes; + while (VL_UNLIKELY(len >= availableBytes)) { + memcpy(m_bufferp->data() + m_usedBytes, str, availableBytes); + m_usedBytes = WRITE_BUFFER_SIZE_BYTES; + writeBlock(); + str += availableBytes; + len -= availableBytes; + availableBytes = WRITE_BUFFER_SIZE_BYTES; + } + if (len > 0) { + memcpy(m_bufferp->data() + m_usedBytes, str, len); + m_usedBytes += len; + } + } }; class V3OutCFile VL_NOT_FINAL : public V3OutFile { From 42b711b862571af996f7eadb0f0f756c98215bd0 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 5 Jul 2022 12:17:04 +0100 Subject: [PATCH 011/119] Don't use 'assert' in profiler initialization --- include/verilated_profiler.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/verilated_profiler.cpp b/include/verilated_profiler.cpp index 9f37addf9..21246827a 100644 --- a/include/verilated_profiler.cpp +++ b/include/verilated_profiler.cpp @@ -108,10 +108,13 @@ void VlExecutionProfiler::setupThread(uint32_t threadId) { // while profiling. t_trace.reserve(RESERVED_TRACE_CAPACITY); // Register thread-local buffer in list of all buffers + bool exists; { const VerilatedLockGuard lock{m_mutex}; - bool exists = !m_traceps.emplace(threadId, &t_trace).second; - assert(!exists); + exists = !m_traceps.emplace(threadId, &t_trace).second; + } + if (VL_UNLIKELY(exists)) { + VL_FATAL_MT(__FILE__, __LINE__, "", "multiple initialization of profiler on some thread"); } } From 3aa8624658613adddf1ad178f025ad0bd5816034 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 5 Jul 2022 10:57:16 +0100 Subject: [PATCH 012/119] Set 'threads' in tests via parameter to compile This is in preparation to #3454. --- test_regress/driver.pl | 8 ++++++-- test_regress/t/t_depth_flop.pl | 3 ++- test_regress/t/t_dotfiles.pl | 3 ++- test_regress/t/t_embed1.pl | 1 + test_regress/t/t_gantt.pl | 3 ++- test_regress/t/t_hier_block.pl | 3 ++- test_regress/t/t_hier_block_nohier.pl | 4 ++-- test_regress/t/t_hier_block_sc.pl | 2 +- test_regress/t/t_hier_block_sc_trace_fst.pl | 2 +- test_regress/t/t_hier_block_sc_trace_vcd.pl | 2 +- test_regress/t/t_hier_block_trace_fst.pl | 4 ++-- test_regress/t/t_hier_block_trace_vcd.pl | 4 ++-- test_regress/t/t_hier_block_vlt.pl | 4 ++-- test_regress/t/t_inst_tree_inl0_pub1.pl | 6 +++--- test_regress/t/t_lib_prot_shared.pl | 2 +- test_regress/t/t_pgo_profoutofdate_bad.pl | 2 +- test_regress/t/t_pgo_threads.pl | 7 ++++--- test_regress/t/t_split_var_0.pl | 4 ++-- test_regress/t/t_split_var_2_trace.pl | 4 ++-- test_regress/t/t_threads_counter_0.pl | 3 ++- test_regress/t/t_threads_counter_1.pl | 3 ++- test_regress/t/t_threads_counter_2.pl | 3 ++- test_regress/t/t_threads_counter_4.pl | 3 ++- test_regress/t/t_threads_crazy.pl | 3 ++- test_regress/t/t_threads_nondeterminism.pl | 3 ++- test_regress/t/t_trace_litendian.pl | 4 ++-- test_regress/t/t_trace_litendian_fst.pl | 4 ++-- test_regress/t/t_trace_litendian_fst_sc.pl | 4 ++-- test_regress/t/t_verilated_all.pl | 4 ++-- test_regress/t/t_verilated_threaded.pl | 3 ++- test_regress/t/t_wrapper_context.pl | 3 ++- test_regress/t/t_wrapper_context_fst.pl | 3 ++- test_regress/t/t_wrapper_context_seq.pl | 3 ++- 33 files changed, 67 insertions(+), 47 deletions(-) diff --git a/test_regress/driver.pl b/test_regress/driver.pl index 968b89f81..cbd9ba9ea 100755 --- a/test_regress/driver.pl +++ b/test_regress/driver.pl @@ -578,6 +578,7 @@ sub new { make_pli => 0, # need to compile pli sc_time_resolution => "SC_PS", # Keep - PS is SystemC default sim_time => 1100, + threads => -1, # --threads (negative means auto based on scenario) benchmark => $opt_benchmark, verbose => $opt_verbose, run_env => '', @@ -902,6 +903,7 @@ sub compile_vlt_flags { @{$param{verilator_flags}}, @{$param{verilator_flags2}}, @{$param{verilator_flags3}}); + die "%Error: specify threads via 'threads =>' argument, not as a command line option" unless ($checkflags !~ /(^|\s)-?-threads\s/ && $checkflags !~ /(^|\s)-?-no-threads($|\s)/); $self->{sc} = 1 if ($checkflags =~ /-sc\b/); $self->{trace} = ($opt_trace || $checkflags =~ /-trace\b/ || $checkflags =~ /-trace-fst\b/); @@ -920,8 +922,7 @@ sub compile_vlt_flags { unshift @verilator_flags, "--rr" if $opt_rr; unshift @verilator_flags, "--x-assign unique"; # More likely to be buggy unshift @verilator_flags, "--trace" if $opt_trace; - my $threads = ::calc_threads($Vltmt_threads); - unshift @verilator_flags, "--threads $threads" if $param{vltmt} && $checkflags !~ /-threads /; + unshift @verilator_flags, "--threads $param{threads}" if $param{threads} >= 0; unshift @verilator_flags, "--trace-threads 2" if $param{vltmt} && $checkflags =~ /-trace-fst /; unshift @verilator_flags, "--debug-partition" if $param{vltmt}; unshift @verilator_flags, "-CFLAGS -ggdb -LDFLAGS -ggdb" if $opt_gdbsim; @@ -972,6 +973,9 @@ sub compile { return 1 if $self->errors || $self->skips || $self->unsupporteds; $self->oprint("Compile\n") if $self->{verbose}; + die "%Error: 'threads =>' argument must be <= 1 for vlt scenario" if $param{vlt} && $param{threads} > 1; + $param{threads} = ::calc_threads($Vltmt_threads) if ($param{threads} < 0 && $param{vltmt}); + compile_vlt_cmd(%param); if (!$param{make_top_shell}) { diff --git a/test_regress/t/t_depth_flop.pl b/test_regress/t/t_depth_flop.pl index dabf3116e..8b4ad07ec 100755 --- a/test_regress/t/t_depth_flop.pl +++ b/test_regress/t/t_depth_flop.pl @@ -11,7 +11,8 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di scenarios(vltmt => 1); # Note issue shows up with --threads compile( - verilator_flags2 => ['--compiler clang --threads 2 -Wno-UNOPTTHREADS'], + verilator_flags2 => ['--compiler clang -Wno-UNOPTTHREADS'], + threads => 2 ); ok(1); diff --git a/test_regress/t/t_dotfiles.pl b/test_regress/t/t_dotfiles.pl index 31b8ac837..a9b52715e 100755 --- a/test_regress/t/t_dotfiles.pl +++ b/test_regress/t/t_dotfiles.pl @@ -14,7 +14,8 @@ scenarios(vltmt => 1); top_filename("t/t_gen_alw.v"); compile( - v_flags2 => ["--debug --debugi 5 --threads 2"] + v_flags2 => ["--debug --debugi 5"], + threads => 2 ); foreach my $dotname ("linkcells", "task_call", "gate_simp", "gate_opt", diff --git a/test_regress/t/t_embed1.pl b/test_regress/t/t_embed1.pl index a28adebe6..08e4c042c 100755 --- a/test_regress/t/t_embed1.pl +++ b/test_regress/t/t_embed1.pl @@ -22,6 +22,7 @@ mkdir $child_dir; (VM_PREFIX => "$Self->{VM_PREFIX}_child", top_filename => "$Self->{name}_child.v", verilator_flags => ["-cc", "-Mdir", "${child_dir}", "--debug-check"], + threads => $Self->{vltmt} ? $Self->get_default_vltmt_threads() : 0 ); run(logfile => "${child_dir}/vlt_compile.log", diff --git a/test_regress/t/t_gantt.pl b/test_regress/t/t_gantt.pl index c757d3fbe..1f92de577 100755 --- a/test_regress/t/t_gantt.pl +++ b/test_regress/t/t_gantt.pl @@ -18,8 +18,9 @@ scenarios(vlt_all => 1); top_filename("t/t_gen_alw.v"); compile( + v_flags2 => ["--prof-exec"], # Checks below care about thread count, so use 2 (minimum reasonable) - v_flags2 => ["--prof-exec", ($Self->{vltmt} ? "--threads 2" : "")] + threads => $Self->{vltmt} ? 2 : 0 ); execute( diff --git a/test_regress/t/t_hier_block.pl b/test_regress/t/t_hier_block.pl index cffc7ba04..e861c733e 100755 --- a/test_regress/t/t_hier_block.pl +++ b/test_regress/t/t_hier_block.pl @@ -18,11 +18,12 @@ scenarios(vlt_all => 1); compile( v_flags2 => ['t/t_hier_block.cpp'], - verilator_flags2 => ['--stats', ($Self->{vltmt} ? ' --threads 6' : ''), + verilator_flags2 => ['--stats', '--hierarchical', '--Wno-TIMESCALEMOD', '--CFLAGS', '"-pipe -DCPP_MACRO=cplusplus"' ], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_hier_block_nohier.pl b/test_regress/t/t_hier_block_nohier.pl index 006c981b3..8da19e163 100755 --- a/test_regress/t/t_hier_block_nohier.pl +++ b/test_regress/t/t_hier_block_nohier.pl @@ -23,8 +23,8 @@ compile( v_flags2 => ['t/t_hier_block.cpp'], verilator_flags2 => ['--stats', '+define+USE_VLT', 't/t_hier_block_vlt.vlt', - '--CFLAGS', '"-pipe -DCPP_MACRO=cplusplus"', - ($Self->{vltmt} ? ' --threads 6' : '')], + '--CFLAGS', '"-pipe -DCPP_MACRO=cplusplus"'], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_hier_block_sc.pl b/test_regress/t/t_hier_block_sc.pl index 139b280b9..8aff1358d 100755 --- a/test_regress/t/t_hier_block_sc.pl +++ b/test_regress/t/t_hier_block_sc.pl @@ -22,9 +22,9 @@ compile( verilator_flags2 => ['--sc', '--stats', '--hierarchical', - ($Self->{vltmt} ? ' --threads 6' : ''), '--CFLAGS', '"-pipe -DCPP_MACRO=cplusplus"' ], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_hier_block_sc_trace_fst.pl b/test_regress/t/t_hier_block_sc_trace_fst.pl index 6c8dd260e..b2497ad14 100755 --- a/test_regress/t/t_hier_block_sc_trace_fst.pl +++ b/test_regress/t/t_hier_block_sc_trace_fst.pl @@ -22,11 +22,11 @@ compile( verilator_flags2 => ['--sc', '--stats', '--hierarchical', - ($Self->{vltmt} ? ' --threads 6' : ''), '--CFLAGS', '"-pipe -DCPP_MACRO=cplusplus"', "--CFLAGS", '"-O0 -ggdb"', "--trace-fst" ], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_hier_block_sc_trace_vcd.pl b/test_regress/t/t_hier_block_sc_trace_vcd.pl index 3ce4b1da3..7239369c7 100755 --- a/test_regress/t/t_hier_block_sc_trace_vcd.pl +++ b/test_regress/t/t_hier_block_sc_trace_vcd.pl @@ -22,11 +22,11 @@ compile( verilator_flags2 => ['--sc', '--stats', '--hierarchical', - ($Self->{vltmt} ? ' --threads 6' : ''), '--CFLAGS', '"-pipe -DCPP_MACRO=cplusplus"', "--CFLAGS", '"-O0 -ggdb"', "--trace" ], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_hier_block_trace_fst.pl b/test_regress/t/t_hier_block_trace_fst.pl index 29213f97b..753b9dda1 100755 --- a/test_regress/t/t_hier_block_trace_fst.pl +++ b/test_regress/t/t_hier_block_trace_fst.pl @@ -17,12 +17,12 @@ top_filename("t/t_hier_block.v"); compile( v_flags2 => ['t/t_hier_block.cpp'], - verilator_flags2 => [($Self->{vltmt} ? ' --threads 6' : ''), - '--hierarchical', + verilator_flags2 => ['--hierarchical', '--Wno-TIMESCALEMOD', '--trace-fst', '--no-trace-underscore', # To avoid handle mismatches ], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_hier_block_trace_vcd.pl b/test_regress/t/t_hier_block_trace_vcd.pl index ed212b921..4f3b80f09 100755 --- a/test_regress/t/t_hier_block_trace_vcd.pl +++ b/test_regress/t/t_hier_block_trace_vcd.pl @@ -17,12 +17,12 @@ top_filename("t/t_hier_block.v"); compile( v_flags2 => ['t/t_hier_block.cpp'], - verilator_flags2 => [($Self->{vltmt} ? ' --threads 6' : ''), - '--hierarchical', + verilator_flags2 => ['--hierarchical', '--Wno-TIMESCALEMOD', '--trace', '--no-trace-underscore', # To avoid handle mismatches ], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_hier_block_vlt.pl b/test_regress/t/t_hier_block_vlt.pl index 904182717..ef467ede9 100755 --- a/test_regress/t/t_hier_block_vlt.pl +++ b/test_regress/t/t_hier_block_vlt.pl @@ -22,8 +22,8 @@ compile( '--hierarchical', '+define+SHOW_TIMESCALE', '+define+USE_VLT', 't/t_hier_block_vlt.vlt', - '--CFLAGS', '"-pipe -DCPP_MACRO=cplusplus"', - ($Self->{vltmt} ? ' --threads 6' : '')], + '--CFLAGS', '"-pipe -DCPP_MACRO=cplusplus"'], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_inst_tree_inl0_pub1.pl b/test_regress/t/t_inst_tree_inl0_pub1.pl index b06051080..bad6139b0 100755 --- a/test_regress/t/t_inst_tree_inl0_pub1.pl +++ b/test_regress/t/t_inst_tree_inl0_pub1.pl @@ -14,9 +14,9 @@ top_filename("t/t_inst_tree.v"); my $default_vltmt_threads = $Self->get_default_vltmt_threads(); compile( - verilator_flags2 => ['--stats', "$Self->{t_dir}/$Self->{name}.vlt", - # Force 3 threads even if we have fewer cores - $Self->{vltmt} ? "--threads $default_vltmt_threads" : ""] + verilator_flags2 => ['--stats', "$Self->{t_dir}/$Self->{name}.vlt"], + # Force 3 threads even if we have fewer cores + threads => $Self->{vltmt} ? $default_vltmt_threads : 0 ); sub checkRelativeRefs { diff --git a/test_regress/t/t_lib_prot_shared.pl b/test_regress/t/t_lib_prot_shared.pl index a770be51b..1a3f8af5f 100755 --- a/test_regress/t/t_lib_prot_shared.pl +++ b/test_regress/t/t_lib_prot_shared.pl @@ -56,10 +56,10 @@ while (1) { compile( verilator_flags2 => ["$secret_dir/secret.sv", - ($Self->{vltmt} ? ' --threads 1' : ''), "-LDFLAGS", "'-Wl,-rpath,$abs_secret_dir -L$abs_secret_dir -l$secret_prefix'"], xsim_flags2 => ["$secret_dir/secret.sv"], + threads => $Self->{vltmt} ? 1 : 0 ); execute( diff --git a/test_regress/t/t_pgo_profoutofdate_bad.pl b/test_regress/t/t_pgo_profoutofdate_bad.pl index e2cfc96a1..3e78104a6 100755 --- a/test_regress/t/t_pgo_profoutofdate_bad.pl +++ b/test_regress/t/t_pgo_profoutofdate_bad.pl @@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di scenarios(vltmt => 1); compile( - v_flags2 => ["--threads 2"], + threads => 2, fails => 1, expect_filename => $Self->{golden_filename}, ); diff --git a/test_regress/t/t_pgo_threads.pl b/test_regress/t/t_pgo_threads.pl index 24e300882..974bbe3c4 100755 --- a/test_regress/t/t_pgo_threads.pl +++ b/test_regress/t/t_pgo_threads.pl @@ -14,7 +14,8 @@ scenarios(vltmt => 1); top_filename("t/t_gen_alw.v"); compile( - v_flags2 => ["--prof-pgo --threads 2"] + v_flags2 => ["--prof-pgo"], + threads => 2 ); execute( @@ -30,8 +31,8 @@ file_grep("$Self->{obj_dir}/profile.vlt", qr/profile_data/i); compile( # Intentinally no --prof-pgo here to make sure profile data can be read in # without it (that is: --prof-pgo has no effect on profile_data hash names) - v_flags2 => ["--threads 2", - " $Self->{obj_dir}/profile.vlt"], + v_flags2 => [" $Self->{obj_dir}/profile.vlt"], + threads => 2 ); execute( diff --git a/test_regress/t/t_split_var_0.pl b/test_regress/t/t_split_var_0.pl index 5f07afe64..0441871e4 100755 --- a/test_regress/t/t_split_var_0.pl +++ b/test_regress/t/t_split_var_0.pl @@ -14,8 +14,8 @@ scenarios(simulator => 1); # %Warning-UNOPTTHREADS: Thread scheduler is unable to provide requested parallelism; consider asking for fewer threads. # So use 6 threads here though it's not optimal in performace wise, but ok. compile( - verilator_flags2 => ['--stats' . ($Self->{vltmt} ? ' --threads 6' : ''), - "$Self->{t_dir}/t_split_var_0.vlt"], + verilator_flags2 => ['--stats', "$Self->{t_dir}/t_split_var_0.vlt"], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_split_var_2_trace.pl b/test_regress/t/t_split_var_2_trace.pl index c79ed778a..35d20007a 100755 --- a/test_regress/t/t_split_var_2_trace.pl +++ b/test_regress/t/t_split_var_2_trace.pl @@ -15,8 +15,8 @@ top_filename("t/t_split_var_0.v"); # %Warning-UNOPTTHREADS: Thread scheduler is unable to provide requested parallelism; consider asking for fewer threads. # So use 6 threads here though it's not optimal in performace wise, but ok. compile( - verilator_flags2 => ['--cc --trace --stats' . ($Self->{vltmt} ? ' --threads 6' : ''), - '+define+TEST_ATTRIBUTES'], + verilator_flags2 => ['--cc --trace --stats +define+TEST_ATTRIBUTES'], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_threads_counter_0.pl b/test_regress/t/t_threads_counter_0.pl index 8ba463b4e..b93572929 100755 --- a/test_regress/t/t_threads_counter_0.pl +++ b/test_regress/t/t_threads_counter_0.pl @@ -13,7 +13,8 @@ scenarios(simulator => 1); top_filename("t/t_threads_counter.v"); compile( - verilator_flags2 => ['--cc --no-threads'], + verilator_flags2 => ['--cc'], + threads => 0, ); execute( diff --git a/test_regress/t/t_threads_counter_1.pl b/test_regress/t/t_threads_counter_1.pl index f1b1cf7b0..95a607bfd 100755 --- a/test_regress/t/t_threads_counter_1.pl +++ b/test_regress/t/t_threads_counter_1.pl @@ -13,7 +13,8 @@ scenarios(vltmt => 1); top_filename("t/t_threads_counter.v"); compile( - verilator_flags2 => ['--cc --threads 1'], + verilator_flags2 => ['--cc'], + threads => 1 ); execute( diff --git a/test_regress/t/t_threads_counter_2.pl b/test_regress/t/t_threads_counter_2.pl index 460c79651..3625322fe 100755 --- a/test_regress/t/t_threads_counter_2.pl +++ b/test_regress/t/t_threads_counter_2.pl @@ -13,7 +13,8 @@ scenarios(vltmt => 1); top_filename("t/t_threads_counter.v"); compile( - verilator_flags2 => ['--cc --threads 2'], + verilator_flags2 => ['--cc'], + threads => 2 ); execute( diff --git a/test_regress/t/t_threads_counter_4.pl b/test_regress/t/t_threads_counter_4.pl index 97ac33b97..20ed3feb9 100755 --- a/test_regress/t/t_threads_counter_4.pl +++ b/test_regress/t/t_threads_counter_4.pl @@ -13,7 +13,8 @@ scenarios(vltmt => 1); top_filename("t/t_threads_counter.v"); compile( - verilator_flags2 => ['--cc --threads 4'], + verilator_flags2 => ['--cc'], + threads => 4 ); execute( diff --git a/test_regress/t/t_threads_crazy.pl b/test_regress/t/t_threads_crazy.pl index ee8031054..6bb21acb0 100755 --- a/test_regress/t/t_threads_crazy.pl +++ b/test_regress/t/t_threads_crazy.pl @@ -15,7 +15,8 @@ if ($Self->cfg_with_m32) { } compile( - verilator_flags2 => ['--cc --threads 1024'], + verilator_flags2 => ['--cc'], + threads => 1024 ); execute( diff --git a/test_regress/t/t_threads_nondeterminism.pl b/test_regress/t/t_threads_nondeterminism.pl index 418a03fc6..b584fa62d 100755 --- a/test_regress/t/t_threads_nondeterminism.pl +++ b/test_regress/t/t_threads_nondeterminism.pl @@ -13,7 +13,8 @@ scenarios(vltmt => 1); top_filename("t/t_threads_counter.v"); compile( - verilator_flags2 => ['--cc --threads 2 --debug-nondeterminism --no-skip-identical'], + verilator_flags2 => ['--cc --debug-nondeterminism --no-skip-identical'], + threads => 2 ); execute( diff --git a/test_regress/t/t_trace_litendian.pl b/test_regress/t/t_trace_litendian.pl index 91cc2e48f..5b7289295 100755 --- a/test_regress/t/t_trace_litendian.pl +++ b/test_regress/t/t_trace_litendian.pl @@ -14,8 +14,8 @@ scenarios(simulator => 1); # %Warning-UNOPTTHREADS: Thread scheduler is unable to provide requested parallelism; consider asking for fewer threads. # Strangely, asking for more threads makes it go away. compile( - verilator_flags2 => ['--cc --trace --trace-params -Wno-LITENDIAN', - ($Self->{vltmt} ? '--threads 6' : '')], + verilator_flags2 => ['--cc --trace --trace-params -Wno-LITENDIAN'], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_trace_litendian_fst.pl b/test_regress/t/t_trace_litendian_fst.pl index 4918c10bf..3e24bf901 100755 --- a/test_regress/t/t_trace_litendian_fst.pl +++ b/test_regress/t/t_trace_litendian_fst.pl @@ -16,8 +16,8 @@ top_filename("t/t_trace_litendian.v"); # %Warning-UNOPTTHREADS: Thread scheduler is unable to provide requested parallelism; consider asking for fewer threads. # Strangely, asking for more threads makes it go away. compile( - verilator_flags2 => ['--cc --trace-fst --trace-params -Wno-LITENDIAN', - ($Self->{vltmt} ? '--threads 6' : '')], + verilator_flags2 => ['--cc --trace-fst --trace-params -Wno-LITENDIAN'], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_trace_litendian_fst_sc.pl b/test_regress/t/t_trace_litendian_fst_sc.pl index 56754b9df..b79e1e78f 100755 --- a/test_regress/t/t_trace_litendian_fst_sc.pl +++ b/test_regress/t/t_trace_litendian_fst_sc.pl @@ -20,8 +20,8 @@ else { # %Warning-UNOPTTHREADS: Thread scheduler is unable to provide requested parallelism; consider asking for fewer threads. # Strangely, asking for more threads makes it go away. compile( - verilator_flags2 => ['--sc --trace-fst --trace-params -Wno-LITENDIAN', - ($Self->{vltmt} ? '--threads 6' : '')], + verilator_flags2 => ['--sc --trace-fst --trace-params -Wno-LITENDIAN'], + threads => $Self->{vltmt} ? 6 : 0 ); execute( diff --git a/test_regress/t/t_verilated_all.pl b/test_regress/t/t_verilated_all.pl index 450b5bd9c..402f834d4 100755 --- a/test_regress/t/t_verilated_all.pl +++ b/test_regress/t/t_verilated_all.pl @@ -8,7 +8,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di # Version 2.0. # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 -scenarios(vlt => 1); +scenarios(vltmt => 1); my $root = ".."; @@ -17,10 +17,10 @@ compile( verilator_flags2 => ["--cc", "--coverage-toggle --coverage-line --coverage-user", "--trace --vpi ", - "--threads 2", "--trace-threads 1", "--prof-exec", "--prof-pgo", "$root/include/verilated_save.cpp"], + threads => 2 ); execute( diff --git a/test_regress/t/t_verilated_threaded.pl b/test_regress/t/t_verilated_threaded.pl index ec8edeb50..a11df1cfe 100755 --- a/test_regress/t/t_verilated_threaded.pl +++ b/test_regress/t/t_verilated_threaded.pl @@ -16,7 +16,8 @@ my $root = ".."; compile( # Can't use --coverage and --savable together, so cheat and compile inline - verilator_flags2 => ["--cc --coverage-toggle --coverage-line --coverage-user --trace --threads 1 --vpi $root/include/verilated_save.cpp"], + verilator_flags2 => ["--cc --coverage-toggle --coverage-line --coverage-user --trace --vpi $root/include/verilated_save.cpp"], + threads => 1 ); execute( diff --git a/test_regress/t/t_wrapper_context.pl b/test_regress/t/t_wrapper_context.pl index e95e77250..30c3a78a3 100755 --- a/test_regress/t/t_wrapper_context.pl +++ b/test_regress/t/t_wrapper_context.pl @@ -14,8 +14,9 @@ compile( make_top_shell => 0, make_main => 0, # link threads library, add custom .cpp code, add tracing & coverage support - verilator_flags2 => ["-threads 1 --exe $Self->{t_dir}/$Self->{name}.cpp", + verilator_flags2 => ["--exe $Self->{t_dir}/$Self->{name}.cpp", "--trace --coverage -cc"], + threads => 1, make_flags => 'CPPFLAGS_ADD=-DVL_NO_LEGACY', ); diff --git a/test_regress/t/t_wrapper_context_fst.pl b/test_regress/t/t_wrapper_context_fst.pl index 8b40a7b2d..98b3ceb5a 100755 --- a/test_regress/t/t_wrapper_context_fst.pl +++ b/test_regress/t/t_wrapper_context_fst.pl @@ -16,8 +16,9 @@ compile( make_top_shell => 0, make_main => 0, # link threads library, add custom .cpp code, add tracing & coverage support - verilator_flags2 => ["-threads 1 --exe $Self->{t_dir}/t_wrapper_context.cpp", + verilator_flags2 => ["--exe $Self->{t_dir}/t_wrapper_context.cpp", "--trace-fst --coverage -cc"], + threads => 1, make_flags => 'CPPFLAGS_ADD=-DVL_NO_LEGACY', ); diff --git a/test_regress/t/t_wrapper_context_seq.pl b/test_regress/t/t_wrapper_context_seq.pl index 8ddf958f8..162a0c596 100755 --- a/test_regress/t/t_wrapper_context_seq.pl +++ b/test_regress/t/t_wrapper_context_seq.pl @@ -16,8 +16,9 @@ compile( make_top_shell => 0, make_main => 0, # link threads library, add custom .cpp code, add tracing & coverage support - verilator_flags2 => ["-threads 1 --exe $Self->{t_dir}/t_wrapper_context.cpp", + verilator_flags2 => ["--exe $Self->{t_dir}/t_wrapper_context.cpp", "--trace --coverage -cc"], + threads => 1, make_flags => 'CPPFLAGS_ADD=-DVL_NO_LEGACY', ); From 0de1bbc85b281f811e3f539b2b297d39d2c31eac Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 5 Jul 2022 14:20:37 +0100 Subject: [PATCH 013/119] Add and use VL_CONSTEXPR_CXX17 --- include/verilatedos.h | 10 ++++++++++ src/V3Ast.h | 8 ++++---- src/V3AstUserAllocator.h | 27 ++++++++++++--------------- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/include/verilatedos.h b/include/verilatedos.h index 6bacfe27b..2b0cdd8ce 100644 --- a/include/verilatedos.h +++ b/include/verilatedos.h @@ -234,6 +234,16 @@ # error "Verilator requires a C++11 or newer compiler" #endif +//========================================================================= +// C++-2017 + +#if __cplusplus >= 201703L +# define VL_CONSTEXPR_CXX17 constexpr +#else +# define VL_CONSTEXPR_CXX17 +#endif + + //========================================================================= // Optimization diff --git a/src/V3Ast.h b/src/V3Ast.h index dc4cf6d8e..868fc73f8 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -1948,7 +1948,7 @@ private: ASTNODE_PREFETCH(nodep->op2p()); ASTNODE_PREFETCH(nodep->op3p()); ASTNODE_PREFETCH(nodep->op4p()); - if /* TODO: 'constexpr' in C++17 */ (VisitNext) ASTNODE_PREFETCH(nodep->nextp()); + if VL_CONSTEXPR_CXX17 (VisitNext) ASTNODE_PREFETCH(nodep->nextp()); // Apply function in pre-order if (privateTypeTest::type>(nodep)) { @@ -1964,7 +1964,7 @@ private: } // Traverse 'nextp()' chain if requested - if /* TODO: 'constexpr' in C++17 */ (VisitNext) { + if VL_CONSTEXPR_CXX17 (VisitNext) { nodep = nodep->nextp(); } else { break; @@ -1987,7 +1987,7 @@ private: ASTNODE_PREFETCH(nodep->op2p()); ASTNODE_PREFETCH(nodep->op3p()); ASTNODE_PREFETCH(nodep->op4p()); - if /* TODO: 'constexpr' in C++17 */ (VisitNext) ASTNODE_PREFETCH(nodep->nextp()); + if VL_CONSTEXPR_CXX17 (VisitNext) ASTNODE_PREFETCH(nodep->nextp()); // Apply function in pre-order if (privateTypeTest::type>(nodep)) { @@ -2011,7 +2011,7 @@ private: } // Traverse 'nextp()' chain if requested - if /* TODO: 'constexpr' in C++17 */ (VisitNext) { + if VL_CONSTEXPR_CXX17 (VisitNext) { nodep = nodep->nextp(); } else { break; diff --git a/src/V3AstUserAllocator.h b/src/V3AstUserAllocator.h index 8d63ad5a9..f8982bf16 100644 --- a/src/V3AstUserAllocator.h +++ b/src/V3AstUserAllocator.h @@ -35,17 +35,16 @@ private: std::vector m_allocated; inline T_Data* getUserp(const T_Node* nodep) const { - // This simplifies statically as T_UserN is constant. In C++17, use 'if constexpr'. - if (T_UserN == 1) { + if VL_CONSTEXPR_CXX17 (T_UserN == 1) { const VNUser user = nodep->user1u(); return user.to(); - } else if (T_UserN == 2) { + } else if VL_CONSTEXPR_CXX17 (T_UserN == 2) { const VNUser user = nodep->user2u(); return user.to(); - } else if (T_UserN == 3) { + } else if VL_CONSTEXPR_CXX17 (T_UserN == 3) { const VNUser user = nodep->user3u(); return user.to(); - } else if (T_UserN == 4) { + } else if VL_CONSTEXPR_CXX17 (T_UserN == 4) { const VNUser user = nodep->user4u(); return user.to(); } else { @@ -55,14 +54,13 @@ private: } inline void setUserp(T_Node* nodep, T_Data* userp) const { - // This simplifies statically as T_UserN is constant. In C++17, use 'if constexpr'. - if (T_UserN == 1) { + if VL_CONSTEXPR_CXX17 (T_UserN == 1) { nodep->user1u(VNUser(userp)); - } else if (T_UserN == 2) { + } else if VL_CONSTEXPR_CXX17 (T_UserN == 2) { nodep->user2u(VNUser(userp)); - } else if (T_UserN == 3) { + } else if VL_CONSTEXPR_CXX17 (T_UserN == 3) { nodep->user3u(VNUser(userp)); - } else if (T_UserN == 4) { + } else if VL_CONSTEXPR_CXX17 (T_UserN == 4) { nodep->user4u(VNUser(userp)); } else { nodep->user5u(VNUser(userp)); @@ -71,14 +69,13 @@ private: protected: AstUserAllocatorBase() { - // This simplifies statically as T_UserN is constant. In C++17, use 'if constexpr'. - if (T_UserN == 1) { + if VL_CONSTEXPR_CXX17 (T_UserN == 1) { VNUser1InUse::check(); - } else if (T_UserN == 2) { + } else if VL_CONSTEXPR_CXX17 (T_UserN == 2) { VNUser2InUse::check(); - } else if (T_UserN == 3) { + } else if VL_CONSTEXPR_CXX17 (T_UserN == 3) { VNUser3InUse::check(); - } else if (T_UserN == 4) { + } else if VL_CONSTEXPR_CXX17 (T_UserN == 4) { VNUser4InUse::check(); } else { VNUser5InUse::check(); From 9f37cef1bb5d3f87b8873abbfec69b3773b15f96 Mon Sep 17 00:00:00 2001 From: Yutetsu TAKATSUKASA Date: Wed, 6 Jul 2022 08:33:37 +0900 Subject: [PATCH 014/119] Fix #3470 of incorrect bit op tree optimization (#3476) * Tests: Add a test to reproduce #3470 * Update LSB during return path of traversal. No functional change is intended. * Introduce LeafInfo::m_msb * Update LeafInfo::m_msb when visitin AstCCast * Internals: Add comment, reorder. No functional change is intended. * Delete explicit from copy constructor to fix build error. * Update Changes * Internals: Remove unused parameter. No functional change is intended. * Tests: Add explanation to t_const_opt. --- Changes | 2 +- src/V3Const.cpp | 61 ++++++++++++++++++++++++----------- test_regress/t/t_const_opt.pl | 2 +- test_regress/t/t_const_opt.v | 36 +++++++++++++++++++-- 4 files changed, 79 insertions(+), 22 deletions(-) diff --git a/Changes b/Changes index b597d565f..2e1183c7b 100644 --- a/Changes +++ b/Changes @@ -13,7 +13,7 @@ Verilator 4.225 devel **Minor:** - +* Fix incorrect bit op tree optimization (#3470). [algrobman] Verilator 4.224 2022-06-19 ========================== diff --git a/src/V3Const.cpp b/src/V3Const.cpp index 7dae3f014..700a99e38 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -80,30 +80,48 @@ class ConstBitOpTreeVisitor final : public VNVisitor { using ResultTerm = std::tuple; class LeafInfo final { // Leaf node (either AstConst or AstVarRef) + // MEMBERS bool m_polarity = true; - int m_lsb = 0; + int m_lsb = 0; // LSB of actually used bit of m_refp->varp() + int m_msb = 0; // MSB of actually used bit of m_refp->varp() int m_wordIdx = -1; // -1 means AstWordSel is not used. AstVarRef* m_refp = nullptr; const AstConst* m_constp = nullptr; public: + // CONSTRUCTORS + LeafInfo() = default; + LeafInfo(const LeafInfo& other) = default; + explicit LeafInfo(int lsb) + : m_lsb{lsb} {} + + // METHODS void setLeaf(AstVarRef* refp) { UASSERT(!m_refp && !m_constp, "Must be called just once"); m_refp = refp; + m_msb = refp->varp()->widthMin() - 1; } void setLeaf(const AstConst* constp) { UASSERT(!m_refp && !m_constp, "Must be called just once"); m_constp = constp; + m_msb = constp->widthMin() - 1; } + void updateBitRange(const AstCCast* castp) { + m_msb = std::min(m_msb, m_lsb + castp->width() - 1); + } + void updateBitRange(const AstShiftR* shiftp) { + m_lsb += VN_AS(shiftp->rhsp(), Const)->toUInt(); + } + void wordIdx(int i) { m_wordIdx = i; } + void polarity(bool p) { m_polarity = p; } + AstVarRef* refp() const { return m_refp; } const AstConst* constp() const { return m_constp; } int wordIdx() const { return m_wordIdx; } bool polarity() const { return m_polarity; } int lsb() const { return m_lsb; } - void wordIdx(int i) { m_wordIdx = i; } - void lsb(int l) { m_lsb = l; } - void polarity(bool p) { m_polarity = p; } + int msb() const { return std::min(m_msb, varWidth() - 1); } int varWidth() const { UASSERT(m_refp, "m_refp should be set"); const int width = m_refp->varp()->widthMin(); @@ -382,7 +400,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { // Traverse down to see AstConst or AstVarRef LeafInfo findLeaf(AstNode* nodep, bool expectConst) { - LeafInfo info; + LeafInfo info{m_lsb}; { VL_RESTORER(m_leafp); m_leafp = &info; @@ -402,7 +420,10 @@ class ConstBitOpTreeVisitor final : public VNVisitor { virtual void visit(AstNode* nodep) override { CONST_BITOP_SET_FAILED("Hit unexpected op", nodep); } - virtual void visit(AstCCast* nodep) override { iterateChildren(nodep); } + virtual void visit(AstCCast* nodep) override { + iterateChildren(nodep); + if (m_leafp) m_leafp->updateBitRange(nodep); + } virtual void visit(AstShiftR* nodep) override { CONST_BITOP_RETURN_IF(!m_leafp, nodep); AstConst* const constp = VN_CAST(nodep->rhsp(), Const); @@ -410,12 +431,14 @@ class ConstBitOpTreeVisitor final : public VNVisitor { m_lsb += constp->toUInt(); incrOps(nodep, __LINE__); iterate(nodep->lhsp()); + m_leafp->updateBitRange(nodep); m_lsb -= constp->toUInt(); } virtual void visit(AstNot* nodep) override { CONST_BITOP_RETURN_IF(nodep->widthMin() != 1, nodep); AstNode* lhsp = nodep->lhsp(); - if (AstCCast* const castp = VN_CAST(lhsp, CCast)) lhsp = castp->lhsp(); + AstCCast* const castp = VN_CAST(lhsp, CCast); + if (castp) lhsp = castp->lhsp(); CONST_BITOP_RETURN_IF(!VN_IS(lhsp, VarRef) && !VN_IS(lhsp, Xor) && !VN_IS(lhsp, RedXor) && !VN_IS(lhsp, ShiftR), lhsp); @@ -424,6 +447,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { iterateChildren(nodep); // Don't restore m_polarity for Xor as it counts parity of the entire tree if (!isXorTree()) m_polarity = !m_polarity; + if (m_leafp && castp) m_leafp->updateBitRange(castp); } virtual void visit(AstWordSel* nodep) override { CONST_BITOP_RETURN_IF(!m_leafp, nodep); @@ -437,27 +461,27 @@ class ConstBitOpTreeVisitor final : public VNVisitor { CONST_BITOP_RETURN_IF(!m_leafp, nodep); m_leafp->setLeaf(nodep); m_leafp->polarity(m_polarity); - m_leafp->lsb(m_lsb); } virtual void visit(AstConst* nodep) override { CONST_BITOP_RETURN_IF(!m_leafp, nodep); m_leafp->setLeaf(nodep); - m_leafp->lsb(m_lsb); } virtual void visit(AstRedXor* nodep) override { Restorer restorer{*this}; CONST_BITOP_RETURN_IF(!VN_IS(m_rootp, Xor), nodep); AstNode* lhsp = nodep->lhsp(); - if (const AstCCast* const castp = VN_CAST(lhsp, CCast)) lhsp = castp->lhsp(); + const AstCCast* const castp = VN_CAST(lhsp, CCast); + if (castp) lhsp = castp->lhsp(); if (const AstAnd* const andp = VN_CAST(lhsp, And)) { // '^(mask & leaf)' CONST_BITOP_RETURN_IF(!andp, lhsp); const LeafInfo& mask = findLeaf(andp->lhsp(), true); CONST_BITOP_RETURN_IF(!mask.constp() || mask.lsb() != 0, andp->lhsp()); - const LeafInfo& ref = findLeaf(andp->rhsp(), false); + LeafInfo ref = findLeaf(andp->rhsp(), false); CONST_BITOP_RETURN_IF(!ref.refp(), andp->rhsp()); + if (castp) ref.updateBitRange(castp); restorer.disableRestore(); // Now all subtree succeeded @@ -467,7 +491,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { incrOps(andp, __LINE__); // Mark all bits checked in this reduction - const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.varWidth()); + const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.msb() + 1); for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { const int maskIdx = bitIdx - ref.lsb(); if (maskNum.bitIs0(maskIdx)) continue; @@ -475,15 +499,16 @@ class ConstBitOpTreeVisitor final : public VNVisitor { m_bitPolarities.emplace_back(ref, true, bitIdx); } } else { // '^leaf' - const LeafInfo& ref = findLeaf(lhsp, false); + LeafInfo ref = findLeaf(lhsp, false); CONST_BITOP_RETURN_IF(!ref.refp(), lhsp); + if (castp) ref.updateBitRange(castp); restorer.disableRestore(); // Now all checks passed incrOps(nodep, __LINE__); // Mark all bits checked by this comparison - for (int bitIdx = ref.lsb(); bitIdx < ref.varWidth(); ++bitIdx) { + for (int bitIdx = ref.lsb(); bitIdx <= ref.msb(); ++bitIdx) { m_bitPolarities.emplace_back(ref, true, bitIdx); } } @@ -503,7 +528,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { for (const bool right : {false, true}) { Restorer restorer{*this}; - LeafInfo leafInfo; + LeafInfo leafInfo{m_lsb}; m_leafp = &leafInfo; AstNode* opp = right ? nodep->rhsp() : nodep->lhsp(); const bool origFailed = m_failed; @@ -522,7 +547,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { // The conditional on the lsb being in range is necessary for some degenerate // case, e.g.: (IData)((QData)wide[0] >> 32), or <1-bit-var> >> 1, which is // just zero - if (leafInfo.lsb() < leafInfo.varWidth()) { + if (leafInfo.lsb() <= leafInfo.msb()) { m_bitPolarities.emplace_back(leafInfo, isXorTree() || leafInfo.polarity(), leafInfo.lsb()); } else if (isAndTree() && leafInfo.polarity()) { @@ -559,7 +584,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { incrOps(andp, __LINE__); // Mark all bits checked by this comparison - const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.varWidth()); + const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.msb() + 1); for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { const int maskIdx = bitIdx - ref.lsb(); if (maskNum.bitIs0(maskIdx)) continue; @@ -575,7 +600,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { incrOps(nodep, __LINE__); // Mark all bits checked by this comparison - const int maxBitIdx = std::min(ref.lsb() + compNum.width(), ref.varWidth()); + const int maxBitIdx = std::min(ref.lsb() + compNum.width(), ref.msb() + 1); for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { const int maskIdx = bitIdx - ref.lsb(); const bool polarity = compNum.bitIs1(maskIdx) != maskFlip; diff --git a/test_regress/t/t_const_opt.pl b/test_regress/t/t_const_opt.pl index 83e301744..837b5f74f 100755 --- a/test_regress/t/t_const_opt.pl +++ b/test_regress/t/t_const_opt.pl @@ -19,7 +19,7 @@ execute( ); if ($Self->{vlt}) { - file_grep($Self->{stats}, qr/Optimizations, Const bit op reduction\s+(\d+)/i, 11); + file_grep($Self->{stats}, qr/Optimizations, Const bit op reduction\s+(\d+)/i, 14); } ok(1); 1; diff --git a/test_regress/t/t_const_opt.v b/test_regress/t/t_const_opt.v index 407fef13c..e24d28bf8 100644 --- a/test_regress/t/t_const_opt.v +++ b/test_regress/t/t_const_opt.v @@ -62,7 +62,7 @@ module t(/*AUTOARG*/ $write("[%0t] cyc==%0d crc=%x sum=%x\n", $time, cyc, crc, sum); if (crc !== 64'hc77bb9b3784ea091) $stop; // What checksum will we end up with (above print should match) -`define EXPECTED_SUM 64'hdccb9e7b8b638233 +`define EXPECTED_SUM 64'hde21e019a3e12039 if (sum !== `EXPECTED_SUM) $stop; $write("*-* All Finished *-*\n"); @@ -86,10 +86,11 @@ module Test(/*AUTOARG*/ logic bug3182_out; logic bug3197_out; logic bug3445_out; + logic bug3470_out; output logic o; - logic [7:0] tmp; + logic [8:0] tmp; assign o = ^tmp; always_ff @(posedge clk) begin @@ -113,11 +114,13 @@ module Test(/*AUTOARG*/ tmp[5] <= bug3182_out; tmp[6] <= bug3197_out; tmp[7] <= bug3445_out; + tmp[8] <= bug3470_out; end bug3182 i_bug3182(.in(d[4:0]), .out(bug3182_out)); bug3197 i_bug3197(.clk(clk), .in(d), .out(bug3197_out)); bug3445 i_bug3445(.clk(clk), .in(d), .out(bug3445_out)); + bug3470 i_bug3470(.clk(clk), .in(d), .out(bug3470_out)); endmodule @@ -203,3 +206,32 @@ module bug3445(input wire clk, input wire [31:0] in, output wire out); assign out = result0 ^ result1 ^ (result2 | result3); endmodule + +// Bug3470 +// CCast had been ignored in bit op tree optimization +// Assume the following HDL input: +// (^d[38:32]) ^ (^d[31:0]) +// where d is logic [38:0] +// ^d[31:0] becomes REDXOR(CCast(uint32_t, d)), +// but CCast was ignored and interpreted as ^d[38:0]. +// Finally (^d[38:32]) ^ (^d31:0]) was wrongly transformed to +// (^d[38:32]) ^ (^d[38:0]) +// -> (^d[38:32]) ^ ((^d[38:32]) ^ (^d[31:0])) +// -> ^d[31:0] +// Of course the correct result is ^d[38:0] = ^d +module bug3470(input wire clk, input wire [31:0] in, output wire out); + logic [38:0] d; + always_ff @(posedge clk) + d <= {d[6:0], in}; + + logic tmp, expected; + always_ff @(posedge clk) begin + tmp <= ^(d >> 32) ^ (^d[31:0]); + expected <= ^d; + end + + always @(posedge clk) + if (tmp != expected) $stop; + + assign out = tmp; +endmodule From 3d71716a8a6cc7a931abd78b9d7930a68e9f3b03 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sat, 9 Jul 2022 07:28:28 -0400 Subject: [PATCH 015/119] Internals: Constructor style cleanup. No functional change. --- src/V3Simulate.h | 26 +++++++++++++------------- src/V3Table.cpp | 36 ++++++++++++++++++------------------ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/V3Simulate.h b/src/V3Simulate.h index d5a7453ac..28691d478 100644 --- a/src/V3Simulate.h +++ b/src/V3Simulate.h @@ -15,7 +15,7 @@ //************************************************************************* // // void example_usage() { -// SimulateVisitor simvis (false, false); +// SimulateVisitor simvis{false, false}; // simvis.clear(); // // Set all inputs to the constant // for (deque::iterator it = m_inVarps.begin(); it!=m_inVarps.end(); ++it) { @@ -130,7 +130,7 @@ private: const int width = itemp->width(); const int lsb = itemp->lsb(); const int msb = lsb + width - 1; - V3Number fieldNum(nump, width); + V3Number fieldNum{nump, width}; fieldNum.opSel(*nump, msb, lsb); out << itemp->name() << ": "; if (AstNodeDType* const childTypep = itemp->subDTypep()) { @@ -152,7 +152,7 @@ private: const int width = childTypep->width(); const int lsb = width * element; const int msb = lsb + width - 1; - V3Number fieldNum(nump, width); + V3Number fieldNum{nump, width}; fieldNum.opSel(*nump, msb, lsb); const int arrayElem = arrayp->lo() + element; out << arrayElem << " = " << prettyNumber(&fieldNum, childTypep); @@ -236,7 +236,7 @@ private: } if (allocNewConst) { // Need to allocate new constant - constp = new AstConst(nodep->fileline(), AstConst::DtypedValue(), nodep->dtypep(), 0); + constp = new AstConst{nodep->fileline(), AstConst::DtypedValue{}, nodep->dtypep(), 0}; // Mark as in use, add to free list for later reuse constp->user2(1); freeList.push_back(constp); @@ -683,15 +683,15 @@ private: initp = vscpnump; } else { // Assignment to unassigned variable, all bits are X // TODO generic initialization which builds X/arrays by recursion - AstConst* const outconstp = new AstConst( - nodep->fileline(), AstConst::WidthedValue(), basicp->widthMin(), 0); + AstConst* const outconstp = new AstConst{ + nodep->fileline(), AstConst::WidthedValue{}, basicp->widthMin(), 0}; if (basicp->isZeroInit()) { outconstp->num().setAllBits0(); } else { outconstp->num().setAllBitsX(); } - initp = new AstInitArray(nodep->fileline(), arrayp, outconstp); + initp = new AstInitArray{nodep->fileline(), arrayp, outconstp}; m_reclaimValuesp.push_back(initp); } const uint32_t index = fetchConst(selp->bitp())->toUInt(); @@ -706,7 +706,7 @@ private: } void handleAssignSel(AstNodeAssign* nodep, AstSel* selp) { AstVarRef* varrefp = nullptr; - V3Number lsb(nodep); + V3Number lsb{nodep}; iterateAndNextNull(nodep->rhsp()); // Value to assign handleAssignSelRecurse(nodep, selp, varrefp /*ref*/, lsb /*ref*/, 0); if (!m_checkOnly && optimizable()) { @@ -719,8 +719,8 @@ private: } else if (AstConst* const vscpnump = fetchConstNull(vscp)) { outconstp = vscpnump; } else { // Assignment to unassigned variable, all bits are X or 0 - outconstp = new AstConst(nodep->fileline(), AstConst::WidthedValue(), - varrefp->varp()->widthMin(), 0); + outconstp = new AstConst{nodep->fileline(), AstConst::WidthedValue{}, + varrefp->varp()->widthMin(), 0}; if (varrefp->varp()->basicp() && varrefp->varp()->basicp()->isZeroInit()) { outconstp->num().setAllBits0(); } else { @@ -742,7 +742,7 @@ private: lsbRef = fetchConst(selp->lsbp())->num(); return; // And presumably still optimizable() } else if (AstSel* const subselp = VN_CAST(selp->lhsp(), Sel)) { - V3Number sublsb(nodep); + V3Number sublsb{nodep}; handleAssignSelRecurse(nodep, subselp, outVarrefpRef, sublsb /*ref*/, depth + 1); if (optimizable()) { lsbRef = sublsb; @@ -829,7 +829,7 @@ private: if (hit) break; iterateAndNextNull(ep); if (optimizable()) { - V3Number match(nodep, 1); + V3Number match{nodep, 1}; match.opEq(fetchConst(nodep->exprp())->num(), fetchConst(ep)->num()); if (match.isNeqZero()) { iterateAndNextNull(itemp->bodysp()); @@ -1097,7 +1097,7 @@ private: } AstConst* const resultConstp - = new AstConst(nodep->fileline(), AstConst::String(), result); + = new AstConst{nodep->fileline(), AstConst::String{}, result}; setValue(nodep, resultConstp); m_reclaimValuesp.push_back(resultConstp); } diff --git a/src/V3Table.cpp b/src/V3Table.cpp index 822407b4c..0944bb589 100644 --- a/src/V3Table.cpp +++ b/src/V3Table.cpp @@ -92,8 +92,8 @@ public: = elemDType->isString() ? elemDType : v3Global.rootp()->findBitDType(width, width, VSigning::UNSIGNED); - AstUnpackArrayDType* const tableDTypep - = new AstUnpackArrayDType(m_fl, subDTypep, new AstRange(m_fl, size, 0)); + AstUnpackArrayDType* const tableDTypep = new AstUnpackArrayDType{ + m_fl, subDTypep, new AstRange{m_fl, static_cast(size), 0}}; v3Global.rootp()->typeTablep()->addTypesp(tableDTypep); // Create table initializer (with default value 0) AstConst* const defaultp = elemDType->isString() @@ -106,7 +106,7 @@ public: UASSERT_OBJ(!m_varScopep, m_fl, "Table variable already created"); // Default value is zero/empty string so don't add it if (value.isString() ? value.toString().empty() : value.isEqZero()) return; - m_initp->addIndexValuep(index, new AstConst(m_fl, value)); + m_initp->addIndexValuep(index, new AstConst{m_fl, value}); } AstVarScope* varScopep() { @@ -247,14 +247,14 @@ private: // We will need a table index variable, create it here. AstVar* const indexVarp - = new AstVar(fl, VVarType::BLOCKTEMP, "__Vtableidx" + cvtToStr(m_modTables), - VFlagBitPacked(), m_inWidthBits); + = new AstVar{fl, VVarType::BLOCKTEMP, "__Vtableidx" + cvtToStr(m_modTables), + VFlagBitPacked{}, static_cast(m_inWidthBits)}; m_modp->addStmtp(indexVarp); - AstVarScope* const indexVscp = new AstVarScope(indexVarp->fileline(), m_scopep, indexVarp); + AstVarScope* const indexVscp = new AstVarScope{indexVarp->fileline(), m_scopep, indexVarp}; m_scopep->addVarp(indexVscp); // The 'output assigned' table builder - TableBuilder outputAssignedTableBuilder(fl); + TableBuilder outputAssignedTableBuilder{fl}; outputAssignedTableBuilder.setTableSize( nodep->findBitDType(m_outVarps.size(), m_outVarps.size(), VSigning::UNSIGNED), VL_MASK_I(m_inWidthBits)); @@ -311,7 +311,7 @@ private: << simvis.whyNotMessage()); // Build output value tables and the assigned flags table - V3Number outputAssignedMask(nodep, m_outVarps.size(), 0); + V3Number outputAssignedMask{nodep, static_cast(m_outVarps.size()), 0}; for (TableOutputVar& tov : m_outVarps) { if (V3Number* const outnump = simvis.fetchOutNumberNull(tov.varScopep())) { UINFO(8, " Output " << tov.name() << " = " << *outnump << endl); @@ -333,21 +333,21 @@ private: // First var in inVars becomes the LSB of the concat AstNode* concatp = nullptr; for (AstVarScope* invscp : m_inVarps) { - AstVarRef* const refp = new AstVarRef(fl, invscp, VAccess::READ); + AstVarRef* const refp = new AstVarRef{fl, invscp, VAccess::READ}; if (concatp) { - concatp = new AstConcat(fl, refp, concatp); + concatp = new AstConcat{fl, refp, concatp}; } else { concatp = refp; } } - return new AstAssign(fl, new AstVarRef(fl, indexVscp, VAccess::WRITE), concatp); + return new AstAssign{fl, new AstVarRef{fl, indexVscp, VAccess::WRITE}, concatp}; } AstArraySel* select(FileLine* fl, AstVarScope* fromp, AstVarScope* indexp) { - AstVarRef* const fromRefp = new AstVarRef(fl, fromp, VAccess::READ); - AstVarRef* const indexRefp = new AstVarRef(fl, indexp, VAccess::READ); - return new AstArraySel(fl, fromRefp, indexRefp); + AstVarRef* const fromRefp = new AstVarRef{fl, fromp, VAccess::READ}; + AstVarRef* const indexRefp = new AstVarRef{fl, indexp, VAccess::READ}; + return new AstArraySel{fl, fromRefp, indexRefp}; } void createOutputAssigns(AstNode* nodep, AstNode* stmtsp, AstVarScope* indexVscp, @@ -362,12 +362,12 @@ private: // If this output is unassigned on some code paths, wrap the assignment in an If if (tov.mayBeUnassigned()) { - V3Number outputChgMask(nodep, m_outVarps.size(), 0); + V3Number outputChgMask{nodep, static_cast(m_outVarps.size()), 0}; outputChgMask.setBit(tov.ord(), 1); AstNode* const condp - = new AstAnd(fl, select(fl, outputAssignedTableVscp, indexVscp), - new AstConst(fl, outputChgMask)); - outsetp = new AstIf(fl, condp, outsetp); + = new AstAnd{fl, select(fl, outputAssignedTableVscp, indexVscp), + new AstConst{fl, outputChgMask}}; + outsetp = new AstIf{fl, condp, outsetp}; } stmtsp->addNext(outsetp); From a4fddb3fbee12417257c29b95b2b82258b6c5c21 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sat, 9 Jul 2022 07:55:46 -0400 Subject: [PATCH 016/119] Fix table misoptimizing away display (#3488). --- Changes | 2 ++ src/V3Simulate.h | 14 ++++++-- src/V3Table.cpp | 3 ++ test_regress/t/t_opt_table_display.out | 12 +++++++ test_regress/t/t_opt_table_display.pl | 23 ++++++++++++++ test_regress/t/t_opt_table_display.v | 44 ++++++++++++++++++++++++++ 6 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 test_regress/t/t_opt_table_display.out create mode 100755 test_regress/t/t_opt_table_display.pl create mode 100644 test_regress/t/t_opt_table_display.v diff --git a/Changes b/Changes index 2e1183c7b..5f68d4a40 100644 --- a/Changes +++ b/Changes @@ -14,6 +14,8 @@ Verilator 4.225 devel **Minor:** * Fix incorrect bit op tree optimization (#3470). [algrobman] +* Fix table misoptimizing away display (#3488). [Stefan Post] + Verilator 4.224 2022-06-19 ========================== diff --git a/src/V3Simulate.h b/src/V3Simulate.h index 28691d478..195139cd2 100644 --- a/src/V3Simulate.h +++ b/src/V3Simulate.h @@ -100,6 +100,7 @@ private: bool m_anyAssignDly; ///< True if found a delayed assignment bool m_anyAssignComb; ///< True if found a non-delayed assignment bool m_inDlyAssign; ///< Under delayed assignment + bool m_isOutputter; // Creates output int m_instrCount; ///< Number of nodes int m_dataCount; ///< Bytes of data AstJumpGo* m_jumpp; ///< Jump label we're branching from @@ -205,6 +206,7 @@ public: AstNode* whyNotNodep() const { return m_whyNotNodep; } bool isAssignDly() const { return m_anyAssignDly; } + bool isOutputter() const { return m_isOutputter; } int instrCount() const { return m_instrCount; } int dataCount() const { return m_dataCount; } @@ -342,15 +344,16 @@ private: nodep->user2p((void*)valuep); } - void checkNodeInfo(AstNode* nodep) { + void checkNodeInfo(AstNode* nodep, bool ignorePredict = false) { if (m_checkOnly) { m_instrCount += nodep->instrCount(); m_dataCount += nodep->width(); } - if (!nodep->isPredictOptimizable()) { + if (!ignorePredict && !nodep->isPredictOptimizable()) { // UINFO(9, " !predictopt " << nodep << endl); clearOptimizable(nodep, "Isn't predictable"); } + if (nodep->isOutputter()) m_isOutputter = true; } void badNodeType(AstNode* nodep) { @@ -756,6 +759,7 @@ private: virtual void visit(AstNodeAssign* nodep) override { if (jumpingOver(nodep)) return; if (!optimizable()) return; // Accelerate + checkNodeInfo(nodep); if (VN_IS(nodep, AssignForce)) { clearOptimizable(nodep, "Force"); } else if (VN_IS(nodep, AssignDly)) { @@ -970,6 +974,7 @@ private: if (jumpingOver(nodep)) return; if (!optimizable()) return; // Accelerate UINFO(5, " FUNCREF " << nodep << endl); + checkNodeInfo(nodep); if (!m_params) { badNodeType(nodep); return; @@ -1053,6 +1058,7 @@ private: virtual void visit(AstSFormatF* nodep) override { if (jumpingOver(nodep)) return; if (!optimizable()) return; // Accelerate + checkNodeInfo(nodep); iterateChildren(nodep); if (m_params) { AstNode* nextArgp = nodep->exprsp(); @@ -1106,6 +1112,9 @@ private: virtual void visit(AstDisplay* nodep) override { if (jumpingOver(nodep)) return; if (!optimizable()) return; // Accelerate + // We ignore isPredictOptimizable as $display is often in constant + // functions and we want them to work if used with parameters + checkNodeInfo(nodep, /*display:*/ true); iterateChildren(nodep); if (m_params) { AstConst* const textp = fetchConst(nodep->fmtp()); @@ -1155,6 +1164,7 @@ public: m_anyAssignComb = false; m_anyAssignDly = false; m_inDlyAssign = false; + m_isOutputter = false; m_instrCount = 0; m_dataCount = 0; m_jumpp = nullptr; diff --git a/src/V3Table.cpp b/src/V3Table.cpp index 0944bb589..30a9cb587 100644 --- a/src/V3Table.cpp +++ b/src/V3Table.cpp @@ -225,6 +225,9 @@ private: if (!m_outWidthBytes || !m_inWidthBits) { chkvis.clearOptimizable(nodep, "Table has no outputs"); } + if (chkvis.isOutputter()) { + chkvis.clearOptimizable(nodep, "Table creates display output"); + } UINFO(4, " Test: Opt=" << (chkvis.optimizable() ? "OK" : "NO") << ", Instrs=" << chkvis.instrCount() << " Data=" << chkvis.dataCount() << " in width (bits)=" << m_inWidthBits << " out width (bytes)=" diff --git a/test_regress/t/t_opt_table_display.out b/test_regress/t/t_opt_table_display.out new file mode 100644 index 000000000..3bbd99b6b --- /dev/null +++ b/test_regress/t/t_opt_table_display.out @@ -0,0 +1,12 @@ +Clocked +Clocked +Clocked +Clocked +Clocked +Clocked +Clocked +Clocked +Clocked +Clocked +Clocked +*-* All Finished *-* diff --git a/test_regress/t/t_opt_table_display.pl b/test_regress/t/t_opt_table_display.pl new file mode 100755 index 000000000..e08f4a744 --- /dev/null +++ b/test_regress/t/t_opt_table_display.pl @@ -0,0 +1,23 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +compile( + verilator_flags2 => ["--stats"], + ); + +execute( + check_finished => 1, + expect_filename => $Self->{golden_filename}, + ); + +ok(1); +1; diff --git a/test_regress/t/t_opt_table_display.v b/test_regress/t/t_opt_table_display.v new file mode 100644 index 000000000..19fbe03ad --- /dev/null +++ b/test_regress/t/t_opt_table_display.v @@ -0,0 +1,44 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Wilson Snyder. +// SPDX-License-Identifier: CC0-1.0 + +module t (/*AUTOARG*/ + // Outputs + test, + // Inputs + clk + ); + input clk; + + output reg [5:0] test; + parameter STATE1 = 6'b000001; + parameter STATE2 = 6'b000010; + parameter STATE3 = 6'b000100; + parameter STATE4 = 6'b001000; + parameter STATE5 = 6'b010000; + parameter STATE6 = 6'b100000; + + always @(posedge clk) begin + $display("Clocked"); + case (test) + STATE1: test <= STATE2; + STATE2: test <= STATE3; + STATE3: test <= STATE4; + STATE4: test <= STATE5; + STATE5: test <= STATE6; + default: test <= STATE1; + endcase + end + + int cyc; + always @(posedge clk) begin + cyc <= cyc + 1; + if (cyc == 10) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end + +endmodule From 5f3316d3dc68099adf1c512ae0658f99699cbab9 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sat, 9 Jul 2022 08:30:57 -0400 Subject: [PATCH 017/119] * Fix empty string arguments to display (#3484). --- Changes | 1 + src/V3LinkResolve.cpp | 58 +++++++++++++++--------------- test_regress/t/t_display_merge.out | 2 ++ test_regress/t/t_display_merge.pl | 2 +- test_regress/t/t_display_merge.v | 4 +++ 5 files changed, 38 insertions(+), 29 deletions(-) diff --git a/Changes b/Changes index 5f68d4a40..910eceb2f 100644 --- a/Changes +++ b/Changes @@ -14,6 +14,7 @@ Verilator 4.225 devel **Minor:** * Fix incorrect bit op tree optimization (#3470). [algrobman] +* Fix empty string arguments to display (#3484). [Grulfen] * Fix table misoptimizing away display (#3488). [Stefan Post] diff --git a/src/V3LinkResolve.cpp b/src/V3LinkResolve.cpp index a0d198bc7..45ce55922 100644 --- a/src/V3LinkResolve.cpp +++ b/src/V3LinkResolve.cpp @@ -352,43 +352,45 @@ private: while (argp) { if (skipCount) { argp = argp->nextp(); - skipCount--; + --skipCount; continue; } const AstConst* const constp = VN_CAST(argp, Const); const bool isFromString = (constp) ? constp->num().isFromString() : false; if (isFromString) { const int numchars = argp->dtypep()->width() / 8; - string str(numchars, ' '); - // now scan for % operators - bool inpercent = false; - for (int i = 0; i < numchars; i++) { - const int ii = numchars - i - 1; - const char c = constp->num().dataByte(ii); - str[i] = c; - if (!inpercent && c == '%') { - inpercent = true; - } else if (inpercent) { - inpercent = false; - switch (c) { - case '0': // FALLTHRU - case '1': // FALLTHRU - case '2': // FALLTHRU - case '3': // FALLTHRU - case '4': // FALLTHRU - case '5': // FALLTHRU - case '6': // FALLTHRU - case '7': // FALLTHRU - case '8': // FALLTHRU - case '9': // FALLTHRU - case '.': inpercent = true; break; - case '%': break; - default: - if (V3Number::displayedFmtLegal(c, isScan)) ++skipCount; + if (!constp->num().toString().empty()) { + string str(numchars, ' '); + // now scan for % operators + bool inpercent = false; + for (int i = 0; i < numchars; i++) { + const int ii = numchars - i - 1; + const char c = constp->num().dataByte(ii); + str[i] = c; + if (!inpercent && c == '%') { + inpercent = true; + } else if (inpercent) { + inpercent = false; + switch (c) { + case '0': // FALLTHRU + case '1': // FALLTHRU + case '2': // FALLTHRU + case '3': // FALLTHRU + case '4': // FALLTHRU + case '5': // FALLTHRU + case '6': // FALLTHRU + case '7': // FALLTHRU + case '8': // FALLTHRU + case '9': // FALLTHRU + case '.': inpercent = true; break; + case '%': break; + default: + if (V3Number::displayedFmtLegal(c, isScan)) ++skipCount; + } } } + newFormat.append(str); } - newFormat.append(str); AstNode* const nextp = argp->nextp(); argp->unlinkFrBack(); VL_DO_DANGLING(pushDeletep(argp), argp); diff --git a/test_regress/t/t_display_merge.out b/test_regress/t/t_display_merge.out index 358455d76..0dcb8b10e 100644 --- a/test_regress/t/t_display_merge.out +++ b/test_regress/t/t_display_merge.out @@ -1,5 +1,7 @@ Merge: This should merge +Merge: +This should also merge f 1=1 a=top.t 1=1 1=1 b=top.t 1=1 pre diff --git a/test_regress/t/t_display_merge.pl b/test_regress/t/t_display_merge.pl index 75570aba2..27cc0ef25 100755 --- a/test_regress/t/t_display_merge.pl +++ b/test_regress/t/t_display_merge.pl @@ -20,7 +20,7 @@ execute( ); file_grep("$Self->{obj_dir}/$Self->{VM_PREFIX}__stats.txt", - qr/Node count, DISPLAY \s+ 41 \s+ 27 \s+ 27 \s+ 6/); + qr/Node count, DISPLAY \s+ 44 \s+ 27 \s+ 27 \s+ 6/); ok(1); 1; diff --git a/test_regress/t/t_display_merge.v b/test_regress/t/t_display_merge.v index 0704e7728..ede4c15eb 100644 --- a/test_regress/t/t_display_merge.v +++ b/test_regress/t/t_display_merge.v @@ -19,6 +19,10 @@ module t (/*AUTOARG*/); $write("should "); $display("merge"); + $display("Merge:"); + $write("This ", "", "should ", "", "also "); + $display("merge"); + $display("f"); $write(" 1=%0d a=%m 1=%0d", one, one); $display(" 1=%0d b=%m 1=%0d", one, one); From d8ea989edab83ba948db717b98c394c14c2d8deb Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sat, 9 Jul 2022 09:50:50 -0400 Subject: [PATCH 018/119] Tests/examples: Remove some legacy Verilator:: calls. --- docs/guide/connecting.rst | 69 ++++--------------- docs/guide/exe_sim.rst | 4 +- docs/guide/languages.rst | 2 +- examples/make_hello_c/sim_main.cpp | 7 +- test_regress/t/t_dpi_accessors.cpp | 79 +++++++++++----------- test_regress/t/t_dpi_var.cpp | 26 +++---- test_regress/t/t_hier_block_cmake/main.cpp | 9 +-- test_regress/t/t_leak.cpp | 3 +- test_regress/t/t_trace_two_cc.cpp | 28 ++++---- test_regress/t/t_tri_gate.cpp | 2 - test_regress/t/t_var_pinsizes.cpp | 2 - test_regress/t/t_vpi_cb_iter.cpp | 17 +++-- test_regress/t/t_vpi_cbs_called.cpp | 35 +++++----- test_regress/t/t_vpi_get.cpp | 30 ++++---- test_regress/t/t_vpi_memory.cpp | 33 ++++----- test_regress/t/t_vpi_module.cpp | 44 ++++++------ test_regress/t/t_vpi_param.cpp | 34 +++++----- test_regress/t/t_vpi_time_cb.cpp | 35 +++++----- test_regress/t/t_vpi_unimpl.cpp | 33 ++++----- test_regress/t/t_vpi_var.cpp | 21 +++--- test_regress/t/t_vpi_zero_time_cb.cpp | 30 ++++---- 21 files changed, 258 insertions(+), 285 deletions(-) diff --git a/docs/guide/connecting.rst b/docs/guide/connecting.rst index edfc3d67b..443702c7d 100644 --- a/docs/guide/connecting.rst +++ b/docs/guide/connecting.rst @@ -86,61 +86,17 @@ Connecting to C++ In C++ output mode (:vlopt:`--cc`), the Verilator generated model class is a simple C++ class. The user must write a C++ wrapper and main loop for the simulation, which instantiates the model class, and link with the Verilated -model. Here is a simple example: +model. -.. code-block:: C++ +Refer to ``examples/make_tracing_c`` in the distribution for a detailed +commented example. - #include // Defines common routines - #include // Need std::cout - #include "Vtop.h" // From Verilating "top.v" +Top level IO signals are read and written as members of the model. You +call the model's :code:`eval()` method to evaluate the model. When the +simulation is complete call the model's :code:`final()` method to execute +any SystemVerilog final blocks, and complete any assertions. See +:ref:`Evaluation Loop`. - Vtop *top; // Instantiation of model - - uint64_t main_time = 0; // Current simulation time - // This is a 64-bit integer to reduce wrap over issues and - // allow modulus. This is in units of the timeprecision - // used in Verilog (or from --timescale-override) - - double sc_time_stamp() { // Called by $time in Verilog - return main_time; // converts to double, to match - // what SystemC does - } - - int main(int argc, char** argv) { - Verilated::commandArgs(argc, argv); // Remember args - - top = new Vtop; // Create model - // Do not instead make Vtop as a file-scope static - // variable, as the "C++ static initialization order fiasco" - // may cause a crash - - top->reset_l = 0; // Set some inputs - - while (!Verilated::gotFinish()) { - if (main_time > 10) { - top->reset_l = 1; // Deassert reset - } - if ((main_time % 10) == 1) { - top->clk = 1; // Toggle clock - } - if ((main_time % 10) == 6) { - top->clk = 0; - } - top->eval(); // Evaluate model - cout << top->out << endl; // Read a output - main_time++; // Time passes... - } - - top->final(); // Done simulating - // // (Though this example doesn't get here) - delete top; - } - - -Note top level IO signals are read and written as members of the model. You -call the :code:`eval()` method to evaluate the model. When the simulation is -complete call the :code:`final()` method to execute any SystemVerilog final -blocks, and complete any assertions. See :ref:`Evaluation Loop`. Connecting to SystemC @@ -449,14 +405,15 @@ accesses the above signal "readme" would be: int main(int argc, char** argv, char** env) { Verilated::commandArgs(argc, argv); - Vour* top = new Vour; - Verilated::internalsDump(); // See scopes to help debug - while (!Verilated::gotFinish()) { + const std::unique_ptr contextp{new VerilatedContext}; + const std::unique_ptr top{new Vour{contextp.get()}}; + + contextp->internalsDump(); // See scopes to help debug + while (!contextp->gotFinish()) { top->eval(); VerilatedVpi::callValueCbs(); // For signal callbacks read_and_check(); } - delete top; return 0; } EOF diff --git a/docs/guide/exe_sim.rst b/docs/guide/exe_sim.rst index 016340cc8..364ac5fba 100644 --- a/docs/guide/exe_sim.rst +++ b/docs/guide/exe_sim.rst @@ -8,7 +8,7 @@ Simulation Runtime Arguments The following are the arguments that may be passed to a Verilated executable, provided that executable calls -:code:`Verilated::commandArgs()`. +:code:`VerilatedContext*->commandArgs(argc, argv)`. All simulation runtime arguments begin with "+verilator", so that the user's executable may skip over all "+verilator" arguments when parsing its @@ -96,7 +96,7 @@ Summary: .. option:: +verilator+noassert Disable assert checking per runtime argument. This is the same as - calling :code:`Verilated::assertOn(false)` in the model. + calling :code:`VerilatedContext*->assertOn(false)` in the model. .. option:: +verilator+V diff --git a/docs/guide/languages.rst b/docs/guide/languages.rst index 074f91d31..9b8adc596 100644 --- a/docs/guide/languages.rst +++ b/docs/guide/languages.rst @@ -489,7 +489,7 @@ $test$plusargs, $value$plusargs .. code-block:: C++ - Verilated::commandArgs(argc, argv); + {VerilatedContext*} ->commandArgs(argc, argv); to register the command line before calling $test$plusargs or $value$plusargs. diff --git a/examples/make_hello_c/sim_main.cpp b/examples/make_hello_c/sim_main.cpp index b2c76c84a..2e605d512 100644 --- a/examples/make_hello_c/sim_main.cpp +++ b/examples/make_hello_c/sim_main.cpp @@ -21,11 +21,14 @@ int main(int argc, char** argv, char** env) { // Prevent unused variable warnings if (false && argc && argv && env) {} + // Construct a VerilatedContext to hold simulation time, etc. + VerilatedContext* contextp = new VerilatedContext; + // Construct the Verilated model, from Vtop.h generated from Verilating "top.v" - Vtop* top = new Vtop; + Vtop* top = new Vtop{contextp}; // Simulate until $finish - while (!Verilated::gotFinish()) { + while (!contextp->gotFinish()) { // Evaluate model top->eval(); diff --git a/test_regress/t/t_dpi_accessors.cpp b/test_regress/t/t_dpi_accessors.cpp index d0de0d9a2..f160a888a 100644 --- a/test_regress/t/t_dpi_accessors.cpp +++ b/test_regress/t/t_dpi_accessors.cpp @@ -26,11 +26,9 @@ using std::hex; using std::setfill; using std::setw; -double sc_time_stamp() { return 0; } - // Convenience function to check we didn't finish unexpectedly -static void checkFinish(const char* msg) { - if (Verilated::gotFinish()) { +static void checkFinish(VerilatedContext* contextp, const char* msg) { + if (contextp->gotFinish()) { vl_fatal(__FILE__, __LINE__, "dut", msg); exit(1); } @@ -61,7 +59,9 @@ static void checkResult(bool p, const char* msg_fail) { // Main function instantiates the model and steps through the test. int main() { - Vt_dpi_accessors* dut = new Vt_dpi_accessors("dut"); + const std::unique_ptr contextp{new VerilatedContext}; + const std::unique_ptr dut{new VM_PREFIX{contextp.get(), "dut"}}; + svScope scope = svGetScopeFromName("dut.t"); if (!scope) vl_fatal(__FILE__, __LINE__, "dut", "No svGetScopeFromName result"); svSetScope(scope); @@ -112,7 +112,7 @@ int main() { cout << "===============================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; a = (int)a_read(); logReg(dut->clk, "read a", a, " (before clk)"); @@ -130,7 +130,7 @@ int main() { "Test of scalar register reading failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Check we can read a vector register. #ifdef TEST_VERBOSE @@ -138,7 +138,7 @@ int main() { cout << "===============================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; b = (int)b_read(); logRegHex(dut->clk, "read b", 8, b, " (before clk)"); @@ -153,7 +153,7 @@ int main() { "Test of vector register reading failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Test we can read an array element #ifdef TEST_VERBOSE @@ -162,7 +162,7 @@ int main() { cout << "=============================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; mem32 = (int)mem32_read(); logRegHex(dut->clk, "read mem32", 8, mem32, " (before clk)"); @@ -177,7 +177,7 @@ int main() { checkResult(mem32 == 0x20, "Test of array element reading failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Check we can read a scalar wire #ifdef TEST_VERBOSE @@ -186,7 +186,7 @@ int main() { cout << "===========================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; a = (int)a_read(); c = (int)c_read(); @@ -206,7 +206,7 @@ int main() { checkResult(c == (1 - a), "Test of scalar wire reading failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Check we can read a vector wire #ifdef TEST_VERBOSE @@ -215,7 +215,7 @@ int main() { cout << "===========================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; b = (int)b_read(); d = (int)d_read(); @@ -236,7 +236,7 @@ int main() { checkResult(d == ((~b) & 0xff), "Test of vector wire reading failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Check we can write a scalar register #ifdef TEST_VERBOSE @@ -245,7 +245,7 @@ int main() { cout << "===============================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; a = 1 - (int)a_read(); a_write(reinterpret_cast(&a)); @@ -265,7 +265,7 @@ int main() { "Test of scalar register writing failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Check we can write a vector register #ifdef TEST_VERBOSE @@ -274,7 +274,7 @@ int main() { cout << "===============================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; b = (int)b_read() - 1; b_write(reinterpret_cast(&b)); @@ -294,7 +294,7 @@ int main() { "Test of vector register writing failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Test we can write an array element #ifdef TEST_VERBOSE @@ -303,7 +303,7 @@ int main() { cout << "=============================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; mem32 = (int)mem32_read() - 1; mem32_write(reinterpret_cast(&mem32)); @@ -323,7 +323,7 @@ int main() { checkResult(mem32_after == mem32, "Test of array element writing failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Check we can read a vector register slice #ifdef TEST_VERBOSE @@ -332,7 +332,7 @@ int main() { cout << "=====================================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; b = (int)b_read(); int b_slice = (int)b_slice_read(); @@ -350,7 +350,7 @@ int main() { checkResult(b_slice == (b & 0x0f), "Test of vector register slice reading failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Test we can read an array element slice #ifdef TEST_VERBOSE @@ -359,7 +359,7 @@ int main() { cout << "===================================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; mem32 = (int)mem32_read(); int mem32_slice = (int)mem32_slice_read(); @@ -379,7 +379,7 @@ int main() { "Test of array element slice reading failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Check we can read a vector wire slice #ifdef TEST_VERBOSE @@ -388,7 +388,7 @@ int main() { cout << "=================================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; b = (int)b_read(); d = (int)d_read(); @@ -410,7 +410,7 @@ int main() { checkResult(d_slice == ((d & 0x7e) >> 1), "Test of vector wire slice reading failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Check we can write a vector register slice #ifdef TEST_VERBOSE @@ -419,7 +419,7 @@ int main() { cout << "=====================================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; b = (int)b_read(); @@ -449,7 +449,7 @@ int main() { logRegHex(dut->clk, "read b [3:0]", 4, b_slice, " (after clk)"); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Test we can write an array element slice #ifdef TEST_VERBOSE @@ -458,7 +458,7 @@ int main() { cout << "===================================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; mem32 = (int)mem32_read(); @@ -494,7 +494,7 @@ int main() { "Test of array element slice writing failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Check we can read complex registers #ifdef TEST_VERBOSE @@ -503,7 +503,7 @@ int main() { cout << "================================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; b = (int)b_read(); @@ -540,9 +540,9 @@ int main() { cout << endl; #endif - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; e = 0x05 | (i << 4); @@ -574,7 +574,7 @@ int main() { "Test of complex register reading l2 failed."); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Test we can write a complex register #ifdef TEST_VERBOSE @@ -583,7 +583,7 @@ int main() { cout << "================================\n"; #endif - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; b = (int)b_read(); @@ -632,9 +632,9 @@ int main() { cout << endl; #endif - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); - for (int i = 0; !Verilated::gotFinish() && (i < 4); i++) { + for (int i = 0; !contextp->gotFinish() && (i < 4); i++) { dut->clk = 1 - dut->clk; e = (int)e_read(); @@ -671,11 +671,10 @@ int main() { logRegHex(dut->clk, "read l2", 8, l2, " (before clk)"); } - checkFinish("t_dpi_accessors unexpected finish"); + checkFinish(contextp.get(), "t_dpi_accessors unexpected finish"); // Tidy up dut->final(); - VL_DO_DANGLING(delete dut, dut); cout << "*-* All Finished *-*\n"; } diff --git a/test_regress/t/t_dpi_var.cpp b/test_regress/t/t_dpi_var.cpp index 0bd8aa1ac..190044f69 100644 --- a/test_regress/t/t_dpi_var.cpp +++ b/test_regress/t/t_dpi_var.cpp @@ -110,39 +110,39 @@ void mon_eval() { //====================================================================== -unsigned int main_time = 0; - -double sc_time_stamp() { return main_time; } int main(int argc, char** argv, char** env) { - uint64_t sim_time = 1100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); + const std::unique_ptr contextp{new VerilatedContext}; - VM_PREFIX* topp = new VM_PREFIX(""); // Note null name - we're flattening it out + uint64_t sim_time = 1100; + contextp->commandArgs(argc, argv); + contextp->debug(0); + + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; // clang-format off #ifdef VERILATOR # ifdef TEST_VERBOSE - Verilated::scopesDump(); + contextp->scopesDump(); # endif #endif // clang-format on topp->eval(); topp->clk = 0; - main_time += 10; + contextp->timeInc(10); - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { - main_time += 1; + while (contextp->time() < sim_time && !contextp->gotFinish()) { + contextp->timeInc(1); topp->eval(); topp->clk = !topp->clk; // mon_do(); } - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); - VL_DO_DANGLING(delete topp, topp); return 0; } diff --git a/test_regress/t/t_hier_block_cmake/main.cpp b/test_regress/t/t_hier_block_cmake/main.cpp index b26551f4d..e49101162 100644 --- a/test_regress/t/t_hier_block_cmake/main.cpp +++ b/test_regress/t/t_hier_block_cmake/main.cpp @@ -13,13 +13,14 @@ #include "Vt_hier_block.h" int main(int argc, char *argv[]) { - std::unique_ptr top{new Vt_hier_block("top")}; - Verilated::commandArgs(argc, argv); - for (int i = 0; i < 100 && !Verilated::gotFinish(); ++i) { + const std::unique_ptr contextp{new VerilatedContext}; + std::unique_ptr top{new Vt_hier_block{contextp.get(), "top"}}; + contextp->commandArgs(argc, argv); + for (int i = 0; i < 100 && !contextp->gotFinish(); ++i) { top->eval(); top->clk ^= 1; } - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); } top->final(); diff --git a/test_regress/t/t_leak.cpp b/test_regress/t/t_leak.cpp index a0ad9259b..247dce41a 100644 --- a/test_regress/t/t_leak.cpp +++ b/test_regress/t/t_leak.cpp @@ -48,11 +48,12 @@ void make_and_destroy() { #ifdef VL_NO_LEGACY VerilatedContext* contextp = new VerilatedContext; VM_PREFIX* topp = new VM_PREFIX{contextp}; + contextp->debug(0); #else VM_PREFIX* topp = new VM_PREFIX; + Verilated::debug(0); #endif - Verilated::debug(0); topp->eval(); topp->clk = true; while (! diff --git a/test_regress/t/t_trace_two_cc.cpp b/test_regress/t/t_trace_two_cc.cpp index 0074d4ecb..31c314f86 100644 --- a/test_regress/t/t_trace_two_cc.cpp +++ b/test_regress/t/t_trace_two_cc.cpp @@ -25,21 +25,21 @@ VM_PREFIX* ap; Vt_trace_two_b* bp; -uint64_t main_time = 0; -double sc_time_stamp() { return main_time; } int main(int argc, char** argv, char** env) { + const std::unique_ptr contextp{new VerilatedContext}; + uint64_t sim_time = 1100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); - Verilated::traceEverOn(true); + contextp->commandArgs(argc, argv); + contextp->debug(0); + contextp->traceEverOn(true); srand48(5); - ap = new VM_PREFIX("topa"); - bp = new Vt_trace_two_b("topb"); + ap = new VM_PREFIX{contextp.get(), "topa"}; + bp = new Vt_trace_two_b{contextp.get(), "topb"}; // clang-format off #ifdef TEST_HDR_TRACE - Verilated::traceEverOn(true); + contextp->traceEverOn(true); # ifdef TEST_FST VerilatedFstC* tfp = new VerilatedFstC; ap->trace(tfp, 99); @@ -59,14 +59,14 @@ int main(int argc, char** argv, char** env) { bp->eval_step(); ap->eval_end_step(); bp->eval_end_step(); - if (tfp) tfp->dump(main_time); + if (tfp) tfp->dump(contextp->time()); #endif { ap->clk = false; - main_time += 10; + contextp->timeInc(10); } - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { + while (contextp->time() < sim_time && !contextp->gotFinish()) { ap->clk = !ap->clk; bp->clk = ap->clk; ap->eval_step(); @@ -74,11 +74,11 @@ int main(int argc, char** argv, char** env) { ap->eval_end_step(); bp->eval_end_step(); #ifdef TEST_HDR_TRACE - if (tfp) tfp->dump(main_time); + if (tfp) tfp->dump(contextp->time()); #endif - main_time += 5; + contextp->timeInc(5); } - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); } ap->final(); diff --git a/test_regress/t/t_tri_gate.cpp b/test_regress/t/t_tri_gate.cpp index f16ae0a45..66159175b 100644 --- a/test_regress/t/t_tri_gate.cpp +++ b/test_regress/t/t_tri_gate.cpp @@ -8,8 +8,6 @@ VM_PREFIX* tb = nullptr; -double sc_time_stamp() { return 0; } - bool check() { bool pass; int c = (tb->A >> tb->SEL) & 0x1; diff --git a/test_regress/t/t_var_pinsizes.cpp b/test_regress/t/t_var_pinsizes.cpp index a5c75e363..5dc53e9e2 100644 --- a/test_regress/t/t_var_pinsizes.cpp +++ b/test_regress/t/t_var_pinsizes.cpp @@ -7,8 +7,6 @@ VM_PREFIX* tb = nullptr; -double sc_time_stamp() { return 0; } - int main() { Verilated::debug(0); tb = new VM_PREFIX("tb"); diff --git a/test_regress/t/t_vpi_cb_iter.cpp b/test_regress/t/t_vpi_cb_iter.cpp index 3afbe07e9..8258385e3 100644 --- a/test_regress/t/t_vpi_cb_iter.cpp +++ b/test_regress/t/t_vpi_cb_iter.cpp @@ -143,11 +143,15 @@ static void register_filler_cb() { double sc_time_stamp() { return main_time; } int main(int argc, char** argv, char** env) { - uint64_t sim_time = 100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); + const std::unique_ptr contextp{new VerilatedContext}; - VM_PREFIX* topp = new VM_PREFIX(""); // Note null name - we're flattening it out + uint64_t sim_time = 100; + contextp->commandArgs(argc, argv); + contextp->debug(0); + + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; reregister_value_cb(); TEST_CHECK_NZ(vh_value_cb); @@ -158,7 +162,7 @@ int main(int argc, char** argv, char** env) { topp->eval(); topp->clk = 0; - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { + while (main_time < sim_time && !contextp->gotFinish()) { main_time += 1; if (verbose) VL_PRINTF("Sim Time %d got_error %d\n", main_time, errors); topp->clk = !topp->clk; @@ -168,11 +172,10 @@ int main(int argc, char** argv, char** env) { if (errors) vl_stop(__FILE__, __LINE__, "TOP-cpp"); } - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); - VL_DO_DANGLING(delete topp, topp); return errors ? 10 : 0; } diff --git a/test_regress/t/t_vpi_cbs_called.cpp b/test_regress/t/t_vpi_cbs_called.cpp index c015b38af..c3e39db12 100644 --- a/test_regress/t/t_vpi_cbs_called.cpp +++ b/test_regress/t/t_vpi_cbs_called.cpp @@ -43,7 +43,6 @@ bool callbacks_expected_called[CB_COUNT] = {false}; std::vector::const_iterator cb_iter; std::vector::const_iterator state_iter; -unsigned int main_time = 0; bool got_error = false; #ifdef TEST_VERBOSE @@ -245,27 +244,29 @@ static int register_test_callback() { return 0; } -double sc_time_stamp() { return main_time; } - int main(int argc, char** argv, char** env) { + const std::unique_ptr contextp{new VerilatedContext}; + uint64_t sim_time = 100; bool cbs_called; - Verilated::commandArgs(argc, argv); + contextp->commandArgs(argc, argv); - VM_PREFIX* topp = new VM_PREFIX(""); // Note null name - we're flattening it out + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; - if (verbose) VL_PRINTF("-- { Sim Time %d } --\n", main_time); + if (verbose) VL_PRINTF("-- { Sim Time %" PRId64 " } --\n", contextp->time()); register_test_callback(); topp->eval(); topp->clk = 0; - main_time += 1; + contextp->timeInc(1); - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { + while (contextp->time() < sim_time && !contextp->gotFinish()) { if (verbose) { - VL_PRINTF("-- { Sim Time %d , Callback %s (%d) , Testcase State %d } --\n", main_time, - cb_reason_to_string(*cb_iter), *cb_iter, *state_iter); + VL_PRINTF("-- { Sim Time %" PRId64 " , Callback %s (%d) , Testcase State %d } --\n", + contextp->time(), cb_reason_to_string(*cb_iter), *cb_iter, *state_iter); } topp->eval(); @@ -285,14 +286,17 @@ int main(int argc, char** argv, char** env) { VerilatedVpi::callTimedCbs(); - main_time = VerilatedVpi::cbNextDeadline(); - if (main_time == -1 && !Verilated::gotFinish()) { - if (verbose) VL_PRINTF("-- { Sim Time %d , No more testcases } --\n", main_time); + int64_t next_time = VerilatedVpi::cbNextDeadline(); + contextp->time(next_time); + if (next_time == -1 && !contextp->gotFinish()) { + if (verbose) + VL_PRINTF("-- { Sim Time %" PRId64 " , No more testcases } --\n", + contextp->time()); if (got_error) { vl_stop(__FILE__, __LINE__, "TOP-cpp"); } else { VL_PRINTF("*-* All Finished *-*\n"); - Verilated::gotFinish(true); + contextp->gotFinish(true); } } @@ -302,11 +306,10 @@ int main(int argc, char** argv, char** env) { topp->clk = !topp->clk; } - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); - VL_DO_DANGLING(delete topp, topp); return 0; } diff --git a/test_regress/t/t_vpi_get.cpp b/test_regress/t/t_vpi_get.cpp index ab8a9bce5..20d5d0fd7 100644 --- a/test_regress/t/t_vpi_get.cpp +++ b/test_regress/t/t_vpi_get.cpp @@ -40,8 +40,6 @@ #define TEST_MSG \ if (0) printf -unsigned int main_time = 0; - //====================================================================== #define CHECK_RESULT_VH(got, exp) \ @@ -240,22 +238,25 @@ void vpi_compat_bootstrap(void) { void (*vlog_startup_routines[])() = {vpi_compat_bootstrap, 0}; #else -double sc_time_stamp() { return main_time; } int main(int argc, char** argv, char** env) { - uint64_t sim_time = 1100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); + const std::unique_ptr contextp{new VerilatedContext}; - Vt_vpi_get* topp = new Vt_vpi_get(""); // Note null name - we're flattening it out + uint64_t sim_time = 1100; + contextp->commandArgs(argc, argv); + contextp->debug(0); + + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; #ifdef VERILATOR #ifdef TEST_VERBOSE - Verilated::scopesDump(); + contextp->scopesDump(); #endif #endif #if VM_TRACE - Verilated::traceEverOn(true); + contextp->traceEverOn(true); VL_PRINTF("Enabling waves...\n"); VerilatedVcdC* tfp = new VerilatedVcdC; topp->trace(tfp, 99); @@ -264,19 +265,19 @@ int main(int argc, char** argv, char** env) { topp->eval(); topp->clk = 0; - main_time += 10; + contextp->timeInc(10); - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { - main_time += 1; + while (contextp->time() < sim_time && !contextp->gotFinish()) { + contextp->timeInc(1); topp->eval(); VerilatedVpi::callValueCbs(); topp->clk = !topp->clk; // mon_do(); #if VM_TRACE - if (tfp) tfp->dump(main_time); + if (tfp) tfp->dump(contextp->time()); #endif } - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(FILENM, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); @@ -285,7 +286,6 @@ int main(int argc, char** argv, char** env) { if (tfp) tfp->close(); #endif - VL_DO_DANGLING(delete topp, topp); return 0; } diff --git a/test_regress/t/t_vpi_memory.cpp b/test_regress/t/t_vpi_memory.cpp index 490d871e0..e7532b9ad 100644 --- a/test_regress/t/t_vpi_memory.cpp +++ b/test_regress/t/t_vpi_memory.cpp @@ -41,7 +41,6 @@ #define DEBUG \ if (0) printf -unsigned int main_time = 0; int errors = 0; //====================================================================== @@ -250,24 +249,27 @@ void (*vlog_startup_routines[])() = {vpi_compat_bootstrap, 0}; #else -double sc_time_stamp() { return main_time; } int main(int argc, char** argv, char** env) { - uint64_t sim_time = 1100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); - // we're going to be checking for these errors do don't crash out - Verilated::fatalOnVpiError(0); + const std::unique_ptr contextp{new VerilatedContext}; - VM_PREFIX* topp = new VM_PREFIX(""); // Note null name - we're flattening it out + uint64_t sim_time = 1100; + contextp->commandArgs(argc, argv); + contextp->debug(0); + // we're going to be checking for these errors do don't crash out + contextp->fatalOnVpiError(0); + + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; #ifdef VERILATOR #ifdef TEST_VERBOSE - Verilated::scopesDump(); + contextp->scopesDump(); #endif #endif #if VM_TRACE - Verilated::traceEverOn(true); + contextp->traceEverOn(true); VL_PRINTF("Enabling waves...\n"); VerilatedVcdC* tfp = new VerilatedVcdC; topp->trace(tfp, 99); @@ -276,19 +278,19 @@ int main(int argc, char** argv, char** env) { topp->eval(); topp->clk = 0; - main_time += 10; + contextp->timeInc(10); - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { - main_time += 1; + while (contextp->time() < sim_time && !contextp->gotFinish()) { + contextp->timeInc(1); topp->eval(); VerilatedVpi::callValueCbs(); topp->clk = !topp->clk; // mon_do(); #if VM_TRACE - if (tfp) tfp->dump(main_time); + if (tfp) tfp->dump(contextp->time()); #endif } - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(FILENM, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); @@ -297,7 +299,6 @@ int main(int argc, char** argv, char** env) { if (tfp) tfp->close(); #endif - VL_DO_DANGLING(delete topp, topp); return 0; } diff --git a/test_regress/t/t_vpi_module.cpp b/test_regress/t/t_vpi_module.cpp index cc3325eca..b09fe6ea7 100644 --- a/test_regress/t/t_vpi_module.cpp +++ b/test_regress/t/t_vpi_module.cpp @@ -40,8 +40,6 @@ #define DEBUG \ if (0) printf -unsigned int main_time = 0; - #define CHECK_RESULT_NZ(got) \ if (!(got)) { \ printf("%%Error: %s:%d: GOT = NULL EXP = !NULL\n", FILENM, __LINE__); \ @@ -172,27 +170,36 @@ void (*vlog_startup_routines[])() = {vpi_compat_bootstrap, 0}; #else -double sc_time_stamp() { return main_time; } int main(int argc, char** argv, char** env) { - uint64_t sim_time = 1100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); - // we're going to be checking for these errors do don't crash out - Verilated::fatalOnVpiError(0); + const std::unique_ptr contextp{new VerilatedContext}; + + uint64_t sim_time = 1100; + contextp->commandArgs(argc, argv); + contextp->debug(0); + // we're going to be checking for these errors do don't crash out + contextp->fatalOnVpiError(0); + + { + // Construct and destroy + const std::unique_ptr topp{ + new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; + } - VM_PREFIX* topp = new VM_PREFIX(""); // Note null name - we're flattening it out // Test second construction - delete topp; - topp = new VM_PREFIX(""); + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; #ifdef VERILATOR #ifdef TEST_VERBOSE - Verilated::scopesDump(); + contextp->scopesDump(); #endif #endif #if VM_TRACE - Verilated::traceEverOn(true); + contextp->traceEverOn(true); VL_PRINTF("Enabling waves...\n"); VerilatedVcdC* tfp = new VerilatedVcdC; topp->trace(tfp, 99); @@ -201,19 +208,19 @@ int main(int argc, char** argv, char** env) { topp->eval(); topp->clk = 0; - main_time += 10; + contextp->timeInc(10); - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { - main_time += 1; + while (contextp->time() < sim_time && !contextp->gotFinish()) { + contextp->timeInc(1); topp->eval(); VerilatedVpi::callValueCbs(); topp->clk = !topp->clk; // mon_do(); #if VM_TRACE - if (tfp) tfp->dump(main_time); + if (tfp) tfp->dump(contextp->time()); #endif } - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(FILENM, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); @@ -222,7 +229,6 @@ int main(int argc, char** argv, char** env) { if (tfp) tfp->close(); #endif - VL_DO_DANGLING(delete topp, topp); return 0; } diff --git a/test_regress/t/t_vpi_param.cpp b/test_regress/t/t_vpi_param.cpp index 639bb9196..8ac956531 100644 --- a/test_regress/t/t_vpi_param.cpp +++ b/test_regress/t/t_vpi_param.cpp @@ -40,8 +40,6 @@ #define DEBUG \ if (0) printf -unsigned int main_time = 0; - //====================================================================== #define CHECK_RESULT_VH(got, exp) \ @@ -240,24 +238,27 @@ void (*vlog_startup_routines[])() = {vpi_compat_bootstrap, 0}; #else -double sc_time_stamp() { return main_time; } int main(int argc, char** argv, char** env) { - uint64_t sim_time = 1100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); - // we're going to be checking for these errors do don't crash out - Verilated::fatalOnVpiError(0); + const std::unique_ptr contextp{new VerilatedContext}; - VM_PREFIX* topp = new VM_PREFIX(""); // Note null name - we're flattening it out + uint64_t sim_time = 1100; + contextp->commandArgs(argc, argv); + contextp->debug(0); + // we're going to be checking for these errors do don't crash out + contextp->fatalOnVpiError(0); + + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; #ifdef VERILATOR #ifdef TEST_VERBOSE - Verilated::scopesDump(); + contextp->scopesDump(); #endif #endif #if VM_TRACE - Verilated::traceEverOn(true); + contextp->traceEverOn(true); VL_PRINTF("Enabling waves...\n"); VerilatedVcdC* tfp = new VerilatedVcdC; topp->trace(tfp, 99); @@ -266,19 +267,19 @@ int main(int argc, char** argv, char** env) { topp->eval(); topp->clk = 0; - main_time += 10; + contextp->timeInc(10); - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { - main_time += 1; + while (contextp->time() < sim_time && !contextp->gotFinish()) { + contextp->timeInc(1); topp->eval(); VerilatedVpi::callValueCbs(); topp->clk = !topp->clk; // mon_do(); #if VM_TRACE - if (tfp) tfp->dump(main_time); + if (tfp) tfp->dump(contextp->time()); #endif } - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(FILENM, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); @@ -287,7 +288,6 @@ int main(int argc, char** argv, char** env) { if (tfp) tfp->close(); #endif - VL_DO_DANGLING(delete topp, topp); return 0; } diff --git a/test_regress/t/t_vpi_time_cb.cpp b/test_regress/t/t_vpi_time_cb.cpp index 70c65cdeb..1e59a384d 100644 --- a/test_regress/t/t_vpi_time_cb.cpp +++ b/test_regress/t/t_vpi_time_cb.cpp @@ -22,27 +22,27 @@ #include -unsigned int main_time = 0; - //====================================================================== -double sc_time_stamp() { return main_time; } - int main(int argc, char** argv, char** env) { - uint64_t sim_time = 1100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); + const std::unique_ptr contextp{new VerilatedContext}; - VM_PREFIX* topp = new VM_PREFIX(""); // Note null name - we're flattening it out + uint64_t sim_time = 1100; + contextp->commandArgs(argc, argv); + contextp->debug(0); + + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; // clang-format off #ifdef TEST_VERBOSE - Verilated::scopesDump(); + contextp->scopesDump(); #endif // clang-format on #if VM_TRACE - Verilated::traceEverOn(true); + contextp->traceEverOn(true); VL_PRINTF("Enabling waves...\n"); VerilatedVcdC* tfp = new VerilatedVcdC; topp->trace(tfp, 99); @@ -54,24 +54,24 @@ int main(int argc, char** argv, char** env) { topp->eval(); topp->clk = 0; - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { - main_time += 1; + while (vl_time_stamp64() < sim_time && !contextp->gotFinish()) { + contextp->timeInc(1); topp->eval(); VerilatedVpi::callValueCbs(); VerilatedVpi::callTimedCbs(); - if (main_time > 20) { // Else haven't registered callbacks - TEST_CHECK_EQ(VerilatedVpi::cbNextDeadline(), main_time + 1); + if (contextp->time() > 20) { // Else haven't registered callbacks + TEST_CHECK_EQ(VerilatedVpi::cbNextDeadline(), contextp->time() + 1); } - if ((main_time % 5) == 0) topp->clk = !topp->clk; + if ((contextp->time() % 5) == 0) topp->clk = !topp->clk; // mon_do(); #if VM_TRACE - if (tfp) tfp->dump(main_time); + if (tfp) tfp->dump(contextp->time()); #endif } VerilatedVpi::callCbs(cbEndOfSimulation); - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); @@ -80,6 +80,5 @@ int main(int argc, char** argv, char** env) { if (tfp) tfp->close(); #endif - VL_DO_DANGLING(delete topp, topp); return errors ? 10 : 0; } diff --git a/test_regress/t/t_vpi_unimpl.cpp b/test_regress/t/t_vpi_unimpl.cpp index e3763b504..e8359ba45 100644 --- a/test_regress/t/t_vpi_unimpl.cpp +++ b/test_regress/t/t_vpi_unimpl.cpp @@ -28,7 +28,6 @@ #define DEBUG \ if (0) printf -unsigned int main_time = 0; unsigned int callback_count = 0; //====================================================================== @@ -184,24 +183,27 @@ extern "C" int mon_check() { //====================================================================== -double sc_time_stamp() { return main_time; } int main(int argc, char** argv, char** env) { - uint64_t sim_time = 1100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); - // we're going to be checking for these errors do don't crash out - Verilated::fatalOnVpiError(0); + const std::unique_ptr contextp{new VerilatedContext}; - VM_PREFIX* topp = new VM_PREFIX(""); // Note null name - we're flattening it out + uint64_t sim_time = 1100; + contextp->commandArgs(argc, argv); + contextp->debug(0); + // we're going to be checking for these errors do don't crash out + contextp->fatalOnVpiError(0); + + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; #ifdef VERILATOR #ifdef TEST_VERBOSE - Verilated::scopesDump(); + contextp->scopesDump(); #endif #endif #if VM_TRACE - Verilated::traceEverOn(true); + contextp->traceEverOn(true); VL_PRINTF("Enabling waves...\n"); VerilatedVcdC* tfp = new VerilatedVcdC; topp->trace(tfp, 99); @@ -210,20 +212,20 @@ int main(int argc, char** argv, char** env) { topp->eval(); topp->clk = 0; - main_time += 10; + contextp->timeInc(10); - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { - main_time += 1; + while (contextp->time() < sim_time && !contextp->gotFinish()) { + contextp->timeInc(1); topp->eval(); // VerilatedVpi::callValueCbs(); // Make sure can link without verilated_vpi.h included topp->clk = !topp->clk; // mon_do(); #if VM_TRACE - if (tfp) tfp->dump(main_time); + if (tfp) tfp->dump(contextp->time()); #endif } if (!callback_count) vl_fatal(FILENM, __LINE__, "main", "%Error: never got callbacks"); - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(FILENM, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); @@ -232,6 +234,5 @@ int main(int argc, char** argv, char** env) { if (tfp) tfp->close(); #endif - VL_DO_DANGLING(delete topp, topp); return 0; } diff --git a/test_regress/t/t_vpi_var.cpp b/test_regress/t/t_vpi_var.cpp index 648557234..f2c4dfa8f 100644 --- a/test_regress/t/t_vpi_var.cpp +++ b/test_regress/t/t_vpi_var.cpp @@ -693,20 +693,24 @@ void (*vlog_startup_routines[])() = {vpi_compat_bootstrap, 0}; double sc_time_stamp() { return main_time; } int main(int argc, char** argv, char** env) { - uint64_t sim_time = 1100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); + const std::unique_ptr contextp{new VerilatedContext}; - VM_PREFIX* topp = new VM_PREFIX(""); // Note null name - we're flattening it out + uint64_t sim_time = 1100; + contextp->commandArgs(argc, argv); + contextp->debug(0); + + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; #ifdef VERILATOR #ifdef TEST_VERBOSE - Verilated::scopesDump(); + contextp->scopesDump(); #endif #endif #if VM_TRACE - Verilated::traceEverOn(true); + contextp->traceEverOn(true); VL_PRINTF("Enabling waves...\n"); VerilatedVcdC* tfp = new VerilatedVcdC; topp->trace(tfp, 99); @@ -717,7 +721,7 @@ int main(int argc, char** argv, char** env) { topp->clk = 0; main_time += 10; - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { + while (vl_time_stamp64() < sim_time && !contextp->gotFinish()) { main_time += 1; topp->eval(); VerilatedVpi::callValueCbs(); @@ -731,7 +735,7 @@ int main(int argc, char** argv, char** env) { CHECK_RESULT(callback_count_half, 250); CHECK_RESULT(callback_count_quad, 2); CHECK_RESULT(callback_count_strs, callback_count_strs_max); - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(FILENM, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); @@ -740,7 +744,6 @@ int main(int argc, char** argv, char** env) { if (tfp) tfp->close(); #endif - VL_DO_DANGLING(delete topp, topp); return 0; } diff --git a/test_regress/t/t_vpi_zero_time_cb.cpp b/test_regress/t/t_vpi_zero_time_cb.cpp index 243e97746..0427ac226 100644 --- a/test_regress/t/t_vpi_zero_time_cb.cpp +++ b/test_regress/t/t_vpi_zero_time_cb.cpp @@ -37,7 +37,6 @@ #include "TestVpi.h" int errors = 0; -unsigned int main_time = 0; unsigned int callback_count_zero_time = 0; unsigned int callback_count_start_of_sim = 0; @@ -105,25 +104,27 @@ void (*vlog_startup_routines[])() = {vpi_compat_bootstrap, 0}; #else -double sc_time_stamp() { return main_time; } - int main(int argc, char** argv, char** env) { - uint64_t sim_time = 1100; - Verilated::commandArgs(argc, argv); - Verilated::debug(0); + const std::unique_ptr contextp{new VerilatedContext}; - VM_PREFIX* topp = new VM_PREFIX(""); // Note null name - we're flattening it out + uint64_t sim_time = 1100; + contextp->commandArgs(argc, argv); + contextp->debug(0); + + const std::unique_ptr topp{new VM_PREFIX{contextp.get(), + // Note null name - we're flattening it out + ""}}; // clang-format off #ifdef VERILATOR # ifdef TEST_VERBOSE - Verilated::scopesDump(); + contextp->scopesDump(); # endif #endif // clang-format on #if VM_TRACE - Verilated::traceEverOn(true); + contextp->traceEverOn(true); VL_PRINTF("Enabling waves...\n"); VerilatedVcdC* tfp = new VerilatedVcdC; topp->trace(tfp, 99); @@ -146,23 +147,23 @@ int main(int argc, char** argv, char** env) { topp->eval(); topp->clk = 0; - main_time += 1; + contextp->timeInc(1); - while (vl_time_stamp64() < sim_time && !Verilated::gotFinish()) { - main_time += 1; + while (contextp->time() < sim_time && !contextp->gotFinish()) { + contextp->timeInc(1); topp->eval(); VerilatedVpi::callValueCbs(); VerilatedVpi::callTimedCbs(); topp->clk = !topp->clk; // mon_do(); #if VM_TRACE - if (tfp) tfp->dump(main_time); + if (tfp) tfp->dump(contextp->time()); #endif } VerilatedVpi::callCbs(cbEndOfSimulation); - if (!Verilated::gotFinish()) { + if (!contextp->gotFinish()) { vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); } topp->final(); @@ -171,7 +172,6 @@ int main(int argc, char** argv, char** env) { if (tfp) tfp->close(); #endif - VL_DO_DANGLING(delete topp, topp); return 0; } From 8377514127d4e5b2885d3f32da4cf928a4185764 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kozdra Date: Mon, 11 Jul 2022 12:21:35 +0200 Subject: [PATCH 019/119] Add support for $test$plusargs(expr) (#3489) --- docs/CONTRIBUTORS | 1 + include/verilated.cpp | 4 ++-- include/verilated_funcs.h | 2 +- src/V3AstNodes.h | 19 +++++++------------ src/V3EmitCFunc.h | 2 +- src/V3Hasher.cpp | 5 ----- src/V3LinkLValue.cpp | 7 +++++++ src/V3Width.cpp | 7 ++++++- src/verilog.y | 2 +- test_regress/t/t_sys_plusargs.v | 6 ++++++ 10 files changed, 32 insertions(+), 23 deletions(-) diff --git a/docs/CONTRIBUTORS b/docs/CONTRIBUTORS index efd6749cc..6089a5eaa 100644 --- a/docs/CONTRIBUTORS +++ b/docs/CONTRIBUTORS @@ -10,6 +10,7 @@ Alex Chadwick Aliaksei Chapyzhenka Ameya Vikram Singh Andreas Kuster +Arkadiusz Kozdra Chris Randall Chuxuan Wang Conor McCullough diff --git a/include/verilated.cpp b/include/verilated.cpp index 1f6fe3b4c..da8599a42 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -1606,8 +1606,8 @@ IData VL_SYSTEM_IW(int lhswords, const WDataInP lhsp) VL_MT_SAFE { return code >> 8; // Want exit status } -IData VL_TESTPLUSARGS_I(const char* formatp) VL_MT_SAFE { - const std::string& match = Verilated::threadContextp()->impp()->argPlusMatch(formatp); +IData VL_TESTPLUSARGS_I(const std::string& format) VL_MT_SAFE { + const std::string& match = Verilated::threadContextp()->impp()->argPlusMatch(format.c_str()); return match.empty() ? 0 : 1; } diff --git a/include/verilated_funcs.h b/include/verilated_funcs.h index ab71839b5..c8fb91d3e 100644 --- a/include/verilated_funcs.h +++ b/include/verilated_funcs.h @@ -146,7 +146,7 @@ extern IData VL_SYSTEM_IW(int lhswords, WDataInP const lhsp); extern IData VL_SYSTEM_IQ(QData lhs); inline IData VL_SYSTEM_II(IData lhs) VL_MT_SAFE { return VL_SYSTEM_IQ(lhs); } -extern IData VL_TESTPLUSARGS_I(const char* formatp); +extern IData VL_TESTPLUSARGS_I(const std::string& format); extern const char* vl_mc_scan_plusargs(const char* prefixp); // PLIish //========================================================================= diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h index 118666c0b..d9e4b81db 100644 --- a/src/V3AstNodes.h +++ b/src/V3AstNodes.h @@ -4551,26 +4551,21 @@ public: class AstTestPlusArgs final : public AstNodeMath { // Parents: expr // Child: variable to set. If nullptr then this is a $test$plusargs instead of $value$plusargs -private: - string m_text; - public: - AstTestPlusArgs(FileLine* fl, const string& text) - : ASTGEN_SUPER_TestPlusArgs(fl) - , m_text{text} {} + AstTestPlusArgs(FileLine* fl, AstNode* searchp) + : ASTGEN_SUPER_TestPlusArgs(fl) { + setOp1p(searchp); + } ASTNODE_NODE_FUNCS(TestPlusArgs) - virtual string name() const override { return m_text; } virtual string verilogKwd() const override { return "$test$plusargs"; } virtual string emitVerilog() override { return verilogKwd(); } virtual string emitC() override { return "VL_VALUEPLUSARGS_%nq(%lw, %P, nullptr)"; } virtual bool isGateOptimizable() const override { return false; } virtual bool isPredictOptimizable() const override { return false; } virtual bool cleanOut() const override { return true; } - virtual bool same(const AstNode* samep) const override { - return text() == static_cast(samep)->text(); - } - string text() const { return m_text; } // * = Text to display - void text(const string& text) { m_text = text; } + virtual bool same(const AstNode* samep) const override { return true; } + AstNode* searchp() const { return op1p(); } // op1 = Search expression + void searchp(AstNode* nodep) { setOp1p(nodep); } }; class AstGenFor final : public AstNodeFor { diff --git a/src/V3EmitCFunc.h b/src/V3EmitCFunc.h index 4bdfb1c57..305fa47a7 100644 --- a/src/V3EmitCFunc.h +++ b/src/V3EmitCFunc.h @@ -581,7 +581,7 @@ public: } virtual void visit(AstTestPlusArgs* nodep) override { puts("VL_TESTPLUSARGS_I("); - putsQuoted(nodep->text()); + emitCvtPackStr(nodep->searchp()); puts(")"); } virtual void visit(AstFError* nodep) override { diff --git a/src/V3Hasher.cpp b/src/V3Hasher.cpp index 7cf3672b6..4b0cbbbba 100644 --- a/src/V3Hasher.cpp +++ b/src/V3Hasher.cpp @@ -225,11 +225,6 @@ private: m_hash += nodep->text(); }); } - virtual void visit(AstTestPlusArgs* nodep) override { - m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, [=]() { // - m_hash += nodep->text(); - }); - } virtual void visit(AstAddrOfCFunc* nodep) override { m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, [=]() { // iterateNull(nodep->funcp()); diff --git a/src/V3LinkLValue.cpp b/src/V3LinkLValue.cpp index 6ab7eb8e7..1f955e361 100644 --- a/src/V3LinkLValue.cpp +++ b/src/V3LinkLValue.cpp @@ -210,6 +210,13 @@ private: iterateAndNextNull(nodep->msbp()); } } + virtual void visit(AstTestPlusArgs* nodep) override { + VL_RESTORER(m_setRefLvalue); + { + m_setRefLvalue = VAccess::NOCHANGE; + iterateAndNextNull(nodep->searchp()); + } + } virtual void visit(AstValuePlusArgs* nodep) override { VL_RESTORER(m_setRefLvalue); { diff --git a/src/V3Width.cpp b/src/V3Width.cpp index 15bdf6f33..8a833d9f2 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -448,7 +448,6 @@ private: // Widths: Constant, terminal virtual void visit(AstTime* nodep) override { nodep->dtypeSetUInt64(); } virtual void visit(AstTimeD* nodep) override { nodep->dtypeSetDouble(); } - virtual void visit(AstTestPlusArgs* nodep) override { nodep->dtypeSetSigned32(); } virtual void visit(AstScopeName* nodep) override { nodep->dtypeSetUInt64(); // A pointer, but not that it matters } @@ -4350,6 +4349,12 @@ private: userIterateAndNext(nodep->lsbp(), WidthVP(SELF, BOTH).p()); userIterateAndNext(nodep->msbp(), WidthVP(SELF, BOTH).p()); } + virtual void visit(AstTestPlusArgs* nodep) override { + if (m_vup->prelim()) { + userIterateAndNext(nodep->searchp(), WidthVP{SELF, BOTH}.p()); + nodep->dtypeChgWidthSigned(32, 1, VSigning::SIGNED); // Spec says integer return + } + } virtual void visit(AstValuePlusArgs* nodep) override { if (m_vup->prelim()) { userIterateAndNext(nodep->searchp(), WidthVP(SELF, BOTH).p()); diff --git a/src/verilog.y b/src/verilog.y index ea4161500..34fbe86b0 100644 --- a/src/verilog.y +++ b/src/verilog.y @@ -3917,7 +3917,7 @@ system_f_call_or_t: // IEEE: part of system_tf_call (can be task or | yD_STABLE '(' expr ',' expr ')' { $$ = $3; BBUNSUP($1, "Unsupported: $stable and clock arguments"); } | yD_TAN '(' expr ')' { $$ = new AstTanD($1,$3); } | yD_TANH '(' expr ')' { $$ = new AstTanhD($1,$3); } - | yD_TESTPLUSARGS '(' str ')' { $$ = new AstTestPlusArgs($1,*$3); } + | yD_TESTPLUSARGS '(' expr ')' { $$ = new AstTestPlusArgs($1, $3); } | yD_TIME parenE { $$ = new AstTime($1, VTimescale(VTimescale::NONE)); } | yD_TYPENAME '(' exprOrDataType ')' { $$ = new AstAttrOf($1, VAttrType::TYPENAME, $3); } | yD_UNGETC '(' expr ',' expr ')' { $$ = new AstFUngetC($1, $5, $3); } // Arg swap to file first diff --git a/test_regress/t/t_sys_plusargs.v b/test_regress/t/t_sys_plusargs.v index 55daf5085..4aef6e33c 100644 --- a/test_regress/t/t_sys_plusargs.v +++ b/test_regress/t/t_sys_plusargs.v @@ -22,6 +22,12 @@ module t; //if ($test$plusargs("")!==1) $stop; // Simulators differ in this answer if ($test$plusargs("NOTTHERE")!==0) $stop; + sv_in = "PLUS"; +`ifdef VERILATOR + if ($c1(0)) sv_in = "NEVER"; // Prevent constant propagation +`endif + if ($test$plusargs(sv_in)!==1) $stop; + p_i = 10; if ($value$plusargs("NOTTHERE%d", p_i) !== 0) $stop; if ($value$plusargs("NOTTHERE%0d", p_i) !== 0) $stop; From f4038e36743bfb5d5860293520df781deedfe598 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 12 Jul 2022 11:41:15 +0100 Subject: [PATCH 020/119] Move thread pool and execution profiler into the context. (#3477) Fixes #3454 --- include/verilated.cpp | 58 +++++++++ include/verilated.h | 48 +++++++- include/verilated_profiler.cpp | 43 +++++-- include/verilated_profiler.h | 17 +-- include/verilated_threads.cpp | 54 ++++----- include/verilated_threads.h | 32 ++--- include/verilated_trace.h | 22 ++-- include/verilated_trace_imp.h | 133 ++++++++++----------- src/V3EmitCHeaders.cpp | 6 +- src/V3EmitCMake.cpp | 2 +- src/V3EmitCModel.cpp | 31 +++-- src/V3EmitCSyms.cpp | 24 ++-- src/V3EmitMk.cpp | 2 +- src/V3Trace.cpp | 9 +- test_regress/driver.pl | 8 +- test_regress/t/t_embed1.pl | 3 +- test_regress/t/t_gantt_two.cpp | 43 +++++++ test_regress/t/t_gantt_two.pl | 61 ++++++++++ test_regress/t/t_hier_block_cmake/main.cpp | 5 +- test_regress/t/t_lib_prot_shared.pl | 3 +- test_regress/t/t_threads_crazy.pl | 12 +- test_regress/t/t_threads_crazy_context.pl | 36 ++++++ test_regress/t/t_wrapper_context.cpp | 2 + 23 files changed, 470 insertions(+), 184 deletions(-) create mode 100644 test_regress/t/t_gantt_two.cpp create mode 100755 test_regress/t/t_gantt_two.pl create mode 100755 test_regress/t/t_threads_crazy_context.pl diff --git a/include/verilated.cpp b/include/verilated.cpp index da8599a42..cf1d76d8f 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -66,6 +66,10 @@ #if defined(_WIN32) || defined(__MINGW32__) # include // mkdir #endif + +#ifdef VL_THREADED +# include "verilated_threads.h" +#endif // clang-format on // Max characters in static char string for VL_VALUE_STRING @@ -2428,6 +2432,33 @@ const char* VerilatedContext::timeprecisionString() const VL_MT_SAFE { return vl_time_str(timeprecision()); } +void VerilatedContext::threads(unsigned n) { + if (n == 0) VL_FATAL_MT(__FILE__, __LINE__, "", "%Error: Simulation threads must be >= 1"); + + if (m_threadPool) { + VL_FATAL_MT( + __FILE__, __LINE__, "", + "%Error: Cannot set simulation threads after the thread pool has been created."); + } + +#if VL_THREADED + if (m_threads == n) return; // To avoid unnecessary warnings + m_threads = n; + const unsigned hardwareThreadsAvailable = std::thread::hardware_concurrency(); + if (m_threads > hardwareThreadsAvailable) { + VL_PRINTF_MT("%%Warning: System has %u hardware threads but simulation thread count set " + "to %u. This will likely cause significant slowdown.\n", + hardwareThreadsAvailable, m_threads); + } +#else + if (n > 1) { + VL_PRINTF_MT("%%Warning: Verilator run-time library built without VL_THREADS. Ignoring " + "call to 'VerilatedContext::threads' with argument %u.\n", + n); + } +#endif +} + void VerilatedContext::commandArgs(int argc, const char** argv) VL_MT_SAFE_EXCLUDES(m_argMutex) { const VerilatedLockGuard lock{m_argMutex}; m_args.m_argVec.clear(); // Empty first, then add @@ -2458,6 +2489,33 @@ void VerilatedContext::internalsDump() const VL_MT_SAFE { VerilatedImp::userDump(); } +void VerilatedContext::addModel(VerilatedModel* modelp) { + threadPoolp(); // Ensure thread pool is created, so m_threads cannot change any more + + if (modelp->threads() > m_threads) { + std::ostringstream msg; + msg << "VerilatedContext has " << m_threads << " threads but model '" + << modelp->modelName() << "' (instantiated as '" << modelp->hierName() + << "') was Verilated with --threads " << modelp->threads() << ".\n"; + const std::string str = msg.str(); + VL_FATAL_MT(__FILE__, __LINE__, modelp->hierName(), str.c_str()); + } +} + +VerilatedVirtualBase* VerilatedContext::threadPoolp() { + if (m_threads == 1) return nullptr; +#if VL_THREADED + if (!m_threadPool) m_threadPool.reset(new VlThreadPool{this, m_threads - 1}); +#endif + return m_threadPool.get(); +} + +VerilatedVirtualBase* +VerilatedContext::enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&)) { + if (!m_executionProfiler) m_executionProfiler.reset(construct(*this)); + return m_executionProfiler.get(); +} + //====================================================================== // VerilatedContextImp:: Methods - command line diff --git a/include/verilated.h b/include/verilated.h index f9cf79601..bc1d5a3f2 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -252,6 +252,28 @@ public: #endif }; +//========================================================================= +/// Base class of a Verilator generated (Verilated) model. +/// +/// VerilatedModel is a base class of the user facing primary class generated +/// by Verilator. + +class VerilatedModel VL_NOT_FINAL { + VL_UNCOPYABLE(VerilatedModel); + +protected: + explicit VerilatedModel() = default; + virtual ~VerilatedModel() = default; + +public: + /// Returns the hierarchical name of this module instance. + virtual const char* hierName() = 0; + /// Returns the name of this model (the name of the generated model class). + virtual const char* modelName() = 0; + /// Returns the thread level parallelism, this model was Verilated with. Always 1 or higher. + virtual unsigned threads() = 0; +}; + //========================================================================= /// Base class for all Verilated module classes. @@ -266,10 +288,6 @@ public: const char* name() const { return m_namep; } ///< Return name of module }; -/// Declare a module, ala SC_MODULE -#define VL_MODULE(modname) class modname VL_NOT_FINAL : public VerilatedModule -// Not class final in VL_MODULE, as users might be abstracting our models (--hierarchical) - //========================================================================= // Functions overridable by user defines // (Internals however must use VL_PRINTF_MT, which calls these.) @@ -362,6 +380,16 @@ protected: // Implementation details const std::unique_ptr m_impdatap; + // Number of threads to use for simulation (size of m_threadPool + 1 for main thread) +#ifdef VL_THREADED + unsigned m_threads = std::thread::hardware_concurrency(); +#else + const unsigned m_threads = 1; +#endif + // The thread pool shared by all models added to this context + std::unique_ptr m_threadPool; + // The execution profiler shared by all models added to this context + std::unique_ptr m_executionProfiler; // Coverage access std::unique_ptr m_coveragep; // Pointer for coveragep() @@ -495,6 +523,12 @@ public: /// Get time precision as IEEE-standard text const char* timeprecisionString() const VL_MT_SAFE; + /// Get number of threads used for simulation (including the main thread) + unsigned threads() const { return m_threads; } + /// Set number of threads used for simulation (including the main thread) + /// Can only be called before the thread pool is created (before first model is added). + void threads(unsigned n); + /// Allow traces to at some point be enabled (disables some optimizations) void traceEverOn(bool flag) VL_MT_SAFE { if (flag) calcUnusedSigs(true); @@ -517,6 +551,12 @@ public: // But for internal use only return reinterpret_cast(this); } + void addModel(VerilatedModel*); + + VerilatedVirtualBase* threadPoolp(); + VerilatedVirtualBase* + enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&)); + // Internal: $dumpfile void dumpfile(const std::string& flag) VL_MT_SAFE_EXCLUDES(m_timeDumpMutex); std::string dumpfile() const VL_MT_SAFE_EXCLUDES(m_timeDumpMutex); diff --git a/include/verilated_profiler.cpp b/include/verilated_profiler.cpp index 21246827a..d65442f44 100644 --- a/include/verilated_profiler.cpp +++ b/include/verilated_profiler.cpp @@ -66,41 +66,66 @@ template static size_t roundUptoMultipleOf(size_t value) { return (value + mask) & ~mask; } -VlExecutionProfiler::VlExecutionProfiler() { +VlExecutionProfiler::VlExecutionProfiler(VerilatedContext& context) + : m_context{context} { // Setup profiling on main thread setupThread(0); } -void VlExecutionProfiler::configure(const VerilatedContext& context) { +void VlExecutionProfiler::configure() { + if (VL_UNLIKELY(m_enabled)) { --m_windowCount; - if (VL_UNLIKELY(m_windowCount == context.profExecWindow())) { + if (VL_UNLIKELY(m_windowCount == m_context.profExecWindow())) { VL_DEBUG_IF(VL_DBG_MSGF("+ profile start collection\n");); clear(); // Clear the profile after the cache warm-up cycles. m_tickBegin = VL_CPU_TICK(); } else if (VL_UNLIKELY(m_windowCount == 0)) { const uint64_t tickEnd = VL_CPU_TICK(); VL_DEBUG_IF(VL_DBG_MSGF("+ profile end\n");); - const std::string& fileName = context.profExecFilename(); + const std::string& fileName = m_context.profExecFilename(); dump(fileName.c_str(), tickEnd); m_enabled = false; } return; } - const uint64_t startReq = context.profExecStart() + 1; // + 1, so we can start at time 0 + const uint64_t startReq = m_context.profExecStart() + 1; // + 1, so we can start at time 0 - if (VL_UNLIKELY(m_lastStartReq < startReq && VL_TIME_Q() >= context.profExecStart())) { + if (VL_UNLIKELY(m_lastStartReq < startReq && VL_TIME_Q() >= m_context.profExecStart())) { VL_DEBUG_IF(VL_DBG_MSGF("+ profile start warmup\n");); VL_DEBUG_IF(assert(m_windowCount == 0);); m_enabled = true; - m_windowCount = context.profExecWindow() * 2; + m_windowCount = m_context.profExecWindow() * 2; m_lastStartReq = startReq; } } -void VlExecutionProfiler::startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId) { - profilep->setupThread(threadId); +VerilatedVirtualBase* VlExecutionProfiler::construct(VerilatedContext& context) { + VlExecutionProfiler* const selfp = new VlExecutionProfiler{context}; +#if VL_THREADED + if (VlThreadPool* const threadPoolp = static_cast(context.threadPoolp())) { + for (int i = 0; i < threadPoolp->numThreads(); ++i) { + // Data to pass to worker thread initialization + struct Data { + VlExecutionProfiler* const selfp; + const uint32_t threadId; + } data{selfp, static_cast(i + 1)}; + + // Initialize worker thread + threadPoolp->workerp(i)->addTask( + [](void* userp, bool) { + Data* const datap = static_cast(userp); + datap->selfp->setupThread(datap->threadId); + }, + &data); + + // Wait until initializationis complete + threadPoolp->workerp(i)->wait(); + } + } +#endif + return selfp; } void VlExecutionProfiler::setupThread(uint32_t threadId) { diff --git a/include/verilated_profiler.h b/include/verilated_profiler.h index f85c95528..61f2813d3 100644 --- a/include/verilated_profiler.h +++ b/include/verilated_profiler.h @@ -33,13 +33,14 @@ #include class VlExecutionProfiler; +class VlThreadPool; //============================================================================= // Macros to simplify generated code #define VL_EXEC_TRACE_ADD_RECORD(vlSymsp) \ - if (VL_UNLIKELY((vlSymsp)->__Vm_executionProfiler.enabled())) \ - (vlSymsp)->__Vm_executionProfiler.addRecord() + if (VL_UNLIKELY((vlSymsp)->__Vm_executionProfilerp->enabled())) \ + (vlSymsp)->__Vm_executionProfilerp->addRecord() //============================================================================= // Return high-precision counter for profiling, or 0x0 if not available @@ -131,7 +132,7 @@ static_assert(std::is_trivially_destructible::value, //============================================================================= // VlExecutionProfiler is for collecting profiling data about model execution -class VlExecutionProfiler final { +class VlExecutionProfiler final : public VerilatedVirtualBase { // CONSTANTS // In order to try to avoid dynamic memory allocations during the actual profiling phase, @@ -149,6 +150,7 @@ class VlExecutionProfiler final { using ExecutionTrace = std::vector; // STATE + VerilatedContext& m_context; // The context this profiler is under static VL_THREAD_LOCAL ExecutionTrace t_trace; // thread-local trace buffers mutable VerilatedMutex m_mutex; // Map from thread id to &t_trace of given thread @@ -162,7 +164,8 @@ class VlExecutionProfiler final { public: // CONSTRUCTOR - VlExecutionProfiler(); + explicit VlExecutionProfiler(VerilatedContext& context); + virtual ~VlExecutionProfiler() = default; // METHODS @@ -174,7 +177,7 @@ public: return t_trace.back(); } // Configure profiler (called in beginning of 'eval') - void configure(const VerilatedContext&); + void configure(); // Setup profiling on a particular thread; void setupThread(uint32_t threadId); // Clear all profiling data @@ -182,8 +185,8 @@ public: // Write profiling data into file void dump(const char* filenamep, uint64_t tickEnd) VL_MT_SAFE_EXCLUDES(m_mutex); - // Called via VlStartWorkerCb in VlWorkerThread::startWorker - static void startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId); + // Passed to VerilatedContext to create the VlExecutionProfiler profiler instance + static VerilatedVirtualBase* construct(VerilatedContext& context); }; //============================================================================= diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp index a78ea9ae6..6696d738d 100644 --- a/include/verilated_threads.cpp +++ b/include/verilated_threads.cpp @@ -47,11 +47,9 @@ VlMTaskVertex::VlMTaskVertex(uint32_t upstreamDepCount) //============================================================================= // VlWorkerThread -VlWorkerThread::VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, - VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) +VlWorkerThread::VlWorkerThread(VerilatedContext* contextp) : m_ready_size{0} - , m_cthread{startWorker, this, threadId, profilerp, startCb} - , m_contextp{contextp} {} + , m_cthread{startWorker, this, contextp} {} VlWorkerThread::~VlWorkerThread() { shutdown(); @@ -59,47 +57,49 @@ VlWorkerThread::~VlWorkerThread() { m_cthread.join(); } -void VlWorkerThread::shutdownTask(void*, bool) { +static void shutdownTask(void*, bool) { // Deliberately empty, we use the address of this function as a magic number } +void VlWorkerThread::shutdown() { addTask(shutdownTask, nullptr); } + +void VlWorkerThread::wait() { + // Enqueue a task that sets this flag. Execution is in-order so this ensures completion. + std::atomic flag{false}; + addTask([](void* flagp, bool) { static_cast*>(flagp)->store(true); }, &flag); + // Spin wait + for (unsigned i = 0; i < VL_LOCK_SPINS; ++i) { + if (flag.load()) return; + VL_CPU_RELAX(); + } + // Yield wait + while (!flag.load()) std::this_thread::yield(); +} + void VlWorkerThread::workerLoop() { ExecRec work; + // Wait for the first task without spinning, in case the thread is never actually used. + dequeWork(&work); + while (true) { - dequeWork(&work); if (VL_UNLIKELY(work.m_fnp == shutdownTask)) break; work.m_fnp(work.m_selfp, work.m_evenCycle); + // Wait for next task with spinning. + dequeWork(&work); } } -void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId, - VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) { - Verilated::threadContextp(workerp->m_contextp); - if (VL_UNLIKELY(startCb)) startCb(profilerp, threadId); +void VlWorkerThread::startWorker(VlWorkerThread* workerp, VerilatedContext* contextp) { + Verilated::threadContextp(contextp); workerp->workerLoop(); } //============================================================================= // VlThreadPool -VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads, - VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) { - // --threads N passes nThreads=N-1, as the "main" threads counts as 1 - ++nThreads; - const unsigned cpus = std::thread::hardware_concurrency(); - if (cpus < nThreads) { - static int warnedOnce = 0; - if (!warnedOnce++) { - VL_PRINTF_MT("%%Warning: System has %u CPUs but model Verilated with" - " --threads %d; may run slow.\n", - cpus, nThreads); - } - } - // Create worker threads - for (uint32_t threadId = 1; threadId < nThreads; ++threadId) { - m_workers.push_back(new VlWorkerThread{threadId, contextp, profilerp, startCb}); - } +VlThreadPool::VlThreadPool(VerilatedContext* contextp, unsigned nThreads) { + for (unsigned i = 0; i < nThreads; ++i) m_workers.push_back(new VlWorkerThread{contextp}); } VlThreadPool::~VlThreadPool() { diff --git a/include/verilated_threads.h b/include/verilated_threads.h index eeb8f9342..fdb45580e 100644 --- a/include/verilated_threads.h +++ b/include/verilated_threads.h @@ -60,9 +60,6 @@ using VlSelfP = void*; using VlExecFnp = void (*)(VlSelfP, bool); -// VlWorkerThread::startWorker callback, used to hook in VlExecutionProfiler -using VlStartWorkerCb = void (*)(VlExecutionProfiler*, uint32_t threadId); - // Track dependencies for a single MTask. class VlMTaskVertex final { // MEMBERS @@ -166,24 +163,23 @@ private: std::atomic m_ready_size; std::thread m_cthread; // Underlying C++ thread record - VerilatedContext* const m_contextp; // Context for spawned thread VL_UNCOPYABLE(VlWorkerThread); public: // CONSTRUCTORS - explicit VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, - VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); + explicit VlWorkerThread(VerilatedContext* contextp); ~VlWorkerThread(); // METHODS + template // inline void dequeWork(ExecRec* workp) VL_MT_SAFE_EXCLUDES(m_mutex) { // Spin for a while, waiting for new data - for (int i = 0; i < VL_LOCK_SPINS; ++i) { - if (VL_LIKELY(m_ready_size.load(std::memory_order_relaxed))) { // - break; + if VL_CONSTEXPR_CXX17 (SpinWait) { + for (unsigned i = 0; i < VL_LOCK_SPINS; ++i) { + if (VL_LIKELY(m_ready_size.load(std::memory_order_relaxed))) break; + VL_CPU_RELAX(); } - VL_CPU_RELAX(); } VerilatedLockGuard lock{m_mutex}; while (m_ready.empty()) { @@ -197,7 +193,7 @@ public: m_ready.erase(m_ready.begin()); m_ready_size.fetch_sub(1, std::memory_order_relaxed); } - inline void addTask(VlExecFnp fnp, VlSelfP selfp, bool evenCycle) + inline void addTask(VlExecFnp fnp, VlSelfP selfp, bool evenCycle = false) VL_MT_SAFE_EXCLUDES(m_mutex) { bool notify; { @@ -209,15 +205,14 @@ public: if (notify) m_cv.notify_one(); } - inline void shutdown() { addTask(shutdownTask, nullptr, false); } - static void shutdownTask(void*, bool); + void shutdown(); // Finish current tasks, then terminate thread + void wait(); // Blocks calling thread until all tasks complete in this thread void workerLoop(); - static void startWorker(VlWorkerThread* workerp, uint32_t threadId, - VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); + static void startWorker(VlWorkerThread* workerp, VerilatedContext* contextp); }; -class VlThreadPool final { +class VlThreadPool final : public VerilatedVirtualBase { // MEMBERS std::vector m_workers; // our workers @@ -226,9 +221,8 @@ public: // Construct a thread pool with 'nThreads' dedicated threads. The thread // pool will create these threads and make them available to execute tasks // via this->workerp(index)->addTask(...) - VlThreadPool(VerilatedContext* contextp, int nThreads, VlExecutionProfiler* profilerp, - VlStartWorkerCb startCb); - ~VlThreadPool(); + VlThreadPool(VerilatedContext* contextp, unsigned nThreads); + virtual ~VlThreadPool(); // METHODS inline int numThreads() const { return m_workers.size(); } diff --git a/include/verilated_trace.h b/include/verilated_trace.h index 7915c3645..0d0f7c0f6 100644 --- a/include/verilated_trace.h +++ b/include/verilated_trace.h @@ -194,8 +194,6 @@ private: static void parallelWorkerTask(void*, bool); #endif - using ParallelCallbackMap = std::unordered_map>; - protected: uint32_t* m_sigs_oldvalp = nullptr; // Previous value store EData* m_sigs_enabledp = nullptr; // Bit vector of enabled codes (nullptr = all on) @@ -203,10 +201,10 @@ private: uint64_t m_timeLastDump = 0; // Last time we did a dump std::vector m_sigs_enabledVec; // Staging for m_sigs_enabledp std::vector m_initCbs; // Routines to initialize tracing - ParallelCallbackMap m_fullCbs; // Routines to perform full dump - ParallelCallbackMap m_chgCbs; // Routines to perform incremental dump + std::vector m_fullCbs; // Routines to perform full dump + std::vector m_chgCbs; // Routines to perform incremental dump std::vector m_cleanupCbs; // Routines to call at the end of dump - std::vector m_threadPoolps; // All thread pools, in insertion order + VerilatedContext* m_contextp = nullptr; // The context used by the traced models bool m_fullDump = true; // Whether a full dump is required on the next call to 'dump' uint32_t m_nextCode = 0; // Next code number to assign uint32_t m_numSignals = 0; // Number of distinct signals @@ -217,16 +215,16 @@ private: double m_timeRes = 1e-9; // Time resolution (ns/ms etc) double m_timeUnit = 1e-0; // Time units (ns/ms etc) - void addThreadPool(VlThreadPool* threadPoolp) VL_MT_SAFE_EXCLUDES(m_mutex); + void addContext(VerilatedContext*) VL_MT_SAFE_EXCLUDES(m_mutex); - void addCallbackRecord(std::vector& cbVec, CallbackRecord& cbRec) + void addCallbackRecord(std::vector& cbVec, CallbackRecord&& cbRec) VL_MT_SAFE_EXCLUDES(m_mutex); // Equivalent to 'this' but is of the sub-type 'T_Trace*'. Use 'self()->' // to access duck-typed functions to avoid a virtual function call. T_Trace* self() { return static_cast(this); } - void runParallelCallbacks(const ParallelCallbackMap& cbMap); + void runCallbacks(const std::vector& cbVec); // Flush any remaining data for this file static void onFlush(void* selfp) VL_MT_UNSAFE_ONE; @@ -341,10 +339,10 @@ public: //========================================================================= // Non-hot path internal interface to Verilator generated code - void addInitCb(initCb_t cb, void* userp) VL_MT_SAFE; - void addFullCb(dumpCb_t cb, void* userp, VlThreadPool* = nullptr) VL_MT_SAFE; - void addChgCb(dumpCb_t cb, void* userp, VlThreadPool* = nullptr) VL_MT_SAFE; - void addCleanupCb(cleanupCb_t cb, void* userp) VL_MT_SAFE; + void addInitCb(initCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; + void addFullCb(dumpCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; + void addChgCb(dumpCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; + void addCleanupCb(cleanupCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; void scopeEscape(char flag) { m_scopeEscape = flag; } diff --git a/include/verilated_trace_imp.h b/include/verilated_trace_imp.h index d2ffa965c..a09ac0f43 100644 --- a/include/verilated_trace_imp.h +++ b/include/verilated_trace_imp.h @@ -478,55 +478,52 @@ template <> VL_ATTR_NOINLINE void VerilatedTrace::ParallelWo #endif template <> -void VerilatedTrace::runParallelCallbacks(const ParallelCallbackMap& cbMap) { - for (VlThreadPool* threadPoolp : m_threadPoolps) { +void VerilatedTrace::runCallbacks(const std::vector& cbVec) { #ifdef VL_TRACE_PARALLEL - // If tracing in parallel, dispatch to the thread pool (if exists) - if (threadPoolp && threadPoolp->numThreads()) { - // List of work items for thread (std::list, as ParallelWorkerData is not movable) - std::list workerData; - // We use the whole pool + the main thread - const unsigned threads = threadPoolp->numThreads() + 1; - // Main thread executes all jobs with index % threads == 0 - std::vector mainThreadWorkerData; - // The tracing callbacks to execute on this thread-pool - const auto& cbVec = cbMap.at(threadPoolp); - // Enuque all the jobs - for (unsigned i = 0; i < cbVec.size(); ++i) { - const CallbackRecord& cbr = cbVec[i]; - // Always get the trace buffer on the main thread - Buffer* const bufp = getTraceBuffer(); - // Create new work item - workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp); - // Grab the new work item - ParallelWorkerData* const itemp = &workerData.back(); - // Enqueue task to thread pool, or main thread - if (unsigned rem = i % threads) { - threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp, false); - } else { - mainThreadWorkerData.push_back(itemp); - } - } - // Execute main thead jobs - for (ParallelWorkerData* const itemp : mainThreadWorkerData) { - parallelWorkerTask(itemp, false); - } - // Commit all trace buffers in order - for (ParallelWorkerData& item : workerData) { - // Wait until ready - item.wait(); - // Commit the buffer - commitTraceBuffer(item.m_bufp); - } - continue; + // If tracing in parallel, dispatch to the thread pool + VlThreadPool* threadPoolp = static_cast(m_contextp->threadPoolp()); + // List of work items for thread (std::list, as ParallelWorkerData is not movable) + std::list workerData; + // We use the whole pool + the main thread + const unsigned threads = threadPoolp->numThreads() + 1; + // Main thread executes all jobs with index % threads == 0 + std::vector mainThreadWorkerData; + // Enuque all the jobs + for (unsigned i = 0; i < cbVec.size(); ++i) { + const CallbackRecord& cbr = cbVec[i]; + // Always get the trace buffer on the main thread + Buffer* const bufp = getTraceBuffer(); + // Create new work item + workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp); + // Grab the new work item + ParallelWorkerData* const itemp = &workerData.back(); + // Enqueue task to thread pool, or main thread + if (unsigned rem = i % threads) { + threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp); + } else { + mainThreadWorkerData.push_back(itemp); } + } + // Execute main thead jobs + for (ParallelWorkerData* const itemp : mainThreadWorkerData) { + parallelWorkerTask(itemp, false); + } + // Commit all trace buffers in order + for (ParallelWorkerData& item : workerData) { + // Wait until ready + item.wait(); + // Commit the buffer + commitTraceBuffer(item.m_bufp); + } + + // Done + return; #endif - // Fall back on sequential execution - for (const CallbackRecord& cbr : cbMap.at(threadPoolp)) { - Buffer* const traceBufferp = getTraceBuffer(); - cbr.m_dumpCb(cbr.m_userp, traceBufferp); - commitTraceBuffer(traceBufferp); - } + // Fall back on sequential execution + for (const CallbackRecord& cbr : cbVec) { + Buffer* const traceBufferp = getTraceBuffer(); + cbr.m_dumpCb(cbr.m_userp, traceBufferp); + commitTraceBuffer(traceBufferp); } } @@ -579,9 +576,9 @@ void VerilatedTrace::dump(uint64_t timeui) VL_MT_SAFE_EXCLUD // Run the callbacks if (VL_UNLIKELY(m_fullDump)) { m_fullDump = false; // No more need for next dump to be full - runParallelCallbacks(m_fullCbs); + runCallbacks(m_fullCbs); } else { - runParallelCallbacks(m_chgCbs); + runCallbacks(m_chgCbs); } for (uint32_t i = 0; i < m_cleanupCbs.size(); ++i) { @@ -607,18 +604,20 @@ void VerilatedTrace::dump(uint64_t timeui) VL_MT_SAFE_EXCLUD // Non-hot path internal interface to Verilator generated code template <> -void VerilatedTrace::addThreadPool(VlThreadPool* threadPoolp) +void VerilatedTrace::addContext(VerilatedContext* contextp) VL_MT_SAFE_EXCLUDES(m_mutex) { const VerilatedLockGuard lock{m_mutex}; - for (VlThreadPool* const poolp : m_threadPoolps) { - if (poolp == threadPoolp) return; + if (m_contextp && contextp != m_contextp) { + VL_FATAL_MT( + __FILE__, __LINE__, "", + "A trace file instance can only handle models from the same simulation context"); } - m_threadPoolps.push_back(threadPoolp); + m_contextp = contextp; } template <> void VerilatedTrace::addCallbackRecord(std::vector& cbVec, - CallbackRecord& cbRec) + CallbackRecord&& cbRec) VL_MT_SAFE_EXCLUDES(m_mutex) { const VerilatedLockGuard lock{m_mutex}; if (VL_UNCOVERABLE(timeLastDump() != 0)) { // LCOV_EXCL_START @@ -630,28 +629,28 @@ void VerilatedTrace::addCallbackRecord(std::vector -void VerilatedTrace::addInitCb(initCb_t cb, void* userp) VL_MT_SAFE { - CallbackRecord cbr{cb, userp}; - addCallbackRecord(m_initCbs, cbr); +void VerilatedTrace::addInitCb(initCb_t cb, void* userp, + VerilatedContext* contextp) VL_MT_SAFE { + addContext(contextp); + addCallbackRecord(m_initCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::addFullCb(dumpCb_t cb, void* userp, - VlThreadPool* threadPoolp) VL_MT_SAFE { - CallbackRecord cbr{cb, userp}; - addThreadPool(threadPoolp); - addCallbackRecord(m_fullCbs[threadPoolp], cbr); + VerilatedContext* contextp) VL_MT_SAFE { + addContext(contextp); + addCallbackRecord(m_fullCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::addChgCb(dumpCb_t cb, void* userp, - VlThreadPool* threadPoolp) VL_MT_SAFE { - CallbackRecord cbr{cb, userp}; - addThreadPool(threadPoolp); - addCallbackRecord(m_chgCbs[threadPoolp], cbr); + VerilatedContext* contextp) VL_MT_SAFE { + addContext(contextp); + addCallbackRecord(m_chgCbs, CallbackRecord{cb, userp}); } template <> -void VerilatedTrace::addCleanupCb(cleanupCb_t cb, void* userp) VL_MT_SAFE { - CallbackRecord cbr{cb, userp}; - addCallbackRecord(m_cleanupCbs, cbr); +void VerilatedTrace::addCleanupCb(cleanupCb_t cb, void* userp, + VerilatedContext* contextp) VL_MT_SAFE { + addContext(contextp); + addCallbackRecord(m_cleanupCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::pushNamePrefix(const std::string& prefix) { diff --git a/src/V3EmitCHeaders.cpp b/src/V3EmitCHeaders.cpp index ef53dfa16..77835212b 100644 --- a/src/V3EmitCHeaders.cpp +++ b/src/V3EmitCHeaders.cpp @@ -251,15 +251,15 @@ class EmitCHeader final : public EmitCConstInit { emitTextSection(modp, VNType::atScHdr); // Open class body {{{ + puts("\nclass "); + puts(prefixNameProtect(modp)); if (const AstClass* const classp = VN_CAST(modp, Class)) { - puts("class "); - puts(prefixNameProtect(modp)); if (classp->extendsp()) { puts(" : public "); puts(prefixNameProtect(classp->extendsp()->classp())); } } else { - puts("VL_MODULE(" + prefixNameProtect(modp) + ")"); + puts(" final : public VerilatedModule"); } puts(" {\n"); ofp()->resetPrivate(); diff --git a/src/V3EmitCMake.cpp b/src/V3EmitCMake.cpp index 7df71dfeb..710829eaf 100644 --- a/src/V3EmitCMake.cpp +++ b/src/V3EmitCMake.cpp @@ -173,7 +173,7 @@ class CMakeEmitter final { + ".cpp"); } } - if (v3Global.opt.mtasks()) { + if (v3Global.opt.threads()) { global.emplace_back("${VERILATOR_ROOT}/include/verilated_threads.cpp"); } if (v3Global.opt.usesProfiler()) { diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index e04c79f7e..203582609 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -89,11 +89,12 @@ class EmitCModel final : public EmitCFunc { puts("\n"); puts("// This class is the main interface to the Verilated model\n"); + puts("class " + topClassName() + " VL_NOT_FINAL : "); if (optSystemC()) { - puts("SC_MODULE(" + topClassName() + ") {\n"); - } else { - puts("class " + topClassName() + " VL_NOT_FINAL {\n"); + // SC_MODULE, but with multiple-inheritance of VerilatedModel + puts("public ::sc_core::sc_module, "); } + puts("public VerilatedModel {\n"); ofp()->resetPrivate(); ofp()->putsPrivate(true); // private: @@ -221,6 +222,11 @@ class EmitCModel final : public EmitCFunc { + topClassName() + "& rhs);\n"); } + puts("\n// Abstract methods from VerilatedModel\n"); + puts("const char* hierName() override;\n"); + puts("const char* modelName() override;\n"); + puts("unsigned threads() override;\n"); + puts("} VL_ATTR_ALIGNED(VL_CACHE_LINE_BYTES);\n"); ofp()->putsEndGuard(); @@ -235,7 +241,8 @@ class EmitCModel final : public EmitCFunc { puts(topClassName() + "::" + topClassName()); if (optSystemC()) { puts("(sc_module_name /* unused */)\n"); - puts(" : vlSymsp{new " + symClassName() + "(nullptr, name(), this)}\n"); + puts(" : vlSymsp{new " + symClassName() + + "(Verilated::threadContextp(), name(), this)}\n"); } else { puts(+"(VerilatedContext* _vcontextp__, const char* _vcname__)\n"); puts(" : vlSymsp{new " + symClassName() + "(_vcontextp__, _vcname__, this)}\n"); @@ -263,6 +270,8 @@ class EmitCModel final : public EmitCFunc { puts(" , rootp{&(vlSymsp->TOP)}\n"); puts("{\n"); + puts("// Register model with the context\n"); + puts("vlSymsp->_vm_contextp__->addModel(this);\n"); if (optSystemC()) { // Create sensitivity list for when to evaluate the model. @@ -301,7 +310,7 @@ class EmitCModel final : public EmitCFunc { if (!optSystemC()) { puts("\n"); puts(topClassName() + "::" + topClassName() + "(const char* _vcname__)\n"); - puts(" : " + topClassName() + "(nullptr, _vcname__)\n{\n}\n"); + puts(" : " + topClassName() + "(Verilated::threadContextp(), _vcname__)\n{\n}\n"); } } @@ -428,7 +437,7 @@ class EmitCModel final : public EmitCFunc { } if (v3Global.opt.profExec()) { - puts("vlSymsp->__Vm_executionProfiler.configure(*(vlSymsp->_vm_contextp__));\n"); + puts("vlSymsp->__Vm_executionProfilerp->configure();\n"); puts("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).evalBegin();\n"); } @@ -477,6 +486,13 @@ class EmitCModel final : public EmitCFunc { puts("\nVL_ATTR_COLD void " + topClassName() + "::final() {\n"); puts(/**/ topModNameProtected + "__" + protect("_final") + "(&(vlSymsp->TOP));\n"); puts("}\n"); + + putSectionDelimiter("Implementations of abstract methods from VerilatedModel\n"); + puts("const char* " + topClassName() + "::hierName() { return vlSymsp->name(); }\n"); + puts("const char* " + topClassName() + "::modelName() { return \"" + topClassName() + + "\"; }\n"); + puts("unsigned " + topClassName() + "::threads() { return " + + cvtToStr(std::max(1, v3Global.opt.threads())) + "; }\n"); } void emitTraceMethods(AstNodeModule* modp) { @@ -529,7 +545,8 @@ class EmitCModel final : public EmitCFunc { puts(/**/ "}"); } puts(/**/ "if (false && levels && options) {} // Prevent unused\n"); - puts(/**/ "tfp->spTrace()->addInitCb(&" + protect("trace_init") + ", &(vlSymsp->TOP));\n"); + puts(/**/ "tfp->spTrace()->addInitCb(&" + protect("trace_init") + + ", &(vlSymsp->TOP), contextp());\n"); puts(/**/ topModNameProtected + "__" + protect("trace_register") + "(&(vlSymsp->TOP), tfp->spTrace());\n"); diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index c66f346b0..e325aa79e 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -445,17 +445,17 @@ void EmitCSyms::emitSymHdr() { } puts("bool __Vm_didInit = false;\n"); - if (v3Global.opt.profExec()) { - puts("\n// EXECUTION PROFILING\n"); - puts("VlExecutionProfiler __Vm_executionProfiler;\n"); - } - if (v3Global.opt.mtasks()) { puts("\n// MULTI-THREADING\n"); puts("VlThreadPool* const __Vm_threadPoolp;\n"); puts("bool __Vm_even_cycle = false;\n"); } + if (v3Global.opt.profExec()) { + puts("\n// EXECUTION PROFILING\n"); + puts("VlExecutionProfiler* const __Vm_executionProfilerp;\n"); + } + puts("\n// MODULE INSTANCE STATE\n"); for (const auto& i : m_scopes) { const AstScope* const scopep = i.first; @@ -673,7 +673,6 @@ void EmitCSyms::emitSymImp() { puts("_vm_pgoProfiler.write(\"" + topClassName() + "\", _vm_contextp__->profVltFilename());\n"); } - if (v3Global.opt.mtasks()) puts("delete __Vm_threadPoolp;\n"); puts("}\n\n"); // Constructor @@ -705,12 +704,13 @@ void EmitCSyms::emitSymImp() { // Note we create N-1 threads in the thread pool. The thread // that calls eval() becomes the final Nth thread for the // duration of the eval call. - puts(" , __Vm_threadPoolp{new VlThreadPool{_vm_contextp__, " - + cvtToStr(v3Global.opt.threads() - 1) + ", " - + (v3Global.opt.profExec() - ? "&__Vm_executionProfiler, &VlExecutionProfiler::startWorkerSetup" - : "nullptr, nullptr") - + "}}\n"); + puts(" , __Vm_threadPoolp{static_cast(contextp->threadPoolp())}\n"); + } + + if (v3Global.opt.profExec()) { + puts(" , " + "__Vm_executionProfilerp{static_cast(contextp->" + "enableExecutionProfiler(&VlExecutionProfiler::construct))}\n"); } puts(" // Setup module instances\n"); diff --git a/src/V3EmitMk.cpp b/src/V3EmitMk.cpp index 3fb3907be..800cf589c 100644 --- a/src/V3EmitMk.cpp +++ b/src/V3EmitMk.cpp @@ -116,7 +116,7 @@ public: putMakeClassEntry(of, v3Global.opt.traceSourceLang() + ".cpp"); } } - if (v3Global.opt.mtasks()) putMakeClassEntry(of, "verilated_threads.cpp"); + if (v3Global.opt.threads()) putMakeClassEntry(of, "verilated_threads.cpp"); if (v3Global.opt.usesProfiler()) { putMakeClassEntry(of, "verilated_profiler.cpp"); } diff --git a/src/V3Trace.cpp b/src/V3Trace.cpp index 9fa1b099a..7113c5e0e 100644 --- a/src/V3Trace.cpp +++ b/src/V3Trace.cpp @@ -512,8 +512,10 @@ private: m_regFuncp->addStmtsp(new AstText(flp, "tracep->addChgCb(", true)); } m_regFuncp->addStmtsp(new AstAddrOfCFunc(flp, funcp)); - const string threadPool{m_parallelism > 1 ? "vlSymsp->__Vm_threadPoolp" : "nullptr"}; - m_regFuncp->addStmtsp(new AstText(flp, ", vlSelf, " + threadPool + ");\n", true)); + m_regFuncp->addStmtsp(new AstText(flp, ", vlSelf", true)); + m_regFuncp->addStmtsp( + new AstText(flp, ", vlSelf->vlSymsp->__Vm_modelp->contextp()", true)); + m_regFuncp->addStmtsp(new AstText(flp, ");\n", true)); } else { // Sub functions funcp->argTypes(v3Global.opt.traceClassBase() + "::Buffer* bufp"); @@ -700,7 +702,8 @@ private: // Register it m_regFuncp->addStmtsp(new AstText(fl, "tracep->addCleanupCb(", true)); m_regFuncp->addStmtsp(new AstAddrOfCFunc(fl, cleanupFuncp)); - m_regFuncp->addStmtsp(new AstText(fl, ", vlSelf);\n", true)); + m_regFuncp->addStmtsp( + new AstText(fl, ", vlSelf, vlSelf->vlSymsp->__Vm_modelp->contextp());\n", true)); // Clear global activity flag cleanupFuncp->addStmtsp( diff --git a/test_regress/driver.pl b/test_regress/driver.pl index cbd9ba9ea..ae0ed4f36 100755 --- a/test_regress/driver.pl +++ b/test_regress/driver.pl @@ -579,6 +579,7 @@ sub new { sc_time_resolution => "SC_PS", # Keep - PS is SystemC default sim_time => 1100, threads => -1, # --threads (negative means auto based on scenario) + context_threads => 0, # Number of threads to allocate in the context benchmark => $opt_benchmark, verbose => $opt_verbose, run_env => '', @@ -974,7 +975,11 @@ sub compile { $self->oprint("Compile\n") if $self->{verbose}; die "%Error: 'threads =>' argument must be <= 1 for vlt scenario" if $param{vlt} && $param{threads} > 1; - $param{threads} = ::calc_threads($Vltmt_threads) if ($param{threads} < 0 && $param{vltmt}); + # Compute automatic parameter values + $param{threads} = ::calc_threads($Vltmt_threads) if $param{threads} < 0 && $param{vltmt}; + $param{context_threads} = $param{threads} >= 1 ? $param{threads} : 1 if !$param{context_threads}; + $self->{threads} = $param{threads}; + $self->{context_threads} = $param{context_threads}; compile_vlt_cmd(%param); @@ -1795,6 +1800,7 @@ sub _make_main { } print $fh " const std::unique_ptr contextp{new VerilatedContext};\n"; + print $fh " contextp->threads($self->{context_threads});\n"; print $fh " contextp->commandArgs(argc, argv);\n"; print $fh " contextp->debug(" . ($self->{verilated_debug} ? 1 : 0) . ");\n"; print $fh " srand48(5);\n"; # Ensure determinism diff --git a/test_regress/t/t_embed1.pl b/test_regress/t/t_embed1.pl index 08e4c042c..2c5b8a918 100755 --- a/test_regress/t/t_embed1.pl +++ b/test_regress/t/t_embed1.pl @@ -22,7 +22,8 @@ mkdir $child_dir; (VM_PREFIX => "$Self->{VM_PREFIX}_child", top_filename => "$Self->{name}_child.v", verilator_flags => ["-cc", "-Mdir", "${child_dir}", "--debug-check"], - threads => $Self->{vltmt} ? $Self->get_default_vltmt_threads() : 0 + # Can't use multi threading (like hier blocks), but needs to be thread safe + threads => $Self->{vltmt} ? 1 : 0, ); run(logfile => "${child_dir}/vlt_compile.log", diff --git a/test_regress/t/t_gantt_two.cpp b/test_regress/t/t_gantt_two.cpp new file mode 100644 index 000000000..da253fab7 --- /dev/null +++ b/test_regress/t/t_gantt_two.cpp @@ -0,0 +1,43 @@ +// +// DESCRIPTION: Verilator: Verilog Multiple Model Test Module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Geza Lore. +// SPDX-License-Identifier: CC0-1.0 +// + +#include +#include "verilated.h" +#include "Vt_gantt_two.h" + +int main(int argc, char** argv, char** env) { + srand48(5); + + const std::unique_ptr contextp{new VerilatedContext}; +#ifdef VL_THREADED + contextp->threads(2); +#endif + contextp->commandArgs(argc, argv); + contextp->debug(0); + + std::unique_ptr topap{new Vt_gantt_two{contextp.get(), "topa"}}; + std::unique_ptr topbp{new Vt_gantt_two{contextp.get(), "topb"}}; + + topap->clk = false; + topap->eval(); + topbp->clk = false; + topbp->eval(); + + contextp->timeInc(10); + while ((contextp->time() < 1100) && !contextp->gotFinish()) { + topap->clk = !topap->clk; + topap->eval(); + topbp->clk = !topbp->clk; + topbp->eval(); + contextp->timeInc(5); + } + if (!contextp->gotFinish()) { + vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); + } + return 0; +} diff --git a/test_regress/t/t_gantt_two.pl b/test_regress/t/t_gantt_two.pl new file mode 100755 index 000000000..768f55440 --- /dev/null +++ b/test_regress/t/t_gantt_two.pl @@ -0,0 +1,61 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +# Test for bin/verilator_gantt, + +scenarios(vlt_all => 1); + +# It doesn't really matter what test +# we use, so long as it runs several cycles, +# enough for the profiling to happen: +top_filename("t/t_gen_alw.v"); + +compile( + make_top_shell => 0, + make_main => 0, + v_flags2 => ["--prof-exec --exe $Self->{t_dir}/$Self->{name}.cpp"], + # Checks below care about thread count, so use 2 (minimum reasonable) + threads => $Self->{vltmt} ? 2 : 0, + make_flags => 'CPPFLAGS_ADD=-DVL_NO_LEGACY', + ); + +execute( + all_run_flags => ["+verilator+prof+exec+start+4", + " +verilator+prof+exec+window+4", + " +verilator+prof+exec+file+$Self->{obj_dir}/profile_exec.dat", + " +verilator+prof+vlt+file+$Self->{obj_dir}/profile.vlt", + ], + check_finished => 1, + ); + +# For now, verilator_gantt still reads from STDIN +# (probably it should take a file, gantt.dat like verilator_profcfunc) +# The profiling data still goes direct to the runtime's STDOUT +# (maybe that should go to a separate file - gantt.dat?) +run(cmd => ["$ENV{VERILATOR_ROOT}/bin/verilator_gantt", + "$Self->{obj_dir}/profile_exec.dat", + "--vcd $Self->{obj_dir}/profile_exec.vcd", + "| tee $Self->{obj_dir}/gantt.log"], + ); + +if ($Self->{vltmt}) { + file_grep("$Self->{obj_dir}/gantt.log", qr/Total threads += 2/i); + file_grep("$Self->{obj_dir}/gantt.log", qr/Total mtasks += 7/i); +} else { + file_grep("$Self->{obj_dir}/gantt.log", qr/Total threads += 1/i); + file_grep("$Self->{obj_dir}/gantt.log", qr/Total mtasks += 0/i); +} +file_grep("$Self->{obj_dir}/gantt.log", qr/Total evals += 4/i); + +# Diff to itself, just to check parsing +vcd_identical("$Self->{obj_dir}/profile_exec.vcd", "$Self->{obj_dir}/profile_exec.vcd"); + +ok(1); +1; diff --git a/test_regress/t/t_hier_block_cmake/main.cpp b/test_regress/t/t_hier_block_cmake/main.cpp index e49101162..58bc8d5ad 100644 --- a/test_regress/t/t_hier_block_cmake/main.cpp +++ b/test_regress/t/t_hier_block_cmake/main.cpp @@ -14,8 +14,11 @@ int main(int argc, char *argv[]) { const std::unique_ptr contextp{new VerilatedContext}; - std::unique_ptr top{new Vt_hier_block{contextp.get(), "top"}}; +#if VL_THREADED + contextp->threads(6); +#endif contextp->commandArgs(argc, argv); + std::unique_ptr top{new Vt_hier_block{contextp.get(), "top"}}; for (int i = 0; i < 100 && !contextp->gotFinish(); ++i) { top->eval(); top->clk ^= 1; diff --git a/test_regress/t/t_lib_prot_shared.pl b/test_regress/t/t_lib_prot_shared.pl index 1a3f8af5f..cc0c2f977 100755 --- a/test_regress/t/t_lib_prot_shared.pl +++ b/test_regress/t/t_lib_prot_shared.pl @@ -59,7 +59,8 @@ while (1) { "-LDFLAGS", "'-Wl,-rpath,$abs_secret_dir -L$abs_secret_dir -l$secret_prefix'"], xsim_flags2 => ["$secret_dir/secret.sv"], - threads => $Self->{vltmt} ? 1 : 0 + threads => $Self->{vltmt} ? 1 : 0, + context_threads => $Self->{vltmt} ? 6 : 1 ); execute( diff --git a/test_regress/t/t_threads_crazy.pl b/test_regress/t/t_threads_crazy.pl index 6bb21acb0..c72858f2c 100755 --- a/test_regress/t/t_threads_crazy.pl +++ b/test_regress/t/t_threads_crazy.pl @@ -10,20 +10,16 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di scenarios(vltmt => 1); -if ($Self->cfg_with_m32) { - skip("Does not work with -m32 (resource unavailable)"); -} - compile( verilator_flags2 => ['--cc'], - threads => 1024 + threads => 4, + context_threads => 2 ); execute( - check_finished => 1, + fails => 1 ); -file_grep($Self->{run_log_filename}, qr/System has .* CPUs but.*--threads 1024/); - +file_grep($Self->{run_log_filename}, qr/%Error: .*\/verilated\.cpp:\d+: VerilatedContext has 2 threads but model 'Vt_threads_crazy' \(instantiated as 'top'\) was Verilated with --threads 4\./); ok(1); 1; diff --git a/test_regress/t/t_threads_crazy_context.pl b/test_regress/t/t_threads_crazy_context.pl new file mode 100755 index 000000000..8e28bb87a --- /dev/null +++ b/test_regress/t/t_threads_crazy_context.pl @@ -0,0 +1,36 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003-2009 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(vlt_all => 1); + +if ($Self->cfg_with_m32) { + skip("Does not work with -m32 (resource unavailable)"); +} + +top_filename("t/t_threads_crazy.v"); + +compile( + verilator_flags2 => ['--cc'], + threads => $Self->{vltmt} ? 2 : 0, + context_threads => 1024 + ); + +execute( + check_finished => 1, + ); + +if ($Self->{vltmt}) { + file_grep($Self->{run_log_filename}, qr/System has \d+ hardware threads but simulation thread count set to 1024\. This will likely cause significant slowdown\./); +} else { + file_grep($Self->{run_log_filename}, qr/Verilator run-time library built without VL_THREADS\. Ignoring call to 'VerilatedContext::threads' with argument 1024\./); +} + +ok(1); +1; diff --git a/test_regress/t/t_wrapper_context.cpp b/test_regress/t/t_wrapper_context.cpp index 87332eb8c..31a9334f0 100644 --- a/test_regress/t/t_wrapper_context.cpp +++ b/test_regress/t/t_wrapper_context.cpp @@ -92,6 +92,8 @@ int main(int argc, char** argv, char** env) { std::unique_ptr context1p{new VerilatedContext}; // configuration + context0p->threads(1); + context1p->threads(1); context0p->fatalOnError(false); context1p->fatalOnError(false); context0p->traceEverOn(true); From b61d819fcb03388a51ed589320e39adddc3b454d Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 12 Jul 2022 15:38:24 +0100 Subject: [PATCH 021/119] Move contextp() under VerilatedModel --- include/verilated.cpp | 6 ++++++ include/verilated.h | 8 +++++++- src/V3EmitCModel.cpp | 16 +++++----------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/include/verilated.cpp b/include/verilated.cpp index cf1d76d8f..d5821b8f7 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -2908,6 +2908,12 @@ void VerilatedImp::versionDump() VL_MT_SAFE { VL_PRINTF_MT(" Version: %s %s\n", Verilated::productName(), Verilated::productVersion()); } +//=========================================================================== +// VerilatedModel:: Methods + +VerilatedModel::VerilatedModel(VerilatedContext& context) + : m_context{context} {} + //=========================================================================== // VerilatedModule:: Methods diff --git a/include/verilated.h b/include/verilated.h index bc1d5a3f2..3a4fab003 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -81,6 +81,7 @@ #endif // clang-format on +class VerilatedContext; class VerilatedContextImp; class VerilatedContextImpData; class VerilatedCovContext; @@ -261,11 +262,16 @@ public: class VerilatedModel VL_NOT_FINAL { VL_UNCOPYABLE(VerilatedModel); + VerilatedContext& m_context; // The VerilatedContext this model is instantiated under + protected: - explicit VerilatedModel() = default; + explicit VerilatedModel(VerilatedContext& context); virtual ~VerilatedModel() = default; public: + /// Returns the VerilatedContext this model is instantiated under + /// Used to get to e.g. simulation time via contextp()->time() + inline VerilatedContext* contextp() const { return &m_context; } /// Returns the hierarchical name of this module instance. virtual const char* hierName() = 0; /// Returns the name of this model (the name of the generated model class). diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index 203582609..9522b214f 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -195,9 +195,6 @@ class EmitCModel final : public EmitCFunc { } } - puts("/// Return current simulation context for this model.\n"); - puts("/// Used to get to e.g. simulation time via contextp()->time()\n"); - puts("VerilatedContext* contextp() const;\n"); if (!optSystemC()) { puts("/// Retrieve name of this model instance (as passed to constructor).\n"); puts("const char* name() const;\n"); @@ -241,11 +238,12 @@ class EmitCModel final : public EmitCFunc { puts(topClassName() + "::" + topClassName()); if (optSystemC()) { puts("(sc_module_name /* unused */)\n"); - puts(" : vlSymsp{new " + symClassName() - + "(Verilated::threadContextp(), name(), this)}\n"); + puts(" : VerilatedModel{*Verilated::threadContextp()}\n"); + puts(" , vlSymsp{new " + symClassName() + "(contextp(), name(), this)}\n"); } else { puts(+"(VerilatedContext* _vcontextp__, const char* _vcname__)\n"); - puts(" : vlSymsp{new " + symClassName() + "(_vcontextp__, _vcname__, this)}\n"); + puts(" : VerilatedModel{*_vcontextp__}\n"); + puts(" , vlSymsp{new " + symClassName() + "(contextp(), _vcname__, this)}\n"); } // Set up IO references @@ -271,7 +269,7 @@ class EmitCModel final : public EmitCFunc { puts("{\n"); puts("// Register model with the context\n"); - puts("vlSymsp->_vm_contextp__->addModel(this);\n"); + puts("contextp()->addModel(this);\n"); if (optSystemC()) { // Create sensitivity list for when to evaluate the model. @@ -469,10 +467,6 @@ class EmitCModel final : public EmitCFunc { } putSectionDelimiter("Utilities"); - // ::contextp - puts("\nVerilatedContext* " + topClassName() + "::contextp() const {\n"); - puts(/**/ "return vlSymsp->_vm_contextp__;\n"); - puts("}\n"); if (!optSystemC()) { // ::name From 79c901c220d931a27826b7a19382c0790d2241f5 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 12 Jul 2022 15:50:11 +0100 Subject: [PATCH 022/119] Tighten signatures/implementaion of VerilatedModel abstract methods. --- include/verilated.h | 6 +++--- src/V3EmitCModel.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/verilated.h b/include/verilated.h index 3a4fab003..ebb1990e8 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -273,11 +273,11 @@ public: /// Used to get to e.g. simulation time via contextp()->time() inline VerilatedContext* contextp() const { return &m_context; } /// Returns the hierarchical name of this module instance. - virtual const char* hierName() = 0; + virtual const char* hierName() const = 0; /// Returns the name of this model (the name of the generated model class). - virtual const char* modelName() = 0; + virtual const char* modelName() const = 0; /// Returns the thread level parallelism, this model was Verilated with. Always 1 or higher. - virtual unsigned threads() = 0; + virtual unsigned threads() const = 0; }; //========================================================================= diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index 9522b214f..04b422a97 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -220,9 +220,9 @@ class EmitCModel final : public EmitCFunc { } puts("\n// Abstract methods from VerilatedModel\n"); - puts("const char* hierName() override;\n"); - puts("const char* modelName() override;\n"); - puts("unsigned threads() override;\n"); + puts("const char* hierName() const override final;\n"); + puts("const char* modelName() const override final;\n"); + puts("unsigned threads() const override final;\n"); puts("} VL_ATTR_ALIGNED(VL_CACHE_LINE_BYTES);\n"); @@ -482,10 +482,10 @@ class EmitCModel final : public EmitCFunc { puts("}\n"); putSectionDelimiter("Implementations of abstract methods from VerilatedModel\n"); - puts("const char* " + topClassName() + "::hierName() { return vlSymsp->name(); }\n"); - puts("const char* " + topClassName() + "::modelName() { return \"" + topClassName() + puts("const char* " + topClassName() + "::hierName() const { return vlSymsp->name(); }\n"); + puts("const char* " + topClassName() + "::modelName() const { return \"" + topClassName() + "\"; }\n"); - puts("unsigned " + topClassName() + "::threads() { return " + puts("unsigned " + topClassName() + "::threads() const { return " + cvtToStr(std::max(1, v3Global.opt.threads())) + "; }\n"); } From 457ad07ade56c36cb0ed901fac4c6f34e59c57ef Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 12 Jul 2022 17:51:17 +0100 Subject: [PATCH 023/119] Remove unnecessary static state from V3EmitCFunc --- src/V3EmitCFunc.cpp | 64 +++++++++++++++------------------------------ src/V3EmitCFunc.h | 22 ++++++++++++++++ 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/src/V3EmitCFunc.cpp b/src/V3EmitCFunc.cpp index 2aa4f2342..ae3f8ce03 100644 --- a/src/V3EmitCFunc.cpp +++ b/src/V3EmitCFunc.cpp @@ -169,30 +169,8 @@ void EmitCFunc::emitOpName(AstNode* nodep, const string& format, AstNode* lhsp, } } -// We only do one display at once, so can just use static state -static struct EmitDispState { - string m_format; // "%s" and text from user - std::vector m_argsChar; // Format of each argument to be printed - std::vector m_argsp; // Each argument to be printed - std::vector m_argsFunc; // Function before each argument to be printed - EmitDispState() { clear(); } - void clear() { - m_format = ""; - m_argsChar.clear(); - m_argsp.clear(); - m_argsFunc.clear(); - } - void pushFormat(const string& fmt) { m_format += fmt; } - void pushFormat(char fmt) { m_format += fmt; } - void pushArg(char fmtChar, AstNode* nodep, const string& func) { - m_argsChar.push_back(fmtChar); - m_argsp.push_back(nodep); - m_argsFunc.push_back(func); - } -} emitDispState; - void EmitCFunc::displayEmit(AstNode* nodep, bool isScan) { - if (emitDispState.m_format == "" + if (m_emitDispState.m_format == "" && VN_IS(nodep, Display)) { // not fscanf etc, as they need to return value // NOP } else { @@ -235,12 +213,12 @@ void EmitCFunc::displayEmit(AstNode* nodep, bool isScan) { } else { nodep->v3fatalSrc("Unknown displayEmit node type"); } - ofp()->putsQuoted(emitDispState.m_format); + ofp()->putsQuoted(m_emitDispState.m_format); // Arguments - for (unsigned i = 0; i < emitDispState.m_argsp.size(); i++) { - const char fmt = emitDispState.m_argsChar[i]; - AstNode* const argp = emitDispState.m_argsp[i]; - const string func = emitDispState.m_argsFunc[i]; + for (unsigned i = 0; i < m_emitDispState.m_argsp.size(); i++) { + const char fmt = m_emitDispState.m_argsChar[i]; + AstNode* const argp = m_emitDispState.m_argsp[i]; + const string func = m_emitDispState.m_argsFunc[i]; if (func != "" || argp) { puts(","); ofp()->indentInc(); @@ -265,7 +243,7 @@ void EmitCFunc::displayEmit(AstNode* nodep, bool isScan) { puts(" "); } // Prep for next - emitDispState.clear(); + m_emitDispState.clear(); } } @@ -306,16 +284,16 @@ void EmitCFunc::displayArg(AstNode* dispp, AstNode** elistp, bool isScan, const } else { pfmt = string("%") + vfmt + fmtLetter; } - emitDispState.pushFormat(pfmt); + m_emitDispState.pushFormat(pfmt); if (!ignore) { if (argp->dtypep()->basicp() && argp->dtypep()->basicp()->keyword() == VBasicDTypeKwd::STRING) { // string in SystemVerilog is std::string in C++ which is not POD - emitDispState.pushArg(' ', nullptr, "-1"); + m_emitDispState.pushArg(' ', nullptr, "-1"); } else { - emitDispState.pushArg(' ', nullptr, cvtToStr(argp->widthMin())); + m_emitDispState.pushArg(' ', nullptr, cvtToStr(argp->widthMin())); } - emitDispState.pushArg(fmtLetter, argp, ""); + m_emitDispState.pushArg(fmtLetter, argp, ""); if (fmtLetter == 't' || fmtLetter == '^') { const AstSFormatF* fmtp = nullptr; if (const AstDisplay* const nodep = VN_CAST(dispp, Display)) { @@ -328,10 +306,10 @@ void EmitCFunc::displayArg(AstNode* dispp, AstNode** elistp, bool isScan, const UASSERT_OBJ(fmtp, dispp, "Use of %t must be under AstDisplay, AstSFormat, or AstSFormatF"); UASSERT_OBJ(!fmtp->timeunit().isNone(), fmtp, "timenunit must be set"); - emitDispState.pushArg(' ', nullptr, cvtToStr((int)fmtp->timeunit().powerOfTen())); + m_emitDispState.pushArg(' ', nullptr, cvtToStr((int)fmtp->timeunit().powerOfTen())); } } else { - emitDispState.pushArg(fmtLetter, nullptr, ""); + m_emitDispState.pushArg(fmtLetter, nullptr, ""); } } @@ -341,7 +319,7 @@ void EmitCFunc::displayNode(AstNode* nodep, AstScopeName* scopenamep, const stri // Convert Verilog display to C printf formats // "%0t" becomes "%d" - emitDispState.clear(); + m_emitDispState.clear(); string vfmt; string::const_iterator pos = vformat.begin(); bool inPct = false; @@ -353,7 +331,7 @@ void EmitCFunc::displayNode(AstNode* nodep, AstScopeName* scopenamep, const stri ignore = false; vfmt = ""; } else if (!inPct) { // Normal text - emitDispState.pushFormat(*pos); + m_emitDispState.pushFormat(*pos); } else { // Format character inPct = false; switch (tolower(pos[0])) { @@ -374,7 +352,7 @@ void EmitCFunc::displayNode(AstNode* nodep, AstScopeName* scopenamep, const stri inPct = true; // Get more digits break; case '%': - emitDispState.pushFormat("%%"); // We're printf'ing it, so need to quote the % + m_emitDispState.pushFormat("%%"); // We're printf'ing it, so need to quote the % break; case '*': vfmt += pos[0]; @@ -410,17 +388,17 @@ void EmitCFunc::displayNode(AstNode* nodep, AstScopeName* scopenamep, const stri UASSERT_OBJ(scopenamep, nodep, "Display with %m but no AstScopeName"); const string suffix = scopenamep->scopePrettySymName(); if (suffix == "") { - emitDispState.pushFormat("%S"); + m_emitDispState.pushFormat("%S"); } else { - emitDispState.pushFormat("%N"); // Add a . when needed + m_emitDispState.pushFormat("%N"); // Add a . when needed } - emitDispState.pushArg(' ', nullptr, "vlSymsp->name()"); - emitDispState.pushFormat(suffix); + m_emitDispState.pushArg(' ', nullptr, "vlSymsp->name()"); + m_emitDispState.pushFormat(suffix); break; } case 'l': { // Better than not compiling - emitDispState.pushFormat("----"); + m_emitDispState.pushFormat("----"); break; } default: diff --git a/src/V3EmitCFunc.h b/src/V3EmitCFunc.h index 305fa47a7..629b90397 100644 --- a/src/V3EmitCFunc.h +++ b/src/V3EmitCFunc.h @@ -122,6 +122,28 @@ private: std::vector m_blkChangeDetVec; // All encountered changes in block bool m_emitConstInit = false; // Emitting constant initializer + // State associated with processing $display style string formatting + struct EmitDispState { + string m_format; // "%s" and text from user + std::vector m_argsChar; // Format of each argument to be printed + std::vector m_argsp; // Each argument to be printed + std::vector m_argsFunc; // Function before each argument to be printed + EmitDispState() { clear(); } + void clear() { + m_format = ""; + m_argsChar.clear(); + m_argsp.clear(); + m_argsFunc.clear(); + } + void pushFormat(const string& fmt) { m_format += fmt; } + void pushFormat(char fmt) { m_format += fmt; } + void pushArg(char fmtChar, AstNode* nodep, const string& func) { + m_argsChar.push_back(fmtChar); + m_argsp.push_back(nodep); + m_argsFunc.push_back(func); + } + } m_emitDispState; + protected: EmitCLazyDecls m_lazyDecls; // Visitor for emitting lazy declarations bool m_useSelfForThis = false; // Replace "this" with "vlSelf" From 7e8bafd21726eaf84802863d3d9300236b8a37b5 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 12 Jul 2022 18:37:26 +0100 Subject: [PATCH 024/119] Remove static data use from PartContraction::siblingPairFromRelatives Use std::sort with lambda rather than qsort with static function and static data. Verilation performance neutral. --- src/V3Partition.cpp | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index a4177c214..53f101ae3 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -30,6 +30,7 @@ #include "V3Stats.h" #include "V3UniqueNames.h" +#include #include #include #include @@ -1529,20 +1530,6 @@ private: } } - static const GraphWay* s_shortestWaywardCpInclusiveWay; - static int shortestWaywardCpInclusive(const void* vap, const void* vbp) { - const GraphWay* const wp = s_shortestWaywardCpInclusiveWay; - const LogicMTask* const ap = *reinterpret_cast(vap); - const LogicMTask* const bp = *reinterpret_cast(vbp); - const uint32_t aCp = ap->critPathCost(*wp) + ap->stepCost(); - const uint32_t bCp = bp->critPathCost(*wp) + bp->stepCost(); - if (aCp < bCp) return -1; - if (aCp > bCp) return 1; - if (ap->id() < bp->id()) return -1; - if (ap->id() > bp->id()) return 1; - return 0; - } - void siblingPairFromRelatives(GraphWay way, V3GraphVertex* mtaskp, bool exhaustive) { std::vector shortestPrereqs; @@ -1556,10 +1543,13 @@ private: if (shortestPrereqs.empty()) return; - // qsort_r would be nice here, but it isn't portable - s_shortestWaywardCpInclusiveWay = &way; - qsort(&shortestPrereqs[0], shortestPrereqs.size(), sizeof(LogicMTask*), - &shortestWaywardCpInclusive); + std::sort(shortestPrereqs.begin(), shortestPrereqs.end(), + [way](const LogicMTask* ap, const LogicMTask* bp) { + const uint32_t aCp = ap->critPathCost(way) + ap->stepCost(); + const uint32_t bCp = bp->critPathCost(way) + bp->stepCost(); + if (aCp != bCp) return aCp < bCp; + return ap->id() < bp->id(); + }); // Don't make all NxN/2 possible pairs of prereqs, that's a lot // to cart around. Just make a few pairs. @@ -1691,8 +1681,6 @@ private: VL_UNCOPYABLE(PartContraction); }; -const GraphWay* PartContraction::s_shortestWaywardCpInclusiveWay = nullptr; - //###################################################################### // DpiImportCallVisitor From 87f1e06c4187cce7afbbee2e2db1fe12389e5076 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 12 Jul 2022 18:54:17 +0100 Subject: [PATCH 025/119] Small algorithmic improvement of PartContraction::siblingPairFromRelatives Use std::partial_sort for the non-exhaustive case. This is O(n) instead of O(n*log(n)) in the size of the candidate list being sorted. (It actually is O(n*log(k)), but k is constant 6 in the non-exhaustive case). --- src/V3Partition.cpp | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 53f101ae3..6b6aea6d7 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -1541,25 +1541,32 @@ private: if (shortestPrereqs.size() > PART_SIBLING_EDGE_LIMIT) break; } - if (shortestPrereqs.empty()) return; + if (shortestPrereqs.size() <= 1) return; - std::sort(shortestPrereqs.begin(), shortestPrereqs.end(), - [way](const LogicMTask* ap, const LogicMTask* bp) { - const uint32_t aCp = ap->critPathCost(way) + ap->stepCost(); - const uint32_t bCp = bp->critPathCost(way) + bp->stepCost(); - if (aCp != bCp) return aCp < bCp; - return ap->id() < bp->id(); - }); + const auto cmp = [way](const LogicMTask* ap, const LogicMTask* bp) { + const uint32_t aCp = ap->critPathCost(way) + ap->stepCost(); + const uint32_t bCp = bp->critPathCost(way) + bp->stepCost(); + if (aCp != bCp) return aCp < bCp; + return ap->id() < bp->id(); + }; - // Don't make all NxN/2 possible pairs of prereqs, that's a lot - // to cart around. Just make a few pairs. - auto it = shortestPrereqs.cbegin(); - for (unsigned i = 0; exhaustive || (i < 3); ++i) { - if (it == shortestPrereqs.cend()) break; - LogicMTask* const ap = *(it++); - if (it == shortestPrereqs.cend()) break; - LogicMTask* const bp = *(it++); - makeSiblingMC(ap, bp); + // Don't make all possible pairs of prereqs when not requested (non-exhaustive). + // Just make a few pairs. + constexpr size_t MAX_NONEXHAUSTIVE_PAIRS = 3; + + size_t end; // End index of pairs to add to candidates (exclusive) + + if (exhaustive || (shortestPrereqs.size() <= 2 * MAX_NONEXHAUSTIVE_PAIRS)) { + end = shortestPrereqs.size() & ~static_cast(1); // Round down to even + std::sort(shortestPrereqs.begin(), shortestPrereqs.end(), cmp); + } else { + end = 2 * MAX_NONEXHAUSTIVE_PAIRS; + std::partial_sort(shortestPrereqs.begin(), shortestPrereqs.begin() + end, + shortestPrereqs.end(), cmp); + } + + for (size_t i = 0; i < end; i += 2) { + makeSiblingMC(shortestPrereqs[i], shortestPrereqs[i + 1]); } } From 63507e8e29c0c0e1f659d7f7d0cb12205feb8158 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Tue, 12 Jul 2022 18:02:45 -0400 Subject: [PATCH 026/119] Internals: Favor UASSERT_OBJ when have object. --- src/V3Ast.cpp | 10 +++++----- src/V3Descope.cpp | 4 ++-- src/V3Inst.cpp | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/V3Ast.cpp b/src/V3Ast.cpp index a48f701e6..7826214d0 100644 --- a/src/V3Ast.cpp +++ b/src/V3Ast.cpp @@ -281,13 +281,13 @@ void AstNode::addNextHere(AstNode* newp) { // This could be at head, tail, or both (single) // New could be head of single node, or list UASSERT(newp, "Null item passed to addNext"); - UASSERT(!newp->backp(), "New node (back) already assigned?"); + UASSERT_OBJ(!newp->backp(), newp, "New node (back) already assigned?"); debugTreeChange(this, "-addHereThs: ", __LINE__, false); debugTreeChange(newp, "-addHereNew: ", __LINE__, true); newp->editCountInc(); AstNode* const addlastp = newp->m_headtailp; // Last node in list to be added - UASSERT(!addlastp->m_nextp, "Headtailp tail isn't at the tail"); + UASSERT_OBJ(!addlastp->m_nextp, addlastp, "Headtailp tail isn't at the tail"); // Forward links AstNode* const oldnextp = this->m_nextp; @@ -437,7 +437,7 @@ void VNRelinker::dump(std::ostream& str) const { AstNode* AstNode::unlinkFrBackWithNext(VNRelinker* linkerp) { debugTreeChange(this, "-unlinkWNextThs: ", __LINE__, true); AstNode* const oldp = this; - UASSERT(oldp->m_backp, "Node has no back, already unlinked?"); + UASSERT_OBJ(oldp->m_backp, oldp, "Node has no back, already unlinked?"); oldp->editCountInc(); AstNode* const backp = oldp->m_backp; if (linkerp) { @@ -497,7 +497,7 @@ AstNode* AstNode::unlinkFrBackWithNext(VNRelinker* linkerp) { AstNode* AstNode::unlinkFrBack(VNRelinker* linkerp) { debugTreeChange(this, "-unlinkFrBkThs: ", __LINE__, true); AstNode* const oldp = this; - UASSERT(oldp->m_backp, "Node has no back, already unlinked?"); + UASSERT_OBJ(oldp->m_backp, oldp, "Node has no back, already unlinked?"); oldp->editCountInc(); AstNode* const backp = oldp->m_backp; if (linkerp) { @@ -565,7 +565,7 @@ void AstNode::relink(VNRelinker* linkerp) { } AstNode* const newp = this; UASSERT(linkerp && linkerp->m_backp, "Need non-empty linker"); - UASSERT(!newp->backp(), "New node already linked?"); + UASSERT_OBJ(!newp->m_backp, newp, "New node already linked?"); newp->editCountInc(); if (debug() > 8) { diff --git a/src/V3Descope.cpp b/src/V3Descope.cpp index 9e6aea78f..c43c0352f 100644 --- a/src/V3Descope.cpp +++ b/src/V3Descope.cpp @@ -248,8 +248,8 @@ private: VL_RESTORER(m_funcp); if (!nodep->user1()) { // Static functions should have been moved under the corresponding AstClassPackage - UASSERT(!(nodep->isStatic() && VN_IS(m_modp, Class)), - "Static function under AstClass"); + UASSERT_OBJ(!(nodep->isStatic() && VN_IS(m_modp, Class)), nodep, + "Static function under AstClass"); m_funcp = nodep; iterateChildren(nodep); nodep->user1(true); diff --git a/src/V3Inst.cpp b/src/V3Inst.cpp index cf3906851..339a5e0a5 100644 --- a/src/V3Inst.cpp +++ b/src/V3Inst.cpp @@ -442,7 +442,7 @@ private: int expr_i = i; if (const AstSliceSel* const slicep = VN_CAST(newp->exprp(), SliceSel)) { varrefp = VN_AS(slicep->fromp(), VarRef); - UASSERT(VN_IS(slicep->rhsp(), Const), "Slices should be constant"); + UASSERT_OBJ(VN_IS(slicep->rhsp(), Const), slicep, "Slices should be constant"); const int slice_index = slicep->declRange().left() + in * slicep->declRange().leftToRightInc(); const auto* const exprArrp = VN_AS(varrefp->dtypep(), UnpackArrayDType); From 108c90038776724e94d8c372554c253b0a4d2341 Mon Sep 17 00:00:00 2001 From: "William D. Jones" Date: Wed, 13 Jul 2022 06:38:03 -0400 Subject: [PATCH 027/119] Fix unique_ptr memory header for MinGW64 (#3493). --- docs/CONTRIBUTORS | 1 + src/V3File.h | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/CONTRIBUTORS b/docs/CONTRIBUTORS index 6089a5eaa..a80d950b9 100644 --- a/docs/CONTRIBUTORS +++ b/docs/CONTRIBUTORS @@ -116,6 +116,7 @@ Unai Martinez-Corral Vassilis Papaefstathiou Veripool API Bot Victor Besyakov +William D. Jones Wilson Snyder Xi Zhang Yoda Lee diff --git a/src/V3File.h b/src/V3File.h index 6c45a0456..6a4ded0de 100644 --- a/src/V3File.h +++ b/src/V3File.h @@ -28,6 +28,7 @@ #include #include #include +#include //============================================================================ // V3File: Create streams, recording dependency information From 178e1789b5169715a4adb9cdd0839cfe6f814859 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 12 Jul 2022 13:34:41 +0100 Subject: [PATCH 028/119] Make AstNode::addHereThisAsNext always O(1) Using unlinkFrBackWithNext is O(n) in the size of the list if unlinking from the middle, so addHereThisAsNext also had this complexity. This patch implements addHereThisAsNext directly, which is always O(1). --- src/V3Ast.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/src/V3Ast.cpp b/src/V3Ast.cpp index 7826214d0..5518a98a8 100644 --- a/src/V3Ast.cpp +++ b/src/V3Ast.cpp @@ -631,11 +631,56 @@ void AstNode::relinkOneLink(AstNode*& pointpr, // Ref to pointer that gets set } void AstNode::addHereThisAsNext(AstNode* newp) { - // {old}->this->{next} becomes {old}->new->this->{next} - VNRelinker handle; - this->unlinkFrBackWithNext(&handle); - newp->addNext(this); - handle.relink(newp); + // {back}->this->{next} becomes {back}->new->this->{next} + UASSERT_OBJ(!newp->backp(), newp, "New node already linked?"); + UASSERT_OBJ(this->m_backp, this, "'this' node has no back, already unlinked?"); + UASSERT_OBJ(newp->m_headtailp, newp, "m_headtailp not set on new node"); + // + AstNode* const backp = this->m_backp; + AstNode* const newLastp = newp->m_headtailp; + // + this->editCountInc(); + // Common linkage + newLastp->m_nextp = this; + this->m_backp = newLastp; + newp->m_backp = backp; + // newLastp will not be the last node in the list as 'this' will follow it. + // If newLastp == newp, then below handles newp becoming head + newLastp->m_headtailp = nullptr; + // Linkage dependent on position + if (backp && backp->m_nextp == this) { + // If 'this' is not at the head of a list, then the new node will also not be at the head + // of a list, so we can just link in the new node in the middle. + backp->m_nextp = newp; + newp->m_headtailp = nullptr; + } else { + // If 'this' is at the head of a list, then the new node becomes the head of that list. + if (backp) { + if (backp->m_op1p == this) { + backp->m_op1p = newp; + } else if (backp->m_op2p == this) { + backp->m_op2p = newp; + } else if (backp->m_op3p == this) { + backp->m_op3p = newp; + } else { + UASSERT_OBJ(backp->m_op4p == this, this, "Don't know where newp should go"); + backp->m_op4p = newp; + } + } + // We also need to update m_headtailp. + AstNode* const tailp = this->m_headtailp; + this->m_headtailp = nullptr; + newp->m_headtailp = tailp; + tailp->m_headtailp = newp; + } + // Iterator fixup + if (newLastp->m_iterpp) { + *(newLastp->m_iterpp) = this; + } else if (this->m_iterpp) { + *(this->m_iterpp) = newp; + } + // + debugTreeChange(this, "-addHereThisAsNext: ", __LINE__, true); } void AstNode::swapWith(AstNode* bp) { From e0a38ce2c2b52b1b4ad540d4e535ba7ef36728a4 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 13 Jul 2022 12:20:30 +0100 Subject: [PATCH 029/119] Remove unnecessary AstNode::clearIter() --- src/V3Ast.h | 1 - src/V3LinkLevel.cpp | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/V3Ast.h b/src/V3Ast.h index 868fc73f8..3b9c88a39 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -1787,7 +1787,6 @@ public: void deleteTree(); // Always deletes the next link void checkTree(); // User Interface version void checkIter() const; - void clearIter() { m_iterpp = nullptr; } void dumpPtrs(std::ostream& os = std::cout) const; void dumpTree(std::ostream& os = std::cout, const string& indent = " ", int maxDepth = 0) const; diff --git a/src/V3LinkLevel.cpp b/src/V3LinkLevel.cpp index be8706477..d96892762 100644 --- a/src/V3LinkLevel.cpp +++ b/src/V3LinkLevel.cpp @@ -73,11 +73,7 @@ void V3LinkLevel::modSortByLevel() { // Reorder the netlist's modules to have modules in level sorted order stable_sort(mods.begin(), mods.end(), CmpLevel()); // Sort the vector UINFO(9, "modSortByLevel() sorted\n"); // Comment required for gcc4.6.3 / bug666 - for (AstNodeModule* nodep : mods) { - nodep->clearIter(); // Because we didn't iterate to find the node - // pointers, may have a stale m_iterp() needing cleanup - nodep->unlinkFrBack(); - } + for (AstNodeModule* nodep : mods) nodep->unlinkFrBack(); UASSERT_OBJ(!v3Global.rootp()->modulesp(), v3Global.rootp(), "Unlink didn't work"); for (AstNodeModule* nodep : mods) v3Global.rootp()->addModulep(nodep); UINFO(9, "modSortByLevel() done\n"); // Comment required for gcc4.6.3 / bug666 From 3fc8249429a3cde8c925f1ef6ff87bc82865121e Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 13 Jul 2022 13:55:40 +0100 Subject: [PATCH 030/119] Use AstNode::addHereThisAsNext in a few places --- src/V3Depth.cpp | 5 +---- src/V3Expand.cpp | 5 +---- src/V3Premit.cpp | 5 +---- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/V3Depth.cpp b/src/V3Depth.cpp index d83d69d66..0c3b791e3 100644 --- a/src/V3Depth.cpp +++ b/src/V3Depth.cpp @@ -68,10 +68,7 @@ private: // Put assignment before the referencing statement AstAssign* const assp = new AstAssign{ nodep->fileline(), new AstVarRef{nodep->fileline(), varp, VAccess::WRITE}, nodep}; - VNRelinker linker2; - m_stmtp->unlinkFrBack(&linker2); - assp->addNext(m_stmtp); - linker2.relink(assp); + m_stmtp->addHereThisAsNext(assp); } // VISITORS diff --git a/src/V3Expand.cpp b/src/V3Expand.cpp index a89d893b6..5c341db26 100644 --- a/src/V3Expand.cpp +++ b/src/V3Expand.cpp @@ -82,10 +82,7 @@ private: static void insertBefore(AstNode* placep, AstNode* newp) { newp->user1(1); // Already processed, don't need to re-iterate - VNRelinker linker; - placep->unlinkFrBack(&linker); - newp->addNext(placep); - linker.relink(newp); + placep->addHereThisAsNext(newp); } static void replaceWithDelete(AstNode* nodep, AstNode* newp) { newp->user1(1); // Already processed, don't need to re-iterate diff --git a/src/V3Premit.cpp b/src/V3Premit.cpp index 836b7c814..50254b1ca 100644 --- a/src/V3Premit.cpp +++ b/src/V3Premit.cpp @@ -109,10 +109,7 @@ private: } else if (m_inTracep) { m_inTracep->addPrecondsp(newp); } else if (m_stmtp) { - VNRelinker linker; - m_stmtp->unlinkFrBack(&linker); - newp->addNext(m_stmtp); - linker.relink(newp); + m_stmtp->addHereThisAsNext(newp); } else { newp->v3fatalSrc("No statement insertion point."); } From 658819bb71553ed82425cf75aa72a92355f4518c Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 13 Jul 2022 16:01:03 +0100 Subject: [PATCH 031/119] Trivial static const -> constexpr --- src/V3File.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/V3File.cpp b/src/V3File.cpp index 305d19c08..75b2cb5d6 100644 --- a/src/V3File.cpp +++ b/src/V3File.cpp @@ -489,8 +489,8 @@ private: #ifdef INFILTER_PIPE int fd_stdin[2]; int fd_stdout[2]; - static const int P_RD = 0; - static const int P_WR = 1; + constexpr int P_RD = 0; + constexpr int P_WR = 1; if (pipe(fd_stdin) != 0 || pipe(fd_stdout) != 0) { v3fatal("--pipe-filter: Can't pipe: " << strerror(errno)); From f4efcbde5c1a5f700a7db4cfb4e8ffe5e19bf192 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 13 Jul 2022 16:15:21 +0100 Subject: [PATCH 032/119] Remove simple use of static data from V3OutFormatter::indentSpaces --- src/V3File.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/V3File.cpp b/src/V3File.cpp index 75b2cb5d6..b19d1f868 100644 --- a/src/V3File.cpp +++ b/src/V3File.cpp @@ -623,17 +623,9 @@ V3OutFormatter::V3OutFormatter(const string& filename, V3OutFormatter::Language //---------------------------------------------------------------------- string V3OutFormatter::indentSpaces(int num) { - // Indent the specified number of spaces. Use spaces. - static char str[MAXSPACE + 20]; - char* cp = str; - if (num > MAXSPACE) num = MAXSPACE; - while (num > 0) { - *cp++ = ' '; - --num; - } - *cp++ = '\0'; - string st{str}; // No const, move optimization - return st; + // Indent the specified number of spaces. + if (num <= 0) return std::string{}; + return std::string(std::min(num, MAXSPACE), ' '); } bool V3OutFormatter::tokenMatch(const char* cp, const char* cmp) { From 3bd830eacf72546382ffbc254ff9d6e0fb38e76f Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 13 Jul 2022 18:24:48 +0100 Subject: [PATCH 033/119] Minor clean up of initialization --- src/V3Global.cpp | 7 +++++-- src/V3Global.h | 6 +----- src/V3PreShell.cpp | 5 ++--- src/V3PreShell.h | 2 +- src/Verilator.cpp | 2 +- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/V3Global.cpp b/src/V3Global.cpp index 98129ae78..cb42337e4 100644 --- a/src/V3Global.cpp +++ b/src/V3Global.cpp @@ -27,9 +27,12 @@ #include "V3Stats.h" //###################################################################### -// V3 Class -- top level +// V3Global -AstNetlist* V3Global::makeNetlist() { return new AstNetlist(); } +void V3Global::boot() { + UASSERT(!m_rootp, "call once"); + m_rootp = new AstNetlist(); +} void V3Global::clear() { #ifdef VL_LEAK_CHECK diff --git a/src/V3Global.h b/src/V3Global.h index 84a75d7e2..a7b54e866 100644 --- a/src/V3Global.h +++ b/src/V3Global.h @@ -119,11 +119,7 @@ public: // CONSTRUCTORS V3Global() {} - AstNetlist* makeNetlist(); - void boot() { - UASSERT(!m_rootp, "call once"); - m_rootp = makeNetlist(); - } + void boot(); void clear(); void shutdown(); // Release allocated resorces // ACCESSORS (general) diff --git a/src/V3PreShell.cpp b/src/V3PreShell.cpp index 6d64c4b7f..4b817a53b 100644 --- a/src/V3PreShell.cpp +++ b/src/V3PreShell.cpp @@ -49,9 +49,8 @@ protected: return level; } - void boot(char** env) { + void boot() { // Create the implementation pointer - if (env) {} if (!s_preprocp) { FileLine* const cmdfl = new FileLine(FileLine::commandLineFilename()); s_preprocp = V3PreProc::createPreProc(cmdfl); @@ -162,7 +161,7 @@ VInFilter* V3PreShellImp::s_filterp = nullptr; //###################################################################### // V3PreShell -void V3PreShell::boot(char** env) { V3PreShellImp::s_preImp.boot(env); } +void V3PreShell::boot() { V3PreShellImp::s_preImp.boot(); } bool V3PreShell::preproc(FileLine* fl, const string& modname, VInFilter* filterp, V3ParseImp* parsep, const string& errmsg) { return V3PreShellImp::s_preImp.preproc(fl, modname, filterp, parsep, errmsg); diff --git a/src/V3PreShell.h b/src/V3PreShell.h index 7e26f940f..9c7b73499 100644 --- a/src/V3PreShell.h +++ b/src/V3PreShell.h @@ -32,7 +32,7 @@ class VSpellCheck; class V3PreShell final { // Static class for calling preprocessor public: - static void boot(char** env); + static void boot(); static bool preproc(FileLine* fl, const string& modname, VInFilter* filterp, V3ParseImp* parsep, const string& errmsg); static void preprocInclude(FileLine* fl, const string& modname); diff --git a/src/Verilator.cpp b/src/Verilator.cpp index 97e3393c4..f503ee6b9 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -716,7 +716,7 @@ int main(int argc, char** argv, char** env) { // Preprocessor // Before command parsing so we can handle -Ds on command line. - V3PreShell::boot(env); + V3PreShell::boot(); // Command option parsing v3Global.opt.bin(argv[0]); From b0e796ca83cd05a6f72674c89e9af1eaa9c71366 Mon Sep 17 00:00:00 2001 From: Todd Strader Date: Fri, 15 Jul 2022 11:44:32 -0400 Subject: [PATCH 034/119] Public combo propagation issues (#2905) --- src/V3Order.cpp | 8 ++++++ test_regress/t/t_public_clk.cpp | 41 ++++++++++++++++++++++++++++++ test_regress/t/t_public_clk.pl | 27 ++++++++++++++++++++ test_regress/t/t_public_clk.v | 28 ++++++++++++++++++++ test_regress/t/t_public_seq.cpp | 45 +++++++++++++++++++++++++++++++++ test_regress/t/t_public_seq.pl | 27 ++++++++++++++++++++ test_regress/t/t_public_seq.v | 42 ++++++++++++++++++++++++++++++ 7 files changed, 218 insertions(+) create mode 100644 test_regress/t/t_public_clk.cpp create mode 100755 test_regress/t/t_public_clk.pl create mode 100644 test_regress/t/t_public_clk.v create mode 100644 test_regress/t/t_public_seq.cpp create mode 100755 test_regress/t/t_public_seq.pl create mode 100644 test_regress/t/t_public_seq.v diff --git a/src/V3Order.cpp b/src/V3Order.cpp index 4f2890be6..032f8ef3f 100644 --- a/src/V3Order.cpp +++ b/src/V3Order.cpp @@ -1601,11 +1601,19 @@ void OrderProcess::processDomainsIterate(OrderEitherVertex* vertexp) { // It should have already been copied into the settle domain. Presumably it has // inputs which we never trigger, or nothing it's sensitive to, so we can rip it out. if (!domainp && vertexp->scopep()) domainp = m_deleteDomainp; + // However, anything that is public RW must be added to the combo domain since the + // user may change it at any time + if (domainp && vvertexp && vvertexp->varScp()->varp()->isSigUserRWPublic()) + domainp = m_comboDomainp; } // vertexp->domainp(domainp); if (vertexp->domainp()) { UINFO(5, " done d=" << cvtToHex(vertexp->domainp()) + << (vertexp->domainp() == m_deleteDomainp ? " [DEL]" : "") + << (vertexp->domainp()->hasClocked() ? " [CLKD]" : "") + << (vertexp->domainp()->hasSettle() ? " [SETL]" : "") + << (vertexp->domainp()->hasInitial() ? " [INIT]" : "") << (vertexp->domainp()->hasCombo() ? " [COMB]" : "") << (vertexp->domainp()->isMulti() ? " [MULT]" : "") << " " << vertexp << endl); diff --git a/test_regress/t/t_public_clk.cpp b/test_regress/t/t_public_clk.cpp new file mode 100644 index 000000000..15ff2985b --- /dev/null +++ b/test_regress/t/t_public_clk.cpp @@ -0,0 +1,41 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +// +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Todd Strader. +// SPDX-License-Identifier: CC0-1.0 + +// Generated header +#include "Vt_public_clk.h" +#include "Vt_public_clk___024root.h" +// General headers +#include "verilated.h" + +std::unique_ptr topp; +int main(int argc, char** argv, char** env) { + vluint64_t sim_time = 1100; + const std::unique_ptr contextp{new VerilatedContext}; + contextp->commandArgs(argc, argv); + contextp->debug(0); + srand48(5); + topp.reset(new Vt_public_clk("top")); + + topp->rootp->t__DOT__clk = 0; + topp->eval(); + { contextp->timeInc(10); } + + while ((contextp->time() < sim_time) && !contextp->gotFinish()) { + topp->rootp->t__DOT__clk = !topp->rootp->t__DOT__clk; + topp->eval(); + contextp->timeInc(5); + } + + if (!contextp->gotFinish()) { + vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); + } + topp->final(); + + topp.reset(); + return 0; +} diff --git a/test_regress/t/t_public_clk.pl b/test_regress/t/t_public_clk.pl new file mode 100755 index 000000000..89f3e05aa --- /dev/null +++ b/test_regress/t/t_public_clk.pl @@ -0,0 +1,27 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +compile( + make_top_shell => 0, + make_main => 0, + verilator_flags2 => [ + "--exe", + "$Self->{t_dir}/$Self->{name}.cpp" + ], + ); + +execute( + check_finished => 1, + ); + +ok(1); +1; diff --git a/test_regress/t/t_public_clk.v b/test_regress/t/t_public_clk.v new file mode 100644 index 000000000..b5f123f35 --- /dev/null +++ b/test_regress/t/t_public_clk.v @@ -0,0 +1,28 @@ +// DESCRIPTION: Verilator: public clock signal +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty, 2022 by Todd Strader +// SPDX-License-Identifier: CC0-1.0 + +`ifdef VERILATOR +// The '$c1(1)' is there to prevent inlining of the signal by V3Gate +`define IMPURE_ONE ($c(1)) +`else +// Use standard $random (chaces of getting 2 consecutive zeroes is zero). +`define IMPURE_ONE (|($random | $random)) +`endif + +module t (); + + logic clk /* verilator public_flat_rw */; + int count; + wire other_clk = `IMPURE_ONE & clk; + + always_ff @(posedge other_clk) begin + count <= count + 1; + if (count == 10) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end +endmodule diff --git a/test_regress/t/t_public_seq.cpp b/test_regress/t/t_public_seq.cpp new file mode 100644 index 000000000..26906bd88 --- /dev/null +++ b/test_regress/t/t_public_seq.cpp @@ -0,0 +1,45 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +// +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Todd Strader. +// SPDX-License-Identifier: CC0-1.0 + +// Generated header +#include "Vt_public_seq.h" +#include "Vt_public_seq___024root.h" +// General headers +#include "verilated.h" + +std::unique_ptr topp; +int main(int argc, char** argv, char** env) { + vluint64_t sim_time = 1100; + const std::unique_ptr contextp{new VerilatedContext}; + contextp->commandArgs(argc, argv); + contextp->debug(0); + srand48(5); + topp.reset(new Vt_public_seq("top")); + + topp->clk = 0; + topp->eval(); + { contextp->timeInc(10); } + + int cyc = 0; + + while ((contextp->time() < sim_time) && !contextp->gotFinish()) { + if (cyc >= 5) ++topp->rootp->t__DOT__pub_byte; + topp->eval(); + topp->clk = !topp->clk; + topp->eval(); + contextp->timeInc(5); + if (topp->clk) cyc++; + } + if (!contextp->gotFinish()) { + vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish"); + } + topp->final(); + + topp.reset(); + return 0; +} diff --git a/test_regress/t/t_public_seq.pl b/test_regress/t/t_public_seq.pl new file mode 100755 index 000000000..89f3e05aa --- /dev/null +++ b/test_regress/t/t_public_seq.pl @@ -0,0 +1,27 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +compile( + make_top_shell => 0, + make_main => 0, + verilator_flags2 => [ + "--exe", + "$Self->{t_dir}/$Self->{name}.cpp" + ], + ); + +execute( + check_finished => 1, + ); + +ok(1); +1; diff --git a/test_regress/t/t_public_seq.v b/test_regress/t/t_public_seq.v new file mode 100644 index 000000000..007e51ffe --- /dev/null +++ b/test_regress/t/t_public_seq.v @@ -0,0 +1,42 @@ +// DESCRIPTION: Verilator: public clock signal +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty, 2022 by Todd Strader +// SPDX-License-Identifier: CC0-1.0 + +`ifdef VERILATOR +// The '$c1(1)' is there to prevent inlining of the signal by V3Gate +`define IMPURE_ONE ($c(1)) +`else +// Use standard $random (chaces of getting 2 consecutive zeroes is zero). +`define IMPURE_ONE (|($random | $random)) +`endif + +module t ( + input clk, + input dummy_clk // Never toggled from C++ +); + + int count; + + logic [7:0] pub_byte /* verilator public_flat_rw */ = 123; + logic [7:0] comb_byte; + + always_comb comb_byte = `IMPURE_ONE ? pub_byte : '0; + + always_ff @(posedge clk) begin + count <= count + 1; + if (comb_byte != pub_byte) begin + $display("%%Error: comb_byte (%0d) != pub_byte (%0d)", comb_byte, pub_byte); + $stop; + end + if (count == 10) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end + + always_ff @(posedge dummy_clk) begin + comb_byte = ~pub_byte; + end +endmodule From 0dfa7d3af5a56c24e67f6590d640344da3ec4d87 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kozdra Date: Mon, 18 Jul 2022 18:58:55 +0200 Subject: [PATCH 035/119] Internals: const-qualify findDType function. No functional change. (#3502) --- src/V3Ast.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/V3Ast.h b/src/V3Ast.h index 3b9c88a39..58e940480 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -1717,13 +1717,13 @@ public: void dtypeSetVoid() { dtypep(findVoidDType()); } // Data type locators - AstNodeDType* findBitDType() { return findBasicDType(VBasicDTypeKwd::LOGIC); } - AstNodeDType* findDoubleDType() { return findBasicDType(VBasicDTypeKwd::DOUBLE); } - AstNodeDType* findStringDType() { return findBasicDType(VBasicDTypeKwd::STRING); } - AstNodeDType* findSigned32DType() { return findBasicDType(VBasicDTypeKwd::INTEGER); } - AstNodeDType* findUInt32DType() { return findBasicDType(VBasicDTypeKwd::UINT32); } - AstNodeDType* findUInt64DType() { return findBasicDType(VBasicDTypeKwd::UINT64); } - AstNodeDType* findCHandleDType() { return findBasicDType(VBasicDTypeKwd::CHANDLE); } + AstNodeDType* findBitDType() const { return findBasicDType(VBasicDTypeKwd::LOGIC); } + AstNodeDType* findDoubleDType() const { return findBasicDType(VBasicDTypeKwd::DOUBLE); } + AstNodeDType* findStringDType() const { return findBasicDType(VBasicDTypeKwd::STRING); } + AstNodeDType* findSigned32DType() const { return findBasicDType(VBasicDTypeKwd::INTEGER); } + AstNodeDType* findUInt32DType() const { return findBasicDType(VBasicDTypeKwd::UINT32); } + AstNodeDType* findUInt64DType() const { return findBasicDType(VBasicDTypeKwd::UINT64); } + AstNodeDType* findCHandleDType() const { return findBasicDType(VBasicDTypeKwd::CHANDLE); } AstNodeDType* findEmptyQueueDType() const; AstNodeDType* findVoidDType() const; AstNodeDType* findQueueIndexDType() const; From 9085e34d7096afa6c9354c9701322cbad946b6de Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 18 Jul 2022 13:14:24 +0100 Subject: [PATCH 036/119] Pass VerilatedModel at trace registration time --- include/verilated_trace.h | 10 +++++----- include/verilated_trace_imp.h | 19 ++++++++++--------- src/V3EmitCModel.cpp | 2 +- src/V3Trace.cpp | 6 ++---- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/include/verilated_trace.h b/include/verilated_trace.h index 0d0f7c0f6..5e7fc85c2 100644 --- a/include/verilated_trace.h +++ b/include/verilated_trace.h @@ -215,7 +215,7 @@ private: double m_timeRes = 1e-9; // Time resolution (ns/ms etc) double m_timeUnit = 1e-0; // Time units (ns/ms etc) - void addContext(VerilatedContext*) VL_MT_SAFE_EXCLUDES(m_mutex); + void addModel(VerilatedModel*) VL_MT_SAFE_EXCLUDES(m_mutex); void addCallbackRecord(std::vector& cbVec, CallbackRecord&& cbRec) VL_MT_SAFE_EXCLUDES(m_mutex); @@ -339,10 +339,10 @@ public: //========================================================================= // Non-hot path internal interface to Verilator generated code - void addInitCb(initCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; - void addFullCb(dumpCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; - void addChgCb(dumpCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; - void addCleanupCb(cleanupCb_t cb, void* userp, VerilatedContext*) VL_MT_SAFE; + void addInitCb(initCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; + void addFullCb(dumpCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; + void addChgCb(dumpCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; + void addCleanupCb(cleanupCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; void scopeEscape(char flag) { m_scopeEscape = flag; } diff --git a/include/verilated_trace_imp.h b/include/verilated_trace_imp.h index a09ac0f43..d20c09166 100644 --- a/include/verilated_trace_imp.h +++ b/include/verilated_trace_imp.h @@ -604,9 +604,10 @@ void VerilatedTrace::dump(uint64_t timeui) VL_MT_SAFE_EXCLUD // Non-hot path internal interface to Verilator generated code template <> -void VerilatedTrace::addContext(VerilatedContext* contextp) +void VerilatedTrace::addModel(VerilatedModel* modelp) VL_MT_SAFE_EXCLUDES(m_mutex) { const VerilatedLockGuard lock{m_mutex}; + VerilatedContext* const contextp = modelp->contextp(); if (m_contextp && contextp != m_contextp) { VL_FATAL_MT( __FILE__, __LINE__, "", @@ -630,26 +631,26 @@ void VerilatedTrace::addCallbackRecord(std::vector void VerilatedTrace::addInitCb(initCb_t cb, void* userp, - VerilatedContext* contextp) VL_MT_SAFE { - addContext(contextp); + VerilatedModel* modelp) VL_MT_SAFE { + addModel(modelp); addCallbackRecord(m_initCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::addFullCb(dumpCb_t cb, void* userp, - VerilatedContext* contextp) VL_MT_SAFE { - addContext(contextp); + VerilatedModel* modelp) VL_MT_SAFE { + addModel(modelp); addCallbackRecord(m_fullCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::addChgCb(dumpCb_t cb, void* userp, - VerilatedContext* contextp) VL_MT_SAFE { - addContext(contextp); + VerilatedModel* modelp) VL_MT_SAFE { + addModel(modelp); addCallbackRecord(m_chgCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::addCleanupCb(cleanupCb_t cb, void* userp, - VerilatedContext* contextp) VL_MT_SAFE { - addContext(contextp); + VerilatedModel* modelp) VL_MT_SAFE { + addModel(modelp); addCallbackRecord(m_cleanupCbs, CallbackRecord{cb, userp}); } diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index 04b422a97..67c02e332 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -540,7 +540,7 @@ class EmitCModel final : public EmitCFunc { } puts(/**/ "if (false && levels && options) {} // Prevent unused\n"); puts(/**/ "tfp->spTrace()->addInitCb(&" + protect("trace_init") - + ", &(vlSymsp->TOP), contextp());\n"); + + ", &(vlSymsp->TOP), this);\n"); puts(/**/ topModNameProtected + "__" + protect("trace_register") + "(&(vlSymsp->TOP), tfp->spTrace());\n"); diff --git a/src/V3Trace.cpp b/src/V3Trace.cpp index 7113c5e0e..e7a445c1a 100644 --- a/src/V3Trace.cpp +++ b/src/V3Trace.cpp @@ -513,8 +513,7 @@ private: } m_regFuncp->addStmtsp(new AstAddrOfCFunc(flp, funcp)); m_regFuncp->addStmtsp(new AstText(flp, ", vlSelf", true)); - m_regFuncp->addStmtsp( - new AstText(flp, ", vlSelf->vlSymsp->__Vm_modelp->contextp()", true)); + m_regFuncp->addStmtsp(new AstText(flp, ", vlSelf->vlSymsp->__Vm_modelp", true)); m_regFuncp->addStmtsp(new AstText(flp, ");\n", true)); } else { // Sub functions @@ -702,8 +701,7 @@ private: // Register it m_regFuncp->addStmtsp(new AstText(fl, "tracep->addCleanupCb(", true)); m_regFuncp->addStmtsp(new AstAddrOfCFunc(fl, cleanupFuncp)); - m_regFuncp->addStmtsp( - new AstText(fl, ", vlSelf, vlSelf->vlSymsp->__Vm_modelp->contextp());\n", true)); + m_regFuncp->addStmtsp(new AstText(fl, ", vlSelf, vlSelf->vlSymsp->__Vm_modelp);\n", true)); // Clear global activity flag cleanupFuncp->addStmtsp( From db59c07f27da450153787f5abfaa3fce476545a3 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 18 Jul 2022 15:32:14 +0100 Subject: [PATCH 037/119] Implement trace offloading with fewer ifdefs Step towards a proper run-time library. Reduce the amount of ifdefs in the implementation of offloaded tracing. There are still a very small number of ifdefs left, which will need more careful changes in order to keep user API compatibility. --- include/verilated_fst_c.cpp | 35 +++-- include/verilated_fst_c.h | 18 ++- include/verilated_trace.h | 196 ++++++++++++----------- include/verilated_trace_imp.h | 240 ++++++++++++++++++----------- include/verilated_vcd_c.cpp | 31 ++-- include/verilated_vcd_c.h | 33 ++-- src/V3HierBlock.cpp | 2 +- src/V3LinkCells.cpp | 2 +- src/V3Options.cpp | 2 +- src/V3Options.h | 8 +- src/V3Trace.cpp | 8 +- test_regress/t/t_flag_hier1_bad.pl | 2 +- 12 files changed, 337 insertions(+), 240 deletions(-) diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp index 0bc1048cf..e6ea9f99f 100644 --- a/include/verilated_fst_c.cpp +++ b/include/verilated_fst_c.cpp @@ -93,7 +93,17 @@ static_assert(static_cast(FST_ST_VCD_PROGRAM) == static_cast(VLT_TRACE // VerilatedFst VerilatedFst::VerilatedFst(void* fst) - : m_fst{fst} {} + : +#ifdef VL_TRACE_OFFLOAD + VerilatedTrace { + true +} +#else + VerilatedTrace { + false +} +#endif +, m_fst{fst} {} VerilatedFst::~VerilatedFst() { if (m_fst) fstWriterClose(m_fst); @@ -250,13 +260,21 @@ void VerilatedFst::declDouble(uint32_t code, const char* name, int dtypenum, fst //============================================================================= // Get/commit trace buffer -VerilatedFstBuffer* VerilatedFst::getTraceBuffer() { return new VerilatedFstBuffer{*this}; } +VerilatedFst::Buffer* VerilatedFst::getTraceBuffer() { +#ifdef VL_THREADED + if (offload()) return new OffloadBuffer{*this}; +#endif + return new Buffer{*this}; +} -void VerilatedFst::commitTraceBuffer(VerilatedFstBuffer* bufp) { -#ifdef VL_TRACE_OFFLOAD - if (bufp->m_offloadBufferWritep) { - m_offloadBufferWritep = bufp->m_offloadBufferWritep; - return; // Buffer will be deleted by the offload thread +void VerilatedFst::commitTraceBuffer(VerilatedFst::Buffer* bufp) { +#ifdef VL_THREADED + if (offload()) { + OffloadBuffer* const offloadBufferp = static_cast(bufp); + if (offloadBufferp->m_offloadBufferWritep) { + m_offloadBufferWritep = offloadBufferp->m_offloadBufferWritep; + return; // Buffer will be deleted by the offload thread + } } #endif delete bufp; @@ -265,9 +283,6 @@ void VerilatedFst::commitTraceBuffer(VerilatedFstBuffer* bufp) { //============================================================================= // VerilatedFstBuffer implementation -VerilatedFstBuffer::VerilatedFstBuffer(VerilatedFst& owner) - : VerilatedTraceBuffer{owner} {} - //============================================================================= // Trace rendering primitives diff --git a/include/verilated_fst_c.h b/include/verilated_fst_c.h index 5131cc8cc..0587527f6 100644 --- a/include/verilated_fst_c.h +++ b/include/verilated_fst_c.h @@ -43,7 +43,7 @@ public: using Super = VerilatedTrace; private: - friend Buffer; // Give the buffer access to the private bits + friend VerilatedFstBuffer; // Give the buffer access to the private bits //========================================================================= // FST specific internals @@ -72,8 +72,8 @@ protected: virtual bool preChangeDump() override { return isOpen(); } // Trace buffer management - virtual VerilatedFstBuffer* getTraceBuffer() override; - virtual void commitTraceBuffer(VerilatedFstBuffer*) override; + virtual Buffer* getTraceBuffer() override; + virtual void commitTraceBuffer(Buffer*) override; public: //========================================================================= @@ -124,10 +124,14 @@ template <> void VerilatedFst::Super::dumpvars(int level, const std::string& hie //============================================================================= // VerilatedFstBuffer -class VerilatedFstBuffer final : public VerilatedTraceBuffer { +class VerilatedFstBuffer VL_NOT_FINAL { // Give the trace file access to the private bits friend VerilatedFst; friend VerilatedFst::Super; + friend VerilatedFst::Buffer; + friend VerilatedFst::OffloadBuffer; + + VerilatedFst& m_owner; // Trace file owning this buffer. Required by subclasses. // The FST file handle void* const m_fst = m_owner.m_fst; @@ -136,10 +140,10 @@ class VerilatedFstBuffer final : public VerilatedTraceBuffer #include -#ifdef VL_TRACE_OFFLOAD +#ifdef VL_THREADED # include # include #endif @@ -57,9 +57,10 @@ // clang-format on class VlThreadPool; -template class VerilatedTraceBuffer; +template class VerilatedTraceBuffer; +template class VerilatedTraceOffloadBuffer; -#ifdef VL_TRACE_OFFLOAD +#ifdef VL_THREADED //============================================================================= // Offloaded tracing @@ -133,23 +134,26 @@ public: // VerilatedTrace // T_Trace is the format specific subclass of VerilatedTrace. -// T_Buffer is the format specific subclass of VerilatedTraceBuffer. +// T_Buffer is the format specific base class of VerilatedTraceBuffer. template class VerilatedTrace VL_NOT_FINAL { - // Give the buffer (both base and derived) access to the private bits - friend VerilatedTraceBuffer; - friend T_Buffer; - public: - using Buffer = T_Buffer; + using Buffer = VerilatedTraceBuffer; + using OffloadBuffer = VerilatedTraceOffloadBuffer; //========================================================================= // Generic tracing internals using initCb_t = void (*)(void*, T_Trace*, uint32_t); // Type of init callbacks using dumpCb_t = void (*)(void*, Buffer*); // Type of dump callbacks + using dumpOffloadCb_t = void (*)(void*, OffloadBuffer*); // Type of offload dump callbacks using cleanupCb_t = void (*)(void*, T_Trace*); // Type of cleanup callbacks private: + // Give the buffer (both base and derived) access to the private bits + friend T_Buffer; + friend Buffer; + friend OffloadBuffer; + struct CallbackRecord { // Note: would make these fields const, but some old STL implementations // (the one in Ubuntu 14.04 with GCC 4.8.4 in particular) use the @@ -158,6 +162,7 @@ private: union { // The callback initCb_t m_initCb; dumpCb_t m_dumpCb; + dumpOffloadCb_t m_dumpOffloadCb; cleanupCb_t m_cleanupCb; }; void* m_userp; // The user pointer to pass to the callback (the symbol table) @@ -167,11 +172,16 @@ private: CallbackRecord(dumpCb_t cb, void* userp) : m_dumpCb{cb} , m_userp{userp} {} + CallbackRecord(dumpOffloadCb_t cb, void* userp) + : m_dumpOffloadCb{cb} + , m_userp{userp} {} CallbackRecord(cleanupCb_t cb, void* userp) : m_cleanupCb{cb} , m_userp{userp} {} }; + const bool m_offload; // Whether to use the offload thread (ignored if !VL_THREADED) + #ifdef VL_TRACE_PARALLEL struct ParallelWorkerData { const dumpCb_t m_cb; // The callback @@ -202,7 +212,9 @@ private: std::vector m_sigs_enabledVec; // Staging for m_sigs_enabledp std::vector m_initCbs; // Routines to initialize tracing std::vector m_fullCbs; // Routines to perform full dump + std::vector m_fullOffloadCbs; // Routines to perform offloaded full dump std::vector m_chgCbs; // Routines to perform incremental dump + std::vector m_chgOffloadCbs; // Routines to perform offloaded incremental dump std::vector m_cleanupCbs; // Routines to call at the end of dump VerilatedContext* m_contextp = nullptr; // The context used by the traced models bool m_fullDump = true; // Whether a full dump is required on the next call to 'dump' @@ -225,13 +237,14 @@ private: T_Trace* self() { return static_cast(this); } void runCallbacks(const std::vector& cbVec); + void runOffloadedCallbacks(const std::vector& cbVec); // Flush any remaining data for this file static void onFlush(void* selfp) VL_MT_UNSAFE_ONE; // Close the file on termination static void onExit(void* selfp) VL_MT_UNSAFE_ONE; -#ifdef VL_TRACE_OFFLOAD +#ifdef VL_THREADED // Number of total offload buffers that have been allocated uint32_t m_numOffloadBuffers = 0; // Size of offload buffers @@ -298,6 +311,12 @@ protected: void closeBase(); void flushBase(); +#ifdef VL_THREADED + inline bool offload() const { return m_offload; } +#else + static constexpr bool offload() { return false; } +#endif + //========================================================================= // Virtual functions to be provided by the format specific implementation @@ -317,7 +336,7 @@ public: //========================================================================= // External interface to client code - explicit VerilatedTrace(); + explicit VerilatedTrace(bool offload); ~VerilatedTrace(); // Set time units (s/ms, defaults to ns) @@ -341,7 +360,9 @@ public: void addInitCb(initCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; void addFullCb(dumpCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; + void addFullCb(dumpOffloadCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; void addChgCb(dumpCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; + void addChgCb(dumpOffloadCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; void addCleanupCb(cleanupCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; void scopeEscape(char flag) { m_scopeEscape = flag; } @@ -353,32 +374,25 @@ public: //============================================================================= // VerilatedTraceBuffer -// T_Trace is the format specific subclass of VerilatedTrace. -// T_Buffer is the format specific subclass of VerilatedTraceBuffer. +// T_Buffer is the format specific base class of VerilatedTraceBuffer. // The format-specific hot-path methods use duck-typing via T_Buffer for performance. -template class VerilatedTraceBuffer VL_NOT_FINAL { - friend T_Trace; // Give the trace file access to the private bits - +template // +class VerilatedTraceBuffer VL_NOT_FINAL : public T_Buffer { protected: - T_Trace& m_owner; // The VerilatedTrace subclass that owns this buffer + // Type of the owner trace file + using Trace = typename std::remove_cv< + typename std::remove_reference::type>::type; - // Previous value store - uint32_t* const m_sigs_oldvalp = m_owner.m_sigs_oldvalp; - // Bit vector of enabled codes (nullptr = all on) - EData* const m_sigs_enabledp = m_owner.m_sigs_enabledp; + static_assert(std::has_virtual_destructor::value, ""); + static_assert(std::is_base_of, Trace>::value, ""); -#ifdef VL_TRACE_OFFLOAD - // Write pointer into current buffer - uint32_t* m_offloadBufferWritep = m_owner.m_offloadBufferWritep; - // End of offload buffer - uint32_t* const m_offloadBufferEndp = m_owner.m_offloadBufferEndp; -#endif + friend Trace; // Give the trace file access to the private bits + friend std::default_delete>; - // Equivalent to 'this' but is of the sub-type 'T_Derived*'. Use 'self()->' - // to access duck-typed functions to avoid a virtual function call. - inline T_Buffer* self() { return static_cast(this); } + uint32_t* const m_sigs_oldvalp; // Previous value store + EData* const m_sigs_enabledp; // Bit vector of enabled codes (nullptr = all on) - explicit VerilatedTraceBuffer(T_Trace& owner); + explicit VerilatedTraceBuffer(Trace& owner); virtual ~VerilatedTraceBuffer() = default; public: @@ -410,7 +424,67 @@ public: void fullWData(uint32_t* oldp, const WData* newvalp, int bits); void fullDouble(uint32_t* oldp, double newval); -#ifdef VL_TRACE_OFFLOAD + // In non-offload mode, these are called directly by the trace callbacks, + // and are called chg*. In offload mode, they are called by the worker + // thread and are called chg*Impl + + // Check previous dumped value of signal. If changed, then emit trace entry + VL_ATTR_ALWINLINE inline void chgBit(uint32_t* oldp, CData newval) { + const uint32_t diff = *oldp ^ newval; + if (VL_UNLIKELY(diff)) fullBit(oldp, newval); + } + VL_ATTR_ALWINLINE inline void chgCData(uint32_t* oldp, CData newval, int bits) { + const uint32_t diff = *oldp ^ newval; + if (VL_UNLIKELY(diff)) fullCData(oldp, newval, bits); + } + VL_ATTR_ALWINLINE inline void chgSData(uint32_t* oldp, SData newval, int bits) { + const uint32_t diff = *oldp ^ newval; + if (VL_UNLIKELY(diff)) fullSData(oldp, newval, bits); + } + VL_ATTR_ALWINLINE inline void chgIData(uint32_t* oldp, IData newval, int bits) { + const uint32_t diff = *oldp ^ newval; + if (VL_UNLIKELY(diff)) fullIData(oldp, newval, bits); + } + VL_ATTR_ALWINLINE inline void chgQData(uint32_t* oldp, QData newval, int bits) { + const uint64_t diff = *reinterpret_cast(oldp) ^ newval; + if (VL_UNLIKELY(diff)) fullQData(oldp, newval, bits); + } + VL_ATTR_ALWINLINE inline void chgWData(uint32_t* oldp, const WData* newvalp, int bits) { + for (int i = 0; i < (bits + 31) / 32; ++i) { + if (VL_UNLIKELY(oldp[i] ^ newvalp[i])) { + fullWData(oldp, newvalp, bits); + return; + } + } + } + VL_ATTR_ALWINLINE inline void chgDouble(uint32_t* oldp, double newval) { + // cppcheck-suppress invalidPointerCast + if (VL_UNLIKELY(*reinterpret_cast(oldp) != newval)) fullDouble(oldp, newval); + } +}; + +#ifdef VL_THREADED +//============================================================================= +// VerilatedTraceOffloadBuffer + +// T_Buffer is the format specific base class of VerilatedTraceBuffer. +// The format-specific hot-path methods use duck-typing via T_Buffer for performance. +template // +class VerilatedTraceOffloadBuffer final : public VerilatedTraceBuffer { + using typename VerilatedTraceBuffer::Trace; + + friend Trace; // Give the trace file access to the private bits + + uint32_t* m_offloadBufferWritep; // Write pointer into current buffer + uint32_t* const m_offloadBufferEndp; // End of offload buffer + + explicit VerilatedTraceOffloadBuffer(Trace& owner); + virtual ~VerilatedTraceOffloadBuffer() = default; + +public: + //========================================================================= + // Hot path internal interface to Verilator generated code + // Offloaded tracing. Just dump everything in the offload buffer inline void chgBit(uint32_t code, CData newval) { m_offloadBufferWritep[0] = VerilatedTraceOffloadCommand::CHG_BIT_0 | newval; @@ -461,63 +535,7 @@ public: m_offloadBufferWritep += 4; VL_DEBUG_IF(assert(m_offloadBufferWritep <= m_offloadBufferEndp);); } - -#define chgBit chgBitImpl -#define chgCData chgCDataImpl -#define chgSData chgSDataImpl -#define chgIData chgIDataImpl -#define chgQData chgQDataImpl -#define chgWData chgWDataImpl -#define chgDouble chgDoubleImpl -#endif - - // In non-offload mode, these are called directly by the trace callbacks, - // and are called chg*. In offload mode, they are called by the worker - // thread and are called chg*Impl - - // Check previous dumped value of signal. If changed, then emit trace entry - VL_ATTR_ALWINLINE inline void chgBit(uint32_t* oldp, CData newval) { - const uint32_t diff = *oldp ^ newval; - if (VL_UNLIKELY(diff)) fullBit(oldp, newval); - } - VL_ATTR_ALWINLINE inline void chgCData(uint32_t* oldp, CData newval, int bits) { - const uint32_t diff = *oldp ^ newval; - if (VL_UNLIKELY(diff)) fullCData(oldp, newval, bits); - } - VL_ATTR_ALWINLINE inline void chgSData(uint32_t* oldp, SData newval, int bits) { - const uint32_t diff = *oldp ^ newval; - if (VL_UNLIKELY(diff)) fullSData(oldp, newval, bits); - } - VL_ATTR_ALWINLINE inline void chgIData(uint32_t* oldp, IData newval, int bits) { - const uint32_t diff = *oldp ^ newval; - if (VL_UNLIKELY(diff)) fullIData(oldp, newval, bits); - } - VL_ATTR_ALWINLINE inline void chgQData(uint32_t* oldp, QData newval, int bits) { - const uint64_t diff = *reinterpret_cast(oldp) ^ newval; - if (VL_UNLIKELY(diff)) fullQData(oldp, newval, bits); - } - VL_ATTR_ALWINLINE inline void chgWData(uint32_t* oldp, const WData* newvalp, int bits) { - for (int i = 0; i < (bits + 31) / 32; ++i) { - if (VL_UNLIKELY(oldp[i] ^ newvalp[i])) { - fullWData(oldp, newvalp, bits); - return; - } - } - } - VL_ATTR_ALWINLINE inline void chgDouble(uint32_t* oldp, double newval) { - // cppcheck-suppress invalidPointerCast - if (VL_UNLIKELY(*reinterpret_cast(oldp) != newval)) fullDouble(oldp, newval); - } - -#ifdef VL_TRACE_OFFLOAD -#undef chgBit -#undef chgCData -#undef chgSData -#undef chgIData -#undef chgQData -#undef chgWData -#undef chgDouble -#endif }; +#endif #endif // guard diff --git a/include/verilated_trace_imp.h b/include/verilated_trace_imp.h index d20c09166..9fd3f2421 100644 --- a/include/verilated_trace_imp.h +++ b/include/verilated_trace_imp.h @@ -78,7 +78,7 @@ static std::string doubleToTimescale(double value) { return valuestr; // Gets converted to string, so no ref to stack } -#ifdef VL_TRACE_OFFLOAD +#ifdef VL_THREADED //========================================================================= // Buffer management @@ -127,7 +127,7 @@ template <> void VerilatedTrace::offloadWorkerThreadMain() { const uint32_t* readp = bufferp; - std::unique_ptr traceBufp; // We own the passed tracebuffer + std::unique_ptr traceBufp; // We own the passed tracebuffer while (true) { const uint32_t cmd = readp[0]; @@ -143,44 +143,44 @@ template <> void VerilatedTrace::offloadWorkerThreadMain() { // CHG_* commands case VerilatedTraceOffloadCommand::CHG_BIT_0: VL_TRACE_OFFLOAD_DEBUG("Command CHG_BIT_0 " << top); - traceBufp->chgBitImpl(oldp, 0); + traceBufp->chgBit(oldp, 0); continue; case VerilatedTraceOffloadCommand::CHG_BIT_1: VL_TRACE_OFFLOAD_DEBUG("Command CHG_BIT_1 " << top); - traceBufp->chgBitImpl(oldp, 1); + traceBufp->chgBit(oldp, 1); continue; case VerilatedTraceOffloadCommand::CHG_CDATA: VL_TRACE_OFFLOAD_DEBUG("Command CHG_CDATA " << top); // Bits stored in bottom byte of command - traceBufp->chgCDataImpl(oldp, *readp, top); + traceBufp->chgCData(oldp, *readp, top); readp += 1; continue; case VerilatedTraceOffloadCommand::CHG_SDATA: VL_TRACE_OFFLOAD_DEBUG("Command CHG_SDATA " << top); // Bits stored in bottom byte of command - traceBufp->chgSDataImpl(oldp, *readp, top); + traceBufp->chgSData(oldp, *readp, top); readp += 1; continue; case VerilatedTraceOffloadCommand::CHG_IDATA: VL_TRACE_OFFLOAD_DEBUG("Command CHG_IDATA " << top); // Bits stored in bottom byte of command - traceBufp->chgIDataImpl(oldp, *readp, top); + traceBufp->chgIData(oldp, *readp, top); readp += 1; continue; case VerilatedTraceOffloadCommand::CHG_QDATA: VL_TRACE_OFFLOAD_DEBUG("Command CHG_QDATA " << top); // Bits stored in bottom byte of command - traceBufp->chgQDataImpl(oldp, *reinterpret_cast(readp), top); + traceBufp->chgQData(oldp, *reinterpret_cast(readp), top); readp += 2; continue; case VerilatedTraceOffloadCommand::CHG_WDATA: VL_TRACE_OFFLOAD_DEBUG("Command CHG_WDATA " << top); - traceBufp->chgWDataImpl(oldp, readp, top); + traceBufp->chgWData(oldp, readp, top); readp += VL_WORDS_I(top); continue; case VerilatedTraceOffloadCommand::CHG_DOUBLE: VL_TRACE_OFFLOAD_DEBUG("Command CHG_DOUBLE " << top); - traceBufp->chgDoubleImpl(oldp, *reinterpret_cast(readp)); + traceBufp->chgDouble(oldp, *reinterpret_cast(readp)); readp += 2; continue; @@ -196,7 +196,7 @@ template <> void VerilatedTrace::offloadWorkerThreadMain() { case VerilatedTraceOffloadCommand::TRACE_BUFFER: VL_TRACE_OFFLOAD_DEBUG("Command TRACE_BUFFER " << top); readp -= 1; // No code in this command, undo increment - traceBufp.reset(*reinterpret_cast(readp)); + traceBufp.reset(*reinterpret_cast(readp)); readp += 2; continue; @@ -252,24 +252,28 @@ template <> void VerilatedTrace::shutdownOffloadWorker() { // Life cycle template <> void VerilatedTrace::closeBase() { -#ifdef VL_TRACE_OFFLOAD - shutdownOffloadWorker(); - while (m_numOffloadBuffers) { - delete[] m_offloadBuffersFromWorker.get(); - --m_numOffloadBuffers; +#ifdef VL_THREADED + if (offload()) { + shutdownOffloadWorker(); + while (m_numOffloadBuffers) { + delete[] m_offloadBuffersFromWorker.get(); + --m_numOffloadBuffers; + } } #endif } template <> void VerilatedTrace::flushBase() { -#ifdef VL_TRACE_OFFLOAD - // Hand an empty buffer to the worker thread - uint32_t* const bufferp = getOffloadBuffer(); - *bufferp = VerilatedTraceOffloadCommand::END; - m_offloadBuffersToWorker.put(bufferp); - // Wait for it to be returned. As the processing is in-order, - // this ensures all previous buffers have been processed. - waitForOffloadBuffer(bufferp); +#ifdef VL_THREDED + if (offload()) { + // Hand an empty buffer to the worker thread + uint32_t* const bufferp = getOffloadBuffer(); + *bufferp = VerilatedTraceOffloadCommand::END; + m_offloadBuffersToWorker.put(bufferp); + // Wait for it to be returned. As the processing is in-order, + // this ensures all previous buffers have been processed. + waitForOffloadBuffer(bufferp); + } #endif } @@ -289,7 +293,14 @@ template <> void VerilatedTrace::onExit(void* selfp) { //============================================================================= // VerilatedTrace -template <> VerilatedTrace::VerilatedTrace() { +template <> +VerilatedTrace::VerilatedTrace(bool offload) + : m_offload{offload} { +#ifndef VL_THREADED + if (m_offload) { + VL_FATAL_MT(__FILE__, __LINE__, "", "Cannot use trace offloading without VL_THREADED"); + } +#endif set_time_unit(Verilated::threadContextp()->timeunitString()); set_time_resolution(Verilated::threadContextp()->timeprecisionString()); } @@ -299,9 +310,7 @@ template <> VerilatedTrace::~VerilatedTrace() { if (m_sigs_enabledp) VL_DO_CLEAR(delete[] m_sigs_enabledp, m_sigs_enabledp = nullptr); Verilated::removeFlushCb(VerilatedTrace::onFlush, this); Verilated::removeExitCb(VerilatedTrace::onExit, this); -#ifdef VL_TRACE_OFFLOAD - closeBase(); -#endif + if (offload()) closeBase(); } //========================================================================= @@ -355,17 +364,19 @@ template <> void VerilatedTrace::traceInit() VL_MT_UNSAFE { Verilated::addFlushCb(VerilatedTrace::onFlush, this); Verilated::addExitCb(VerilatedTrace::onExit, this); -#ifdef VL_TRACE_OFFLOAD - // Compute offload buffer size. we need to be able to store a new value for - // each signal, which is 'nextCode()' entries after the init callbacks - // above have been run, plus up to 2 more words of metadata per signal, - // plus fixed overhead of 1 for a termination flag and 3 for a time stamp - // update. - m_offloadBufferSize = nextCode() + numSignals() * 2 + 4; +#ifdef VL_THREADED + if (offload()) { + // Compute offload buffer size. we need to be able to store a new value for + // each signal, which is 'nextCode()' entries after the init callbacks + // above have been run, plus up to 2 more words of metadata per signal, + // plus fixed overhead of 1 for a termination flag and 3 for a time stamp + // update. + m_offloadBufferSize = nextCode() + numSignals() * 2 + 4; - // Start the worker thread - m_workerThread.reset( - new std::thread{&VerilatedTrace::offloadWorkerThreadMain, this}); + // Start the worker thread + m_workerThread.reset( + new std::thread{&VerilatedTrace::offloadWorkerThreadMain, this}); + } #endif } @@ -527,6 +538,21 @@ void VerilatedTrace::runCallbacks(const std::vector +void VerilatedTrace::runOffloadedCallbacks( + const std::vector& cbVec) { + // Fall back on sequential execution + for (const CallbackRecord& cbr : cbVec) { +#ifdef VL_THREADED + Buffer* traceBufferp = getTraceBuffer(); + cbr.m_dumpOffloadCb(cbr.m_userp, static_cast(traceBufferp)); + commitTraceBuffer(traceBufferp); +#else + VL_FATAL_MT(__FILE__, __LINE__, "", "Unreachable"); +#endif + } +} + template <> void VerilatedTrace::dump(uint64_t timeui) VL_MT_SAFE_EXCLUDES(m_mutex) { // Not really VL_MT_SAFE but more VL_MT_UNSAFE_ONE. @@ -550,35 +576,47 @@ void VerilatedTrace::dump(uint64_t timeui) VL_MT_SAFE_EXCLUD if (!preChangeDump()) return; } -#ifdef VL_TRACE_OFFLOAD - // Currently only incremental dumps run on the worker thread uint32_t* bufferp = nullptr; - if (VL_LIKELY(!m_fullDump)) { - // Get the offload buffer we are about to fill - bufferp = getOffloadBuffer(); - m_offloadBufferWritep = bufferp; - m_offloadBufferEndp = bufferp + m_offloadBufferSize; + if (offload()) { +#ifdef VL_THREADED + // Currently only incremental dumps run on the worker thread + if (VL_LIKELY(!m_fullDump)) { + // Get the offload buffer we are about to fill + bufferp = getOffloadBuffer(); + m_offloadBufferWritep = bufferp; + m_offloadBufferEndp = bufferp + m_offloadBufferSize; - // Tell worker to update time point - m_offloadBufferWritep[0] = VerilatedTraceOffloadCommand::TIME_CHANGE; - *reinterpret_cast(m_offloadBufferWritep + 1) = timeui; - m_offloadBufferWritep += 3; + // Tell worker to update time point + m_offloadBufferWritep[0] = VerilatedTraceOffloadCommand::TIME_CHANGE; + *reinterpret_cast(m_offloadBufferWritep + 1) = timeui; + m_offloadBufferWritep += 3; + } else { + // Update time point + flushBase(); + emitTimeChange(timeui); + } +#else + VL_FATAL_MT(__FILE__, __LINE__, "", "Unreachable"); +#endif } else { // Update time point - flushBase(); emitTimeChange(timeui); } -#else - // Update time point - emitTimeChange(timeui); -#endif // Run the callbacks if (VL_UNLIKELY(m_fullDump)) { m_fullDump = false; // No more need for next dump to be full - runCallbacks(m_fullCbs); + if (offload()) { + runOffloadedCallbacks(m_fullOffloadCbs); + } else { + runCallbacks(m_fullCbs); + } } else { - runCallbacks(m_chgCbs); + if (offload()) { + runOffloadedCallbacks(m_chgOffloadCbs); + } else { + runCallbacks(m_chgCbs); + } } for (uint32_t i = 0; i < m_cleanupCbs.size(); ++i) { @@ -586,8 +624,8 @@ void VerilatedTrace::dump(uint64_t timeui) VL_MT_SAFE_EXCLUD cbr.m_cleanupCb(cbr.m_userp, self()); } -#ifdef VL_TRACE_OFFLOAD - if (VL_LIKELY(bufferp)) { +#ifdef VL_THREADED + if (offload() && VL_LIKELY(bufferp)) { // Mark end of the offload buffer we just filled *m_offloadBufferWritep++ = VerilatedTraceOffloadCommand::END; @@ -638,16 +676,32 @@ void VerilatedTrace::addInitCb(initCb_t cb, void* userp, template <> void VerilatedTrace::addFullCb(dumpCb_t cb, void* userp, VerilatedModel* modelp) VL_MT_SAFE { + assert(!offload()); addModel(modelp); addCallbackRecord(m_fullCbs, CallbackRecord{cb, userp}); } template <> +void VerilatedTrace::addFullCb(dumpOffloadCb_t cb, void* userp, + VerilatedModel* modelp) VL_MT_SAFE { + assert(offload()); + addModel(modelp); + addCallbackRecord(m_fullOffloadCbs, CallbackRecord{cb, userp}); +} +template <> void VerilatedTrace::addChgCb(dumpCb_t cb, void* userp, VerilatedModel* modelp) VL_MT_SAFE { + assert(!offload()); addModel(modelp); addCallbackRecord(m_chgCbs, CallbackRecord{cb, userp}); } template <> +void VerilatedTrace::addChgCb(dumpOffloadCb_t cb, void* userp, + VerilatedModel* modelp) VL_MT_SAFE { + assert(offload()); + addModel(modelp); + addCallbackRecord(m_chgOffloadCbs, CallbackRecord{cb, userp}); +} +template <> void VerilatedTrace::addCleanupCb(cleanupCb_t cb, void* userp, VerilatedModel* modelp) VL_MT_SAFE { addModel(modelp); @@ -757,20 +811,10 @@ static inline void cvtQDataToStr(char* dstp, QData value) { // VerilatedTraceBuffer template <> // -VerilatedTraceBuffer::VerilatedTraceBuffer(VL_SUB_T& owner) - : m_owner{owner} { -#ifdef VL_TRACE_OFFLOAD - if (m_offloadBufferWritep) { - using This = VerilatedTraceBuffer*; - // Tack on the buffer address - static_assert(2 * sizeof(uint32_t) >= sizeof(This), - "This should be enough on all plafrorms"); - *m_offloadBufferWritep++ = VerilatedTraceOffloadCommand::TRACE_BUFFER; - *reinterpret_cast(m_offloadBufferWritep) = this; - m_offloadBufferWritep += 2; - } -#endif -} +VerilatedTraceBuffer::VerilatedTraceBuffer(Trace& owner) + : VL_BUF_T{owner} + , m_sigs_oldvalp{owner.m_sigs_oldvalp} + , m_sigs_enabledp{owner.m_sigs_enabledp} {} // These functions must write the new value back into the old value store, // and subsequently call the format specific emit* implementations. Note @@ -778,61 +822,81 @@ VerilatedTraceBuffer::VerilatedTraceBuffer(VL_SUB_T& owner) // the emit* functions can be inlined for performance. template <> // -void VerilatedTraceBuffer::fullBit(uint32_t* oldp, CData newval) { +void VerilatedTraceBuffer::fullBit(uint32_t* oldp, CData newval) { const uint32_t code = oldp - m_sigs_oldvalp; *oldp = newval; // Still copy even if not tracing so chg doesn't call full if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return; - self()->emitBit(code, newval); + emitBit(code, newval); } template <> -void VerilatedTraceBuffer::fullCData(uint32_t* oldp, CData newval, int bits) { +void VerilatedTraceBuffer::fullCData(uint32_t* oldp, CData newval, int bits) { const uint32_t code = oldp - m_sigs_oldvalp; *oldp = newval; // Still copy even if not tracing so chg doesn't call full if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return; - self()->emitCData(code, newval, bits); + emitCData(code, newval, bits); } template <> -void VerilatedTraceBuffer::fullSData(uint32_t* oldp, SData newval, int bits) { +void VerilatedTraceBuffer::fullSData(uint32_t* oldp, SData newval, int bits) { const uint32_t code = oldp - m_sigs_oldvalp; *oldp = newval; // Still copy even if not tracing so chg doesn't call full if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return; - self()->emitSData(code, newval, bits); + emitSData(code, newval, bits); } template <> -void VerilatedTraceBuffer::fullIData(uint32_t* oldp, IData newval, int bits) { +void VerilatedTraceBuffer::fullIData(uint32_t* oldp, IData newval, int bits) { const uint32_t code = oldp - m_sigs_oldvalp; *oldp = newval; // Still copy even if not tracing so chg doesn't call full if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return; - self()->emitIData(code, newval, bits); + emitIData(code, newval, bits); } template <> -void VerilatedTraceBuffer::fullQData(uint32_t* oldp, QData newval, int bits) { +void VerilatedTraceBuffer::fullQData(uint32_t* oldp, QData newval, int bits) { const uint32_t code = oldp - m_sigs_oldvalp; *reinterpret_cast(oldp) = newval; if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return; - self()->emitQData(code, newval, bits); + emitQData(code, newval, bits); } template <> -void VerilatedTraceBuffer::fullWData(uint32_t* oldp, const WData* newvalp, - int bits) { +void VerilatedTraceBuffer::fullWData(uint32_t* oldp, const WData* newvalp, int bits) { const uint32_t code = oldp - m_sigs_oldvalp; for (int i = 0; i < VL_WORDS_I(bits); ++i) oldp[i] = newvalp[i]; if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return; - self()->emitWData(code, newvalp, bits); + emitWData(code, newvalp, bits); } -template <> -void VerilatedTraceBuffer::fullDouble(uint32_t* oldp, double newval) { +template <> // +void VerilatedTraceBuffer::fullDouble(uint32_t* oldp, double newval) { const uint32_t code = oldp - m_sigs_oldvalp; *reinterpret_cast(oldp) = newval; if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return; // cppcheck-suppress invalidPointerCast - self()->emitDouble(code, newval); + emitDouble(code, newval); } +#ifdef VL_THREADED +//========================================================================= +// VerilatedTraceOffloadBuffer + +template <> // +VerilatedTraceOffloadBuffer::VerilatedTraceOffloadBuffer(VL_SUB_T& owner) + : VerilatedTraceBuffer{owner} + , m_offloadBufferWritep{owner.m_offloadBufferWritep} + , m_offloadBufferEndp{owner.m_offloadBufferEndp} { + if (m_offloadBufferWritep) { + using This = VerilatedTraceBuffer*; + // Tack on the buffer address + static_assert(2 * sizeof(uint32_t) >= sizeof(This), + "This should be enough on all plafrorms"); + *m_offloadBufferWritep++ = VerilatedTraceOffloadCommand::TRACE_BUFFER; + *reinterpret_cast(m_offloadBufferWritep) = static_cast(this); + m_offloadBufferWritep += 2; + } +} +#endif + #endif // VL_CPPCHECK diff --git a/include/verilated_vcd_c.cpp b/include/verilated_vcd_c.cpp index 9b01ea4ce..1f5595ed2 100644 --- a/include/verilated_vcd_c.cpp +++ b/include/verilated_vcd_c.cpp @@ -102,7 +102,8 @@ ssize_t VerilatedVcdFile::write(const char* bufp, ssize_t len) VL_MT_UNSAFE { //============================================================================= // Opening/Closing -VerilatedVcd::VerilatedVcd(VerilatedVcdFile* filep) { +VerilatedVcd::VerilatedVcd(VerilatedVcdFile* filep) + : VerilatedTrace{false} { // Not in header to avoid link issue if header is included without this .cpp file m_fileNewed = (filep == nullptr); m_filep = m_fileNewed ? new VerilatedVcdFile : filep; @@ -583,7 +584,8 @@ void VerilatedVcd::declDouble(uint32_t code, const char* name, bool array, int a //============================================================================= // Get/commit trace buffer -VerilatedVcdBuffer* VerilatedVcd::getTraceBuffer() { +VerilatedVcd::Buffer* VerilatedVcd::getTraceBuffer() { + VerilatedVcd::Buffer* const bufp = new Buffer{*this}; #ifdef VL_TRACE_PARALLEL // Note: This is called from VeriltedVcd::dump, which already holds the lock // If no buffer available, allocate a new one @@ -597,14 +599,16 @@ VerilatedVcdBuffer* VerilatedVcd::getTraceBuffer() { // Grab a buffer const auto pair = m_freeBuffers.back(); m_freeBuffers.pop_back(); - // Return the buffer - return new VerilatedVcdBuffer{*this, pair.first, pair.second}; -#else - return new VerilatedVcdBuffer{*this}; + // Initialize + bufp->m_writep = bufp->m_bufp = pair.first; + bufp->m_size = pair.second; + bufp->adjustGrowp(); #endif + // Return the buffer + return bufp; } -void VerilatedVcd::commitTraceBuffer(VerilatedVcdBuffer* bufp) { +void VerilatedVcd::commitTraceBuffer(VerilatedVcd::Buffer* bufp) { #ifdef VL_TRACE_PARALLEL // Note: This is called from VeriltedVcd::dump, which already holds the lock // Resize output buffer. Note, we use the full size of the trace buffer, as @@ -631,19 +635,6 @@ void VerilatedVcd::commitTraceBuffer(VerilatedVcdBuffer* bufp) { //============================================================================= // VerilatedVcdBuffer implementation -#ifdef VL_TRACE_PARALLEL -VerilatedVcdBuffer::VerilatedVcdBuffer(VerilatedVcd& owner, char* bufp, size_t size) - : VerilatedTraceBuffer{owner} - , m_writep{bufp} - , m_bufp{bufp} - , m_size{size} { - adjustGrowp(); -} -#else -VerilatedVcdBuffer::VerilatedVcdBuffer(VerilatedVcd& owner) - : VerilatedTraceBuffer{owner} {} -#endif - //============================================================================= // Trace rendering primitives diff --git a/include/verilated_vcd_c.h b/include/verilated_vcd_c.h index 0d83eb25d..2a6316d10 100644 --- a/include/verilated_vcd_c.h +++ b/include/verilated_vcd_c.h @@ -41,7 +41,7 @@ public: using Super = VerilatedTrace; private: - friend Buffer; // Give the buffer access to the private bits + friend VerilatedVcdBuffer; // Give the buffer access to the private bits //========================================================================= // VCD specific internals @@ -110,8 +110,8 @@ protected: virtual bool preChangeDump() override; // Trace buffer management - virtual VerilatedVcdBuffer* getTraceBuffer() override; - virtual void commitTraceBuffer(VerilatedVcdBuffer*) override; + virtual Buffer* getTraceBuffer() override; + virtual void commitTraceBuffer(Buffer*) override; public: //========================================================================= @@ -160,16 +160,20 @@ template <> void VerilatedVcd::Super::dumpvars(int level, const std::string& hie //============================================================================= // VerilatedVcdBuffer -class VerilatedVcdBuffer final : public VerilatedTraceBuffer { - // Give the trace file access to the private bits +class VerilatedVcdBuffer VL_NOT_FINAL { + // Give the trace file ans sub-classes access to the private bits friend VerilatedVcd; friend VerilatedVcd::Super; + friend VerilatedVcd::Buffer; + friend VerilatedVcd::OffloadBuffer; + + VerilatedVcd& m_owner; // Trace file owning this buffer. Required by subclasses. #ifdef VL_TRACE_PARALLEL - char* m_writep; // Write pointer into m_bufp - char* m_bufp; // The beginning of the trace buffer - size_t m_size; // The size of the buffer at m_bufp - char* m_growp; // Resize limit pointer + char* m_writep = nullptr; // Write pointer into m_bufp + char* m_bufp = nullptr; // The beginning of the trace buffer + size_t m_size = 0; // The size of the buffer at m_bufp + char* m_growp = nullptr; // Resize limit pointer #else char* m_writep = m_owner.m_writep; // Write pointer into output buffer char* const m_wrFlushp = m_owner.m_wrFlushp; // Output buffer flush trigger location @@ -189,18 +193,13 @@ class VerilatedVcdBuffer final : public VerilatedTraceBuffername()); // possibly mangled name if (v3Global.opt.protectKeyProvided()) opts.push_back(" --protect-key " + v3Global.opt.protectKeyDefaulted()); - opts.push_back(" --hierarchical-child"); + opts.push_back(" --hierarchical-child " + cvtToStr(std::max(1, v3Global.opt.threads()))); const StrGParams gparamsStr = stringifyParams(gparams(), true); for (StrGParams::const_iterator paramIt = gparamsStr.begin(); paramIt != gparamsStr.end(); diff --git a/src/V3LinkCells.cpp b/src/V3LinkCells.cpp index 2645b521e..da4dabaa4 100644 --- a/src/V3LinkCells.cpp +++ b/src/V3LinkCells.cpp @@ -480,7 +480,7 @@ private: // mangled_name, BlockOptions const V3HierBlockOptSet& hierBlocks = v3Global.opt.hierBlocks(); const auto hierIt = vlstd::as_const(hierBlocks).find(v3Global.opt.topModule()); - UASSERT((hierIt != hierBlocks.end()) == v3Global.opt.hierChild(), + UASSERT((hierIt != hierBlocks.end()) == !!v3Global.opt.hierChild(), "information of the top module must exist if --hierarchical-child is set"); // Look at all modules, and store pointers to all module names for (AstNodeModule *nextp, *nodep = v3Global.rootp()->modulesp(); nodep; nodep = nextp) { diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 6daff79bc..e163278f8 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1124,7 +1124,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char const V3HierarchicalBlockOption opt(valp); m_hierBlocks.emplace(opt.mangledName(), opt); }); - DECL_OPTION("-hierarchical-child", OnOff, &m_hierChild); + DECL_OPTION("-hierarchical-child", Set, &m_hierChild); DECL_OPTION("-I", CbPartialMatch, [this, &optdir](const char* optp) { addIncDirUser(parseFileArg(optdir, optp)); }); diff --git a/src/V3Options.h b/src/V3Options.h index 2c795c844..41e508553 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -241,7 +241,6 @@ private: bool m_exe = false; // main switch: --exe bool m_flatten = false; // main switch: --flatten bool m_hierarchical = false; // main switch: --hierarchical - bool m_hierChild = false; // main switch: --hierarchical-child bool m_ignc = false; // main switch: --ignc bool m_lintOnly = false; // main switch: --lint-only bool m_gmake = false; // main switch: --make gmake @@ -288,6 +287,7 @@ private: int m_dumpTree = 0; // main switch: --dump-tree int m_expandLimit = 64; // main switch: --expand-limit int m_gateStmts = 100; // main switch: --gate-stmts + int m_hierChild = 0; // main switch: --hierarchical-child int m_ifDepth = 0; // main switch: --if-depth int m_inlineMult = 2000; // main switch: --inline-mult int m_instrCountDpi = 200; // main switch: --instr-count-dpi @@ -518,7 +518,9 @@ public: int traceMaxWidth() const { return m_traceMaxWidth; } int traceThreads() const { return m_traceThreads; } bool useTraceOffload() const { return trace() && traceFormat().fst() && traceThreads() > 1; } - bool useTraceParallel() const { return trace() && traceFormat().vcd() && threads() > 1; } + bool useTraceParallel() const { + return trace() && traceFormat().vcd() && threads() && (threads() > 1 || hierChild() > 1); + } unsigned vmTraceThreads() const { return useTraceParallel() ? threads() : useTraceOffload() ? 1 : 0; } @@ -605,7 +607,7 @@ public: } bool hierarchical() const { return m_hierarchical; } - bool hierChild() const { return m_hierChild; } + int hierChild() const { return m_hierChild; } bool hierTop() const { return !m_hierChild && !m_hierBlocks.empty(); } const V3HierBlockOptSet& hierBlocks() const { return m_hierBlocks; } // Directory to save .tree, .dot, .dat, .vpp for hierarchical block top diff --git a/src/V3Trace.cpp b/src/V3Trace.cpp index e7a445c1a..2d5551cfe 100644 --- a/src/V3Trace.cpp +++ b/src/V3Trace.cpp @@ -498,7 +498,9 @@ private: }; if (isTopFunc) { // Top functions - funcp->argTypes("void* voidSelf, " + v3Global.opt.traceClassBase() + "::Buffer* bufp"); + funcp->argTypes("void* voidSelf, " + v3Global.opt.traceClassBase() + + "::" + (v3Global.opt.useTraceOffload() ? "OffloadBuffer" : "Buffer") + + "* bufp"); addInitStr(voidSelfAssign(m_topModp)); addInitStr(symClassAssign()); // Add global activity check to change dump functions @@ -517,7 +519,9 @@ private: m_regFuncp->addStmtsp(new AstText(flp, ");\n", true)); } else { // Sub functions - funcp->argTypes(v3Global.opt.traceClassBase() + "::Buffer* bufp"); + funcp->argTypes(v3Global.opt.traceClassBase() + + "::" + +(v3Global.opt.useTraceOffload() ? "OffloadBuffer" : "Buffer") + + "* bufp"); // Setup base references. Note in rare occasions we can end up with an empty trace // sub function, hence the VL_ATTR_UNUSED attributes. if (full) { diff --git a/test_regress/t/t_flag_hier1_bad.pl b/test_regress/t/t_flag_hier1_bad.pl index f41278e68..8a3475533 100755 --- a/test_regress/t/t_flag_hier1_bad.pl +++ b/test_regress/t/t_flag_hier1_bad.pl @@ -14,7 +14,7 @@ top_filename("t/t_hier_block.v"); lint( fails => 1, verilator_flags2 => ['--hierarchical', - '--hierarchical-child', + '--hierarchical-child 1', 'modName', ], expect_filename => $Self->{golden_filename}, From 7ef033f8765ef5c2c5b43c0b49cfa049424252fb Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 19 Jul 2022 11:23:27 +0100 Subject: [PATCH 038/119] Ensure generated Makefile for hierarchical build is stable. Avoid iterating unordered_map. Iterate sorted blocks instead. --- src/V3EmitMk.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/V3EmitMk.cpp b/src/V3EmitMk.cpp index 800cf589c..188047c3c 100644 --- a/src/V3EmitMk.cpp +++ b/src/V3EmitMk.cpp @@ -369,13 +369,13 @@ class EmitMkHierVerilation final { // Rules to process hierarchical blocks of.puts("\n# Verilate hierarchical blocks\n"); - for (V3HierBlockPlan::const_iterator it = m_planp->begin(); it != m_planp->end(); ++it) { - const string prefix = it->second->hierPrefix(); - const string argsFile = it->second->commandArgsFileName(false); - of.puts(it->second->hierGenerated(true)); + for (const V3HierBlock* const blockp : m_planp->hierBlocksSorted()) { + const string prefix = blockp->hierPrefix(); + const string argsFile = blockp->commandArgsFileName(false); + of.puts(blockp->hierGenerated(true)); of.puts(": $(VM_HIER_INPUT_FILES) $(VM_HIER_VERILOG_LIBS) "); of.puts(V3Os::filenameNonDir(argsFile) + " "); - const V3HierBlock::HierBlockSet& children = it->second->children(); + const V3HierBlock::HierBlockSet& children = blockp->children(); for (V3HierBlock::HierBlockSet::const_iterator child = children.begin(); child != children.end(); ++child) { of.puts((*child)->hierWrapper(true) + " "); @@ -384,16 +384,16 @@ class EmitMkHierVerilation final { emitLaunchVerilator(of, argsFile); // Rule to build lib*.a - of.puts(it->second->hierLib(true)); + of.puts(blockp->hierLib(true)); of.puts(": "); - of.puts(it->second->hierMk(true)); + of.puts(blockp->hierMk(true)); of.puts(" "); for (V3HierBlock::HierBlockSet::const_iterator child = children.begin(); child != children.end(); ++child) { of.puts((*child)->hierLib(true)); of.puts(" "); } - of.puts("\n\t$(MAKE) -f " + it->second->hierMk(false) + " -C " + prefix); + of.puts("\n\t$(MAKE) -f " + blockp->hierMk(false) + " -C " + prefix); of.puts(" VM_PREFIX=" + prefix); of.puts("\n\n"); } From af70db88db4773f953e23f4aaab745b25e4ed7f0 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 19 Jul 2022 11:27:18 +0100 Subject: [PATCH 039/119] Remove unused method --- src/V3HierBlock.cpp | 4 ---- src/V3HierBlock.h | 1 - 2 files changed, 5 deletions(-) diff --git a/src/V3HierBlock.cpp b/src/V3HierBlock.cpp index aa80e2ee3..22006fa23 100644 --- a/src/V3HierBlock.cpp +++ b/src/V3HierBlock.cpp @@ -306,10 +306,6 @@ public: //###################################################################### -bool V3HierBlockPlan::isHierBlock(const AstNodeModule* modp) const { - return m_blocks.find(modp) != m_blocks.end(); -} - void V3HierBlockPlan::add(const AstNodeModule* modp, const std::vector& gparams) { const iterator it = m_blocks.find(modp); if (it == m_blocks.end()) { diff --git a/src/V3HierBlock.h b/src/V3HierBlock.h index 7a01dd1a2..c247dce52 100644 --- a/src/V3HierBlock.h +++ b/src/V3HierBlock.h @@ -109,7 +109,6 @@ public: using HierVector = std::vector; VL_DEBUG_FUNC; // Declare debug() - bool isHierBlock(const AstNodeModule* modp) const; void add(const AstNodeModule* modp, const std::vector& gparams); void registerUsage(const AstNodeModule* parentp, const AstNodeModule* childp); From b55ee79d8627b44256a9dab4ab4662c21371cedd Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 19 Jul 2022 12:36:21 +0100 Subject: [PATCH 040/119] Fix typo --- include/verilated_trace_imp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/verilated_trace_imp.h b/include/verilated_trace_imp.h index 9fd3f2421..2cd1038e3 100644 --- a/include/verilated_trace_imp.h +++ b/include/verilated_trace_imp.h @@ -264,7 +264,7 @@ template <> void VerilatedTrace::closeBase() { } template <> void VerilatedTrace::flushBase() { -#ifdef VL_THREDED +#ifdef VL_THREADED if (offload()) { // Hand an empty buffer to the worker thread uint32_t* const bufferp = getOffloadBuffer(); From f8b7981be4905accc3f9851ba01f9eed6c2fc852 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 19 Jul 2022 13:48:03 +0100 Subject: [PATCH 041/119] Make use of FST writer thread switchable at run-time. Always build the FST libray with -DFST_WRITER_PARALLEL, iff VL_THREADED. This supports run-time enablement of the FST writer thread, and has no measurable performance impact on single threaded tracing but simplifies the library build. Note: the actual choice of using the fst writer thread is still compile time, but can now be made run-time easily. --- include/verilated_fst_c.cpp | 6 ++---- include/verilated_fst_c.h | 8 ++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp index e6ea9f99f..8c9523de5 100644 --- a/include/verilated_fst_c.cpp +++ b/include/verilated_fst_c.cpp @@ -28,7 +28,7 @@ #include "verilated_fst_c.h" // GTKWave configuration -#ifdef VL_TRACE_FST_WRITER_THREAD +#ifdef VL_THREADED # define HAVE_LIBPTHREAD # define FST_WRITER_PARALLEL #endif @@ -116,9 +116,7 @@ void VerilatedFst::open(const char* filename) VL_MT_SAFE_EXCLUDES(m_mutex) { m_fst = fstWriterCreate(filename, 1); fstWriterSetPackType(m_fst, FST_WR_PT_LZ4); fstWriterSetTimescaleFromString(m_fst, timeResStr().c_str()); // lintok-begin-on-ref -#ifdef VL_TRACE_FST_WRITER_THREAD - fstWriterSetParallelMode(m_fst, 1); -#endif + if (useFstWriterThread()) fstWriterSetParallelMode(m_fst, 1); fullDump(true); // First dump must be full for fst m_curScope.clear(); diff --git a/include/verilated_fst_c.h b/include/verilated_fst_c.h index 0587527f6..b391c8de6 100644 --- a/include/verilated_fst_c.h +++ b/include/verilated_fst_c.h @@ -60,6 +60,14 @@ private: void declare(uint32_t code, const char* name, int dtypenum, fstVarDir vardir, fstVarType vartype, bool array, int arraynum, bool bussed, int msb, int lsb); + static constexpr bool useFstWriterThread() { +#ifdef VL_TRACE_FST_WRITER_THREAD + return true; +#else + return false; +#endif + } + protected: //========================================================================= // Implementation of VerilatedTrace interface From 3a002b6cf25f27f97417d623ee0c55014fa6aae3 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 19 Jul 2022 13:58:18 +0100 Subject: [PATCH 042/119] Remove VerilatedVcd::m_evcd and related dead code. The legacy code that was using this was removed earlier, and m_evcd was constant false, so removed. --- include/verilated_vcd_c.cpp | 50 +++++++++++++------------------------ include/verilated_vcd_c.h | 1 - 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/include/verilated_vcd_c.cpp b/include/verilated_vcd_c.cpp index 1f5595ed2..ee3a43583 100644 --- a/include/verilated_vcd_c.cpp +++ b/include/verilated_vcd_c.cpp @@ -261,11 +261,6 @@ void VerilatedVcd::close() VL_MT_SAFE_EXCLUDES(m_mutex) { // This function is on the flush() call path const VerilatedLockGuard lock{m_mutex}; if (!isOpen()) return; - if (m_evcd) { - printStr("$vcdclose "); - printQuad(timeLastDump()); - printStr(" $end\n"); - } closePrev(); // closePrev() called Super::flush(), so we just // need to shut down the tracing thread here. @@ -515,38 +510,29 @@ void VerilatedVcd::declare(uint32_t code, const char* name, const char* wirep, b // Print reference std::string decl = "$var "; - if (m_evcd) { - decl += "port"; - } else { - decl += wirep; // usually "wire" - } + decl += wirep; // usually "wire" constexpr size_t bufsize = 1000; char buf[bufsize]; VL_SNPRINTF(buf, bufsize, " %2d ", bits); decl += buf; - if (m_evcd) { - VL_SNPRINTF(buf, bufsize, "<%u", code); - decl += buf; - } else { - // Add string code to decl - char* const endp = writeCode(buf, code); - *endp = '\0'; - decl += buf; - // Build suffix array entry - char* const entryp = &m_suffixes[code * VL_TRACE_SUFFIX_ENTRY_SIZE]; - const size_t length = endp - buf; - assert(length <= VL_TRACE_MAX_VCD_CODE_SIZE); - // 1 bit values don't have a ' ' separator between value and string code - const bool isBit = bits == 1; - entryp[0] = ' '; // Separator - // Use memcpy as we checked size above, and strcpy is flagged unsafe - std::memcpy(entryp + !isBit, buf, - std::strlen(buf)); // Code (overwrite separator if isBit) - entryp[length + !isBit] = '\n'; // Replace '\0' with line termination '\n' - // Set length of suffix (used to increment write pointer) - entryp[VL_TRACE_SUFFIX_ENTRY_SIZE - 1] = !isBit + length + 1; - } + // Add string code to decl + char* const endp = writeCode(buf, code); + *endp = '\0'; + decl += buf; + // Build suffix array entry + char* const entryp = &m_suffixes[code * VL_TRACE_SUFFIX_ENTRY_SIZE]; + const size_t length = endp - buf; + assert(length <= VL_TRACE_MAX_VCD_CODE_SIZE); + // 1 bit values don't have a ' ' separator between value and string code + const bool isBit = bits == 1; + entryp[0] = ' '; // Separator + // Use memcpy as we checked size above, and strcpy is flagged unsafe + std::memcpy(entryp + !isBit, buf, + std::strlen(buf)); // Code (overwrite separator if isBit) + entryp[length + !isBit] = '\n'; // Replace '\0' with line termination '\n' + // Set length of suffix (used to increment write pointer) + entryp[VL_TRACE_SUFFIX_ENTRY_SIZE - 1] = !isBit + length + 1; decl += " "; decl += basename; if (array) { diff --git a/include/verilated_vcd_c.h b/include/verilated_vcd_c.h index 2a6316d10..bba0f4253 100644 --- a/include/verilated_vcd_c.h +++ b/include/verilated_vcd_c.h @@ -49,7 +49,6 @@ private: VerilatedVcdFile* m_filep; // File we're writing to bool m_fileNewed; // m_filep needs destruction bool m_isOpen = false; // True indicates open file - bool m_evcd = false; // True for evcd format std::string m_filename; // Filename we're writing to (if open) uint64_t m_rolloverMB = 0; // MB of file size to rollover at int m_modDepth = 0; // Depth of module hierarchy From efb5caad22a93326df556c5e821457155193bd64 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 19 Jul 2022 14:16:08 +0100 Subject: [PATCH 043/119] Improve robustness of trace configuration Always fail if adding a model to a trace file that has already executed a dump. We used to do this before as well, though in a less robust way. We will be relying on this property more in the future, so improve the check. --- include/verilated_trace.h | 4 ++-- include/verilated_trace_imp.h | 27 +++++++++++++-------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/include/verilated_trace.h b/include/verilated_trace.h index 1c83d3224..895cecd26 100644 --- a/include/verilated_trace.h +++ b/include/verilated_trace.h @@ -208,7 +208,6 @@ protected: uint32_t* m_sigs_oldvalp = nullptr; // Previous value store EData* m_sigs_enabledp = nullptr; // Bit vector of enabled codes (nullptr = all on) private: - uint64_t m_timeLastDump = 0; // Last time we did a dump std::vector m_sigs_enabledVec; // Staging for m_sigs_enabledp std::vector m_initCbs; // Routines to initialize tracing std::vector m_fullCbs; // Routines to perform full dump @@ -226,6 +225,8 @@ private: char m_scopeEscape = '.'; double m_timeRes = 1e-9; // Time resolution (ns/ms etc) double m_timeUnit = 1e-0; // Time units (ns/ms etc) + uint64_t m_timeLastDump = 0; // Last time we did a dump + bool m_didSomeDump = false; // Did at least one dump (i.e.: m_timeLastDump is valid) void addModel(VerilatedModel*) VL_MT_SAFE_EXCLUDES(m_mutex); @@ -290,7 +291,6 @@ protected: uint32_t numSignals() const { return m_numSignals; } uint32_t maxBits() const { return m_maxBits; } void fullDump(bool value) { m_fullDump = value; } - uint64_t timeLastDump() { return m_timeLastDump; } double timeRes() const { return m_timeRes; } double timeUnit() const { return m_timeUnit; } diff --git a/include/verilated_trace_imp.h b/include/verilated_trace_imp.h index 2cd1038e3..c3eebc2dd 100644 --- a/include/verilated_trace_imp.h +++ b/include/verilated_trace_imp.h @@ -559,13 +559,14 @@ void VerilatedTrace::dump(uint64_t timeui) VL_MT_SAFE_EXCLUD // This does get the mutex, but if multiple threads are trying to dump // chances are the data being dumped will have other problems const VerilatedLockGuard lock{m_mutex}; - if (VL_UNCOVERABLE(m_timeLastDump && timeui <= m_timeLastDump)) { // LCOV_EXCL_START + if (VL_UNCOVERABLE(m_didSomeDump && timeui <= m_timeLastDump)) { // LCOV_EXCL_START VL_PRINTF_MT("%%Warning: previous dump at t=%" PRIu64 ", requesting t=%" PRIu64 ", dump call ignored\n", m_timeLastDump, timeui); return; } // LCOV_EXCL_STOP m_timeLastDump = timeui; + m_didSomeDump = true; Verilated::quiesce(); @@ -646,11 +647,14 @@ void VerilatedTrace::addModel(VerilatedModel* modelp) VL_MT_SAFE_EXCLUDES(m_mutex) { const VerilatedLockGuard lock{m_mutex}; VerilatedContext* const contextp = modelp->contextp(); - if (m_contextp && contextp != m_contextp) { - VL_FATAL_MT( - __FILE__, __LINE__, "", - "A trace file instance can only handle models from the same simulation context"); + if (VL_UNCOVERABLE(m_contextp && contextp != m_contextp)) { // LCOV_EXCL_START + VL_FATAL_MT(__FILE__, __LINE__, "", + "A trace file instance can only handle models from the same context"); } + if (VL_UNCOVERABLE(m_didSomeDump)) { + VL_FATAL_MT(__FILE__, __LINE__, "", + "Cannot add models to a trace file if 'dump' has already been called"); + } // LCOV_EXCL_STOP m_contextp = contextp; } @@ -659,11 +663,6 @@ void VerilatedTrace::addCallbackRecord(std::vector::addInitCb(initCb_t cb, void* userp, template <> void VerilatedTrace::addFullCb(dumpCb_t cb, void* userp, VerilatedModel* modelp) VL_MT_SAFE { - assert(!offload()); addModel(modelp); + assert(!offload()); addCallbackRecord(m_fullCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::addFullCb(dumpOffloadCb_t cb, void* userp, VerilatedModel* modelp) VL_MT_SAFE { - assert(offload()); addModel(modelp); + assert(offload()); addCallbackRecord(m_fullOffloadCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::addChgCb(dumpCb_t cb, void* userp, VerilatedModel* modelp) VL_MT_SAFE { - assert(!offload()); addModel(modelp); + assert(!offload()); addCallbackRecord(m_chgCbs, CallbackRecord{cb, userp}); } template <> void VerilatedTrace::addChgCb(dumpOffloadCb_t cb, void* userp, VerilatedModel* modelp) VL_MT_SAFE { - assert(offload()); addModel(modelp); + assert(offload()); addCallbackRecord(m_chgOffloadCbs, CallbackRecord{cb, userp}); } template <> From a4ed3c20864b5161926239edd060a14ccd198c70 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 19 Jul 2022 17:06:26 +0100 Subject: [PATCH 044/119] Make parallel tracing switchable at run-time --- include/verilated_trace.h | 10 ++- include/verilated_trace_imp.h | 80 +++++++++---------- include/verilated_vcd_c.cpp | 140 ++++++++++++++++++---------------- include/verilated_vcd_c.h | 24 +++--- 4 files changed, 138 insertions(+), 116 deletions(-) diff --git a/include/verilated_trace.h b/include/verilated_trace.h index 895cecd26..e5e80904f 100644 --- a/include/verilated_trace.h +++ b/include/verilated_trace.h @@ -182,7 +182,7 @@ private: const bool m_offload; // Whether to use the offload thread (ignored if !VL_THREADED) -#ifdef VL_TRACE_PARALLEL +#ifdef VL_THREADED struct ParallelWorkerData { const dumpCb_t m_cb; // The callback void* const m_userp; // The use pointer to pass to the callback @@ -317,6 +317,14 @@ protected: static constexpr bool offload() { return false; } #endif + inline bool parallel() const { +#ifdef VL_TRACE_PARALLEL + return true; +#else + return false; +#endif + } + //========================================================================= // Virtual functions to be provided by the format specific implementation diff --git a/include/verilated_trace_imp.h b/include/verilated_trace_imp.h index c3eebc2dd..aed3a09a4 100644 --- a/include/verilated_trace_imp.h +++ b/include/verilated_trace_imp.h @@ -26,7 +26,7 @@ #include "verilated_intrinsics.h" #include "verilated_trace.h" -#ifdef VL_TRACE_PARALLEL +#ifdef VL_THREADED # include "verilated_threads.h" # include #endif @@ -462,7 +462,7 @@ void VerilatedTrace::dumpvars(int level, const std::string& } } -#ifdef VL_TRACE_PARALLEL +#ifdef VL_THREADED template <> // void VerilatedTrace::parallelWorkerTask(void* datap, bool) { ParallelWorkerData* const wdp = reinterpret_cast(datap); @@ -490,45 +490,47 @@ template <> VL_ATTR_NOINLINE void VerilatedTrace::ParallelWo template <> void VerilatedTrace::runCallbacks(const std::vector& cbVec) { -#ifdef VL_TRACE_PARALLEL - // If tracing in parallel, dispatch to the thread pool - VlThreadPool* threadPoolp = static_cast(m_contextp->threadPoolp()); - // List of work items for thread (std::list, as ParallelWorkerData is not movable) - std::list workerData; - // We use the whole pool + the main thread - const unsigned threads = threadPoolp->numThreads() + 1; - // Main thread executes all jobs with index % threads == 0 - std::vector mainThreadWorkerData; - // Enuque all the jobs - for (unsigned i = 0; i < cbVec.size(); ++i) { - const CallbackRecord& cbr = cbVec[i]; - // Always get the trace buffer on the main thread - Buffer* const bufp = getTraceBuffer(); - // Create new work item - workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp); - // Grab the new work item - ParallelWorkerData* const itemp = &workerData.back(); - // Enqueue task to thread pool, or main thread - if (unsigned rem = i % threads) { - threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp); - } else { - mainThreadWorkerData.push_back(itemp); +#ifdef VL_THREADED + if (parallel()) { + // If tracing in parallel, dispatch to the thread pool + VlThreadPool* threadPoolp = static_cast(m_contextp->threadPoolp()); + // List of work items for thread (std::list, as ParallelWorkerData is not movable) + std::list workerData; + // We use the whole pool + the main thread + const unsigned threads = threadPoolp->numThreads() + 1; + // Main thread executes all jobs with index % threads == 0 + std::vector mainThreadWorkerData; + // Enuque all the jobs + for (unsigned i = 0; i < cbVec.size(); ++i) { + const CallbackRecord& cbr = cbVec[i]; + // Always get the trace buffer on the main thread + Buffer* const bufp = getTraceBuffer(); + // Create new work item + workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp); + // Grab the new work item + ParallelWorkerData* const itemp = &workerData.back(); + // Enqueue task to thread pool, or main thread + if (unsigned rem = i % threads) { + threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp); + } else { + mainThreadWorkerData.push_back(itemp); + } + } + // Execute main thead jobs + for (ParallelWorkerData* const itemp : mainThreadWorkerData) { + parallelWorkerTask(itemp, false); + } + // Commit all trace buffers in order + for (ParallelWorkerData& item : workerData) { + // Wait until ready + item.wait(); + // Commit the buffer + commitTraceBuffer(item.m_bufp); } - } - // Execute main thead jobs - for (ParallelWorkerData* const itemp : mainThreadWorkerData) { - parallelWorkerTask(itemp, false); - } - // Commit all trace buffers in order - for (ParallelWorkerData& item : workerData) { - // Wait until ready - item.wait(); - // Commit the buffer - commitTraceBuffer(item.m_bufp); - } - // Done - return; + // Done + return; + } #endif // Fall back on sequential execution for (const CallbackRecord& cbr : cbVec) { diff --git a/include/verilated_vcd_c.cpp b/include/verilated_vcd_c.cpp index ee3a43583..e5ce780cf 100644 --- a/include/verilated_vcd_c.cpp +++ b/include/verilated_vcd_c.cpp @@ -230,9 +230,11 @@ VerilatedVcd::~VerilatedVcd() { if (m_wrBufp) VL_DO_CLEAR(delete[] m_wrBufp, m_wrBufp = nullptr); deleteNameMap(); if (m_filep && m_fileNewed) VL_DO_CLEAR(delete m_filep, m_filep = nullptr); -#ifdef VL_TRACE_PARALLEL - assert(m_numBuffers == m_freeBuffers.size()); - for (auto& pair : m_freeBuffers) VL_DO_CLEAR(delete[] pair.first, pair.first = nullptr); +#ifdef VL_THREADED + if (parallel()) { + assert(m_numBuffers == m_freeBuffers.size()); + for (auto& pair : m_freeBuffers) VL_DO_CLEAR(delete[] pair.first, pair.first = nullptr); + } #endif } @@ -572,49 +574,55 @@ void VerilatedVcd::declDouble(uint32_t code, const char* name, bool array, int a VerilatedVcd::Buffer* VerilatedVcd::getTraceBuffer() { VerilatedVcd::Buffer* const bufp = new Buffer{*this}; -#ifdef VL_TRACE_PARALLEL - // Note: This is called from VeriltedVcd::dump, which already holds the lock - // If no buffer available, allocate a new one - if (m_freeBuffers.empty()) { - constexpr size_t pageSize = 4096; - // 4 * m_maxSignalBytes, so we can reserve 2 * m_maxSignalBytes at the end for safety - size_t startingSize = roundUpToMultipleOf(4 * m_maxSignalBytes); - m_freeBuffers.emplace_back(new char[startingSize], startingSize); - ++m_numBuffers; +#ifdef VL_THREADED + if (parallel()) { + // Note: This is called from VeriltedVcd::dump, which already holds the lock + // If no buffer available, allocate a new one + if (m_freeBuffers.empty()) { + constexpr size_t pageSize = 4096; + // 4 * m_maxSignalBytes, so we can reserve 2 * m_maxSignalBytes at the end for safety + size_t startingSize = roundUpToMultipleOf(4 * m_maxSignalBytes); + m_freeBuffers.emplace_back(new char[startingSize], startingSize); + ++m_numBuffers; + } + // Grab a buffer + const auto pair = m_freeBuffers.back(); + m_freeBuffers.pop_back(); + // Initialize + bufp->m_writep = bufp->m_bufp = pair.first; + bufp->m_size = pair.second; + bufp->adjustGrowp(); } - // Grab a buffer - const auto pair = m_freeBuffers.back(); - m_freeBuffers.pop_back(); - // Initialize - bufp->m_writep = bufp->m_bufp = pair.first; - bufp->m_size = pair.second; - bufp->adjustGrowp(); #endif // Return the buffer return bufp; } void VerilatedVcd::commitTraceBuffer(VerilatedVcd::Buffer* bufp) { -#ifdef VL_TRACE_PARALLEL - // Note: This is called from VeriltedVcd::dump, which already holds the lock - // Resize output buffer. Note, we use the full size of the trace buffer, as - // this is a lot more stable than the actual occupancy of the trace buffer. - // This helps us to avoid re-allocations due to small size changes. - bufferResize(bufp->m_size); - // Compute occupancy of buffer - const size_t usedSize = bufp->m_writep - bufp->m_bufp; - // Copy to output buffer - std::memcpy(m_writep, bufp->m_bufp, usedSize); - // Adjust write pointer - m_writep += usedSize; - // Flush if necessary - bufferCheck(); - // Put buffer back on free list - m_freeBuffers.emplace_back(bufp->m_bufp, bufp->m_size); + if (parallel()) { +#if VL_THREADED + // Note: This is called from VeriltedVcd::dump, which already holds the lock + // Resize output buffer. Note, we use the full size of the trace buffer, as + // this is a lot more stable than the actual occupancy of the trace buffer. + // This helps us to avoid re-allocations due to small size changes. + bufferResize(bufp->m_size); + // Compute occupancy of buffer + const size_t usedSize = bufp->m_writep - bufp->m_bufp; + // Copy to output buffer + std::memcpy(m_writep, bufp->m_bufp, usedSize); + // Adjust write pointer + m_writep += usedSize; + // Flush if necessary + bufferCheck(); + // Put buffer back on free list + m_freeBuffers.emplace_back(bufp->m_bufp, bufp->m_size); #else - // Needs adjusting for emitTimeChange - m_writep = bufp->m_writep; + VL_FATAL_MT(__FILE__, __LINE__, "", "Unreachable"); #endif + } else { + // Needs adjusting for emitTimeChange + m_writep = bufp->m_writep; + } delete bufp; } @@ -656,35 +664,39 @@ void VerilatedVcdBuffer::finishLine(uint32_t code, char* writep) { // suffix, which was stored in the last byte of the suffix buffer entry. m_writep = writep + suffixp[VL_TRACE_SUFFIX_ENTRY_SIZE - 1]; -#ifdef VL_TRACE_PARALLEL - // Double the size of the buffer if necessary - if (VL_UNLIKELY(m_writep >= m_growp)) { - // Compute occupied size of current buffer - const size_t usedSize = m_writep - m_bufp; - // We are always doubling the size - m_size *= 2; - // Allocate the new buffer - char* const newBufp = new char[m_size]; - // Copy from current buffer to new buffer - std::memcpy(newBufp, m_bufp, usedSize); - // Delete current buffer - delete[] m_bufp; - // Make new buffer the current buffer - m_bufp = newBufp; - // Adjust write pointer - m_writep = m_bufp + usedSize; - // Adjust resize limit - adjustGrowp(); - } + if (m_owner.parallel()) { +#ifdef VL_THREADED + // Double the size of the buffer if necessary + if (VL_UNLIKELY(m_writep >= m_growp)) { + // Compute occupied size of current buffer + const size_t usedSize = m_writep - m_bufp; + // We are always doubling the size + m_size *= 2; + // Allocate the new buffer + char* const newBufp = new char[m_size]; + // Copy from current buffer to new buffer + std::memcpy(newBufp, m_bufp, usedSize); + // Delete current buffer + delete[] m_bufp; + // Make new buffer the current buffer + m_bufp = newBufp; + // Adjust write pointer + m_writep = m_bufp + usedSize; + // Adjust resize limit + adjustGrowp(); + } #else - // Flush the write buffer if there's not enough space left for new information - // We only call this once per vector, so we need enough slop for a very wide "b###" line - if (VL_UNLIKELY(m_writep > m_wrFlushp)) { - m_owner.m_writep = m_writep; - m_owner.bufferFlush(); - m_writep = m_owner.m_writep; - } + VL_FATAL_MT(__FILE__, __LINE__, "", "Unreachable"); #endif + } else { + // Flush the write buffer if there's not enough space left for new information + // We only call this once per vector, so we need enough slop for a very wide "b###" line + if (VL_UNLIKELY(m_writep > m_wrFlushp)) { + m_owner.m_writep = m_writep; + m_owner.bufferFlush(); + m_writep = m_owner.m_writep; + } + } } //============================================================================= diff --git a/include/verilated_vcd_c.h b/include/verilated_vcd_c.h index bba0f4253..853e976e7 100644 --- a/include/verilated_vcd_c.h +++ b/include/verilated_vcd_c.h @@ -65,7 +65,7 @@ private: using NameMap = std::map; NameMap* m_namemapp = nullptr; // List of names for the header -#ifdef VL_TRACE_PARALLEL +#ifdef VL_THREADED // Vector of free trace buffers as (pointer, size) pairs. std::vector> m_freeBuffers; size_t m_numBuffers = 0; // Number of trace buffers allocated @@ -168,30 +168,30 @@ class VerilatedVcdBuffer VL_NOT_FINAL { VerilatedVcd& m_owner; // Trace file owning this buffer. Required by subclasses. -#ifdef VL_TRACE_PARALLEL - char* m_writep = nullptr; // Write pointer into m_bufp - char* m_bufp = nullptr; // The beginning of the trace buffer - size_t m_size = 0; // The size of the buffer at m_bufp - char* m_growp = nullptr; // Resize limit pointer -#else - char* m_writep = m_owner.m_writep; // Write pointer into output buffer - char* const m_wrFlushp = m_owner.m_wrFlushp; // Output buffer flush trigger location -#endif + // Write pointer into output buffer (in parallel mode, this is set up in 'getTraceBuffer') + char* m_writep = m_owner.parallel() ? nullptr : m_owner.m_writep; + // Output buffer flush trigger location (only used when not parallel) + char* const m_wrFlushp = m_owner.parallel() ? nullptr : m_owner.m_wrFlushp; // VCD line end string codes + metadata const char* const m_suffixes = m_owner.m_suffixes.data(); // The maximum number of bytes a single signal can emit const size_t m_maxSignalBytes = m_owner.m_maxSignalBytes; - void finishLine(uint32_t code, char* writep); +#ifdef VL_THREADED + // Additional data for parallel tracing only + char* m_bufp = nullptr; // The beginning of the trace buffer + size_t m_size = 0; // The size of the buffer at m_bufp + char* m_growp = nullptr; // Resize limit pointer -#ifdef VL_TRACE_PARALLEL void adjustGrowp() { m_growp = (m_bufp + m_size) - (2 * m_maxSignalBytes); assert(m_growp >= m_bufp + m_maxSignalBytes); } #endif + void finishLine(uint32_t code, char* writep); + // CONSTRUCTOR explicit VerilatedVcdBuffer(VerilatedVcd& owner) : m_owner{owner} {} From 1d400dd98c21e9672fc17b43bdb49ef3b25721ad Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 20 Jul 2022 11:27:10 +0100 Subject: [PATCH 045/119] Configure tracing at run-time, instead of compile time (#3504) All remaining use of conditional compilation in the tracing implementation of the run-time library are replaced with the use of VerilatedModel::traceConfig, and is now done at run-time. --- include/verilated.cpp | 4 ++ include/verilated.h | 8 ++++ include/verilated.mk.in | 20 --------- include/verilated_fst_c.cpp | 22 +++++----- include/verilated_fst_c.h | 13 +++--- include/verilated_trace.h | 68 +++++++++++++++---------------- include/verilated_trace_imp.h | 76 +++++++++++++++++++++-------------- include/verilated_vcd_c.cpp | 3 +- include/verilated_vcd_c.h | 3 ++ src/V3EmitCMake.cpp | 6 --- src/V3EmitCModel.cpp | 18 ++++++++- src/V3EmitMk.cpp | 9 ----- src/V3Options.h | 1 + src/V3Trace.cpp | 3 +- verilator-config.cmake.in | 11 ----- 15 files changed, 128 insertions(+), 137 deletions(-) diff --git a/include/verilated.cpp b/include/verilated.cpp index d5821b8f7..74aaf3fbd 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -72,6 +72,8 @@ #endif // clang-format on +#include "verilated_trace.h" + // Max characters in static char string for VL_VALUE_STRING constexpr unsigned VL_VALUE_STRING_MAX_WIDTH = 8192; @@ -2914,6 +2916,8 @@ void VerilatedImp::versionDump() VL_MT_SAFE { VerilatedModel::VerilatedModel(VerilatedContext& context) : m_context{context} {} +std::unique_ptr VerilatedModel::traceConfig() const { return nullptr; } + //=========================================================================== // VerilatedModule:: Methods diff --git a/include/verilated.h b/include/verilated.h index ebb1990e8..8943fc523 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -91,6 +91,8 @@ class VerilatedFstC; class VerilatedFstSc; class VerilatedScope; class VerilatedScopeNameMap; +template class VerilatedTrace; +class VerilatedTraceConfig; class VerilatedVar; class VerilatedVarNameMap; class VerilatedVcd; @@ -278,6 +280,12 @@ public: virtual const char* modelName() const = 0; /// Returns the thread level parallelism, this model was Verilated with. Always 1 or higher. virtual unsigned threads() const = 0; + +private: + // The following are for use by Verilator internals only + template friend class VerilatedTrace; + // Run-time trace configuration requested by this model + virtual std::unique_ptr traceConfig() const; }; //========================================================================= diff --git a/include/verilated.mk.in b/include/verilated.mk.in index 34e975bcc..03a6ef35d 100644 --- a/include/verilated.mk.in +++ b/include/verilated.mk.in @@ -142,26 +142,6 @@ ifneq ($(VM_THREADS),0) endif endif -ifneq ($(VM_TRACE_THREADS),0) - ifneq ($(VM_TRACE_THREADS),) - ifeq ($(findstring -DVL_THREADED,$(CPPFLAGS)),) - $(error VM_TRACE_THREADS requires VM_THREADS) - endif - CPPFLAGS += -DVL_TRACE_THREADED - VK_C11=1 - VK_LIBS_THREADED=1 - endif -endif - - -ifneq ($(VM_TRACE_FST_WRITER_THREAD),0) - ifneq ($(VM_TRACE_FST_WRITER_THREAD),) - CPPFLAGS += -DVL_TRACE_FST_WRITER_THREAD - VK_C11=1 - VK_LIBS_THREADED=1 - endif -endif - ifneq ($(VK_C11),0) ifneq ($(VK_C11),) # Need C++11 at least, so always default to newest diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp index 8c9523de5..69a71c5d9 100644 --- a/include/verilated_fst_c.cpp +++ b/include/verilated_fst_c.cpp @@ -93,17 +93,7 @@ static_assert(static_cast(FST_ST_VCD_PROGRAM) == static_cast(VLT_TRACE // VerilatedFst VerilatedFst::VerilatedFst(void* fst) - : -#ifdef VL_TRACE_OFFLOAD - VerilatedTrace { - true -} -#else - VerilatedTrace { - false -} -#endif -, m_fst{fst} {} + : m_fst{fst} {} VerilatedFst::~VerilatedFst() { if (m_fst) fstWriterClose(m_fst); @@ -116,7 +106,7 @@ void VerilatedFst::open(const char* filename) VL_MT_SAFE_EXCLUDES(m_mutex) { m_fst = fstWriterCreate(filename, 1); fstWriterSetPackType(m_fst, FST_WR_PT_LZ4); fstWriterSetTimescaleFromString(m_fst, timeResStr().c_str()); // lintok-begin-on-ref - if (useFstWriterThread()) fstWriterSetParallelMode(m_fst, 1); + if (m_useFstWriterThread) fstWriterSetParallelMode(m_fst, 1); fullDump(true); // First dump must be full for fst m_curScope.clear(); @@ -278,6 +268,14 @@ void VerilatedFst::commitTraceBuffer(VerilatedFst::Buffer* bufp) { delete bufp; } +//============================================================================= +// Configure + +void VerilatedFst::configure(const VerilatedTraceConfig& config) { + // If at least one model requests the FST writer thread, then use it + m_useFstWriterThread |= config.m_useFstWriterThread; +} + //============================================================================= // VerilatedFstBuffer implementation diff --git a/include/verilated_fst_c.h b/include/verilated_fst_c.h index b391c8de6..c87b9763b 100644 --- a/include/verilated_fst_c.h +++ b/include/verilated_fst_c.h @@ -55,19 +55,13 @@ private: fstHandle* m_symbolp = nullptr; // same as m_code2symbol, but as an array char* m_strbuf = nullptr; // String buffer long enough to hold maxBits() chars + bool m_useFstWriterThread = false; // Whether to use the separate FST writer thread + // CONSTRUCTORS VL_UNCOPYABLE(VerilatedFst); void declare(uint32_t code, const char* name, int dtypenum, fstVarDir vardir, fstVarType vartype, bool array, int arraynum, bool bussed, int msb, int lsb); - static constexpr bool useFstWriterThread() { -#ifdef VL_TRACE_FST_WRITER_THREAD - return true; -#else - return false; -#endif - } - protected: //========================================================================= // Implementation of VerilatedTrace interface @@ -83,6 +77,9 @@ protected: virtual Buffer* getTraceBuffer() override; virtual void commitTraceBuffer(Buffer*) override; + // Configure sub-class + virtual void configure(const VerilatedTraceConfig&) override; + public: //========================================================================= // External interface to client code diff --git a/include/verilated_trace.h b/include/verilated_trace.h index e5e80904f..66481602f 100644 --- a/include/verilated_trace.h +++ b/include/verilated_trace.h @@ -24,21 +24,6 @@ // clang-format off -// In FST mode, VL_TRACE_THREADED enables offloading, but only if we also have -// the FST writer thread. This means with --trace-threads 1, we get the FST -// writer thread only, and with --trace-threads 2 we get offloading as well -#if defined(VL_TRACE_FST_WRITER_THREAD) && defined(VL_TRACE_THREADED) -# define VL_TRACE_OFFLOAD -#endif -// VCD tracing can happen fully in parallel -#if defined(VM_TRACE_VCD) && VM_TRACE_VCD && defined(VL_TRACE_THREADED) -# define VL_TRACE_PARALLEL -#endif - -#if defined(VL_TRACE_PARALLEL) && defined(VL_TRACE_OFFLOAD) -# error "Cannot have VL_TRACE_PARALLEL and VL_TRACE_OFFLOAD together" -#endif - #include "verilated.h" #include "verilated_trace_defs.h" @@ -47,6 +32,7 @@ #include #include #include +#include #include #ifdef VL_THREADED @@ -130,6 +116,22 @@ public: }; #endif +//============================================================================= +// VerilatedTraceConfig + +// Simple data representing trace configuration required by generated models. +class VerilatedTraceConfig final { +public: + const bool m_useParallel; // Use parallel tracing + const bool m_useOffloading; // Offloading trace rendering + const bool m_useFstWriterThread; // Use the separate FST writer thread + + VerilatedTraceConfig(bool useParallel, bool useOffloading, bool useFstWriterThread) + : m_useParallel{useParallel} + , m_useOffloading{useOffloading} + , m_useFstWriterThread{useFstWriterThread} {} +}; + //============================================================================= // VerilatedTrace @@ -180,7 +182,8 @@ private: , m_userp{userp} {} }; - const bool m_offload; // Whether to use the offload thread (ignored if !VL_THREADED) + bool m_offload = false; // Use the offload thread (ignored if !VL_THREADED) + bool m_parallel = false; // Use parallel tracing (ignored if !VL_THREADED) #ifdef VL_THREADED struct ParallelWorkerData { @@ -215,7 +218,6 @@ private: std::vector m_chgCbs; // Routines to perform incremental dump std::vector m_chgOffloadCbs; // Routines to perform offloaded incremental dump std::vector m_cleanupCbs; // Routines to call at the end of dump - VerilatedContext* m_contextp = nullptr; // The context used by the traced models bool m_fullDump = true; // Whether a full dump is required on the next call to 'dump' uint32_t m_nextCode = 0; // Next code number to assign uint32_t m_numSignals = 0; // Number of distinct signals @@ -227,8 +229,8 @@ private: double m_timeUnit = 1e-0; // Time units (ns/ms etc) uint64_t m_timeLastDump = 0; // Last time we did a dump bool m_didSomeDump = false; // Did at least one dump (i.e.: m_timeLastDump is valid) - - void addModel(VerilatedModel*) VL_MT_SAFE_EXCLUDES(m_mutex); + VerilatedContext* m_contextp = nullptr; // The context used by the traced models + std::unordered_set m_models; // The collection of models being traced void addCallbackRecord(std::vector& cbVec, CallbackRecord&& cbRec) VL_MT_SAFE_EXCLUDES(m_mutex); @@ -313,18 +315,12 @@ protected: #ifdef VL_THREADED inline bool offload() const { return m_offload; } + inline bool parallel() const { return m_parallel; } #else static constexpr bool offload() { return false; } + static constexpr bool parallel() { return false; } #endif - inline bool parallel() const { -#ifdef VL_TRACE_PARALLEL - return true; -#else - return false; -#endif - } - //========================================================================= // Virtual functions to be provided by the format specific implementation @@ -340,11 +336,14 @@ protected: virtual Buffer* getTraceBuffer() = 0; virtual void commitTraceBuffer(Buffer*) = 0; + // Configure sub-class + virtual void configure(const VerilatedTraceConfig&) = 0; + public: //========================================================================= // External interface to client code - explicit VerilatedTrace(bool offload); + explicit VerilatedTrace(); ~VerilatedTrace(); // Set time units (s/ms, defaults to ns) @@ -366,12 +365,13 @@ public: //========================================================================= // Non-hot path internal interface to Verilator generated code - void addInitCb(initCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; - void addFullCb(dumpCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; - void addFullCb(dumpOffloadCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; - void addChgCb(dumpCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; - void addChgCb(dumpOffloadCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; - void addCleanupCb(cleanupCb_t cb, void* userp, VerilatedModel*) VL_MT_SAFE; + void addModel(VerilatedModel*) VL_MT_SAFE_EXCLUDES(m_mutex); + void addInitCb(initCb_t cb, void* userp) VL_MT_SAFE; + void addFullCb(dumpCb_t cb, void* userp) VL_MT_SAFE; + void addFullCb(dumpOffloadCb_t cb, void* userp) VL_MT_SAFE; + void addChgCb(dumpCb_t cb, void* userp) VL_MT_SAFE; + void addChgCb(dumpOffloadCb_t cb, void* userp) VL_MT_SAFE; + void addCleanupCb(cleanupCb_t cb, void* userp) VL_MT_SAFE; void scopeEscape(char flag) { m_scopeEscape = flag; } diff --git a/include/verilated_trace_imp.h b/include/verilated_trace_imp.h index aed3a09a4..1401f67eb 100644 --- a/include/verilated_trace_imp.h +++ b/include/verilated_trace_imp.h @@ -293,14 +293,7 @@ template <> void VerilatedTrace::onExit(void* selfp) { //============================================================================= // VerilatedTrace -template <> -VerilatedTrace::VerilatedTrace(bool offload) - : m_offload{offload} { -#ifndef VL_THREADED - if (m_offload) { - VL_FATAL_MT(__FILE__, __LINE__, "", "Cannot use trace offloading without VL_THREADED"); - } -#endif +template <> VerilatedTrace::VerilatedTrace() { set_time_unit(Verilated::threadContextp()->timeunitString()); set_time_resolution(Verilated::threadContextp()->timeprecisionString()); } @@ -648,8 +641,17 @@ template <> void VerilatedTrace::addModel(VerilatedModel* modelp) VL_MT_SAFE_EXCLUDES(m_mutex) { const VerilatedLockGuard lock{m_mutex}; + + const bool firstModel = m_models.empty(); + const bool newModel = m_models.insert(modelp).second; VerilatedContext* const contextp = modelp->contextp(); - if (VL_UNCOVERABLE(m_contextp && contextp != m_contextp)) { // LCOV_EXCL_START + + // Validate + if (!newModel) { // LCOV_EXCL_START + VL_FATAL_MT(__FILE__, __LINE__, "", + "The same model has already been added to this trace file"); + } + if (VL_UNCOVERABLE(m_contextp && contextp != m_contextp)) { VL_FATAL_MT(__FILE__, __LINE__, "", "A trace file instance can only handle models from the same context"); } @@ -657,7 +659,35 @@ void VerilatedTrace::addModel(VerilatedModel* modelp) VL_FATAL_MT(__FILE__, __LINE__, "", "Cannot add models to a trace file if 'dump' has already been called"); } // LCOV_EXCL_STOP + + // Keep hold of the context m_contextp = contextp; + + // Get the desired trace config from the model + const std::unique_ptr configp = modelp->traceConfig(); +#ifndef VL_THREADED + if (configp->m_useOffloading) { + VL_FATAL_MT(__FILE__, __LINE__, "", "Cannot use trace offloading without VL_THREADED"); + } +#endif + + // Configure trace base class + if (!firstModel) { + if (m_offload != configp->m_useOffloading) { + VL_FATAL_MT(__FILE__, __LINE__, "", + "Either all or no models using the same trace file must use offloading"); + } + } + m_offload = configp->m_useOffloading; + // If at least one model requests parallel tracing, then use it + m_parallel |= configp->m_useParallel; + + if (VL_UNCOVERABLE(m_parallel && m_offload)) { // LCOV_EXCL_START + VL_FATAL_MT(__FILE__, __LINE__, "", "Cannot use parallel tracing with offloading"); + } // LCOV_EXCL_STOP + + // Configure format specific sub class + configure(*(configp.get())); } template <> @@ -669,43 +699,27 @@ void VerilatedTrace::addCallbackRecord(std::vector -void VerilatedTrace::addInitCb(initCb_t cb, void* userp, - VerilatedModel* modelp) VL_MT_SAFE { - addModel(modelp); +void VerilatedTrace::addInitCb(initCb_t cb, void* userp) VL_MT_SAFE { addCallbackRecord(m_initCbs, CallbackRecord{cb, userp}); } template <> -void VerilatedTrace::addFullCb(dumpCb_t cb, void* userp, - VerilatedModel* modelp) VL_MT_SAFE { - addModel(modelp); - assert(!offload()); +void VerilatedTrace::addFullCb(dumpCb_t cb, void* userp) VL_MT_SAFE { addCallbackRecord(m_fullCbs, CallbackRecord{cb, userp}); } template <> -void VerilatedTrace::addFullCb(dumpOffloadCb_t cb, void* userp, - VerilatedModel* modelp) VL_MT_SAFE { - addModel(modelp); - assert(offload()); +void VerilatedTrace::addFullCb(dumpOffloadCb_t cb, void* userp) VL_MT_SAFE { addCallbackRecord(m_fullOffloadCbs, CallbackRecord{cb, userp}); } template <> -void VerilatedTrace::addChgCb(dumpCb_t cb, void* userp, - VerilatedModel* modelp) VL_MT_SAFE { - addModel(modelp); - assert(!offload()); +void VerilatedTrace::addChgCb(dumpCb_t cb, void* userp) VL_MT_SAFE { addCallbackRecord(m_chgCbs, CallbackRecord{cb, userp}); } template <> -void VerilatedTrace::addChgCb(dumpOffloadCb_t cb, void* userp, - VerilatedModel* modelp) VL_MT_SAFE { - addModel(modelp); - assert(offload()); +void VerilatedTrace::addChgCb(dumpOffloadCb_t cb, void* userp) VL_MT_SAFE { addCallbackRecord(m_chgOffloadCbs, CallbackRecord{cb, userp}); } template <> -void VerilatedTrace::addCleanupCb(cleanupCb_t cb, void* userp, - VerilatedModel* modelp) VL_MT_SAFE { - addModel(modelp); +void VerilatedTrace::addCleanupCb(cleanupCb_t cb, void* userp) VL_MT_SAFE { addCallbackRecord(m_cleanupCbs, CallbackRecord{cb, userp}); } diff --git a/include/verilated_vcd_c.cpp b/include/verilated_vcd_c.cpp index e5ce780cf..342ba7b2c 100644 --- a/include/verilated_vcd_c.cpp +++ b/include/verilated_vcd_c.cpp @@ -102,8 +102,7 @@ ssize_t VerilatedVcdFile::write(const char* bufp, ssize_t len) VL_MT_UNSAFE { //============================================================================= // Opening/Closing -VerilatedVcd::VerilatedVcd(VerilatedVcdFile* filep) - : VerilatedTrace{false} { +VerilatedVcd::VerilatedVcd(VerilatedVcdFile* filep) { // Not in header to avoid link issue if header is included without this .cpp file m_fileNewed = (filep == nullptr); m_filep = m_fileNewed ? new VerilatedVcdFile : filep; diff --git a/include/verilated_vcd_c.h b/include/verilated_vcd_c.h index 853e976e7..20c5440f5 100644 --- a/include/verilated_vcd_c.h +++ b/include/verilated_vcd_c.h @@ -112,6 +112,9 @@ protected: virtual Buffer* getTraceBuffer() override; virtual void commitTraceBuffer(Buffer*) override; + // Configure sub-class + virtual void configure(const VerilatedTraceConfig&) override { return; }; + public: //========================================================================= // External interface to client code diff --git a/src/V3EmitCMake.cpp b/src/V3EmitCMake.cpp index 710829eaf..1e8f1700e 100644 --- a/src/V3EmitCMake.cpp +++ b/src/V3EmitCMake.cpp @@ -113,12 +113,6 @@ class CMakeEmitter final { cmake_set_raw(*of, name + "_COVERAGE", v3Global.opt.coverage() ? "1" : "0"); *of << "# Threaded output mode? 0/1/N threads (from --threads)\n"; cmake_set_raw(*of, name + "_THREADS", cvtToStr(v3Global.opt.threads())); - *of << "# Threaded tracing output mode? 0/1/N threads (from --threads/--trace-threads)\n"; - cmake_set_raw(*of, name + "_TRACE_THREADS", cvtToStr(v3Global.opt.vmTraceThreads())); - cmake_set_raw(*of, name + "_TRACE_FST_WRITER_THREAD", - v3Global.opt.traceThreads() && v3Global.opt.traceFormat().fst() ? "1" : "0"); - *of << "# Struct output mode? 0/1 (from --trace-structs)\n"; - cmake_set_raw(*of, name + "_TRACE_STRUCTS", cvtToStr(v3Global.opt.traceStructs())); *of << "# VCD Tracing output mode? 0/1 (from --trace)\n"; cmake_set_raw(*of, name + "_TRACE_VCD", (v3Global.opt.trace() && v3Global.opt.traceFormat().vcd()) ? "1" : "0"); diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index 67c02e332..c0a8e452c 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -223,6 +223,9 @@ class EmitCModel final : public EmitCFunc { puts("const char* hierName() const override final;\n"); puts("const char* modelName() const override final;\n"); puts("unsigned threads() const override final;\n"); + if (v3Global.opt.trace()) { + puts("std::unique_ptr traceConfig() const override final;\n"); + } puts("} VL_ATTR_ALIGNED(VL_CACHE_LINE_BYTES);\n"); @@ -487,6 +490,17 @@ class EmitCModel final : public EmitCFunc { + "\"; }\n"); puts("unsigned " + topClassName() + "::threads() const { return " + cvtToStr(std::max(1, v3Global.opt.threads())) + "; }\n"); + + if (v3Global.opt.trace()) { + puts("std::unique_ptr " + topClassName() + + "::traceConfig() const {\n"); + puts("return std::unique_ptr{new VerilatedTraceConfig{"); + puts(v3Global.opt.useTraceParallel() ? "true" : "false"); + puts(v3Global.opt.useTraceOffload() ? ", true" : ", false"); + puts(v3Global.opt.useFstWriterThread() ? ", true" : ", false"); + puts("}};\n"); + puts("};\n"); + } } void emitTraceMethods(AstNodeModule* modp) { @@ -539,8 +553,8 @@ class EmitCModel final : public EmitCFunc { puts(/**/ "}"); } puts(/**/ "if (false && levels && options) {} // Prevent unused\n"); - puts(/**/ "tfp->spTrace()->addInitCb(&" + protect("trace_init") - + ", &(vlSymsp->TOP), this);\n"); + puts(/**/ "tfp->spTrace()->addModel(this);\n"); + puts(/**/ "tfp->spTrace()->addInitCb(&" + protect("trace_init") + ", &(vlSymsp->TOP));\n"); puts(/**/ topModNameProtected + "__" + protect("trace_register") + "(&(vlSymsp->TOP), tfp->spTrace());\n"); diff --git a/src/V3EmitMk.cpp b/src/V3EmitMk.cpp index 188047c3c..75dabd3ff 100644 --- a/src/V3EmitMk.cpp +++ b/src/V3EmitMk.cpp @@ -73,15 +73,6 @@ public: of.puts("VM_TRACE_FST = "); of.puts(v3Global.opt.trace() && v3Global.opt.traceFormat().fst() ? "1" : "0"); of.puts("\n"); - of.puts( - "# Tracing threaded output mode? 0/1/N threads (from --threads/--trace-thread)\n"); - of.puts("VM_TRACE_THREADS = "); - of.puts(cvtToStr(v3Global.opt.vmTraceThreads())); - of.puts("\n"); - of.puts("# Separate FST writer thread? 0/1 (from --trace-fst with --trace-thread > 0)\n"); - of.puts("VM_TRACE_FST_WRITER_THREAD = "); - of.puts(v3Global.opt.traceThreads() && v3Global.opt.traceFormat().fst() ? "1" : "0"); - of.puts("\n"); of.puts("\n### Object file lists...\n"); for (int support = 0; support < 3; ++support) { diff --git a/src/V3Options.h b/src/V3Options.h index 41e508553..78fa1b8bd 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -521,6 +521,7 @@ public: bool useTraceParallel() const { return trace() && traceFormat().vcd() && threads() && (threads() > 1 || hierChild() > 1); } + bool useFstWriterThread() const { return traceThreads() && traceFormat().fst(); } unsigned vmTraceThreads() const { return useTraceParallel() ? threads() : useTraceOffload() ? 1 : 0; } diff --git a/src/V3Trace.cpp b/src/V3Trace.cpp index 2d5551cfe..c8a665f3e 100644 --- a/src/V3Trace.cpp +++ b/src/V3Trace.cpp @@ -515,7 +515,6 @@ private: } m_regFuncp->addStmtsp(new AstAddrOfCFunc(flp, funcp)); m_regFuncp->addStmtsp(new AstText(flp, ", vlSelf", true)); - m_regFuncp->addStmtsp(new AstText(flp, ", vlSelf->vlSymsp->__Vm_modelp", true)); m_regFuncp->addStmtsp(new AstText(flp, ");\n", true)); } else { // Sub functions @@ -705,7 +704,7 @@ private: // Register it m_regFuncp->addStmtsp(new AstText(fl, "tracep->addCleanupCb(", true)); m_regFuncp->addStmtsp(new AstAddrOfCFunc(fl, cleanupFuncp)); - m_regFuncp->addStmtsp(new AstText(fl, ", vlSelf, vlSelf->vlSymsp->__Vm_modelp);\n", true)); + m_regFuncp->addStmtsp(new AstText(fl, ", vlSelf);\n", true)); // Clear global activity flag cleanupFuncp->addStmtsp( diff --git a/verilator-config.cmake.in b/verilator-config.cmake.in index 7f9ece972..2125341b8 100644 --- a/verilator-config.cmake.in +++ b/verilator-config.cmake.in @@ -261,15 +261,6 @@ function(verilate TARGET) set_property(TARGET ${TARGET} PROPERTY VERILATOR_THREADED ON) endif() - if (${VERILATE_PREFIX}_TRACE_THREADS) - # If any verilate() call specifies TRACE_THREADS, define VL_TRACE_THREADED in the final build - set_property(TARGET ${TARGET} PROPERTY VERILATOR_TRACE_THREADED ON) - endif() - - if (${VERILATE_PREFIX}_TRACE_FST_WRITER_THREAD) - set_property(TARGET ${TARGET} PROPERTY VERILATOR_TRACE_FST_WRITER_TRHEAD ON) - endif() - if (${VERILATE_PREFIX}_COVERAGE) # If any verilate() call specifies COVERAGE, define VM_COVERAGE in the final build set_property(TARGET ${TARGET} PROPERTY VERILATOR_COVERAGE ON) @@ -330,8 +321,6 @@ function(verilate TARGET) VM_COVERAGE=$> VM_SC=$> $<$>:VL_THREADED> - $<$>:VL_TRACE_THREADED> - $<$>:VL_TRACE_FST_WRITER_THREAD> VM_TRACE=$> VM_TRACE_VCD=$> VM_TRACE_FST=$> From 1c5e5704f5310409a5e56f26af7a1cc4f06ccd31 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 20 Jul 2022 13:07:26 +0100 Subject: [PATCH 046/119] Fix iteration fixup in AstNode::addHereThisAsNext Previous version broke verialor_ext_tests due to iteration order mismatch after 3fc8249429a3cde8c925f1ef6ff87bc82865121e --- src/V3Ast.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/V3Ast.cpp b/src/V3Ast.cpp index 5518a98a8..9a6e7fca8 100644 --- a/src/V3Ast.cpp +++ b/src/V3Ast.cpp @@ -674,10 +674,10 @@ void AstNode::addHereThisAsNext(AstNode* newp) { tailp->m_headtailp = newp; } // Iterator fixup - if (newLastp->m_iterpp) { - *(newLastp->m_iterpp) = this; - } else if (this->m_iterpp) { + if (newLastp->m_iterpp) *(newLastp->m_iterpp) = this; + if (this->m_iterpp) { *(this->m_iterpp) = newp; + this->m_iterpp = nullptr; } // debugTreeChange(this, "-addHereThisAsNext: ", __LINE__, true); From 542e3248695af6f776457152a9c3b84904b65727 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kozdra Date: Wed, 20 Jul 2022 15:01:36 +0200 Subject: [PATCH 047/119] Wildcard index type support for associative arrays (#3501). Associative arrays that specify a wildcard index type may be indexed by integral expressions of any size, with leading zeros removed automatically. A natural representation for such expressions is a string, especially that the standard explicitly specifies automatic casts from string indices to bit vectors of equivalent size. The automatic cast part is done implicitly by the existing type system. A simpler way to just make this work would be to convert wildcard index type to a string type directly in the parser code, but several new AST classes are needed to make sure illegal method calls are detected. The verilated data structure implementation is reused, because there is no need for differentiating the behavior on C++ side. --- src/V3AstNodes.cpp | 7 + src/V3AstNodes.h | 146 +++++++++++++++ src/V3Clean.cpp | 1 + src/V3EmitCFunc.cpp | 16 ++ src/V3EmitCFunc.h | 26 +++ src/V3Hasher.cpp | 5 + src/V3ParseGrammar.cpp | 2 + src/V3Width.cpp | 170 ++++++++++++++++++ src/V3WidthSel.cpp | 10 ++ src/verilog.y | 6 +- test_regress/t/t_assoc_wildcard.pl | 21 +++ ...oc_wildcard_unsup.v => t_assoc_wildcard.v} | 21 ++- test_regress/t/t_assoc_wildcard_bad.out | 73 ++++++++ ...dcard_unsup.pl => t_assoc_wildcard_bad.pl} | 0 test_regress/t/t_assoc_wildcard_bad.v | 45 +++++ test_regress/t/t_assoc_wildcard_method.pl | 21 +++ test_regress/t/t_assoc_wildcard_method.v | 127 +++++++++++++ test_regress/t/t_assoc_wildcard_unsup.out | 5 - 18 files changed, 682 insertions(+), 20 deletions(-) create mode 100755 test_regress/t/t_assoc_wildcard.pl rename test_regress/t/{t_assoc_wildcard_unsup.v => t_assoc_wildcard.v} (71%) create mode 100644 test_regress/t/t_assoc_wildcard_bad.out rename test_regress/t/{t_assoc_wildcard_unsup.pl => t_assoc_wildcard_bad.pl} (100%) create mode 100644 test_regress/t/t_assoc_wildcard_bad.v create mode 100755 test_regress/t/t_assoc_wildcard_method.pl create mode 100644 test_regress/t/t_assoc_wildcard_method.v delete mode 100644 test_regress/t/t_assoc_wildcard_unsup.out diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 33d275e4e..95ce8ba24 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -673,6 +673,9 @@ AstNodeDType::CTypeRecursed AstNodeDType::cTypeRecurse(bool compound) const { const CTypeRecursed key = adtypep->keyDTypep()->cTypeRecurse(true); const CTypeRecursed val = adtypep->subDTypep()->cTypeRecurse(true); info.m_type = "VlAssocArray<" + key.m_type + ", " + val.m_type + ">"; + } else if (const auto* const adtypep = VN_CAST(dtypep, WildcardArrayDType)) { + const CTypeRecursed sub = adtypep->subDTypep()->cTypeRecurse(true); + info.m_type = "VlAssocArray"; } else if (const auto* const adtypep = VN_CAST(dtypep, DynArrayDType)) { const CTypeRecursed sub = adtypep->subDTypep()->cTypeRecurse(true); info.m_type = "VlQueue<" + sub.m_type + ">"; @@ -1683,6 +1686,10 @@ string AstQueueDType::prettyDTypeName() const { if (boundConst()) str += ":" + cvtToStr(boundConst()); return str + "]"; } +void AstWildcardArrayDType::dumpSmall(std::ostream& str) const { + this->AstNodeDType::dumpSmall(str); + str << "[*]"; +} void AstUnsizedArrayDType::dumpSmall(std::ostream& str) const { this->AstNodeDType::dumpSmall(str); str << "[]"; diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h index d9e4b81db..8b258db0a 100644 --- a/src/V3AstNodes.h +++ b/src/V3AstNodes.h @@ -278,6 +278,17 @@ public: virtual bool same(const AstNode* samep) const override { return true; } }; +class AstWildcardRange final : public AstNodeRange { + // Wildcard range specification, for wildcard index type associative arrays +public: + explicit AstWildcardRange(FileLine* fl) + : ASTGEN_SUPER_WildcardRange(fl) {} + ASTNODE_NODE_FUNCS(WildcardRange) + virtual string emitC() { V3ERROR_NA_RETURN(""); } + virtual string emitVerilog() { return "[*]"; } + virtual bool same(const AstNode* samep) const override { return true; } +}; + class AstGatePin final : public AstNodeMath { // Possibly expand a gate primitive input pin value to match the range of the gate primitive public: @@ -819,6 +830,62 @@ public: virtual bool isCompound() const override { return true; } }; +class AstWildcardArrayDType final : public AstNodeDType { + // Wildcard index type associative array data type, ie "some_dtype var_name [*]" + // Children: DTYPE (moved to refDTypep() in V3Width) +private: + AstNodeDType* m_refDTypep; // Elements of this type (after widthing) +public: + AstWildcardArrayDType(FileLine* fl, VFlagChildDType, AstNodeDType* dtp) + : ASTGEN_SUPER_WildcardArrayDType(fl) { + childDTypep(dtp); // Only for parser + refDTypep(nullptr); + dtypep(nullptr); // V3Width will resolve + } + ASTNODE_NODE_FUNCS(WildcardArrayDType) + virtual const char* broken() const override { + BROKEN_RTN(!((m_refDTypep && !childDTypep() && m_refDTypep->brokeExists()) + || (!m_refDTypep && childDTypep()))); + return nullptr; + } + virtual void cloneRelink() override { + if (m_refDTypep && m_refDTypep->clonep()) m_refDTypep = m_refDTypep->clonep(); + } + virtual bool same(const AstNode* samep) const override { + const AstNodeArrayDType* const asamep = static_cast(samep); + if (!asamep->subDTypep()) return false; + return (subDTypep() == asamep->subDTypep()); + } + virtual bool similarDType(AstNodeDType* samep) const override { + const AstNodeArrayDType* const asamep = static_cast(samep); + return type() == samep->type() && asamep->subDTypep() + && subDTypep()->skipRefp()->similarDType(asamep->subDTypep()->skipRefp()); + } + virtual void dumpSmall(std::ostream& str) const override; + virtual AstNodeDType* getChildDTypep() const override { return childDTypep(); } + // op1 = Range of variable + AstNodeDType* childDTypep() const { return VN_AS(op1p(), NodeDType); } + void childDTypep(AstNodeDType* nodep) { setOp1p(nodep); } + virtual AstNodeDType* subDTypep() const override { + return m_refDTypep ? m_refDTypep : childDTypep(); + } + void refDTypep(AstNodeDType* nodep) { m_refDTypep = nodep; } + virtual AstNodeDType* virtRefDTypep() const override { return m_refDTypep; } + virtual void virtRefDTypep(AstNodeDType* nodep) override { refDTypep(nodep); } + // METHODS + virtual AstBasicDType* basicp() const override { return subDTypep()->basicp(); } + virtual AstNodeDType* skipRefp() const override { return (AstNodeDType*)this; } + virtual AstNodeDType* skipRefToConstp() const override { return (AstNodeDType*)this; } + virtual AstNodeDType* skipRefToEnump() const override { return (AstNodeDType*)this; } + virtual int widthAlignBytes() const override { + return sizeof(std::map); + } + virtual int widthTotalBytes() const override { + return sizeof(std::map); + } + virtual bool isCompound() const override { return true; } +}; + class AstBasicDType final : public AstNodeDType { // Builtin atomic/vectored data type // Children: RANGE (converted to constant in V3Width) @@ -1686,6 +1753,44 @@ public: virtual int instrCount() const override { return widthInstrs(); } }; +class AstWildcardSel final : public AstNodeSel { + // Parents: math|stmt + // Children: varref|arraysel, math +private: + void init(AstNode* fromp) { + if (fromp && VN_IS(fromp->dtypep()->skipRefp(), WildcardArrayDType)) { + // Strip off array to find what array references + dtypeFrom(VN_AS(fromp->dtypep()->skipRefp(), WildcardArrayDType)->subDTypep()); + } + } + +public: + AstWildcardSel(FileLine* fl, AstNode* fromp, AstNode* bitp) + : ASTGEN_SUPER_WildcardSel(fl, fromp, bitp) { + init(fromp); + } + ASTNODE_NODE_FUNCS(WildcardSel) + virtual AstNode* cloneType(AstNode* lhsp, AstNode* rhsp) override { + return new AstWildcardSel{this->fileline(), lhsp, rhsp}; + } + virtual void numberOperate(V3Number& out, const V3Number& lhs, const V3Number& rhs) override { + V3ERROR_NA; + } + virtual string emitVerilog() override { return "%k(%l%f[%r])"; } + virtual string emitC() override { return "%li%k[%ri]"; } + virtual bool cleanOut() const override { return true; } + virtual bool cleanLhs() const override { return false; } + virtual bool cleanRhs() const override { return true; } + virtual bool sizeMattersLhs() const override { return false; } + virtual bool sizeMattersRhs() const override { return false; } + virtual bool isGateOptimizable() const override { + return true; + } // esp for V3Const::ifSameAssign + virtual bool isPredictOptimizable() const override { return false; } + virtual bool same(const AstNode* samep) const override { return true; } + virtual int instrCount() const override { return widthInstrs(); } +}; + class AstWordSel final : public AstNodeSel { // Select a single word from a multi-word wide value public: @@ -4887,6 +4992,47 @@ public: virtual bool same(const AstNode* samep) const override { return true; } }; +class AstConsWildcard final : public AstNodeMath { + // Construct a wildcard assoc array and return object, '{} + // Parents: math + // Children: expression (elements or other queues) +public: + AstConsWildcard(FileLine* fl, AstNode* defaultp) + : ASTGEN_SUPER_ConsWildcard(fl) { + setNOp1p(defaultp); + } + ASTNODE_NODE_FUNCS(ConsWildcard) + virtual string emitVerilog() override { return "'{}"; } + virtual string emitC() override { V3ERROR_NA_RETURN(""); } + virtual string emitSimpleOperator() override { V3ERROR_NA_RETURN(""); } + virtual bool cleanOut() const override { return true; } + virtual int instrCount() const override { return widthInstrs(); } + AstNode* defaultp() const { return op1p(); } + virtual bool same(const AstNode* samep) const override { return true; } +}; +class AstSetWildcard final : public AstNodeMath { + // Set a wildcard assoc array element and return object, '{} + // Parents: math + // Children: expression (elements or other queues) +public: + AstSetWildcard(FileLine* fl, AstNode* lhsp, AstNode* keyp, AstNode* valuep) + : ASTGEN_SUPER_SetWildcard(fl) { + setOp1p(lhsp); + setNOp2p(keyp); + setOp3p(valuep); + } + ASTNODE_NODE_FUNCS(SetWildcard) + virtual string emitVerilog() override { return "'{}"; } + virtual string emitC() override { V3ERROR_NA_RETURN(""); } + virtual string emitSimpleOperator() override { V3ERROR_NA_RETURN(""); } + virtual bool cleanOut() const override { return true; } + virtual int instrCount() const override { return widthInstrs(); } + AstNode* lhsp() const { return op1p(); } + AstNode* keyp() const { return op2p(); } + AstNode* valuep() const { return op3p(); } + virtual bool same(const AstNode* samep) const override { return true; } +}; + class AstConsDynArray final : public AstNodeMath { // Construct a queue and return object, '{}. '{lhs}, '{lhs. rhs} // Parents: math diff --git a/src/V3Clean.cpp b/src/V3Clean.cpp index 613b681d7..eaf7c1f39 100644 --- a/src/V3Clean.cpp +++ b/src/V3Clean.cpp @@ -88,6 +88,7 @@ private: if (VN_IS(nodep, Var) || VN_IS(nodep, NodeDType) // Don't want to change variable widths! || VN_IS(nodep->dtypep()->skipRefp(), AssocArrayDType) // Or arrays + || VN_IS(nodep->dtypep()->skipRefp(), WildcardArrayDType) || VN_IS(nodep->dtypep()->skipRefp(), DynArrayDType) || VN_IS(nodep->dtypep()->skipRefp(), ClassRefDType) || VN_IS(nodep->dtypep()->skipRefp(), QueueDType) diff --git a/src/V3EmitCFunc.cpp b/src/V3EmitCFunc.cpp index ae3f8ce03..7117fee7e 100644 --- a/src/V3EmitCFunc.cpp +++ b/src/V3EmitCFunc.cpp @@ -611,6 +611,17 @@ void EmitCFunc::emitVarReset(AstVar* varp) { emitSetVarConstant(varNameProtected + ".at(" + cvtToStr(itr.first) + ")", VN_AS(valuep, Const)); } + } else if (AstWildcardArrayDType* const adtypep = VN_CAST(dtypep, WildcardArrayDType)) { + if (initarp->defaultp()) { + emitSetVarConstant(varNameProtected + ".atDefault()", + VN_AS(initarp->defaultp(), Const)); + } + const auto& mapr = initarp->map(); + for (const auto& itr : mapr) { + AstNode* const valuep = itr.second->valuep(); + emitSetVarConstant(varNameProtected + ".at(" + cvtToStr(itr.first) + ")", + VN_AS(valuep, Const)); + } } else if (AstUnpackArrayDType* const adtypep = VN_CAST(dtypep, UnpackArrayDType)) { if (initarp->defaultp()) { puts("for (int __Vi=0; __Vi<" + cvtToStr(adtypep->elementsConst())); @@ -642,6 +653,11 @@ string EmitCFunc::emitVarResetRecurse(const AstVar* varp, const string& varNameP const string cvtarray = (adtypep->subDTypep()->isWide() ? ".data()" : ""); return emitVarResetRecurse(varp, varNameProtected, adtypep->subDTypep(), depth + 1, suffix + ".atDefault()" + cvtarray); + } else if (AstWildcardArrayDType* const adtypep = VN_CAST(dtypep, WildcardArrayDType)) { + // Access std::array as C array + const string cvtarray = (adtypep->subDTypep()->isWide() ? ".data()" : ""); + return emitVarResetRecurse(varp, varNameProtected, adtypep->subDTypep(), depth + 1, + suffix + ".atDefault()" + cvtarray); } else if (VN_IS(dtypep, ClassRefDType)) { return ""; // Constructor does it } else if (const AstDynArrayDType* const adtypep = VN_CAST(dtypep, DynArrayDType)) { diff --git a/src/V3EmitCFunc.h b/src/V3EmitCFunc.h index 629b90397..31d1f5a11 100644 --- a/src/V3EmitCFunc.h +++ b/src/V3EmitCFunc.h @@ -379,6 +379,14 @@ public: } puts(")"); } + virtual void visit(AstWildcardSel* nodep) override { + iterateAndNextNull(nodep->fromp()); + putbs(".at("); + AstWildcardArrayDType* const adtypep = VN_AS(nodep->fromp()->dtypep(), WildcardArrayDType); + UASSERT_OBJ(adtypep, nodep, "Wildcard select on non-wildcard-associative type"); + iterateAndNextNull(nodep->bitp()); + puts(")"); + } virtual void visit(AstCCall* nodep) override { const AstCFunc* const funcp = nodep->funcp(); const AstNodeModule* const funcModp = EmitCParentModule::get(funcp); @@ -1189,6 +1197,24 @@ public: iterateAndNextNull(nodep->valuep()); puts(")"); } + virtual void visit(AstConsWildcard* nodep) override { + putbs(nodep->dtypep()->cType("", false, false)); + puts("()"); + if (nodep->defaultp()) { + putbs(".setDefault("); + iterateAndNextNull(nodep->defaultp()); + puts(")"); + } + } + virtual void visit(AstSetWildcard* nodep) override { + iterateAndNextNull(nodep->lhsp()); + putbs(".set("); + iterateAndNextNull(nodep->keyp()); + puts(", "); + putbs(""); + iterateAndNextNull(nodep->valuep()); + puts(")"); + } virtual void visit(AstConsDynArray* nodep) override { putbs(nodep->dtypep()->cType("", false, false)); if (!nodep->lhsp()) { diff --git a/src/V3Hasher.cpp b/src/V3Hasher.cpp index 4b0cbbbba..ba59ea462 100644 --- a/src/V3Hasher.cpp +++ b/src/V3Hasher.cpp @@ -133,6 +133,11 @@ private: iterateNull(nodep->virtRefDTypep()); }); } + virtual void visit(AstWildcardArrayDType* nodep) override { + m_hash += hashNodeAndIterate(nodep, false, HASH_CHILDREN, [=]() { // + iterateNull(nodep->virtRefDTypep()); + }); + } virtual void visit(AstBasicDType* nodep) override { m_hash += hashNodeAndIterate(nodep, false, HASH_CHILDREN, [=]() { m_hash += nodep->keyword(); diff --git a/src/V3ParseGrammar.cpp b/src/V3ParseGrammar.cpp index 2cc65af6a..f2d57f289 100644 --- a/src/V3ParseGrammar.cpp +++ b/src/V3ParseGrammar.cpp @@ -138,6 +138,8 @@ AstNodeDType* V3ParseGrammar::createArray(AstNodeDType* basep, AstNodeRange* nra AstNode* const keyp = arangep->elementsp()->unlinkFrBack(); arrayp = new AstBracketArrayDType(nrangep->fileline(), VFlagChildDType(), arrayp, keyp); + } else if (VN_IS(nrangep, WildcardRange)) { + arrayp = new AstWildcardArrayDType{nrangep->fileline(), VFlagChildDType{}, arrayp}; } else { UASSERT_OBJ(0, nrangep, "Expected range or unsized range"); } diff --git a/src/V3Width.cpp b/src/V3Width.cpp index 8a833d9f2..c98e856aa 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -981,6 +981,27 @@ private: } } + virtual void visit(AstWildcardSel* nodep) override { + // Signed/Real: Output type based on array-declared type; binary operator + if (m_vup->prelim()) { + const AstNodeDType* const fromDtp = nodep->fromp()->dtypep()->skipRefp(); + const AstWildcardArrayDType* const adtypep = VN_CAST(fromDtp, WildcardArrayDType); + if (!adtypep) { + UINFO(1, " Related dtype: " << fromDtp << endl); + nodep->v3fatalSrc("Wildcard array reference is not to wildcard array"); + } + const AstBasicDType* const basicp = nodep->bitp()->dtypep()->skipRefp()->basicp(); + if (!basicp + || (basicp->keyword() != VBasicDTypeKwd::STRING + && !basicp->keyword().isIntNumeric())) { + nodep->v3error("Wildcard index must be integral (IEEE 1800-2017 7.8.1)"); + } + iterateCheckTyped(nodep, "Wildcard associative select", nodep->bitp(), + adtypep->findStringDType(), BOTH); + nodep->dtypeFrom(adtypep->subDTypep()); + } + } + virtual void visit(AstSliceSel* nodep) override { // Always creates as output an unpacked array if (m_vup->prelim()) { @@ -1582,6 +1603,14 @@ private: nodep->dtypep(nodep); // The array itself, not subDtype UINFO(4, "dtWidthed " << nodep << endl); } + virtual void visit(AstWildcardArrayDType* nodep) override { + if (nodep->didWidthAndSet()) return; // This node is a dtype & not both PRELIMed+FINALed + // Iterate into subDTypep() to resolve that type and update pointer. + nodep->refDTypep(iterateEditMoveDTypep(nodep, nodep->subDTypep())); + // Cleanup array size + nodep->dtypep(nodep); // The array itself, not subDtype + UINFO(4, "dtWidthed " << nodep << endl); + } virtual void visit(AstBasicDType* nodep) override { if (nodep->didWidthAndSet()) return; // This node is a dtype & not both PRELIMed+FINALed if (nodep->generic()) return; // Already perfect @@ -2178,6 +2207,31 @@ private: EXTEND_EXP); } } + virtual void visit(AstConsWildcard* nodep) override { + // Type computed when constructed here + auto* const vdtypep = VN_AS(m_vup->dtypep()->skipRefp(), WildcardArrayDType); + UASSERT_OBJ(vdtypep, nodep, "ConsWildcard requires wildcard upper parent data type"); + if (m_vup->prelim()) { + nodep->dtypeFrom(vdtypep); + if (nodep->defaultp()) { + iterateCheck(nodep, "default", nodep->defaultp(), CONTEXT, FINAL, + vdtypep->subDTypep(), EXTEND_EXP); + } + } + } + virtual void visit(AstSetWildcard* nodep) override { + // Type computed when constructed here + auto* const vdtypep = VN_AS(m_vup->dtypep()->skipRefp(), WildcardArrayDType); + UASSERT_OBJ(vdtypep, nodep, "SetWildcard requires wildcard upper parent data type"); + if (m_vup->prelim()) { + nodep->dtypeFrom(vdtypep); + userIterateAndNext(nodep->lhsp(), WidthVP{vdtypep, BOTH}.p()); + iterateCheck(nodep, "key", nodep->keyp(), CONTEXT, FINAL, vdtypep->findStringDType(), + EXTEND_EXP); + iterateCheck(nodep, "value", nodep->valuep(), CONTEXT, FINAL, vdtypep->subDTypep(), + EXTEND_EXP); + } + } virtual void visit(AstConsDynArray* nodep) override { // Type computed when constructed here AstDynArrayDType* const vdtypep = VN_AS(m_vup->dtypep()->skipRefp(), DynArrayDType); @@ -2426,6 +2480,7 @@ private: } } else if (VN_IS(fromDtp, EnumDType) // || VN_IS(fromDtp, AssocArrayDType) // + || VN_IS(fromDtp, WildcardArrayDType) // || VN_IS(fromDtp, UnpackArrayDType) // || VN_IS(fromDtp, DynArrayDType) // || VN_IS(fromDtp, QueueDType) // @@ -2523,6 +2578,8 @@ private: methodCallEnum(nodep, adtypep); } else if (AstAssocArrayDType* const adtypep = VN_CAST(fromDtp, AssocArrayDType)) { methodCallAssoc(nodep, adtypep); + } else if (AstWildcardArrayDType* const adtypep = VN_CAST(fromDtp, WildcardArrayDType)) { + methodCallWildcard(nodep, adtypep); } else if (AstDynArrayDType* const adtypep = VN_CAST(fromDtp, DynArrayDType)) { methodCallDyn(nodep, adtypep); } else if (AstQueueDType* const adtypep = VN_CAST(fromDtp, QueueDType)) { @@ -2684,6 +2741,89 @@ private: nodep->v3error("Unknown built-in enum method " << nodep->prettyNameQ()); } } + void methodCallWildcard(AstMethodCall* nodep, AstWildcardArrayDType* adtypep) { + AstCMethodHard* newp = nullptr; + if (nodep->name() == "num" // function int num() + || nodep->name() == "size") { + methodOkArguments(nodep, 0, 0); + newp = new AstCMethodHard{nodep->fileline(), nodep->fromp()->unlinkFrBack(), + "size"}; // So don't need num() + newp->dtypeSetSigned32(); + } else if (nodep->name() == "first" // function int first(ref index) + || nodep->name() == "last" // + || nodep->name() == "next" // + || nodep->name() == "prev" // + || nodep->name() == "unique_index" // + || nodep->name() == "find_index" || nodep->name() == "find_first_index" + || nodep->name() == "find_last_index") { + nodep->v3error("Array method " << nodep->prettyNameQ() + << " not legal on wildcard associative arrays"); + } else if (nodep->name() == "exists") { // function int exists(input index) + // IEEE really should have made this a "bit" return + methodOkArguments(nodep, 1, 1); + AstNode* const index_exprp = methodCallWildcardIndexExpr(nodep, adtypep); + newp = new AstCMethodHard{nodep->fileline(), nodep->fromp()->unlinkFrBack(), "exists", + index_exprp->unlinkFrBack()}; + newp->dtypeSetSigned32(); + newp->pure(true); + } else if (nodep->name() == "delete") { // function void delete([input integer index]) + methodOkArguments(nodep, 0, 1); + methodCallLValueRecurse(nodep, nodep->fromp(), VAccess::WRITE); + if (!nodep->pinsp()) { + newp = new AstCMethodHard{nodep->fileline(), nodep->fromp()->unlinkFrBack(), + "clear"}; + newp->makeStatement(); + } else { + AstNode* const index_exprp = methodCallWildcardIndexExpr(nodep, adtypep); + newp = new AstCMethodHard{nodep->fileline(), nodep->fromp()->unlinkFrBack(), + "erase", index_exprp->unlinkFrBack()}; + newp->makeStatement(); + } + } else if (nodep->name() == "sort" || nodep->name() == "rsort" + || nodep->name() == "reverse" || nodep->name() == "shuffle") { + nodep->v3error("Array method " << nodep->prettyNameQ() + << " not legal on associative arrays"); + } else if (nodep->name() == "and" || nodep->name() == "or" || nodep->name() == "xor" + || nodep->name() == "sum" || nodep->name() == "product") { + // All value return + AstWith* const withp + = methodWithArgument(nodep, false, false, adtypep->subDTypep(), + adtypep->findStringDType(), adtypep->subDTypep()); + methodOkArguments(nodep, 0, 0); + methodCallLValueRecurse(nodep, nodep->fromp(), VAccess::READ); + newp = new AstCMethodHard{nodep->fileline(), nodep->fromp()->unlinkFrBack(), + "r_" + nodep->name(), withp}; + newp->dtypeFrom(withp ? withp->dtypep() : adtypep->subDTypep()); + if (!nodep->firstAbovep()) newp->makeStatement(); + } else if (nodep->name() == "min" || nodep->name() == "max" || nodep->name() == "unique") { + methodOkArguments(nodep, 0, 0); + methodCallLValueRecurse(nodep, nodep->fromp(), VAccess::READ); + newp = new AstCMethodHard{nodep->fileline(), nodep->fromp()->unlinkFrBack(), + nodep->name()}; + newp->dtypeFrom(adtypep); + if (!nodep->firstAbovep()) newp->makeStatement(); + } else if (nodep->name() == "find" || nodep->name() == "find_first" + || nodep->name() == "find_last") { + AstWith* const withp + = methodWithArgument(nodep, true, false, nodep->findBitDType(), + adtypep->findStringDType(), adtypep->subDTypep()); + methodOkArguments(nodep, 0, 0); + methodCallLValueRecurse(nodep, nodep->fromp(), VAccess::READ); + newp = new AstCMethodHard{nodep->fileline(), nodep->fromp()->unlinkFrBack(), + nodep->name(), withp}; + newp->dtypeFrom(adtypep); + if (!nodep->firstAbovep()) newp->makeStatement(); + } else { + nodep->v3error("Unknown wildcard associative array method " << nodep->prettyNameQ()); + nodep->dtypeFrom(adtypep->subDTypep()); // Best guess + } + if (newp) { + newp->protect(false); + newp->didWidth(true); + nodep->replaceWith(newp); + VL_DO_DANGLING(nodep->deleteTree(), nodep); + } + } void methodCallAssoc(AstMethodCall* nodep, AstAssocArrayDType* adtypep) { AstCMethodHard* newp = nullptr; if (nodep->name() == "num" // function int num() @@ -2789,6 +2929,13 @@ private: VL_DANGLING(index_exprp); // May have been edited return VN_AS(nodep->pinsp(), Arg)->exprp(); } + AstNode* methodCallWildcardIndexExpr(AstMethodCall* nodep, AstWildcardArrayDType* adtypep) { + AstNode* const index_exprp = VN_CAST(nodep->pinsp(), Arg)->exprp(); + iterateCheck(nodep, "index", index_exprp, CONTEXT, FINAL, adtypep->findStringDType(), + EXTEND_EXP); + VL_DANGLING(index_exprp); // May have been edited + return VN_AS(nodep->pinsp(), Arg)->exprp(); + } void methodCallLValueRecurse(AstMethodCall* nodep, AstNode* childp, const VAccess& access) { if (AstNodeVarRef* const varrefp = VN_CAST(childp, NodeVarRef)) { varrefp->access(access); @@ -3393,6 +3540,8 @@ private: VL_DO_DANGLING(patternArray(nodep, vdtypep, defaultp), nodep); } else if (auto* const vdtypep = VN_CAST(dtypep, AssocArrayDType)) { VL_DO_DANGLING(patternAssoc(nodep, vdtypep, defaultp), nodep); + } else if (auto* const vdtypep = VN_CAST(dtypep, WildcardArrayDType)) { + VL_DO_DANGLING(patternWildcard(nodep, vdtypep, defaultp), nodep); } else if (auto* const vdtypep = VN_CAST(dtypep, DynArrayDType)) { VL_DO_DANGLING(patternDynArray(nodep, vdtypep, defaultp), nodep); } else if (auto* const vdtypep = VN_CAST(dtypep, QueueDType)) { @@ -3576,6 +3725,26 @@ private: // if (debug() >= 9) newp->dumpTree("-apat-out: "); VL_DO_DANGLING(pushDeletep(nodep), nodep); // Deletes defaultp also, if present } + void patternWildcard(AstPattern* nodep, AstWildcardArrayDType* arrayDtp, + AstPatMember* defaultp) { + AstNode* defaultValuep = nullptr; + if (defaultp) defaultValuep = defaultp->lhssp()->unlinkFrBack(); + AstNode* newp = new AstConsWildcard{nodep->fileline(), defaultValuep}; + newp->dtypeFrom(arrayDtp); + for (AstPatMember* patp = VN_AS(nodep->itemsp(), PatMember); patp; + patp = VN_AS(patp->nextp(), PatMember)) { + patp->dtypep(arrayDtp->subDTypep()); + AstNode* const valuep = patternMemberValueIterate(patp); + AstNode* const keyp = patp->keyp(); + auto* const newap + = new AstSetWildcard{nodep->fileline(), newp, keyp->unlinkFrBack(), valuep}; + newap->dtypeFrom(arrayDtp); + newp = newap; + } + nodep->replaceWith(newp); + // if (debug() >= 9) newp->dumpTree("-apat-out: "); + VL_DO_DANGLING(pushDeletep(nodep), nodep); // Deletes defaultp also, if present + } void patternDynArray(AstPattern* nodep, AstDynArrayDType* arrayp, AstPatMember*) { AstNode* newp = new AstConsDynArray(nodep->fileline()); newp->dtypeFrom(arrayp); @@ -4092,6 +4261,7 @@ private: added = true; newFormat += "%g"; } else if (VN_IS(dtypep, AssocArrayDType) // + || VN_IS(dtypep, WildcardArrayDType) // || VN_IS(dtypep, ClassRefDType) // || VN_IS(dtypep, DynArrayDType) // || VN_IS(dtypep, QueueDType)) { diff --git a/src/V3WidthSel.cpp b/src/V3WidthSel.cpp index 793b98aec..4d6b7de01 100644 --- a/src/V3WidthSel.cpp +++ b/src/V3WidthSel.cpp @@ -88,6 +88,7 @@ private: if (const AstNodeArrayDType* const adtypep = VN_CAST(ddtypep, NodeArrayDType)) { fromRange = adtypep->declRange(); } else if (VN_IS(ddtypep, AssocArrayDType)) { + } else if (VN_IS(ddtypep, WildcardArrayDType)) { } else if (VN_IS(ddtypep, DynArrayDType)) { } else if (VN_IS(ddtypep, QueueDType)) { } else if (const AstNodeUOrStructDType* const adtypep @@ -257,6 +258,15 @@ private: if (debug() >= 9) newp->dumpTree(cout, "--SELBTn: "); nodep->replaceWith(newp); VL_DO_DANGLING(pushDeletep(nodep), nodep); + } else if (const AstWildcardArrayDType* const adtypep + = VN_CAST(ddtypep, WildcardArrayDType)) { + // SELBIT(array, index) -> WILDCARDSEL(array, index) + AstNode* const subp = rhsp; + AstWildcardSel* const newp = new AstWildcardSel{nodep->fileline(), fromp, subp}; + newp->dtypeFrom(adtypep->subDTypep()); // Need to strip off array reference + if (debug() >= 9) newp->dumpTree(cout, "--SELBTn: "); + nodep->replaceWith(newp); + VL_DO_DANGLING(pushDeletep(nodep), nodep); } else if (const AstDynArrayDType* const adtypep = VN_CAST(ddtypep, DynArrayDType)) { // SELBIT(array, index) -> CMETHODCALL(queue, "at", index) AstNode* const subp = rhsp; diff --git a/src/verilog.y b/src/verilog.y index 34fbe86b0..82faf4a7d 100644 --- a/src/verilog.y +++ b/src/verilog.y @@ -2074,10 +2074,8 @@ variable_dimension: // ==IEEE: variable_dimension // // IEEE: associative_dimension (if data_type) // // Can't tell which until see if expr is data type or not | '[' exprOrDataType ']' { $$ = new AstBracketRange($1, $2); } - | yP_BRASTAR ']' - { $$ = nullptr; BBUNSUP($1, "Unsupported: [*] wildcard associative arrays"); } - | '[' '*' ']' - { $$ = nullptr; BBUNSUP($2, "Unsupported: [*] wildcard associative arrays"); } + | yP_BRASTAR ']' { $$ = new AstWildcardRange{$1}; } + | '[' '*' ']' { $$ = new AstWildcardRange{$1}; } // // IEEE: queue_dimension // // '[' '$' ']' -- $ is part of expr, see '[' constExpr ']' // // '[' '$' ':' expr ']' -- anyrange:expr:$ diff --git a/test_regress/t/t_assoc_wildcard.pl b/test_regress/t/t_assoc_wildcard.pl new file mode 100755 index 000000000..9a15dd2cc --- /dev/null +++ b/test_regress/t/t_assoc_wildcard.pl @@ -0,0 +1,21 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2019 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +compile( + ); + +execute( + check_finished => 1, + ); + +ok(1); +1; diff --git a/test_regress/t/t_assoc_wildcard_unsup.v b/test_regress/t/t_assoc_wildcard.v similarity index 71% rename from test_regress/t/t_assoc_wildcard_unsup.v rename to test_regress/t/t_assoc_wildcard.v index d17eaf3c1..2d4e5c782 100644 --- a/test_regress/t/t_assoc_wildcard_unsup.v +++ b/test_regress/t/t_assoc_wildcard.v @@ -22,26 +22,25 @@ module t (/*AUTOARG*/ cyc <= cyc + 1; begin // Wildcard - string a [*]; + string a [*] = '{default: "nope", "BBBBB": "fooing", 23'h434343: "baring"}; int k; string v; + v = a["CCC"]; `checks(v, "baring"); + v = a["BBBBB"]; `checks(v, "fooing"); + a[32'd1234] = "fooed"; a[4'd3] = "bared"; - i = a.num(); `checkh(i, 2); - i = a.size(); `checkh(i, 2); - v = a[32'd1234]; `checks(v, "fooed"); + a[79'h4141] = "bazed"; + i = a.num(); `checkh(i, 5); + i = a.size(); `checkh(i, 5); + v = a[39'd1234]; `checks(v, "fooed"); + v = a["AA"]; `checks(v, "bazed"); v = a[4'd3]; `checks(v, "bared"); i = a.exists("baz"); `checkh(i, 0); i = a.exists(4'd3); `checkh(i, 1); - i = a.first(k); `checkh(i, 1); `checks(k, 4'd3); - i = a.next(k); `checkh(i, 1); `checks(k, 32'd1234); - i = a.next(k); `checkh(i, 0); - i = a.last(k); `checkh(i, 1); `checks(k, 32'd1234); - i = a.prev(k); `checkh(i, 1); `checks(k, 4'd3); - i = a.prev(k); `checkh(i, 0); a.delete(4'd3); - i = a.size(); `checkh(i, 1); + i = a.size(); `checkh(i, 4); end $write("*-* All Finished *-*\n"); diff --git a/test_regress/t/t_assoc_wildcard_bad.out b/test_regress/t/t_assoc_wildcard_bad.out new file mode 100644 index 000000000..2bd459ff6 --- /dev/null +++ b/test_regress/t/t_assoc_wildcard_bad.out @@ -0,0 +1,73 @@ +%Error: t/t_assoc_wildcard_bad.v:23:13: The 1 arguments passed to .num method does not match its requiring 0 arguments + : ... In instance t + 23 | v = a.num("badarg"); + | ^~~ +%Error: t/t_assoc_wildcard_bad.v:24:13: The 1 arguments passed to .size method does not match its requiring 0 arguments + : ... In instance t + 24 | v = a.size("badarg"); + | ^~~~ +%Error: t/t_assoc_wildcard_bad.v:25:13: The 0 arguments passed to .exists method does not match its requiring 1 arguments + : ... In instance t + 25 | v = a.exists(); + | ^~~~~~ +%Error: t/t_assoc_wildcard_bad.v:26:13: The 2 arguments passed to .exists method does not match its requiring 1 arguments + : ... In instance t + 26 | v = a.exists(k, "bad2"); + | ^~~~~~ +%Error: t/t_assoc_wildcard_bad.v:27:9: The 2 arguments passed to .delete method does not match its requiring 0 to 1 arguments + : ... In instance t + 27 | a.delete(k, "bad2"); + | ^~~~~~ +%Error: t/t_assoc_wildcard_bad.v:29:9: Array method 'sort' not legal on associative arrays + : ... In instance t + 29 | a.sort; + | ^~~~ +%Error: t/t_assoc_wildcard_bad.v:30:9: Array method 'rsort' not legal on associative arrays + : ... In instance t + 30 | a.rsort; + | ^~~~~ +%Error: t/t_assoc_wildcard_bad.v:31:9: Array method 'reverse' not legal on associative arrays + : ... In instance t + 31 | a.reverse; + | ^~~~~~~ +%Error: t/t_assoc_wildcard_bad.v:32:9: Array method 'shuffle' not legal on associative arrays + : ... In instance t + 32 | a.shuffle; + | ^~~~~~~ +%Error: t/t_assoc_wildcard_bad.v:34:9: Array method 'first' not legal on wildcard associative arrays + : ... In instance t + 34 | a.first; + | ^~~~~ +%Error: t/t_assoc_wildcard_bad.v:35:9: Array method 'last' not legal on wildcard associative arrays + : ... In instance t + 35 | a.last; + | ^~~~ +%Error: t/t_assoc_wildcard_bad.v:36:9: Array method 'next' not legal on wildcard associative arrays + : ... In instance t + 36 | a.next; + | ^~~~ +%Error: t/t_assoc_wildcard_bad.v:37:9: Array method 'prev' not legal on wildcard associative arrays + : ... In instance t + 37 | a.prev; + | ^~~~ +%Error: t/t_assoc_wildcard_bad.v:38:9: Array method 'unique_index' not legal on wildcard associative arrays + : ... In instance t + 38 | a.unique_index; + | ^~~~~~~~~~~~ +%Error: t/t_assoc_wildcard_bad.v:39:9: Array method 'find_index' not legal on wildcard associative arrays + : ... In instance t + 39 | a.find_index; + | ^~~~~~~~~~ +%Error: t/t_assoc_wildcard_bad.v:40:9: Array method 'find_first_index' not legal on wildcard associative arrays + : ... In instance t + 40 | a.find_first_index; + | ^~~~~~~~~~~~~~~~ +%Error: t/t_assoc_wildcard_bad.v:41:9: Array method 'find_last_index' not legal on wildcard associative arrays + : ... In instance t + 41 | a.find_last_index; + | ^~~~~~~~~~~~~~~ +%Error: t/t_assoc_wildcard_bad.v:43:8: Wildcard index must be integral (IEEE 1800-2017 7.8.1) + : ... In instance t + 43 | a[x] = "bad"; + | ^ +%Error: Exiting due to diff --git a/test_regress/t/t_assoc_wildcard_unsup.pl b/test_regress/t/t_assoc_wildcard_bad.pl similarity index 100% rename from test_regress/t/t_assoc_wildcard_unsup.pl rename to test_regress/t/t_assoc_wildcard_bad.pl diff --git a/test_regress/t/t_assoc_wildcard_bad.v b/test_regress/t/t_assoc_wildcard_bad.v new file mode 100644 index 000000000..85ebf83ba --- /dev/null +++ b/test_regress/t/t_assoc_wildcard_bad.v @@ -0,0 +1,45 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2019 by Wilson Snyder. +// SPDX-License-Identifier: CC0-1.0 + +typedef class Cls; + +class Cls; + integer imembera; + integer imemberb; +endclass : Cls + +module t (/*AUTOARG*/); + + initial begin + string a [*]; + string k; + string v; + + Cls x; + + v = a.num("badarg"); + v = a.size("badarg"); + v = a.exists(); // Bad + v = a.exists(k, "bad2"); + a.delete(k, "bad2"); + + a.sort; // Not legal on assoc + a.rsort; // Not legal on assoc + a.reverse; // Not legal on assoc + a.shuffle; // Not legal on assoc + + a.first; // Not legal on wildcard + a.last; // Not legal on wildcard + a.next; // Not legal on wildcard + a.prev; // Not legal on wildcard + a.unique_index; // Not legal on wildcard + a.find_index; // Not legal on wildcard + a.find_first_index; // Not legal on wildcard + a.find_last_index; // Not legal on wildcard + + a[x] = "bad"; + end +endmodule diff --git a/test_regress/t/t_assoc_wildcard_method.pl b/test_regress/t/t_assoc_wildcard_method.pl new file mode 100755 index 000000000..b46d46042 --- /dev/null +++ b/test_regress/t/t_assoc_wildcard_method.pl @@ -0,0 +1,21 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +compile( + ); + +execute( + check_finished => 1, + ); + +ok(1); +1; diff --git a/test_regress/t/t_assoc_wildcard_method.v b/test_regress/t/t_assoc_wildcard_method.v new file mode 100644 index 000000000..c61086e7b --- /dev/null +++ b/test_regress/t/t_assoc_wildcard_method.v @@ -0,0 +1,127 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2019 by Wilson Snyder. +// SPDX-License-Identifier: CC0-1.0 + +`define stop $stop +`define checkh(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got='h%x exp='h%x\n", `__FILE__,`__LINE__, (gotv), (expv)); `stop; end while(0); +`define checks(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got='%s' exp='%s'\n", `__FILE__,`__LINE__, (gotv), (expv)); `stop; end while(0); + +module t (/*AUTOARG*/); + initial begin + int q[*]; + int qe[*]; // Empty + int qv[$]; // Value returns + int qi[$]; // Index returns + int i; + string v; + + q = '{"a":1, "b":2, "c":2, "d":4, "e":3}; + v = $sformatf("%p", q); `checks(v, "'{\"a\":'h1, \"b\":'h2, \"c\":'h2, \"d\":'h4, \"e\":'h3} "); + + // NOT tested: with ... selectors + + //q.sort; // Not legal on assoc - see t_assoc_meth_bad + //q.rsort; // Not legal on assoc - see t_assoc_meth_bad + //q.reverse; // Not legal on assoc - see t_assoc_meth_bad + //q.shuffle; // Not legal on assoc - see t_assoc_meth_bad + + v = $sformatf("%p", qe); `checks(v, "'{}"); + qv = q.unique; + v = $sformatf("%p", qv); `checks(v, "'{'h1, 'h2, 'h4, 'h3} "); + qv = qe.unique; + v = $sformatf("%p", qv); `checks(v, "'{}"); + + //q.unique_index; // Not legal on wildcard assoc - see t_assoc_wildcard_bad + + // These require an with clause or are illegal + qv = q.find with (item == 2); + v = $sformatf("%p", qv); `checks(v, "'{'h2, 'h2} "); + qv = q.find_first with (item == 2); + v = $sformatf("%p", qv); `checks(v, "'{'h2} "); + qv = q.find_last with (item == 2); + v = $sformatf("%p", qv); `checks(v, "'{'h2} "); + + qv = q.find with (item == 20); + v = $sformatf("%p", qv); `checks(v, "'{}"); + qv = q.find_first with (item == 20); + v = $sformatf("%p", qv); `checks(v, "'{}"); + qv = q.find_last with (item == 20); + v = $sformatf("%p", qv); `checks(v, "'{}"); + + //q.find_index; // Not legal on wildcard assoc - see t_assoc_wildcard_bad + //q.find_first_index; // Not legal on wildcard assoc - see t_assoc_wildcard_bad + //q.find_last_index; // Not legal on wildcard assoc - see t_assoc_wildcard_bad + + qv = q.min; + v = $sformatf("%p", qv); `checks(v, "'{'h1} "); + qv = q.max; + v = $sformatf("%p", qv); `checks(v, "'{'h4} "); + + qv = qe.min; + v = $sformatf("%p", qv); `checks(v, "'{}"); + qv = qe.max; + v = $sformatf("%p", qv); `checks(v, "'{}"); + + // Reduction methods + + i = q.sum; + `checkh(i, 32'hc); + i = q.sum with (item + 1); + `checkh(i, 32'h11); + i = q.product; + `checkh(i, 32'h30); + i = q.product with (item + 1); + `checkh(i, 32'h168); + + i = qe.sum; + `checkh(i, 32'h0); + i = qe.product; + `checkh(i, 32'h0); + + q = '{10:32'b1100, 11:32'b1010}; + i = q.and; + `checkh(i, 32'b1000); + i = q.and with (item + 1); + `checkh(i, 32'b1001); + i = q.or; + `checkh(i, 32'b1110); + i = q.or with (item + 1); + `checkh(i, 32'b1111); + i = q.xor; + `checkh(i, 32'b0110); + i = q.xor with (item + 1); + `checkh(i, 32'b0110); + + i = qe.and; + `checkh(i, 32'b0); + i = qe.or; + `checkh(i, 32'b0); + i = qe.xor; + `checkh(i, 32'b0); + + i = q.and(); + `checkh(i, 32'b1000); + i = q.and() with (item + 1); + `checkh(i, 32'b1001); + i = q.or(); + `checkh(i, 32'b1110); + i = q.or() with (item + 1); + `checkh(i, 32'b1111); + i = q.xor(); + `checkh(i, 32'b0110); + i = q.xor() with (item + 1); + `checkh(i, 32'b0110); + + i = qe.and(); + `checkh(i, 32'b0); + i = qe.or(); + `checkh(i, 32'b0); + i = qe.xor(); + `checkh(i, 32'b0); + + $write("*-* All Finished *-*\n"); + $finish; + end +endmodule diff --git a/test_regress/t/t_assoc_wildcard_unsup.out b/test_regress/t/t_assoc_wildcard_unsup.out deleted file mode 100644 index 209295f7f..000000000 --- a/test_regress/t/t_assoc_wildcard_unsup.out +++ /dev/null @@ -1,5 +0,0 @@ -%Error-UNSUPPORTED: t/t_assoc_wildcard_unsup.v:25:19: Unsupported: [*] wildcard associative arrays - 25 | string a [*]; - | ^~ - ... For error description see https://verilator.org/warn/UNSUPPORTED?v=latest -%Error: Exiting due to From 30e3edb81d5b2b280587e47af8416e45bed4b498 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 20 Jul 2022 13:15:19 +0100 Subject: [PATCH 048/119] Remove deprecated and unused timescale override defines These have been 'deprecated' for 2 years and are otherwise unused except for using a temporary placeholder value, which I have inlined with the default value. Also remove the now VL_TIME_STR_CONVERT utility function (and corresponding unit tests), which have no references in any project on GitHub. --- include/verilated.cpp | 28 ++---------------- include/verilated_funcs.h | 18 ------------ test_regress/t/t_timescale.cpp | 54 ---------------------------------- test_regress/t/t_timescale.pl | 24 --------------- test_regress/t/t_timescale.v | 3 -- 5 files changed, 3 insertions(+), 124 deletions(-) delete mode 100644 test_regress/t/t_timescale.cpp delete mode 100755 test_regress/t/t_timescale.pl delete mode 100644 test_regress/t/t_timescale.v diff --git a/include/verilated.cpp b/include/verilated.cpp index 74aaf3fbd..7b9200363 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -2164,29 +2164,6 @@ void VL_WRITEMEM_N(bool hex, // Hex format, else binary //=========================================================================== // Timescale conversion -// Helper function for conversion of timescale strings -// Converts (1|10|100)(s|ms|us|ns|ps|fs) to power of then -int VL_TIME_STR_CONVERT(const char* strp) VL_PURE { - int scale = 0; - if (!strp) return 0; - if (*strp++ != '1') return 0; - while (*strp == '0') { - ++scale; - ++strp; - } - switch (*strp++) { - case 's': break; - case 'm': scale -= 3; break; - case 'u': scale -= 6; break; - case 'n': scale -= 9; break; - case 'p': scale -= 12; break; - case 'f': scale -= 15; break; - default: return 0; - } - if ((scale < 0) && (*strp++ != 's')) return 0; - if (*strp) return 0; - return scale; -} static const char* vl_time_str(int scale) VL_PURE { static const char* const names[] = {"100s", "10s", "1s", "100ms", "10ms", "1ms", "100us", "10us", "1us", @@ -2308,8 +2285,9 @@ void VerilatedContext::checkMagic(const VerilatedContext* contextp) { } VerilatedContext::Serialized::Serialized() { - m_timeunit = VL_TIME_UNIT; // Initial value until overriden by _Vconfigure - m_timeprecision = VL_TIME_PRECISION; // Initial value until overriden by _Vconfigure + constexpr int8_t picosecond = -12; + m_timeunit = picosecond; // Initial value until overriden by _Vconfigure + m_timeprecision = picosecond; // Initial value until overriden by _Vconfigure } void VerilatedContext::assertOn(bool flag) VL_MT_SAFE { diff --git a/include/verilated_funcs.h b/include/verilated_funcs.h index c8fb91d3e..a7c3fcc73 100644 --- a/include/verilated_funcs.h +++ b/include/verilated_funcs.h @@ -256,25 +256,7 @@ extern void _vl_debug_print_w(int lbits, WDataInP const iwp); //========================================================================= // Pli macros -extern int VL_TIME_STR_CONVERT(const char* strp) VL_PURE; - -// These are deprecated and used only to establish the default precision/units. -// Use Verilator timescale-override for better control. // clang-format off -#ifndef VL_TIME_PRECISION -# ifdef VL_TIME_PRECISION_STR -# define VL_TIME_PRECISION VL_TIME_STR_CONVERT(VL_STRINGIFY(VL_TIME_PRECISION_STR)) -# else -# define VL_TIME_PRECISION (-12) ///< Timescale default units if not in Verilog - picoseconds -# endif -#endif -#ifndef VL_TIME_UNIT -# ifdef VL_TIME_UNIT_STR -# define VL_TIME_UNIT VL_TIME_STR_CONVERT(VL_STRINGIFY(VL_TIME_PRECISION_STR)) -# else -# define VL_TIME_UNIT (-12) ///< Timescale default units if not in Verilog - picoseconds -# endif -#endif #if defined(SYSTEMC_VERSION) /// Return current simulation time diff --git a/test_regress/t/t_timescale.cpp b/test_regress/t/t_timescale.cpp deleted file mode 100644 index 84c53c583..000000000 --- a/test_regress/t/t_timescale.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// -*- mode: C++; c-file-style: "cc-mode" -*- -// This file ONLY is placed under the Creative Commons Public Domain, for -// any use, without warranty, 2020 by Wilson Snyder. -// SPDX-License-Identifier: CC0-1.0 - -#include - -#include "TestCheck.h" - -#include VM_PREFIX_INCLUDE - -unsigned long long main_time = 0; -double sc_time_stamp() { return (double)main_time; } - -#include - -#define FILENM "t_timescale.cpp" - -int errors = 0; - -int main(int argc, char** argv, char** env) { - VM_PREFIX* top = new VM_PREFIX("top"); - - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("100s"), 2); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("10s"), 1); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("1s"), 0); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("100ms"), -1); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("10ms"), -2); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("1ms"), -3); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("100us"), -4); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("10us"), -5); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("1us"), -6); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("100ns"), -7); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("10ns"), -8); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("1ns"), -9); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("100ps"), -10); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("10ps"), -11); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("1ps"), -12); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("100fs"), -13); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("10fs"), -14); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("1fs"), -15); - - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("1.5s"), 0); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("1s "), 0); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("1ss"), 0); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT("s"), 0); - TEST_CHECK_EQ(VL_TIME_STR_CONVERT(0), 0); - - top->final(); - VL_DO_DANGLING(delete top, top); - printf("*-* All Finished *-*\n"); - - return errors ? 10 : 0; -} diff --git a/test_regress/t/t_timescale.pl b/test_regress/t/t_timescale.pl deleted file mode 100755 index 1bab927e5..000000000 --- a/test_regress/t/t_timescale.pl +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env perl -if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } -# DESCRIPTION: Verilator: Verilog Test driver/expect definition -# -# Copyright 2003-2019 by Wilson Snyder. This program is free software; you -# can redistribute it and/or modify it under the terms of either the GNU -# Lesser General Public License Version 3 or the Perl Artistic License -# Version 2.0. -# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 - -scenarios(vlt_all => 1); - -compile( - make_top_shell => 0, - make_main => 0, - v_flags2 => ["--exe $Self->{t_dir}/t_timescale.cpp"], - ); - -execute( - check_finished => 1, - ); - -ok(1); -1; diff --git a/test_regress/t/t_timescale.v b/test_regress/t/t_timescale.v deleted file mode 100644 index 396bc9b35..000000000 --- a/test_regress/t/t_timescale.v +++ /dev/null @@ -1,3 +0,0 @@ -module t; - -endmodule From e0b61ceabd76d2fd3aae0a1ff1bd522e307de409 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Thu, 21 Jul 2022 14:58:48 +0100 Subject: [PATCH 049/119] Remove legacy #ifdef SYSTEMC_64BIT_PATCHES These days this is always false, see #3505 --- include/verilated_fst_sc.cpp | 3 --- include/verilated_fst_sc.h | 3 --- include/verilated_vcd_sc.cpp | 3 --- include/verilated_vcd_sc.h | 3 --- 4 files changed, 12 deletions(-) diff --git a/include/verilated_fst_sc.cpp b/include/verilated_fst_sc.cpp index fc6693a87..084546435 100644 --- a/include/verilated_fst_sc.cpp +++ b/include/verilated_fst_sc.cpp @@ -63,9 +63,6 @@ void VerilatedFstSc::trace(const unsigned int&, const std::string&, const char** DECL_TRACE_METHOD_B( unsigned short ) DECL_TRACE_METHOD_B( unsigned int ) DECL_TRACE_METHOD_B( unsigned long ) -#ifdef SYSTEMC_64BIT_PATCHES - DECL_TRACE_METHOD_B( unsigned long long) -#endif DECL_TRACE_METHOD_B( char ) DECL_TRACE_METHOD_B( short ) DECL_TRACE_METHOD_B( int ) diff --git a/include/verilated_fst_sc.h b/include/verilated_fst_sc.h index e0958db9e..5ae6b7631 100644 --- a/include/verilated_fst_sc.h +++ b/include/verilated_fst_sc.h @@ -95,9 +95,6 @@ private: DECL_TRACE_METHOD_B( unsigned short ) DECL_TRACE_METHOD_B( unsigned int ) DECL_TRACE_METHOD_B( unsigned long ) -#ifdef SYSTEMC_64BIT_PATCHES - DECL_TRACE_METHOD_B( unsigned long long) -#endif DECL_TRACE_METHOD_B( char ) DECL_TRACE_METHOD_B( short ) DECL_TRACE_METHOD_B( int ) diff --git a/include/verilated_vcd_sc.cpp b/include/verilated_vcd_sc.cpp index d03a85231..0ebff7554 100644 --- a/include/verilated_vcd_sc.cpp +++ b/include/verilated_vcd_sc.cpp @@ -63,9 +63,6 @@ void VerilatedVcdSc::trace(const unsigned int&, const std::string&, const char** DECL_TRACE_METHOD_B( unsigned short ) DECL_TRACE_METHOD_B( unsigned int ) DECL_TRACE_METHOD_B( unsigned long ) -#ifdef SYSTEMC_64BIT_PATCHES - DECL_TRACE_METHOD_B( unsigned long long) -#endif DECL_TRACE_METHOD_B( char ) DECL_TRACE_METHOD_B( short ) DECL_TRACE_METHOD_B( int ) diff --git a/include/verilated_vcd_sc.h b/include/verilated_vcd_sc.h index beb153e98..5e71811c0 100644 --- a/include/verilated_vcd_sc.h +++ b/include/verilated_vcd_sc.h @@ -98,9 +98,6 @@ private: DECL_TRACE_METHOD_B( unsigned short ) DECL_TRACE_METHOD_B( unsigned int ) DECL_TRACE_METHOD_B( unsigned long ) -#ifdef SYSTEMC_64BIT_PATCHES - DECL_TRACE_METHOD_B( unsigned long long) -#endif DECL_TRACE_METHOD_B( char ) DECL_TRACE_METHOD_B( short ) DECL_TRACE_METHOD_B( int ) From 60eab3eb8cb5862155137d25b0fa513bcd864d95 Mon Sep 17 00:00:00 2001 From: Yutetsu TAKATSUKASA Date: Sun, 24 Jul 2022 19:54:37 +0900 Subject: [PATCH 050/119] Fix wrong result of bit op tree optimization #3509 (#3516) * Tests: Add a test to reproduce #3509 * Tests: Compile without tautological-compare check because bit op tree optimization is disabled in the test. * Internals: Dedup code. No functional change is intended. * Fix #3509. "2'b10 == (2'b11 & {1'b0, val[0]})" and "2'b10 != (2'b11 & {1'b0, val[0]})" were wrongly optimized to "!val[0]" and "val[0]" respectively. Now properly optimize them to 1'b0 and 1'b1. * Commentary * Commentary: Update Changes --- Changes | 1 + src/V3Const.cpp | 45 ++++++++++++++++--------- test_regress/t/t_const_no_opt.pl | 9 ++++- test_regress/t/t_const_opt.v | 56 +++++++++++++++++++++++++++++++- 4 files changed, 93 insertions(+), 18 deletions(-) diff --git a/Changes b/Changes index 910eceb2f..9b7dd96e8 100644 --- a/Changes +++ b/Changes @@ -16,6 +16,7 @@ Verilator 4.225 devel * Fix incorrect bit op tree optimization (#3470). [algrobman] * Fix empty string arguments to display (#3484). [Grulfen] * Fix table misoptimizing away display (#3488). [Stefan Post] +* Fix wrong bit op tree optimization (#3509). [Nathan Graybeal] Verilator 4.224 2022-06-19 diff --git a/src/V3Const.cpp b/src/V3Const.cpp index 700a99e38..6f543fdba 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -566,9 +566,35 @@ class ConstBitOpTreeVisitor final : public VNVisitor { const AstConst* const constp = VN_CAST(lhsp, Const); CONST_BITOP_RETURN_IF(!constp, nodep->lhsp()); - const bool maskFlip = isOrTree(); const V3Number& compNum = constp->num(); + auto setPolarities = [this, &compNum](const LeafInfo& ref, const V3Number* maskp) { + const bool maskFlip = isOrTree(); + int constantWidth = compNum.width(); + if (maskp) constantWidth = std::max(constantWidth, maskp->width()); + const int maxBitIdx = std::max(ref.lsb() + constantWidth, ref.msb() + 1); + // Mark all bits checked by this comparison + for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { + const int maskIdx = bitIdx - ref.lsb(); + const bool mask0 = maskp && maskp->bitIs0(maskIdx); + const bool outOfRange = bitIdx > ref.msb(); + if (mask0 || outOfRange) { // RHS is 0 + if (compNum.bitIs1(maskIdx)) { + // LHS is 1 + // And tree: 1 == 0 => always false, set v && !v + // Or tree : 1 != 0 => always true, set v || !v + m_bitPolarities.emplace_back(ref, true, 0); + m_bitPolarities.emplace_back(ref, false, 0); + break; + } else { // This bitIdx is irrelevant + continue; + } + } + const bool polarity = compNum.bitIs1(maskIdx) != maskFlip; + m_bitPolarities.emplace_back(ref, polarity, bitIdx); + } + }; + if (const AstAnd* const andp = VN_CAST(nodep->rhsp(), And)) { // comp == (mask & v) const LeafInfo& mask = findLeaf(andp->lhsp(), true); CONST_BITOP_RETURN_IF(!mask.constp() || mask.lsb() != 0, andp->lhsp()); @@ -583,14 +609,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { incrOps(nodep, __LINE__); incrOps(andp, __LINE__); - // Mark all bits checked by this comparison - const int maxBitIdx = std::min(ref.lsb() + maskNum.width(), ref.msb() + 1); - for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { - const int maskIdx = bitIdx - ref.lsb(); - if (maskNum.bitIs0(maskIdx)) continue; - const bool polarity = compNum.bitIs1(maskIdx) != maskFlip; - m_bitPolarities.emplace_back(ref, polarity, bitIdx); - } + setPolarities(ref, &maskNum); } else { // comp == v const LeafInfo& ref = findLeaf(nodep->rhsp(), false); CONST_BITOP_RETURN_IF(!ref.refp(), nodep->rhsp()); @@ -599,13 +618,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor { incrOps(nodep, __LINE__); - // Mark all bits checked by this comparison - const int maxBitIdx = std::min(ref.lsb() + compNum.width(), ref.msb() + 1); - for (int bitIdx = ref.lsb(); bitIdx < maxBitIdx; ++bitIdx) { - const int maskIdx = bitIdx - ref.lsb(); - const bool polarity = compNum.bitIs1(maskIdx) != maskFlip; - m_bitPolarities.emplace_back(ref, polarity, bitIdx); - } + setPolarities(ref, nullptr); } } else { CONST_BITOP_SET_FAILED("Mixture of different ops cannot be optimized", nodep); diff --git a/test_regress/t/t_const_no_opt.pl b/test_regress/t/t_const_no_opt.pl index 79bc15076..db1a41047 100755 --- a/test_regress/t/t_const_no_opt.pl +++ b/test_regress/t/t_const_no_opt.pl @@ -13,7 +13,14 @@ top_filename("t/t_const_opt.v"); # Run the same design as t_const_opt.pl without bitopt tree optimization to make sure that the result is same. compile( - verilator_flags2 => ["-Wno-UNOPTTHREADS", "--stats", "-fno-const-bit-op-tree", "$Self->{t_dir}/t_const_opt.cpp"], + verilator_flags2 => [ + "-Wno-UNOPTTHREADS", + "--stats", + "-fno-const-bit-op-tree", + "$Self->{t_dir}/t_const_opt.cpp", + "-CFLAGS", + "-Wno-tautological-compare" + ], ); execute( diff --git a/test_regress/t/t_const_opt.v b/test_regress/t/t_const_opt.v index e24d28bf8..559477475 100644 --- a/test_regress/t/t_const_opt.v +++ b/test_regress/t/t_const_opt.v @@ -87,10 +87,11 @@ module Test(/*AUTOARG*/ logic bug3197_out; logic bug3445_out; logic bug3470_out; + logic bug3509_out; output logic o; - logic [8:0] tmp; + logic [9:0] tmp; assign o = ^tmp; always_ff @(posedge clk) begin @@ -115,12 +116,14 @@ module Test(/*AUTOARG*/ tmp[6] <= bug3197_out; tmp[7] <= bug3445_out; tmp[8] <= bug3470_out; + tmp[9] <= bug3509_out; end bug3182 i_bug3182(.in(d[4:0]), .out(bug3182_out)); bug3197 i_bug3197(.clk(clk), .in(d), .out(bug3197_out)); bug3445 i_bug3445(.clk(clk), .in(d), .out(bug3445_out)); bug3470 i_bug3470(.clk(clk), .in(d), .out(bug3470_out)); + bug3509 i_bug3509(.clk(clk), .in(d), .out(bug3509_out)); endmodule @@ -235,3 +238,54 @@ module bug3470(input wire clk, input wire [31:0] in, output wire out); assign out = tmp; endmodule + +// Bug3509 +// Only bit range of "var" was considered in +// "comp == (mask & var)" +// and +// "comp != (mask & var)" +// +// It caused wrong result if "comp" has wider bit width because +// upper bit of "comp" was ignored. +// +// If "comp" has '1' in upper bit range than "var", +// the result is constant after optimization. +module bug3509(input wire clk, input wire [31:0] in, output reg out); + reg [2:0] r0; + always_ff @(posedge clk) + r0 <= in[2:0]; + + wire [3:0] w1_0 = {1'b0, in[2:0]}; + wire [3:0] w1_1 = {1'b0, r0}; + + wire tmp[4]; + + // tmp[0:1] is always 0 because w1[3] == 1'b0 + // tmp[2:3] is always 1 because w1[3] == 1'b0 + assign tmp[0] = w1_0[3:2] == 2'h2 && w1_0[1:0] != 2'd3; + assign tmp[1] = w1_1[3:2] == 2'h2 && w1_1[1:0] != 2'd3; + assign tmp[2] = w1_0[3:2] != 2'h2 || w1_0[1:0] == 2'd3; + assign tmp[3] = w1_1[3:2] != 2'h2 || w1_1[1:0] == 2'd3; + always_ff @(posedge clk) begin + out <= tmp[0] | tmp[1] | !tmp[2] | !tmp[3]; + end + + always @(posedge clk) begin + if(tmp[0]) begin + $display("tmp[0] != 0"); + $stop; + end + if(tmp[1]) begin + $display("tmp[1] != 0"); + $stop; + end + if(!tmp[2]) begin + $display("tmp[2] != 1"); + $stop; + end + if(!tmp[3]) begin + $display("tmp[3] != 1"); + $stop; + end + end +endmodule From 89924bda51d2a170c23ab18e1c8efa7bab57f162 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Fri, 12 Nov 2021 16:46:58 +0000 Subject: [PATCH 051/119] Always type '$clog2' as signed 32 --- src/V3AstNodes.h | 4 +++- src/V3Width.cpp | 5 +---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h index 8b258db0a..f86f8e350 100644 --- a/src/V3AstNodes.h +++ b/src/V3AstNodes.h @@ -6139,7 +6139,9 @@ public: class AstCLog2 final : public AstNodeUniop { public: AstCLog2(FileLine* fl, AstNode* lhsp) - : ASTGEN_SUPER_CLog2(fl, lhsp) {} + : ASTGEN_SUPER_CLog2(fl, lhsp) { + dtypeSetSigned32(); + } ASTNODE_NODE_FUNCS(CLog2) virtual void numberOperate(V3Number& out, const V3Number& lhs) override { out.opCLog2(lhs); } virtual string emitVerilog() override { return "%f$clog2(%l)"; } diff --git a/src/V3Width.cpp b/src/V3Width.cpp index c98e856aa..446f20ed8 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -1248,10 +1248,7 @@ private: } } virtual void visit(AstCLog2* nodep) override { - if (m_vup->prelim()) { - iterateCheckSizedSelf(nodep, "LHS", nodep->lhsp(), SELF, BOTH); - nodep->dtypeSetSigned32(); - } + if (m_vup->prelim()) { iterateCheckSizedSelf(nodep, "LHS", nodep->lhsp(), SELF, BOTH); } } virtual void visit(AstPow* nodep) override { // Pow is special, output sign only depends on LHS sign, but From 290c2e03884284ccb984da1d50ffecf1dd08459b Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 25 Jul 2022 12:51:02 +0100 Subject: [PATCH 052/119] Mark FileLine::v3errorEndFatal as noreturn --- src/V3FileLine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/V3FileLine.h b/src/V3FileLine.h index 1ff0c06a4..328b682cd 100644 --- a/src/V3FileLine.h +++ b/src/V3FileLine.h @@ -247,7 +247,7 @@ public: // OPERATORS void v3errorEnd(std::ostringstream& str, const string& extra = ""); - void v3errorEndFatal(std::ostringstream& str); + void v3errorEndFatal(std::ostringstream& str) VL_ATTR_NORETURN; /// When building an error, prefix for printing continuation lines /// e.g. information referring to the same FileLine as before string warnMore() const; From ac4ec879426905b1dcbd75dd0ccdbab26514bb0d Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 25 Jul 2022 12:59:26 +0100 Subject: [PATCH 053/119] Respect clang's default -fbracket-depth by default Set default value of --comp-limit-parens to 240, to respect default maximum nesting of parentheses in clang (which is controlled by -fbracket-depth and defaults to 256). For code generation consistency, also use the same default with gcc. --- src/V3Options.cpp | 4 ++-- src/V3Options.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/V3Options.cpp b/src/V3Options.cpp index e163278f8..29c4a4ca3 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1010,11 +1010,11 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char if (!strcmp(valp, "clang")) { m_compLimitBlocks = 80; // limit unknown m_compLimitMembers = 64; // soft limit, has slowdown bug as of clang++ 3.8 - m_compLimitParens = 80; // limit unknown + m_compLimitParens = 240; // controlled by -fbracket-depth, which defaults to 256 } else if (!strcmp(valp, "gcc")) { m_compLimitBlocks = 0; // Bug free m_compLimitMembers = 64; // soft limit, has slowdown bug as of g++ 7.1 - m_compLimitParens = 0; // Bug free + m_compLimitParens = 240; // Unlimited, but generate same code as for clang } else if (!strcmp(valp, "msvc")) { m_compLimitBlocks = 80; // 128, but allow some room m_compLimitMembers = 0; // probably ok, and AFAIK doesn't support anon structs diff --git a/src/V3Options.h b/src/V3Options.h index 78fa1b8bd..4db4e9d05 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -316,7 +316,7 @@ private: int m_compLimitBlocks = 0; // compiler selection; number of nested blocks int m_compLimitMembers = 64; // compiler selection; number of members in struct before make anon array - int m_compLimitParens = 0; // compiler selection; number of nested parens + int m_compLimitParens = 240; // compiler selection; number of nested parens string m_bin; // main switch: --bin {binary} string m_exeName; // main switch: -o {name} From eeef5ab4de04810c3ad9d2f96c3ceb037957cb70 Mon Sep 17 00:00:00 2001 From: Gustav Svensk Date: Mon, 25 Jul 2022 17:36:34 +0200 Subject: [PATCH 054/119] Fix sformat string incorrectly cleared (#3515) (#3519). --- docs/CONTRIBUTORS | 1 + include/verilated.cpp | 5 +++-- test_regress/t/t_sys_sformat.v | 6 ++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/CONTRIBUTORS b/docs/CONTRIBUTORS index a80d950b9..77e232945 100644 --- a/docs/CONTRIBUTORS +++ b/docs/CONTRIBUTORS @@ -33,6 +33,7 @@ Gianfranco Costamagna Glen Gibb Graham Rushton Guokai Chen +Gustav Svensk Harald Heckmann Howard Su Huang Rui diff --git a/include/verilated.cpp b/include/verilated.cpp index 7b9200363..49ef170dc 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -1459,11 +1459,12 @@ void VL_SFORMAT_X(int obits, void* destp, const char* formatp, ...) VL_MT_SAFE { void VL_SFORMAT_X(int obits_ignored, std::string& output, const char* formatp, ...) VL_MT_SAFE { if (obits_ignored) {} - output = ""; + std::string temp_output; va_list ap; va_start(ap, formatp); - _vl_vsformat(output, formatp, ap); + _vl_vsformat(temp_output, formatp, ap); va_end(ap); + output = temp_output; } std::string VL_SFORMATF_NX(const char* formatp, ...) VL_MT_SAFE { diff --git a/test_regress/t/t_sys_sformat.v b/test_regress/t/t_sys_sformat.v index 2e3443e5b..bf8bc8a5b 100644 --- a/test_regress/t/t_sys_sformat.v +++ b/test_regress/t/t_sys_sformat.v @@ -83,6 +83,12 @@ module t; $swriteo(str2, 4'd12); if (str2 != "14") $stop; + str3 = "foo"; + $sformat(str3, "%s", str3); // $sformat twice so verilator does not + $sformat(str3, "%s", str3); // optimize the call to $sformat(str3, "%s", "foo") +`ifdef TEST_VERBOSE $display("str3=%0s", str3); `endif + if (str3 != "foo") $stop; + $write("*-* All Finished *-*\n"); $finish; end From 7b431b37c7cb92c9138b53104e210c3d7ccec7ff Mon Sep 17 00:00:00 2001 From: Mostafa Gamal Date: Mon, 25 Jul 2022 23:46:22 +0200 Subject: [PATCH 055/119] Fix struct pattern assignment (#2328) (#3517). --- docs/CONTRIBUTORS | 1 + src/V3Width.cpp | 94 +++++++----- src/verilog.y | 1 + test_regress/t/t_array_list_bad.out | 2 +- .../t/t_structu_dataType_assignment.pl | 21 +++ .../t/t_structu_dataType_assignment.v | 137 ++++++++++++++++++ .../t/t_structu_dataType_assignment_bad.out | 5 + .../t/t_structu_dataType_assignment_bad.pl | 21 +++ .../t/t_structu_dataType_assignment_bad.v | 21 +++ 9 files changed, 269 insertions(+), 34 deletions(-) create mode 100644 test_regress/t/t_structu_dataType_assignment.pl create mode 100644 test_regress/t/t_structu_dataType_assignment.v create mode 100644 test_regress/t/t_structu_dataType_assignment_bad.out create mode 100755 test_regress/t/t_structu_dataType_assignment_bad.pl create mode 100644 test_regress/t/t_structu_dataType_assignment_bad.v diff --git a/docs/CONTRIBUTORS b/docs/CONTRIBUTORS index 77e232945..0b003034b 100644 --- a/docs/CONTRIBUTORS +++ b/docs/CONTRIBUTORS @@ -82,6 +82,7 @@ Michaël Lefebvre Mike Popoloski Miodrag Milanović Morten Borup Petersen +Mostafa Gamal Nandu Raj Nathan Kohagen Nathan Myers diff --git a/src/V3Width.cpp b/src/V3Width.cpp index 446f20ed8..9ce0b5b4c 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -3559,50 +3559,67 @@ private: // which member each AstPatMember corresponds to before we can // determine the dtypep for that PatMember's value, and then // width the initial value appropriately. - using PatMap = std::map; + using PatMap = std::map; // Store member: value + using DTypeMap = std::map; // Store data_type: default_value PatMap patmap; + DTypeMap dtypemap; { const AstMemberDType* memp = vdtypep->membersp(); AstPatMember* patp = VN_CAST(nodep->itemsp(), PatMember); - for (; memp || patp;) { + while (patp) { do { - if (patp) { - if (patp->keyp()) { - if (const AstText* textp = VN_CAST(patp->keyp(), Text)) { - memp = vdtypep->findMember(textp->text()); - if (!memp) { - patp->keyp()->v3error("Assignment pattern key '" - << textp->text() - << "' not found as member"); - break; - } + if (patp->keyp()) { + // '{member:value} or '{data_type: default_value} + if (const AstText* textp = VN_CAST(patp->keyp(), Text)) { + // member: value + memp = vdtypep->findMember(textp->text()); + if (!memp) { + patp->keyp()->v3error("Assignment pattern key '" + << textp->text() + << "' not found as member"); + break; } else { - patp->keyp()->v3error( - "Assignment pattern key not supported/understood: " - << patp->keyp()->prettyTypeName()); + const std::pair ret = patmap.emplace(memp, patp); + if (!ret.second) { + patp->v3error("Assignment pattern contains duplicate entry: " + << VN_AS(patp->keyp(), Text)->text()); + } + memp = VN_AS(memp->nextp(), MemberDType); } } + else if (const AstNodeDType* nodedtypep = VN_CAST(patp->keyp(), NodeDType)){ + // data_type: default_value + const string dtype = nodedtypep->dtypep()->prettyDTypeName(); + auto it = dtypemap.find(dtype); + if (it == dtypemap.end()) { + dtypemap.emplace(dtype, patp); + } + else { + // Override stored default_value + it->second = patp->cloneTree(false); + } + } + else { + // Undefined pattern + patp->keyp()->v3error( + "Assignment pattern key not supported/understood: " + << patp->keyp()->prettyTypeName()); + } } - if (memp && !patp) { - // Missing init elements, warn below - memp = nullptr; - patp = nullptr; - break; - } else if (!memp && patp) { - patp->v3error("Assignment pattern contains too many elements"); - memp = nullptr; - patp = nullptr; - break; - } else { - const std::pair ret = patmap.emplace(memp, patp); - if (!ret.second) { - patp->v3error("Assignment pattern contains duplicate entry: " - << VN_AS(patp->keyp(), Text)->text()); + else{ + // constant expr + if (memp) { + const std::pair ret = patmap.emplace(memp, patp); + if (!ret.second) { + patp->v3error("Assignment pattern contains duplicate entry: " + << VN_AS(patp->keyp(), Text)->text()); + } + memp = VN_AS(memp->nextp(), MemberDType); } } } while (false); + // Next - if (memp) memp = VN_AS(memp->nextp(), MemberDType); if (patp) patp = VN_AS(patp->nextp(), PatMember); } } @@ -3613,13 +3630,24 @@ private: AstPatMember* newpatp = nullptr; AstPatMember* patp = nullptr; if (it == patmap.end()) { - if (defaultp) { + const string memp_DType = memp->virtRefDTypep()->prettyDTypeName(); + const auto it2 = dtypemap.find(memp_DType); + if (it2 != dtypemap.end()) { + // default_value for data_type + patp = it2->second; + newpatp = patp->cloneTree(false); + patp = newpatp; + } + else if (defaultp) { + // default_value for any unassigned member yet newpatp = defaultp->cloneTree(false); patp = newpatp; } else { if (!VN_IS(vdtypep, UnionDType)) { nodep->v3error("Assignment pattern missed initializing elements: " - << memp->prettyTypeName()); + << memp->virtRefDTypep()->prettyDTypeName() + << " " + << memp->prettyName()); } } } else { diff --git a/src/verilog.y b/src/verilog.y index 82faf4a7d..79daad950 100644 --- a/src/verilog.y +++ b/src/verilog.y @@ -3544,6 +3544,7 @@ patternKey: // IEEE: merge structure_pattern_key, array_patt | yaFLOATNUM { $$ = new AstConst($1,AstConst::RealDouble(),$1); } | id { $$ = new AstText($1,*$1); } | strAsInt { $$ = $1; } + | simple_type { $$ = $1; } ; assignment_pattern: // ==IEEE: assignment_pattern diff --git a/test_regress/t/t_array_list_bad.out b/test_regress/t/t_array_list_bad.out index edf0db3e8..0eeb8a06f 100644 --- a/test_regress/t/t_array_list_bad.out +++ b/test_regress/t/t_array_list_bad.out @@ -1,4 +1,4 @@ -%Error: t/t_array_list_bad.v:38:25: Assignment pattern missed initializing elements: MEMBERDTYPE 't3' +%Error: t/t_array_list_bad.v:38:25: Assignment pattern missed initializing elements: logic t3 : ... In instance t 38 | test_out <= '{'0, '0}; | ^~ diff --git a/test_regress/t/t_structu_dataType_assignment.pl b/test_regress/t/t_structu_dataType_assignment.pl new file mode 100644 index 000000000..2cb5eeaff --- /dev/null +++ b/test_regress/t/t_structu_dataType_assignment.pl @@ -0,0 +1,21 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2021 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +compile( + ); + +execute( + check_finished => 1, + ); + +ok(1); +1; diff --git a/test_regress/t/t_structu_dataType_assignment.v b/test_regress/t/t_structu_dataType_assignment.v new file mode 100644 index 000000000..2962a6e78 --- /dev/null +++ b/test_regress/t/t_structu_dataType_assignment.v @@ -0,0 +1,137 @@ +// DESCRIPTION: Verilator: Verilog Test module for specialized type default values +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Mostafa Gamal. +// SPDX-License-Identifier: CC0-1.0 + +/* verilator lint_off UNPACKED */ + +module top(); + + typedef struct { // IEEE 1800-2017 SV CH:5.10 + int a; + shortint b; + } ab_struct; + + typedef struct { // IEEE 1800-2017 SV CH:10.9.2 + int x; + int y; + } st_struct; + + typedef struct { // IEEE 1800-2017 SV CH:10.9.2 + logic [7:0] a; + bit b; + bit signed [31:0] c; + int s; + } sa_struct; + + + typedef struct { // IEEE 1800-2017 SV CH:10.9.2 + int A; + struct { + int B, C; + } BC1, BC2; + } DEF_struct; + + + // struct ab + ab_struct ab; + ab_struct abkey[1:0]; + + // struct st + st_struct st; + int k = 1; + + // struct sa + sa_struct sa; + + // struct DEF + DEF_struct DEF; + + initial begin; + // struct ab + ab = '{0, 0}; //constant member by position + if (ab.a != 0) $stop; + if (ab.b != 0) $stop; + + + ab = '{default: 0}; //default value + if (ab.a != 0) $stop; + if (ab.b != 0) $stop; + + + ab = '{int: 1, shortint: 0}; //data type and default value + if (ab.a != 1) $stop; + if (ab.b != 0) $stop; + + + abkey[1:0] = '{'{a:1, b:2}, '{int:2, shortint:3}}; // member: value & data_type: value + if (abkey[1].a != 1) $stop; + if (abkey[1].b != 2) $stop; + if (abkey[0].a != 2) $stop; + if (abkey[0].b != 3) $stop; + + + // struct st + st = '{1, 2+k}; //constant member by position + if (st.x != 1) $stop; + if (st.y != 2+k) $stop; + + st = '{x:2, y:3+k}; //member: value + if (st.x != 2) $stop; + if (st.y != 3+k) $stop; + + st = '{int:2, int:3+k}; //data_type: value override + if (st.x != 3+k) $stop; + if (st.y != 3+k) $stop; + + + // struct sa + sa = '{default:'1}; + if (sa.a != '1) $stop; + if (sa.b != '1) $stop; + if (sa.c != '1) $stop; + if (sa.s != '1) $stop; + + sa = '{default:'1, int: 5}; + if (sa.a != '1) $stop; + if (sa.b != '1) $stop; + if (sa.c != '1) $stop; + if (sa.s != 5) $stop; + + + sa = '{default:'1, int: 5, b: 0}; + if (sa.a != '1) $stop; + if (sa.b != 0) $stop; + if (sa.c != '1) $stop; + if (sa.s != 5) $stop; + + + // struct DEF + DEF = '{A:1, BC1:'{B:2, C:3}, BC2:'{B:4,C:5}}; + if (DEF.A != 1) $stop; + if (DEF.BC1.B != 2) $stop; + if (DEF.BC1.C != 3) $stop; + if (DEF.BC2.B != 4) $stop; + if (DEF.BC2.C != 5) $stop; + + + DEF = '{int:0, BC1:'{int:10}, BC2:'{default:5}}; + if (DEF.A != 0) $stop; + if (DEF.BC1.B != 10) $stop; + if (DEF.BC1.C != 10) $stop; + if (DEF.BC2.B != 5) $stop; + if (DEF.BC2.C != 5) $stop; + + DEF = '{default:1, BC1:'{int:10}, BC2:'{default:5}}; + if (DEF.A != 1) $stop; + if (DEF.BC1.B != 10) $stop; + if (DEF.BC1.C != 10) $stop; + if (DEF.BC2.B != 5) $stop; + if (DEF.BC2.C != 5) $stop; + + $write("*-* All Finished *-*\n"); + $finish; + end + +endmodule diff --git a/test_regress/t/t_structu_dataType_assignment_bad.out b/test_regress/t/t_structu_dataType_assignment_bad.out new file mode 100644 index 000000000..1b430ffd4 --- /dev/null +++ b/test_regress/t/t_structu_dataType_assignment_bad.out @@ -0,0 +1,5 @@ +%Error: t/t_structu_dataType_assignment_bad.v:19:26: Assignment pattern key not supported/understood: CONST '?32?sh1' + : ... In instance top + 19 | DEF_struct DEF_bad = '{1: 5, default: 10}; + | ^ +%Error: Exiting due to diff --git a/test_regress/t/t_structu_dataType_assignment_bad.pl b/test_regress/t/t_structu_dataType_assignment_bad.pl new file mode 100755 index 000000000..bec0388e9 --- /dev/null +++ b/test_regress/t/t_structu_dataType_assignment_bad.pl @@ -0,0 +1,21 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2021 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + + +compile( + fails => 1, + expect_filename => $Self->{golden_filename}, + ); + + +ok(1); +1; diff --git a/test_regress/t/t_structu_dataType_assignment_bad.v b/test_regress/t/t_structu_dataType_assignment_bad.v new file mode 100644 index 000000000..6ab523bbf --- /dev/null +++ b/test_regress/t/t_structu_dataType_assignment_bad.v @@ -0,0 +1,21 @@ +// DESCRIPTION: Verilator: Verilog Test module for specialized type default values +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Mostafa Gamal. +// SPDX-License-Identifier: CC0-1.0 + +/* verilator lint_off UNPACKED */ + +module top(); + + + typedef struct { // IEEE 1800-2017 SV CH:10.9.2 + int A; + struct { + int B, C; + } BC1, BC2; + } DEF_struct; + + DEF_struct DEF_bad = '{1: 5, default: 10}; + +endmodule From e871cd8a4438432b9641d56b71c35fd2bc2e961e Mon Sep 17 00:00:00 2001 From: github action Date: Mon, 25 Jul 2022 21:47:29 +0000 Subject: [PATCH 056/119] Apply 'make format' --- src/V3Width.cpp | 35 +++++++++---------- .../t/t_structu_dataType_assignment.pl | 0 2 files changed, 16 insertions(+), 19 deletions(-) mode change 100644 => 100755 test_regress/t/t_structu_dataType_assignment.pl diff --git a/src/V3Width.cpp b/src/V3Width.cpp index 9ce0b5b4c..237675b4a 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -3560,7 +3560,8 @@ private: // determine the dtypep for that PatMember's value, and then // width the initial value appropriately. using PatMap = std::map; // Store member: value - using DTypeMap = std::map; // Store data_type: default_value + using DTypeMap + = std::map; // Store data_type: default_value PatMap patmap; DTypeMap dtypemap; { @@ -3575,44 +3576,42 @@ private: memp = vdtypep->findMember(textp->text()); if (!memp) { patp->keyp()->v3error("Assignment pattern key '" - << textp->text() - << "' not found as member"); + << textp->text() << "' not found as member"); break; } else { - const std::pair ret = patmap.emplace(memp, patp); + const std::pair ret + = patmap.emplace(memp, patp); if (!ret.second) { patp->v3error("Assignment pattern contains duplicate entry: " - << VN_AS(patp->keyp(), Text)->text()); + << VN_AS(patp->keyp(), Text)->text()); } memp = VN_AS(memp->nextp(), MemberDType); } - } - else if (const AstNodeDType* nodedtypep = VN_CAST(patp->keyp(), NodeDType)){ + } else if (const AstNodeDType* nodedtypep + = VN_CAST(patp->keyp(), NodeDType)) { // data_type: default_value const string dtype = nodedtypep->dtypep()->prettyDTypeName(); auto it = dtypemap.find(dtype); if (it == dtypemap.end()) { dtypemap.emplace(dtype, patp); - } - else { + } else { // Override stored default_value it->second = patp->cloneTree(false); } - } - else { + } else { // Undefined pattern patp->keyp()->v3error( "Assignment pattern key not supported/understood: " << patp->keyp()->prettyTypeName()); } - } - else{ + } else { // constant expr if (memp) { - const std::pair ret = patmap.emplace(memp, patp); + const std::pair ret + = patmap.emplace(memp, patp); if (!ret.second) { patp->v3error("Assignment pattern contains duplicate entry: " - << VN_AS(patp->keyp(), Text)->text()); + << VN_AS(patp->keyp(), Text)->text()); } memp = VN_AS(memp->nextp(), MemberDType); } @@ -3637,16 +3636,14 @@ private: patp = it2->second; newpatp = patp->cloneTree(false); patp = newpatp; - } - else if (defaultp) { + } else if (defaultp) { // default_value for any unassigned member yet newpatp = defaultp->cloneTree(false); patp = newpatp; } else { if (!VN_IS(vdtypep, UnionDType)) { nodep->v3error("Assignment pattern missed initializing elements: " - << memp->virtRefDTypep()->prettyDTypeName() - << " " + << memp->virtRefDTypep()->prettyDTypeName() << " " << memp->prettyName()); } } diff --git a/test_regress/t/t_structu_dataType_assignment.pl b/test_regress/t/t_structu_dataType_assignment.pl old mode 100644 new mode 100755 From a5ddd10e31b089637f1e85712f3cdef3bb6025d9 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 27 Jul 2022 10:45:33 +0100 Subject: [PATCH 057/119] Tests: compare VCD files both ways vcddiff is a bit broken, and sometimes 'vcddiff a b' fails while the files are indeed equivalent. There is a chance however that 'vcddif b a' will succeed in this case, so compare trace files both ways when checking test results and claim success if vcddiff succeeds in at least one direction. --- test_regress/driver.pl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/test_regress/driver.pl b/test_regress/driver.pl index ae0ed4f36..22a2a67f8 100755 --- a/test_regress/driver.pl +++ b/test_regress/driver.pl @@ -2259,10 +2259,15 @@ sub vcd_identical { print "\t$cmd\n" if $::Debug; $out = `$cmd`; if ($? != 0 || $out ne '') { - print $out; - $self->error("VCD miscompares $fn1 $fn2\n"); - $self->copy_if_golden($fn1, $fn2); - return 0; + $cmd = qq{vcddiff "$fn2" "$fn1"}; + print "\t$cmd\n" if $::Debug; + $out = `$cmd`; + if ($? != 0 || $out ne '') { + print $out; + $self->error("VCD miscompares $fn2 $fn1\n"); + $self->copy_if_golden($fn1, $fn2); + return 0; + } } } { From 2a87387eb393f2a0ce434796311688e757fd451e Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Thu, 28 Jul 2022 08:41:01 -0400 Subject: [PATCH 058/119] Documentation fixes (#3514) --- docs/guide/files.rst | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/guide/files.rst b/docs/guide/files.rst index 796a4c756..a71214983 100644 --- a/docs/guide/files.rst +++ b/docs/guide/files.rst @@ -50,7 +50,7 @@ For --cc/--sc, it creates: - Make include file with class names (from --make gmake) * - *{prefix}*\ _hier.mk - Make file for hierarchy blocks (from --make gmake) - * - *{prefix|*\ _hierMkArgs.f + * - *{prefix}*\ _hierMkArgs.f - Arguments for hierarchical Verilation (from --make gmake) * - *{prefix}*\ _hierCMakeArgs.f - Arguments for hierarchical Verilation (from --make cmake) @@ -62,13 +62,17 @@ For --cc/--sc, it creates: - Top level (SystemVerilog $root) internal header file * - *{prefix}*\ ___024root.cpp - Top level (SystemVerilog $root) internal C++ file - * - *{prefix}*___024root*{__n}*\ .cpp - - Additional top level internal C++ files (from --output-split) + * - *{prefix}*\ ___024root\ *{__n}*\ .cpp + - Additional top level internal C++ files + * - *{prefix}*\ ___024root\ *{__DepSet_hash__n}*\ .cpp + - Additional top level internal C++ files (hashed to reduce build times) * - *{prefix}*\ ___024root__Slow\ *{__n}*\ .cpp - Infrequent cold routines - * - *{prefix}*\ ___024root__Trace{__n}*\ .cpp + * - *{prefix}*\ ___024root\ *{__DepSet_hash__n}*\ .cpp + - Infrequent cold routines (hashed to reduce build times) + * - *{prefix}*\ ___024root__Trace\ *{__n}*\ .cpp - Wave file generation code (from --trace) - * - *{prefix}*\ ___024root__Trace__Slow{__n}*\ .cpp + * - *{prefix}*\ ___024root__Trace__Slow\ *{__n}*\ .cpp - Wave file generation code (from --trace) * - *{prefix}*\ __Dpi.h - DPI import and export declarations (from --dpi) @@ -87,7 +91,9 @@ For --cc/--sc, it creates: * - *{prefix}{each_verilog_module}*\ .cpp - Lower level internal C++ files * - *{prefix}{each_verilog_module}{__n}*\ .cpp - - Additional lower C++ files (from --output-split) + - Additional lower C++ files + * - *{prefix}{each_verilog_module}{__DepSet_hash__n}*\ .cpp + - Additional lower C++ files (hased to reduce build times) For --hierarchy mode, it creates: From 574dbfded1074b37e5d89272ca345dc3c067bf92 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Thu, 28 Jul 2022 12:54:28 +0100 Subject: [PATCH 059/119] V3MergeCond: Fix incorrect merge of assignments to the condition --- src/V3MergeCond.cpp | 2 ++ test_regress/t/t_merge_cond_no_extend.pl | 20 ++++++++++++++++++++ test_regress/t/t_merge_cond_no_extend.v | 22 ++++++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100755 test_regress/t/t_merge_cond_no_extend.pl create mode 100644 test_regress/t/t_merge_cond_no_extend.v diff --git a/src/V3MergeCond.cpp b/src/V3MergeCond.cpp index 210d34ca6..bb4251cbb 100644 --- a/src/V3MergeCond.cpp +++ b/src/V3MergeCond.cpp @@ -790,6 +790,8 @@ private: // otherwise end the current merge. Return ture if added, false if ended merge. bool addIfHelpfulElseEndMerge(AstNodeStmt* nodep) { UASSERT_OBJ(m_mgFirstp, nodep, "List must be open"); + if (!checkOrMakeMergeable(nodep)) return false; + if (!m_mgFirstp) return false; // If 'checkOrMakeMergeable' closed the list if (m_mgNextp == nodep) { if (isSimplifiableNode(nodep)) { if (addToList(nodep, nullptr)) return true; diff --git a/test_regress/t/t_merge_cond_no_extend.pl b/test_regress/t/t_merge_cond_no_extend.pl new file mode 100755 index 000000000..5fd0b644e --- /dev/null +++ b/test_regress/t/t_merge_cond_no_extend.pl @@ -0,0 +1,20 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2022 by Geza Lore. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(vlt_all => 1); + +compile( + verilator_flags2 => ["--stats"], + ); + +file_grep($Self->{stats}, qr/Optimizations, MergeCond merges\s+(\d+)/i, 0); + +ok(1); +1; diff --git a/test_regress/t/t_merge_cond_no_extend.v b/test_regress/t/t_merge_cond_no_extend.v new file mode 100644 index 000000000..ede818af3 --- /dev/null +++ b/test_regress/t/t_merge_cond_no_extend.v @@ -0,0 +1,22 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Geza Lore. +// SPDX-License-Identifier: CC0-1.0 + +module t ( + input wire clk, + input wire [7:0] i, + input wire a, + output reg [7:0] o +); + + reg cond = 0; + + always @(posedge clk) begin + if (cond) o = i; + cond = a; + if (cond) o = ~i; + end + +endmodule From 1f9323d0868d2fb0f9c745fdbf74154239604856 Mon Sep 17 00:00:00 2001 From: Yutetsu TAKATSUKASA Date: Fri, 29 Jul 2022 07:05:04 +0900 Subject: [PATCH 060/119] Set correct dtype in replaceShiftSame() (#3520) * Tests: Add a test to reproduce bug3399 * Fix3399. Set the correct dtype in replaceShiftSame(). * Tests: update stats. * Update Changes --- Changes | 1 + src/V3Const.cpp | 1 + test_regress/t/t_const_opt.pl | 2 +- test_regress/t/t_const_opt.v | 30 ++++++++++++++++++++++++++++-- 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/Changes b/Changes index 9b7dd96e8..6cfd4b984 100644 --- a/Changes +++ b/Changes @@ -17,6 +17,7 @@ Verilator 4.225 devel * Fix empty string arguments to display (#3484). [Grulfen] * Fix table misoptimizing away display (#3488). [Stefan Post] * Fix wrong bit op tree optimization (#3509). [Nathan Graybeal] +* Fix incorrect tristate logic (#3399) [shareefj, Vighnesh Iyer] Verilator 4.224 2022-06-19 diff --git a/src/V3Const.cpp b/src/V3Const.cpp index 6f543fdba..372757419 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -1764,6 +1764,7 @@ private: lp->rhsp(lrp); nodep->lhsp(llp); nodep->rhsp(rlp); + nodep->dtypep(llp->dtypep()); // dtype of Biop is before shift. VL_DO_DANGLING(rp->deleteTree(), rp); VL_DO_DANGLING(rrp->deleteTree(), rrp); // nodep->dumpTree(cout, " repShiftSame_new: "); diff --git a/test_regress/t/t_const_opt.pl b/test_regress/t/t_const_opt.pl index 837b5f74f..36f064cb4 100755 --- a/test_regress/t/t_const_opt.pl +++ b/test_regress/t/t_const_opt.pl @@ -19,7 +19,7 @@ execute( ); if ($Self->{vlt}) { - file_grep($Self->{stats}, qr/Optimizations, Const bit op reduction\s+(\d+)/i, 14); + file_grep($Self->{stats}, qr/Optimizations, Const bit op reduction\s+(\d+)/i, 15); } ok(1); 1; diff --git a/test_regress/t/t_const_opt.v b/test_regress/t/t_const_opt.v index 559477475..d1c545a61 100644 --- a/test_regress/t/t_const_opt.v +++ b/test_regress/t/t_const_opt.v @@ -62,7 +62,7 @@ module t(/*AUTOARG*/ $write("[%0t] cyc==%0d crc=%x sum=%x\n", $time, cyc, crc, sum); if (crc !== 64'hc77bb9b3784ea091) $stop; // What checksum will we end up with (above print should match) -`define EXPECTED_SUM 64'hde21e019a3e12039 +`define EXPECTED_SUM 64'h9366e49d91bfe942 if (sum !== `EXPECTED_SUM) $stop; $write("*-* All Finished *-*\n"); @@ -88,10 +88,12 @@ module Test(/*AUTOARG*/ logic bug3445_out; logic bug3470_out; logic bug3509_out; + wire bug3399_out0; + wire bug3399_out1; output logic o; - logic [9:0] tmp; + logic [11:0] tmp; assign o = ^tmp; always_ff @(posedge clk) begin @@ -117,6 +119,8 @@ module Test(/*AUTOARG*/ tmp[7] <= bug3445_out; tmp[8] <= bug3470_out; tmp[9] <= bug3509_out; + tmp[10]<= bug3399_out0; + tmp[11]<= bug3399_out1; end bug3182 i_bug3182(.in(d[4:0]), .out(bug3182_out)); @@ -124,6 +128,7 @@ module Test(/*AUTOARG*/ bug3445 i_bug3445(.clk(clk), .in(d), .out(bug3445_out)); bug3470 i_bug3470(.clk(clk), .in(d), .out(bug3470_out)); bug3509 i_bug3509(.clk(clk), .in(d), .out(bug3509_out)); + bug3399 i_bug3399(.clk(clk), .in(d), .out0(bug3399_out0), .out1(bug3399_out1)); endmodule @@ -289,3 +294,24 @@ module bug3509(input wire clk, input wire [31:0] in, output reg out); end end endmodule + +// Bug3399 +// replaceShiftSame() in V3Const.cpp optimizes +// Or(Shift(ll,CONSTlr),Shift(rl,CONSTrr==lr)) -> Shift(Or(ll,rl),CONSTlr) +// (Or/And may also be reversed) +// +// dtype of Or after the transformation must be as same as ll and rl, but was dtype of Or BEFORE transformation. +// When the result of Shift was 1 bit width, bit op tree optimization +// optimized the tree even though the graph needs more width. +// Remember that the target of bit op tree optimization is 1 bit width. +module bug3399(input wire clk, input wire [31:0] in, inout wire out0, inout wire out1); + logic [1:0] driver = '0; + logic [1:0] d; + always_ff @(posedge clk) begin + driver <= 2'b11; + d <= in[1:0]; + end + + assign out0 = driver[0] ? d[0] : 1'bz; + assign out1 = driver[1] ? d[1] : 1'bz; +endmodule From b9d7819faa05b33ea61a1b548856ecdd948ecca8 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sat, 30 Jul 2022 10:01:25 -0400 Subject: [PATCH 061/119] Internals: Fix some cppcheck issues. Some dump functions fixed. --- Makefile.in | 2 + include/verilated.cpp | 5 ++- include/verilated_cov.cpp | 10 +++-- include/verilated_fst_c.h | 12 +++--- include/verilated_fst_sc.h | 1 + include/verilated_save.h | 14 +++--- include/verilated_types.h | 10 ++--- include/verilated_vcd_c.h | 12 +++--- include/verilated_vcd_sc.h | 1 + include/verilated_vpi.cpp | 88 +++++++++++++++++++------------------- include/verilatedos.h | 3 +- src/V3Active.cpp | 16 +++---- src/V3Ast.h | 2 +- src/V3AstNodes.cpp | 27 ++++++------ src/V3AstNodes.h | 4 +- src/V3Broken.cpp | 2 +- src/V3Class.cpp | 16 ++++--- src/V3Combine.cpp | 4 +- src/V3Const.cpp | 4 +- src/V3DupFinder.h | 2 +- src/V3EmitCFunc.cpp | 4 +- src/V3EmitCHeaders.cpp | 3 +- src/V3EmitCMake.cpp | 10 ++--- src/V3EmitCModel.cpp | 2 +- src/V3EmitCSyms.cpp | 2 +- src/V3FileLine.cpp | 8 ++-- src/V3Gate.cpp | 6 +-- src/V3Graph.cpp | 2 +- src/V3Hasher.cpp | 4 +- src/V3LinkDot.cpp | 2 +- src/V3LinkParse.cpp | 2 +- src/V3LinkResolve.cpp | 2 +- src/V3Localize.cpp | 2 +- src/V3MergeCond.cpp | 2 +- src/V3Order.cpp | 10 ++--- src/V3Order.h | 2 +- src/V3Param.cpp | 2 +- src/V3ParseImp.cpp | 4 +- src/V3Partition.cpp | 8 ++-- src/V3Premit.cpp | 2 +- src/V3Randomize.cpp | 2 +- src/V3Reloop.cpp | 4 +- src/V3SplitVar.cpp | 9 ++-- src/V3TraceDecl.cpp | 10 ++--- src/V3Tristate.cpp | 8 ++-- src/V3VariableOrder.cpp | 2 +- src/V3Width.cpp | 13 ++++-- src/V3WidthSel.cpp | 2 +- src/Verilator.cpp | 2 +- 49 files changed, 192 insertions(+), 174 deletions(-) diff --git a/Makefile.in b/Makefile.in index 231fbf75b..dc14bddfd 100644 --- a/Makefile.in +++ b/Makefile.in @@ -326,6 +326,8 @@ CLANGTIDY_FLAGS = -config='' -checks='-fuchsia-*,-cppcoreguidelines-avoid-c-arra CLANGTIDY_DEP = $(subst .h,.h.tidy,$(CPPCHECK_H)) \ $(subst .cpp,.cpp.tidy,$(CPPCHECK_CPP)) CLANGTIDY_DEFS = -DVL_DEBUG=1 -DVL_THREADED=1 -DVL_CPPCHECK=1 +# cppcoreguidelines-avoid-goto modernize-avoid-c-arrays readability-magic-numbers readability-simplify-boolean-expr +# cppcoreguidelines-macro-usage clang-tidy: $(CLANGTIDY_DEP) %.cpp.tidy: %.cpp diff --git a/include/verilated.cpp b/include/verilated.cpp index 49ef170dc..be9740700 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -679,7 +679,10 @@ std::string _vl_vsformat_time(char* tmp, T ld, int timeunit, bool left, size_t w if (std::numeric_limits::is_integer) { constexpr int b = 128; constexpr int w = VL_WORDS_I(b); - VlWide tmp0, tmp1, tmp2, tmp3; + VlWide tmp0; + VlWide tmp1; + VlWide tmp2; + VlWide tmp3; WDataInP shifted = VL_EXTEND_WQ(b, 0, tmp0, static_cast(ld)); if (shift < 0) { diff --git a/include/verilated_cov.cpp b/include/verilated_cov.cpp index fb0609be5..d6f03a7f8 100644 --- a/include/verilated_cov.cpp +++ b/include/verilated_cov.cpp @@ -76,15 +76,15 @@ private: public: // METHODS // cppcheck-suppress truncLongCastReturn - virtual uint64_t count() const override { return *m_countp; } - virtual void zero() const override { *m_countp = 0; } + uint64_t count() const override { return *m_countp; } + void zero() const override { *m_countp = 0; } // CONSTRUCTORS // cppcheck-suppress noExplicitConstructor explicit VerilatedCoverItemSpec(T* countp) : m_countp{countp} { *m_countp = 0; } - virtual ~VerilatedCoverItemSpec() override = default; + ~VerilatedCoverItemSpec() override = default; }; //============================================================================= @@ -122,7 +122,7 @@ public: protected: friend class VerilatedCovContext; - virtual ~VerilatedCovImp() override { clearGuts(); } + ~VerilatedCovImp() override { clearGuts(); } private: // PRIVATE METHODS @@ -511,8 +511,10 @@ VerilatedCovContext* VerilatedCov::threadCovp() VL_MT_SAFE { VerilatedCovContext* VerilatedContext::coveragep() VL_MT_SAFE { static VerilatedMutex s_mutex; + // cppcheck-suppress identicalInnerCondition if (VL_UNLIKELY(!m_coveragep)) { const VerilatedLockGuard lock{s_mutex}; + // cppcheck-suppress identicalInnerCondition if (VL_LIKELY(!m_coveragep)) { // Not redundant, prevents race m_coveragep.reset(new VerilatedCovImp); } diff --git a/include/verilated_fst_c.h b/include/verilated_fst_c.h index c87b9763b..1d9b07db7 100644 --- a/include/verilated_fst_c.h +++ b/include/verilated_fst_c.h @@ -67,18 +67,18 @@ protected: // Implementation of VerilatedTrace interface // Called when the trace moves forward to a new time point - virtual void emitTimeChange(uint64_t timeui) override; + void emitTimeChange(uint64_t timeui) override; // Hooks called from VerilatedTrace - virtual bool preFullDump() override { return isOpen(); } - virtual bool preChangeDump() override { return isOpen(); } + bool preFullDump() override { return isOpen(); } + bool preChangeDump() override { return isOpen(); } // Trace buffer management - virtual Buffer* getTraceBuffer() override; - virtual void commitTraceBuffer(Buffer*) override; + Buffer* getTraceBuffer() override; + void commitTraceBuffer(Buffer*) override; // Configure sub-class - virtual void configure(const VerilatedTraceConfig&) override; + void configure(const VerilatedTraceConfig&) override; public: //========================================================================= diff --git a/include/verilated_fst_sc.h b/include/verilated_fst_sc.h index 5ae6b7631..878d0b867 100644 --- a/include/verilated_fst_sc.h +++ b/include/verilated_fst_sc.h @@ -60,6 +60,7 @@ public: } // Override VerilatedFstC. Must be called after starting simulation. + // cppcheck-suppress missingOverride // GCC won't accept override virtual void open(const char* filename) /*override*/ VL_MT_SAFE; private: diff --git a/include/verilated_save.h b/include/verilated_save.h index c7230e74e..292bf39dd 100644 --- a/include/verilated_save.h +++ b/include/verilated_save.h @@ -194,16 +194,16 @@ public: /// Construct new object VerilatedSave() = default; /// Flush, close and destruct - virtual ~VerilatedSave() override { close(); } + ~VerilatedSave() override { close(); } // METHODS /// Open the file; call isOpen() to see if errors void open(const char* filenamep) VL_MT_UNSAFE_ONE; /// Open the file; call isOpen() to see if errors void open(const std::string& filename) VL_MT_UNSAFE_ONE { open(filename.c_str()); } /// Flush and close the file - virtual void close() override VL_MT_UNSAFE_ONE; + void close() override VL_MT_UNSAFE_ONE; /// Flush data to file - virtual void flush() override VL_MT_UNSAFE_ONE; + void flush() override VL_MT_UNSAFE_ONE; }; //============================================================================= @@ -221,7 +221,7 @@ public: /// Construct new object VerilatedRestore() = default; /// Flush, close and destruct - virtual ~VerilatedRestore() override { close(); } + ~VerilatedRestore() override { close(); } // METHODS /// Open the file; call isOpen() to see if errors @@ -229,9 +229,9 @@ public: /// Open the file; call isOpen() to see if errors void open(const std::string& filename) VL_MT_UNSAFE_ONE { open(filename.c_str()); } /// Close the file - virtual void close() override VL_MT_UNSAFE_ONE; - virtual void flush() override VL_MT_UNSAFE_ONE {} - virtual void fill() override VL_MT_UNSAFE_ONE; + void close() override VL_MT_UNSAFE_ONE; + void flush() override VL_MT_UNSAFE_ONE {} + void fill() override VL_MT_UNSAFE_ONE; }; //============================================================================= diff --git a/include/verilated_types.h b/include/verilated_types.h index d45477d00..93906b1d3 100644 --- a/include/verilated_types.h +++ b/include/verilated_types.h @@ -91,9 +91,9 @@ class VlReadMem final { const int m_bits; // Bit width of values const std::string& m_filename; // Filename const QData m_end; // End address (as specified by user) - FILE* m_fp; // File handle for filename - QData m_addr; // Next address to read - int m_linenum; // Line number last read from file + FILE* m_fp = nullptr; // File handle for filename + QData m_addr = 0; // Next address to read + int m_linenum = 0; // Line number last read from file bool m_anyAddr = false; // Had address directive in the file public: VlReadMem(bool hex, int bits, const std::string& filename, QData start, QData end); @@ -107,8 +107,8 @@ public: class VlWriteMem final { const bool m_hex; // Hex format const int m_bits; // Bit width of values - FILE* m_fp; // File handle for filename - QData m_addr; // Next address to write + FILE* m_fp = nullptr; // File handle for filename + QData m_addr = 0; // Next address to write public: VlWriteMem(bool hex, int bits, const std::string& filename, QData start, QData end); ~VlWriteMem(); diff --git a/include/verilated_vcd_c.h b/include/verilated_vcd_c.h index 20c5440f5..99b8ad48b 100644 --- a/include/verilated_vcd_c.h +++ b/include/verilated_vcd_c.h @@ -102,18 +102,18 @@ protected: // Implementation of VerilatedTrace interface // Called when the trace moves forward to a new time point - virtual void emitTimeChange(uint64_t timeui) override; + void emitTimeChange(uint64_t timeui) override; // Hooks called from VerilatedTrace - virtual bool preFullDump() override { return isOpen(); } - virtual bool preChangeDump() override; + bool preFullDump() override { return isOpen(); } + bool preChangeDump() override; // Trace buffer management - virtual Buffer* getTraceBuffer() override; - virtual void commitTraceBuffer(Buffer*) override; + Buffer* getTraceBuffer() override; + void commitTraceBuffer(Buffer*) override; // Configure sub-class - virtual void configure(const VerilatedTraceConfig&) override { return; }; + void configure(const VerilatedTraceConfig&) override { return; }; public: //========================================================================= diff --git a/include/verilated_vcd_sc.h b/include/verilated_vcd_sc.h index 5e71811c0..60504cf1e 100644 --- a/include/verilated_vcd_sc.h +++ b/include/verilated_vcd_sc.h @@ -63,6 +63,7 @@ public: } // Override VerilatedVcdC. Must be called after starting simulation. + // cppcheck-suppress missingOverride // GCC won't accept override virtual void open(const char* filename) /*override*/ VL_MT_SAFE; private: diff --git a/include/verilated_vpi.cpp b/include/verilated_vpi.cpp index 277230720..77c79412b 100644 --- a/include/verilated_vpi.cpp +++ b/include/verilated_vpi.cpp @@ -135,12 +135,12 @@ public: VerilatedVpioTimedCb(uint64_t id, QData time) : m_id{id} , m_time{time} {} - virtual ~VerilatedVpioTimedCb() override = default; + ~VerilatedVpioTimedCb() override = default; static VerilatedVpioTimedCb* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiCallback; } - virtual PLI_INT32 dovpi_remove_cb() override; + uint32_t type() const override { return vpiCallback; } + PLI_INT32 dovpi_remove_cb() override; }; class VerilatedVpioReasonCb final : public VerilatedVpio { @@ -154,12 +154,12 @@ public: VerilatedVpioReasonCb(uint64_t id, PLI_INT32 reason) : m_id{id} , m_reason{reason} {} - virtual ~VerilatedVpioReasonCb() override = default; + ~VerilatedVpioReasonCb() override = default; static VerilatedVpioReasonCb* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiCallback; } - virtual PLI_INT32 dovpi_remove_cb() override; + uint32_t type() const override { return vpiCallback; } + PLI_INT32 dovpi_remove_cb() override; }; class VerilatedVpioConst final : public VerilatedVpio { @@ -168,11 +168,11 @@ class VerilatedVpioConst final : public VerilatedVpio { public: explicit VerilatedVpioConst(int32_t num) : m_num{num} {} - virtual ~VerilatedVpioConst() override = default; + ~VerilatedVpioConst() override = default; static VerilatedVpioConst* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiConstant; } + uint32_t type() const override { return vpiConstant; } int32_t num() const { return m_num; } }; @@ -200,10 +200,10 @@ public: } const VerilatedVar* varp() const { return m_varp; } const VerilatedScope* scopep() const { return m_scopep; } - virtual uint32_t size() const override { return get_range().elements(); } - virtual const VerilatedRange* rangep() const override { return &get_range(); } - virtual const char* name() const override { return m_varp->name(); } - virtual const char* fullname() const override { + uint32_t size() const override { return get_range().elements(); } + const VerilatedRange* rangep() const override { return &get_range(); } + const char* name() const override { return m_varp->name(); } + const char* fullname() const override { static VL_THREAD_LOCAL std::string t_out; t_out = std::string{m_scopep->name()} + "." + name(); return t_out.c_str(); @@ -214,12 +214,12 @@ class VerilatedVpioParam final : public VerilatedVpioVarBase { public: VerilatedVpioParam(const VerilatedVar* varp, const VerilatedScope* scopep) : VerilatedVpioVarBase{varp, scopep} {} - virtual ~VerilatedVpioParam() override = default; + ~VerilatedVpioParam() override = default; static VerilatedVpioParam* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiParameter; } + uint32_t type() const override { return vpiParameter; } void* varDatap() const { return m_varp->datap(); } }; @@ -229,13 +229,13 @@ class VerilatedVpioRange final : public VerilatedVpio { public: explicit VerilatedVpioRange(const VerilatedRange* range) : m_range{range} {} - virtual ~VerilatedVpioRange() override = default; + ~VerilatedVpioRange() override = default; static VerilatedVpioRange* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiRange; } - virtual uint32_t size() const override { return m_range->elements(); } - virtual const VerilatedRange* rangep() const override { return m_range; } + uint32_t type() const override { return vpiRange; } + uint32_t size() const override { return m_range->elements(); } + const VerilatedRange* rangep() const override { return m_range; } }; class VerilatedVpioRangeIter final : public VerilatedVpio { @@ -246,12 +246,12 @@ class VerilatedVpioRangeIter final : public VerilatedVpio { public: explicit VerilatedVpioRangeIter(const VerilatedRange* range) : m_range{range} {} - virtual ~VerilatedVpioRangeIter() override = default; + ~VerilatedVpioRangeIter() override = default; static VerilatedVpioRangeIter* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiIterator; } - virtual vpiHandle dovpi_scan() override { + uint32_t type() const override { return vpiIterator; } + vpiHandle dovpi_scan() override { if (VL_UNLIKELY(m_done)) { delete this; // IEEE 37.2.2 vpi_scan at end does a vpi_release_handle return nullptr; @@ -268,14 +268,14 @@ protected: public: explicit VerilatedVpioScope(const VerilatedScope* scopep) : m_scopep{scopep} {} - virtual ~VerilatedVpioScope() override = default; + ~VerilatedVpioScope() override = default; static VerilatedVpioScope* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiScope; } + uint32_t type() const override { return vpiScope; } const VerilatedScope* scopep() const { return m_scopep; } - virtual const char* name() const override { return m_scopep->name(); } - virtual const char* fullname() const override { return m_scopep->name(); } + const char* name() const override { return m_scopep->name(); } + const char* fullname() const override { return m_scopep->name(); } }; class VerilatedVpioVar VL_NOT_FINAL : public VerilatedVpioVarBase { @@ -308,7 +308,7 @@ public: m_mask.u32 = 0; } } - virtual ~VerilatedVpioVar() override { + ~VerilatedVpioVar() override { if (m_prevDatap) VL_DO_CLEAR(delete[] m_prevDatap, m_prevDatap = nullptr); } static VerilatedVpioVar* castp(vpiHandle h) { @@ -318,7 +318,7 @@ public: uint8_t mask_byte(int idx) const { return m_mask.u8[idx & 3]; } uint32_t entSize() const { return m_entSize; } uint32_t index() const { return m_index; } - virtual uint32_t type() const override { + uint32_t type() const override { return (varp()->dims() > 1) ? vpiMemory : vpiReg; // but might be wire, logic } void* prevDatap() const { return m_prevDatap; } @@ -339,14 +339,14 @@ public: m_index = index; m_varDatap = (static_cast(varp->datap())) + entSize() * offset; } - virtual ~VerilatedVpioMemoryWord() override = default; + ~VerilatedVpioMemoryWord() override = default; static VerilatedVpioMemoryWord* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiMemoryWord; } - virtual uint32_t size() const override { return varp()->packed().elements(); } - virtual const VerilatedRange* rangep() const override { return &(varp()->packed()); } - virtual const char* fullname() const override { + uint32_t type() const override { return vpiMemoryWord; } + uint32_t size() const override { return varp()->packed().elements(); } + const VerilatedRange* rangep() const override { return &(varp()->packed()); } + const char* fullname() const override { static VL_THREAD_LOCAL std::string t_out; constexpr size_t LEN_MAX_INDEX = 25; char num[LEN_MAX_INDEX]; @@ -364,12 +364,12 @@ class VerilatedVpioVarIter final : public VerilatedVpio { public: explicit VerilatedVpioVarIter(const VerilatedScope* scopep) : m_scopep{scopep} {} - virtual ~VerilatedVpioVarIter() override = default; + ~VerilatedVpioVarIter() override = default; static VerilatedVpioVarIter* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiIterator; } - virtual vpiHandle dovpi_scan() override { + uint32_t type() const override { return vpiIterator; } + vpiHandle dovpi_scan() override { if (VL_LIKELY(m_scopep->varsp())) { const VerilatedVarNameMap* const varsp = m_scopep->varsp(); if (VL_UNLIKELY(!m_started)) { @@ -405,15 +405,15 @@ public: , m_varp{varp} , m_iteration{varp->unpacked().right()} , m_direction{VL_LIKELY(varp->unpacked().left() > varp->unpacked().right()) ? 1 : -1} {} - virtual ~VerilatedVpioMemoryWordIter() override = default; + ~VerilatedVpioMemoryWordIter() override = default; static VerilatedVpioMemoryWordIter* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiIterator; } + uint32_t type() const override { return vpiIterator; } void iterationInc() { if (!(m_done = (m_iteration == m_varp->unpacked().left()))) m_iteration += m_direction; } - virtual vpiHandle dovpi_scan() override { + vpiHandle dovpi_scan() override { if (VL_UNLIKELY(m_done)) { delete this; // IEEE 37.2.2 vpi_scan at end does a vpi_release_handle return nullptr; @@ -438,9 +438,9 @@ public: static VerilatedVpioModule* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiModule; } - virtual const char* name() const override { return m_name; } - virtual const char* fullname() const override { return m_fullname; } + uint32_t type() const override { return vpiModule; } + const char* name() const override { return m_name; } + const char* fullname() const override { return m_fullname; } }; class VerilatedVpioModuleIter final : public VerilatedVpio { @@ -452,12 +452,12 @@ public: : m_vec{&vec} { m_it = m_vec->begin(); } - virtual ~VerilatedVpioModuleIter() override = default; + ~VerilatedVpioModuleIter() override = default; static VerilatedVpioModuleIter* castp(vpiHandle h) { return dynamic_cast(reinterpret_cast(h)); } - virtual uint32_t type() const override { return vpiIterator; } - virtual vpiHandle dovpi_scan() override { + uint32_t type() const override { return vpiIterator; } + vpiHandle dovpi_scan() override { if (m_it == m_vec->end()) { delete this; // IEEE 37.2.2 vpi_scan at end does a vpi_release_handle return nullptr; diff --git a/include/verilatedos.h b/include/verilatedos.h index 2b0cdd8ce..b01eac65c 100644 --- a/include/verilatedos.h +++ b/include/verilatedos.h @@ -456,7 +456,8 @@ using ssize_t = uint32_t; ///< signed size_t; returned from read() // or 0x0 if not implemented on this platform #define VL_GET_CPU_TICK(val) \ { \ - uint32_t hi, lo; \ + uint32_t hi; \ + uint32_t lo; \ asm volatile("rdtsc" : "=a"(lo), "=d"(hi)); \ (val) = ((uint64_t)lo) | (((uint64_t)hi) << 32); \ } diff --git a/src/V3Active.cpp b/src/V3Active.cpp index 1b9e9534c..ba69b1b27 100644 --- a/src/V3Active.cpp +++ b/src/V3Active.cpp @@ -75,7 +75,7 @@ public: class LatchDetectGraph final : public V3Graph { protected: - LatchDetectGraphVertex* m_curVertexp; // Current latch detection graph vertex + LatchDetectGraphVertex* m_curVertexp = nullptr; // Current latch detection graph vertex std::vector m_outputs; // Vector of lvalues encountered on this pass VL_DEBUG_FUNC; // Declare debug() @@ -290,13 +290,13 @@ private: // STATE LatchDetectGraph m_graph; // Graph used to detect latches in combo always // VISITORS - virtual void visit(AstVarRef* nodep) { + virtual void visit(AstVarRef* nodep) override { const AstVar* const varp = nodep->varp(); if (nodep->access().isWriteOrRW() && varp->isSignal() && !varp->isUsedLoopIdx()) { m_graph.addAssignment(nodep); } } - virtual void visit(AstNodeIf* nodep) { + virtual void visit(AstNodeIf* nodep) override { if (!nodep->isBoundsCheck()) { LatchDetectGraphVertex* const parentp = m_graph.currentp(); LatchDetectGraphVertex* const branchp = m_graph.addPathVertex(parentp, "BRANCH", true); @@ -308,7 +308,7 @@ private: } } //-------------------- - virtual void visit(AstNode* nodep) { iterateChildren(nodep); } + virtual void visit(AstNode* nodep) override { iterateChildren(nodep); } public: // CONSTRUCTORS @@ -317,7 +317,7 @@ public: iterate(nodep); m_graph.latchCheck(nodep, kwd == VAlwaysKwd::ALWAYS_LATCH); } - virtual ~ActiveLatchCheckVisitor() = default; + ~ActiveLatchCheckVisitor() override = default; }; //###################################################################### @@ -387,7 +387,7 @@ public: : m_check{check} { iterate(nodep); } - virtual ~ActiveDlyVisitor() override = default; + ~ActiveDlyVisitor() override = default; }; //###################################################################### @@ -530,7 +530,7 @@ private: // Warn and/or convert any delayed assignments if (combo && !sequent) { - ActiveDlyVisitor{nodep, ActiveDlyVisitor::CT_COMB}; + { ActiveDlyVisitor{nodep, ActiveDlyVisitor::CT_COMB}; } const ActiveLatchCheckVisitor latchvisitor{nodep, kwd}; } else if (!combo && sequent) { ActiveDlyVisitor{nodep, ActiveDlyVisitor::CT_SEQ}; @@ -598,7 +598,7 @@ private: public: // CONSTRUCTORS explicit ActiveVisitor(AstNetlist* nodep) { iterate(nodep); } - virtual ~ActiveVisitor() override = default; + ~ActiveVisitor() override = default; }; //###################################################################### diff --git a/src/V3Ast.h b/src/V3Ast.h index 58e940480..eb1a61705 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -2605,7 +2605,7 @@ protected: : AstNodeMath{t, fl} , m_access{access} , m_name{name} { - this->varp(nullptr); + varp(nullptr); } AstNodeVarRef(VNType t, FileLine* fl, const string& name, AstVar* varp, const VAccess& access) : AstNodeMath{t, fl} diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 95ce8ba24..cf226075d 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -489,7 +489,7 @@ string AstVar::cPubArgType(bool named, bool forReturn) const { class dpiTypesToStringConverter VL_NOT_FINAL { public: virtual string openArray(const AstVar*) const { return "const svOpenArrayHandle"; } - virtual string bitLogicVector(const AstVar* varp, bool isBit) const { + virtual string bitLogicVector(const AstVar* /*varp*/, bool isBit) const { return isBit ? "svBitVecVal" : "svLogicVecVal"; } virtual string primitive(const AstVar* varp) const { @@ -659,7 +659,7 @@ public: } }; -string AstNodeDType::cType(const string& name, bool forFunc, bool isRef) const { +string AstNodeDType::cType(const string& name, bool /*forFunc*/, bool isRef) const { const CTypeRecursed info = cTypeRecurse(false); return info.render(name, isRef); } @@ -1326,8 +1326,9 @@ void AstClass::repairCache() { clearCache(); for (auto* itemp = membersp(); itemp; itemp = itemp->nextp()) { if (const auto* const scopep = VN_CAST(itemp, Scope)) { - for (auto* itemp = scopep->blocksp(); itemp; itemp = itemp->nextp()) - insertCache(itemp); + for (auto* blockp = scopep->blocksp(); blockp; blockp = blockp->nextp()) { + insertCache(blockp); + } } else { insertCache(itemp); } @@ -1354,7 +1355,7 @@ AstClass* AstClassExtends::classp() const { return refp->classp(); } void AstClassRefDType::dump(std::ostream& str) const { - this->AstNode::dump(str); + this->AstNodeDType::dump(str); if (classOrPackagep()) str << " cpkg=" << nodeAddr(classOrPackagep()); if (classp()) { str << " -> "; @@ -1385,7 +1386,7 @@ void AstEnumItemRef::dump(std::ostream& str) const { } } void AstIfaceRefDType::dump(std::ostream& str) const { - this->AstNode::dump(str); + this->AstNodeDType::dump(str); if (cellName() != "") str << " cell=" << cellName(); if (ifaceName() != "") str << " if=" << ifaceName(); if (modportName() != "") str << " mp=" << modportName(); @@ -1434,7 +1435,7 @@ void AstJumpLabel::dump(std::ostream& str) const { } } void AstLogOr::dump(std::ostream& str) const { - this->AstNode::dump(str); + this->AstNodeMath::dump(str); if (sideEffect()) str << " [SIDE]"; } void AstMemberSel::dump(std::ostream& str) const { @@ -1447,7 +1448,7 @@ void AstMemberSel::dump(std::ostream& str) const { } } void AstMethodCall::dump(std::ostream& str) const { - this->AstNodeStmt::dump(str); + this->AstNodeFTaskRef::dump(str); if (isStatement()) str << " [STMT]"; str << " -> "; if (taskp()) { @@ -1533,7 +1534,7 @@ void AstRefDType::dump(std::ostream& str) const { } } void AstNodeUOrStructDType::dump(std::ostream& str) const { - this->AstNode::dump(str); + this->AstNodeDType::dump(str); if (packed()) str << " [PACKED]"; if (isFourstate()) str << " [4STATE]"; } @@ -1624,7 +1625,7 @@ void AstPackageImport::dump(std::ostream& str) const { str << " -> " << packagep(); } void AstPatMember::dump(std::ostream& str) const { - this->AstNode::dump(str); + this->AstNodeMath::dump(str); if (isDefault()) str << " [DEFAULT]"; } void AstNodeTriop::dump(std::ostream& str) const { this->AstNodeMath::dump(str); } @@ -1773,7 +1774,7 @@ void AstScope::dump(std::ostream& str) const { str << " [modp=" << reinterpret_cast(modp()) << "]"; } void AstScopeName::dump(std::ostream& str) const { - this->AstNode::dump(str); + this->AstNodeMath::dump(str); if (dpiExport()) str << " [DPIEX]"; if (forFormat()) str << " [FMT]"; } @@ -1840,7 +1841,7 @@ void AstNodeBlock::dump(std::ostream& str) const { if (unnamed()) str << " [UNNAMED]"; } void AstBegin::dump(std::ostream& str) const { - this->AstNode::dump(str); + this->AstNodeBlock::dump(str); if (generate()) str << " [GEN]"; if (genforp()) str << " [GENFOR]"; if (implied()) str << " [IMPLIED]"; @@ -1866,7 +1867,7 @@ void AstCoverInc::dump(std::ostream& str) const { } } void AstFork::dump(std::ostream& str) const { - this->AstNode::dump(str); + this->AstNodeBlock::dump(str); if (!joinType().join()) str << " [" << joinType() << "]"; } void AstTraceDecl::dump(std::ostream& str) const { diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h index f86f8e350..4c2a468d9 100644 --- a/src/V3AstNodes.h +++ b/src/V3AstNodes.h @@ -186,7 +186,7 @@ public: class AstEmpty final : public AstNode { // Represents something missing, e.g. a missing argument in FOREACH public: - AstEmpty(FileLine* fl) + explicit AstEmpty(FileLine* fl) : ASTGEN_SUPER_Empty(fl) {} ASTNODE_NODE_FUNCS(Empty) virtual bool same(const AstNode* samep) const override { return true; } @@ -194,7 +194,7 @@ public: class AstEmptyQueue final : public AstNodeMath { public: - AstEmptyQueue(FileLine* fl) + explicit AstEmptyQueue(FileLine* fl) : ASTGEN_SUPER_EmptyQueue(fl) {} ASTNODE_NODE_FUNCS(EmptyQueue) virtual string emitC() override { V3ERROR_NA_RETURN(""); } diff --git a/src/V3Broken.cpp b/src/V3Broken.cpp index 42f57e4e8..2eb5de33b 100644 --- a/src/V3Broken.cpp +++ b/src/V3Broken.cpp @@ -48,7 +48,7 @@ static class BrokenCntGlobal { uint8_t m_count = MIN_VALUE; public: - uint8_t get() { + uint8_t get() const { UASSERT(MIN_VALUE <= m_count && m_count <= MAX_VALUE, "Invalid generation number"); return m_count; } diff --git a/src/V3Class.cpp b/src/V3Class.cpp index 5b29eccd6..f65fbfbf6 100644 --- a/src/V3Class.cpp +++ b/src/V3Class.cpp @@ -115,13 +115,13 @@ private: // Move later, or we wouldn't keep interating the class // We're really moving the VarScope but we might not // have a pointer to it yet - m_toScopeMoves.push_back(std::make_pair(nodep, m_packageScopep)); + m_toScopeMoves.emplace_back(std::make_pair(nodep, m_packageScopep)); } if (!m_ftaskp && nodep->lifetime().isStatic()) { - m_toPackageMoves.push_back(std::make_pair(nodep, m_classPackagep)); + m_toPackageMoves.emplace_back(std::make_pair(nodep, m_classPackagep)); // We're really moving the VarScope but we might not // have a pointer to it yet - m_toScopeMoves.push_back(std::make_pair(nodep, m_packageScopep)); + m_toScopeMoves.emplace_back(std::make_pair(nodep, m_packageScopep)); } } } @@ -137,7 +137,7 @@ private: m_ftaskp = nodep; iterateChildren(nodep); if (m_packageScopep && nodep->lifetime().isStatic()) { - m_toScopeMoves.push_back(std::make_pair(nodep, m_packageScopep)); + m_toScopeMoves.emplace_back(std::make_pair(nodep, m_packageScopep)); } } } @@ -152,12 +152,16 @@ private: virtual void visit(AstInitial* nodep) override { // But not AstInitialAutomatic, which remains under the class iterateChildren(nodep); - if (m_packageScopep) { m_toScopeMoves.push_back(std::make_pair(nodep, m_packageScopep)); } + if (m_packageScopep) { + m_toScopeMoves.emplace_back(std::make_pair(nodep, m_packageScopep)); + } } virtual void visit(AstInitialStatic* nodep) override { // But not AstInitialAutomatic, which remains under the class iterateChildren(nodep); - if (m_packageScopep) { m_toScopeMoves.push_back(std::make_pair(nodep, m_packageScopep)); } + if (m_packageScopep) { + m_toScopeMoves.emplace_back(std::make_pair(nodep, m_packageScopep)); + } } virtual void visit(AstNodeMath* nodep) override {} // Short circuit diff --git a/src/V3Combine.cpp b/src/V3Combine.cpp index eaf5e3708..b1db26b9b 100644 --- a/src/V3Combine.cpp +++ b/src/V3Combine.cpp @@ -220,7 +220,9 @@ class CombineVisitor final : VNVisitor { // CONSTRUCTORS explicit CombineVisitor(AstNetlist* nodep) { iterate(nodep); } - ~CombineVisitor() { V3Stats::addStat("Optimizations, Combined CFuncs", m_cfuncsCombined); } + ~CombineVisitor() override { + V3Stats::addStat("Optimizations, Combined CFuncs", m_cfuncsCombined); + } public: static void apply(AstNetlist* netlistp) { CombineVisitor{netlistp}; } diff --git a/src/V3Const.cpp b/src/V3Const.cpp index 372757419..c9250a3f6 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -325,7 +325,6 @@ class ConstBitOpTreeVisitor final : public VNVisitor { return ResultTerm{resultp, ops, clean}; } - public: // CONSTRUCTORS VarInfo(ConstBitOpTreeVisitor* parent, AstVarRef* refp, int width) : m_parentp{parent} @@ -754,10 +753,11 @@ public: if (debug() >= 9) { // LCOV_EXCL_START cout << "Bitop tree considered: " << endl; for (AstNode* const termp : termps) termp->dumpTree("Reduced term: "); - for (const std::pair& termp : visitor.m_frozenNodes) + for (const std::pair& termp : visitor.m_frozenNodes) { termp.first->dumpTree("Frozen term with lsb " + std::to_string(termp.second.m_lsb) + " polarity " + std::to_string(termp.second.m_polarity) + ": "); + } cout << "Needs flipping: " << needsFlip << endl; cout << "Needs cleaning: " << needsCleaning << endl; cout << "Size: " << resultOps << " input size: " << visitor.m_ops << endl; diff --git a/src/V3DupFinder.h b/src/V3DupFinder.h index 4cf4b485a..ed8d13097 100644 --- a/src/V3DupFinder.h +++ b/src/V3DupFinder.h @@ -52,7 +52,7 @@ public: V3DupFinder() : m_hasherp{new V3Hasher} , m_hasher{*m_hasherp} {} - V3DupFinder(const V3Hasher& hasher) + explicit V3DupFinder(const V3Hasher& hasher) : m_hasherp{nullptr} , m_hasher{hasher} {} ~V3DupFinder() { diff --git a/src/V3EmitCFunc.cpp b/src/V3EmitCFunc.cpp index 7117fee7e..b3a224c5c 100644 --- a/src/V3EmitCFunc.cpp +++ b/src/V3EmitCFunc.cpp @@ -600,7 +600,7 @@ void EmitCFunc::emitVarReset(AstVar* varp) { // If an ARRAYINIT we initialize it using an initial block similar to a signal // puts("// parameter "+varp->nameProtect()+" = "+varp->valuep()->name()+"\n"); } else if (const AstInitArray* const initarp = VN_CAST(varp->valuep(), InitArray)) { - if (AstAssocArrayDType* const adtypep = VN_CAST(dtypep, AssocArrayDType)) { + if (VN_IS(dtypep, AssocArrayDType)) { if (initarp->defaultp()) { emitSetVarConstant(varNameProtected + ".atDefault()", VN_AS(initarp->defaultp(), Const)); @@ -611,7 +611,7 @@ void EmitCFunc::emitVarReset(AstVar* varp) { emitSetVarConstant(varNameProtected + ".at(" + cvtToStr(itr.first) + ")", VN_AS(valuep, Const)); } - } else if (AstWildcardArrayDType* const adtypep = VN_CAST(dtypep, WildcardArrayDType)) { + } else if (VN_IS(dtypep, WildcardArrayDType)) { if (initarp->defaultp()) { emitSetVarConstant(varNameProtected + ".atDefault()", VN_AS(initarp->defaultp(), Const)); diff --git a/src/V3EmitCHeaders.cpp b/src/V3EmitCHeaders.cpp index 77835212b..f0734f670 100644 --- a/src/V3EmitCHeaders.cpp +++ b/src/V3EmitCHeaders.cpp @@ -236,10 +236,11 @@ class EmitCHeader final : public EmitCConstInit { void emitAll(const AstNodeModule* modp) { // Include files required by this AstNodeModule if (const AstClass* const classp = VN_CAST(modp, Class)) { - if (classp->extendsp()) + if (classp->extendsp()) { puts("#include \"" + prefixNameProtect(classp->extendsp()->classp()->classOrPackagep()) + ".h\"\n"); + } } emitModCUse(modp, VUseType::INT_INCLUDE); diff --git a/src/V3EmitCMake.cpp b/src/V3EmitCMake.cpp index 1e8f1700e..db74dcce9 100644 --- a/src/V3EmitCMake.cpp +++ b/src/V3EmitCMake.cpp @@ -208,19 +208,15 @@ class CMakeEmitter final { *of << "target_link_libraries(${TOP_TARGET_NAME} PRIVATE " << prefix << ")\n"; if (!children.empty()) { *of << "target_link_libraries(" << prefix << " INTERFACE"; - for (V3HierBlock::HierBlockSet::const_iterator child = children.begin(); - child != children.end(); ++child) { - *of << " " << (*child)->hierPrefix(); - } + for (const auto& childr : children) { *of << " " << (childr)->hierPrefix(); } *of << ")\n"; } *of << "verilate(" << prefix << " PREFIX " << prefix << " TOP_MODULE " << hblockp->modp()->name() << " DIRECTORY " << deslash(v3Global.opt.makeDir() + "/" + prefix) << " SOURCES "; - for (V3HierBlock::HierBlockSet::const_iterator child = children.begin(); - child != children.end(); ++child) { + for (const auto& childr : children) { *of << " " - << deslash(v3Global.opt.makeDir() + "/" + (*child)->hierWrapper(true)); + << deslash(v3Global.opt.makeDir() + "/" + childr->hierWrapper(true)); } *of << " "; const string vFile = hblockp->vFileIfNecessary(); diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index c0a8e452c..5ec26221f 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -689,5 +689,5 @@ public: void V3EmitC::emitcModel() { UINFO(2, __FUNCTION__ << ": " << endl); - { EmitCModel emit(v3Global.rootp()); } + { EmitCModel{v3Global.rootp()}; } } diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index e325aa79e..aaa920664 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -774,7 +774,7 @@ void EmitCSyms::emitSymImp() { puts(protectIf(aboveScopep->nameDotless(), aboveScopep->protect())); } puts("."); - puts(protName.substr(protName.rfind(".") + 1)); + puts(protName.substr(protName.rfind('.') + 1)); puts(" = &"); puts(protectIf(scopep->nameDotless(), scopep->protect()) + ";\n"); ++m_numStmts; diff --git a/src/V3FileLine.cpp b/src/V3FileLine.cpp index 90e3c85e7..028ee77d1 100644 --- a/src/V3FileLine.cpp +++ b/src/V3FileLine.cpp @@ -72,10 +72,10 @@ int FileLineSingleton::nameToNumber(const string& filename) { //! Experimental. Updated to also put out the language. void FileLineSingleton::fileNameNumMapDumpXml(std::ostream& os) { os << "\n"; - for (auto it = m_namemap.cbegin(); it != m_namemap.cend(); ++it) { - os << "second) << "\" filename=\"" - << V3OutFormatter::quoteNameControls(it->first, V3OutFormatter::LA_XML) - << "\" language=\"" << numberToLang(it->second).ascii() << "\"/>\n"; + for (const auto& itr : m_namemap) { + os << "\n"; } os << "\n"; } diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index cf3485121..e5cecd07a 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -81,16 +81,16 @@ public: AstScope* scopep() const { return m_scopep; } bool reducible() const { return m_reducible; } bool dedupable() const { return m_dedupable; } - void setConsumed(const char* consumedReason) { + void setConsumed(const char* /*consumedReason*/) { m_consumed = true; // UINFO(0, "\t\tSetConsumed "<user4(0); - HasherVisitor{nodep}; + { HasherVisitor{nodep}; } return V3Hash(nodep->user4()); } diff --git a/src/V3LinkDot.cpp b/src/V3LinkDot.cpp index 63a4eb301..7c0d5269b 100644 --- a/src/V3LinkDot.cpp +++ b/src/V3LinkDot.cpp @@ -1902,7 +1902,7 @@ private: static int debug() { return LinkDotState::debug(); } // METHODS - Variables - void createImplicitVar(VSymEnt* lookupSymp, AstVarRef* nodep, AstNodeModule* modp, + void createImplicitVar(VSymEnt* /*lookupSymp*/, AstVarRef* nodep, AstNodeModule* modp, VSymEnt* moduleSymp, bool noWarn) { // Create implicit after warning if (!nodep->varp()) { diff --git a/src/V3LinkParse.cpp b/src/V3LinkParse.cpp index 79be8fd05..67d82b7fb 100644 --- a/src/V3LinkParse.cpp +++ b/src/V3LinkParse.cpp @@ -445,7 +445,7 @@ private: selp->rhsp()->unlinkFrBackWithNext()}; selp->replaceWith(newp); VL_DO_DANGLING(selp->deleteTree(), selp); - } else if (AstSelLoopVars* const selp = VN_CAST(bracketp, SelLoopVars)) { + } else if (VN_IS(bracketp, SelLoopVars)) { // Ok } else { nodep->v3error( diff --git a/src/V3LinkResolve.cpp b/src/V3LinkResolve.cpp index 45ce55922..a20100a80 100644 --- a/src/V3LinkResolve.cpp +++ b/src/V3LinkResolve.cpp @@ -404,7 +404,7 @@ private: return newFormat; } - static void expectDescriptor(AstNode* nodep, AstNodeVarRef* filep) { + static void expectDescriptor(AstNode* /*nodep*/, AstNodeVarRef* filep) { // This might fail on complex expressions like arrays // We use attrFileDescr() only for lint suppression, so that's ok if (filep && filep->varp()) filep->varp()->attrFileDescr(true); diff --git a/src/V3Localize.cpp b/src/V3Localize.cpp index 618b010ab..c121d5387 100644 --- a/src/V3Localize.cpp +++ b/src/V3Localize.cpp @@ -69,7 +69,7 @@ private: && m_accessors(nodep).size() == 1); // .. a block temp used in a single CFunc } - bool existsNonLeaf(const std::unordered_set& funcps) { + static bool existsNonLeaf(const std::unordered_set& funcps) { for (const AstCFunc* const funcp : funcps) { if (funcp->user1()) return true; } diff --git a/src/V3MergeCond.cpp b/src/V3MergeCond.cpp index bb4251cbb..74dfaecb1 100644 --- a/src/V3MergeCond.cpp +++ b/src/V3MergeCond.cpp @@ -414,7 +414,7 @@ public: // Given an AstNode list (held via AstNode::nextp()), move conditional statements as close // together as possible static AstNode* optimize(AstNode* nodep, const StmtPropertiesAllocator& stmtProperties) { - CodeMotionOptimizeVisitor{nodep, stmtProperties}; + { CodeMotionOptimizeVisitor{nodep, stmtProperties}; } // It is possible for the head of the list to be moved later such that it is no longer // in head position. If so, rewind the list and return the new head. while (nodep->backp()->nextp() == nodep) nodep = nodep->backp(); diff --git a/src/V3Order.cpp b/src/V3Order.cpp index 032f8ef3f..311b4a381 100644 --- a/src/V3Order.cpp +++ b/src/V3Order.cpp @@ -737,7 +737,7 @@ class OrderBuildVisitor final : public VNVisitor { virtual void visit(AstNode* nodep) override { iterateChildren(nodep); } // CONSTRUCTOR - OrderBuildVisitor(AstNetlist* nodep) { + explicit OrderBuildVisitor(AstNetlist* nodep) { // Enable debugging (3 is default if global debug; we want acyc debugging) if (debug()) m_graphp->debug(5); @@ -753,7 +753,7 @@ class OrderBuildVisitor final : public VNVisitor { // Build the rest of the graph iterate(nodep); } - virtual ~OrderBuildVisitor() = default; + ~OrderBuildVisitor() override = default; public: // Process the netlist and return the constructed ordering graph. It's 'process' because @@ -1331,7 +1331,7 @@ class OrderProcess final : VNDeleter { pushDeletep(m_deleteDomainp); } - ~OrderProcess() { + ~OrderProcess() override { // Stats for (int type = 0; type < OrderVEdgeType::_ENUM_END; type++) { const double count = double(m_statCut[type]); @@ -2103,9 +2103,9 @@ void V3Order::orderAll(AstNetlist* netlistp) { // Build ordering graph std::unique_ptr orderGraph = OrderBuildVisitor::process(netlistp); // Order the netlist - OrderProcess::main(netlistp, *orderGraph.get()); + OrderProcess::main(netlistp, *orderGraph); // Reset debug level - orderGraph.get()->debug(V3Error::debugDefault()); + orderGraph->debug(V3Error::debugDefault()); // Dump tree V3Global::dumpCheckGlobalTree("order", 0, v3Global.opt.dumpTreeLevel(__FILE__) >= 3); } diff --git a/src/V3Order.h b/src/V3Order.h index 13b82ad69..fa9724473 100644 --- a/src/V3Order.h +++ b/src/V3Order.h @@ -26,7 +26,7 @@ class AstNetlist; class V3Order final { public: - static void orderAll(AstNetlist* nodep); + static void orderAll(AstNetlist* netlistp); }; #endif // Guard diff --git a/src/V3Param.cpp b/src/V3Param.cpp index 607639ed9..610bdba9a 100644 --- a/src/V3Param.cpp +++ b/src/V3Param.cpp @@ -289,7 +289,7 @@ class ParamProcessor final { } } } - string paramSmallName(AstNodeModule* modp, AstNode* varp) { + static string paramSmallName(AstNodeModule* modp, AstNode* varp) { if (varp->user4() <= 1) makeSmallNames(modp); int index = varp->user4() / 256; const char ch = varp->user4() & 255; diff --git a/src/V3ParseImp.cpp b/src/V3ParseImp.cpp index de03355ef..6eb40cf7c 100644 --- a/src/V3ParseImp.cpp +++ b/src/V3ParseImp.cpp @@ -468,13 +468,13 @@ void V3ParseImp::tokenPipeline() { if (nexttok == yP_COLONCOLON) { token = yaID__CC; } else if (nexttok == '#') { - const V3ParseBisonYYSType curValue + const V3ParseBisonYYSType curValueHold = yylval; // Remember value, as about to read ahead { const size_t depth = tokenPipeScanParam(0); if (tokenPeekp(depth)->token == yP_COLONCOLON) token = yaID__CC; } - yylval = curValue; + yylval = curValueHold; } } // If add to above "else if", also add to "if (token" further above diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 6b6aea6d7..4b37f53df 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -261,7 +261,7 @@ private: // METHODS protected: friend class PartPropagateCp; - void notifyEdgeCp(V3GraphVertex* vxp, GraphWay way, V3GraphVertex* throughp, + void notifyEdgeCp(V3GraphVertex* /*vxp*/, GraphWay way, V3GraphVertex* throughp, uint32_t cp) const { const uint32_t throughCost = critPathCost(throughp, way); UASSERT_SELFTEST(uint32_t, cp, (1 + throughCost)); @@ -272,7 +272,7 @@ private: // Don't need to check this in the self test; it supports an assert // that runs in production code. } - void setCritPathCost(V3GraphVertex* vxp, GraphWay way, uint32_t cost) { + void setCritPathCost(V3GraphVertex* vxp, GraphWay /*way*/, uint32_t cost) { m_cp[vxp] = cost; // Confirm that we only set each node's CP once. That's an // important property of PartPropagateCp which allows it to be far @@ -281,7 +281,7 @@ private: UASSERT_OBJ(it == m_seen.end(), vxp, "Set CP on node twice"); m_seen[vxp] = cost; } - uint32_t critPathCost(V3GraphVertex* vxp, GraphWay way) const { + uint32_t critPathCost(V3GraphVertex* vxp, GraphWay /*way*/) const { const auto it = m_cp.find(vxp); if (it != m_cp.end()) return it->second; return 0; @@ -743,7 +743,7 @@ public: bool mergeWouldCreateCycle() const; // Instead of virtual method bool removedFromSb() const { return (m_id & REMOVED_MASK) != 0; } - void removedFromSb(bool removed) { m_id |= REMOVED_MASK; } + void removedFromSb(bool /*removed*/) { m_id |= REMOVED_MASK; } bool operator<(const MergeCandidate& other) const { return m_id < other.m_id; } }; diff --git a/src/V3Premit.cpp b/src/V3Premit.cpp index 50254b1ca..936395f9d 100644 --- a/src/V3Premit.cpp +++ b/src/V3Premit.cpp @@ -378,7 +378,7 @@ public: : m_tempNames{"__Vtemp"} { iterate(nodep); } - virtual ~PremitVisitor() { + ~PremitVisitor() override { V3Stats::addStat("Optimizations, Prelim extracted value to ConstPool", m_extractedToConstPool); } diff --git a/src/V3Randomize.cpp b/src/V3Randomize.cpp index 88aa10198..dbcdbd9ed 100644 --- a/src/V3Randomize.cpp +++ b/src/V3Randomize.cpp @@ -75,7 +75,7 @@ private: } } void markAllDerived() { - for (auto p : m_baseToDerivedMap) { + for (const auto& p : m_baseToDerivedMap) { if (p.first->user1()) markDerived(p.first); } } diff --git a/src/V3Reloop.cpp b/src/V3Reloop.cpp index 360f38769..3186b182a 100644 --- a/src/V3Reloop.cpp +++ b/src/V3Reloop.cpp @@ -67,11 +67,11 @@ private: // METHODS VL_DEBUG_FUNC; // Declare debug() - AstVar* findCreateVarTemp(FileLine* fl, AstCFunc* cfuncp) { + static AstVar* findCreateVarTemp(FileLine* fl, AstCFunc* cfuncp) { AstVar* varp = VN_AS(cfuncp->user1p(), Var); if (!varp) { const string newvarname = string("__Vilp"); - varp = new AstVar(fl, VVarType::STMTTEMP, newvarname, VFlagLogicPacked(), 32); + varp = new AstVar{fl, VVarType::STMTTEMP, newvarname, VFlagLogicPacked{}, 32}; UASSERT_OBJ(cfuncp, fl, "Assignment not under a function"); cfuncp->addInitsp(varp); cfuncp->user1p(varp); diff --git a/src/V3SplitVar.cpp b/src/V3SplitVar.cpp index 549d6ecbc..193b0e061 100644 --- a/src/V3SplitVar.cpp +++ b/src/V3SplitVar.cpp @@ -610,7 +610,7 @@ class SplitUnpackedVarVisitor final : public VNVisitor, public SplitVarImpl { } AstVarRef* createTempVar(AstNode* context, AstNode* nodep, AstUnpackArrayDType* dtypep, const std::string& name_prefix, std::vector& vars, - int start_idx, bool lvalue, bool ftask) { + int start_idx, bool lvalue, bool /*ftask*/) { FileLine* const fl = nodep->fileline(); const std::string name = m_tempNames.get(nodep) + "__" + name_prefix; AstNodeAssign* const assignp = VN_CAST(context, NodeAssign); @@ -773,8 +773,7 @@ class SplitUnpackedVarVisitor final : public VNVisitor, public SplitVarImpl { public: explicit SplitUnpackedVarVisitor(AstNetlist* nodep) - : m_refs{} - , m_tempNames{"__VsplitVar"} { + : m_tempNames{"__VsplitVar"} { iterate(nodep); } ~SplitUnpackedVarVisitor() override { @@ -1114,10 +1113,10 @@ class SplitPackedVarVisitor final : public VNVisitor, public SplitVarImpl { << " is added for " << varp->prettyNameQ() << '\n'); } } - static void updateReferences(AstVar* varp, PackedVarRef& ref, + static void updateReferences(AstVar* varp, PackedVarRef& pref, const std::vector& vars) { for (const bool lvalue : {false, true}) { // Refer the new split variables - std::vector& refs = lvalue ? ref.lhs() : ref.rhs(); + std::vector& refs = lvalue ? pref.lhs() : pref.rhs(); for (PackedVarRefEntry& ref : refs) { auto varit = std::upper_bound(vars.begin(), vars.end(), ref.lsb(), SplitNewVar::Match()); diff --git a/src/V3TraceDecl.cpp b/src/V3TraceDecl.cpp index a3f0643e0..791034307 100644 --- a/src/V3TraceDecl.cpp +++ b/src/V3TraceDecl.cpp @@ -112,7 +112,7 @@ private: AstVarScope* m_vscp; // AstVarScope being traced (non const to allow copy during sorting) std::string m_path; // Path to enclosing module in hierarchy std::string m_name; // Name of signal - Signal(AstVarScope* vscp) + explicit Signal(AstVarScope* vscp) : m_vscp{vscp} { // Compute path in hierarchy and signal name const string& vcdName = AstNode::vcdName(vscp->varp()->name()); @@ -312,13 +312,13 @@ private: const string intfScopeName = irpName.substr(0, scopeLen); if (scopeName != intfScopeName) continue; - string scopeName = AstNode::vcdName(irp->name()); - if (scopeName.substr(0, 4) == "TOP ") scopeName.erase(0, 4); + string iscopeName = AstNode::vcdName(irp->name()); + if (iscopeName.substr(0, 4) == "TOP ") iscopeName.erase(0, 4); // Note this insert doesn't know what above is interfaces. // Perhaps all scopes should be changed to include the VLT_TRACE_SCOPE characters. // Instead we fix up when printing m_scopeSubFuncps - scopeName += getScopeChar(VLT_TRACE_SCOPE_INTERFACE) + ' '; - m_scopeSubFuncps.emplace(scopeName, m_subFuncps); + iscopeName += getScopeChar(VLT_TRACE_SCOPE_INTERFACE) + ' '; + m_scopeSubFuncps.emplace(iscopeName, m_subFuncps); VL_DO_DANGLING(irp->unlinkFrBack(), irp); } diff --git a/src/V3Tristate.cpp b/src/V3Tristate.cpp index d09acd272..6a9d7f4a8 100644 --- a/src/V3Tristate.cpp +++ b/src/V3Tristate.cpp @@ -607,7 +607,6 @@ class TristateVisitor final : public TristateBaseVisitor { if (!outvarp) { // This is the final pre-forced resolution of the tristate, so we apply // the pull direction to any undriven pins. - V3Number pull(invarp, lhsp->width()); const AstPull* const pullp = static_cast(lhsp->user3p()); bool pull1 = pullp && pullp->direction() == 1; // Else default is down undrivenp @@ -989,12 +988,13 @@ class TristateVisitor final : public TristateBaseVisitor { if (!dropop[2]) iterateAndNextNull(nodep->fhsp()); } else { AstNode* nonXp = nullptr; - if (!dropop[0]) + if (!dropop[0]) { nonXp = nodep->rhsp(); - else if (!dropop[1]) + } else if (!dropop[1]) { nonXp = nodep->thsp(); - else if (!dropop[2]) + } else if (!dropop[2]) { nonXp = nodep->fhsp(); + } // Replace 'z with non-Z if (dropop[0] || dropop[1] || dropop[2]) { // Unsupported: A $countones('0) should compare with the enables, but we don't diff --git a/src/V3VariableOrder.cpp b/src/V3VariableOrder.cpp index 8c6d91edd..6628cb7ed 100644 --- a/src/V3VariableOrder.cpp +++ b/src/V3VariableOrder.cpp @@ -48,7 +48,7 @@ public: : m_mtaskIds(mtaskIds) { // Cannot be {} or GCC 4.8 false warning m_serial = ++s_serialNext; // Cannot be ()/{} or GCC 4.8 false warning } - virtual ~VarTspSorter() = default; + ~VarTspSorter() override = default; // METHODS virtual bool operator<(const TspStateBase& other) const override { return operator<(dynamic_cast(other)); diff --git a/src/V3Width.cpp b/src/V3Width.cpp index 237675b4a..f24380c2c 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -674,10 +674,11 @@ private: AstNodeDType* const vdtypep = m_vup->dtypeNullSkipRefp(); if (VN_IS(vdtypep, QueueDType) || VN_IS(vdtypep, DynArrayDType) || VN_IS(vdtypep, UnpackArrayDType)) { - if (times != 1) + if (times != 1) { nodep->v3warn(E_UNSUPPORTED, "Unsupported: Non-1 replication to form " << vdtypep->prettyDTypeNameQ() << " data type"); + } // Don't iterate lhsp as SELF, the potential Concat below needs // the adtypep passed down to recognize the QueueDType userIterateAndNext(nodep->lhsp(), WidthVP(vdtypep, BOTH).p()); @@ -1125,9 +1126,10 @@ private: } virtual void visit(AstEmptyQueue* nodep) override { nodep->dtypeSetEmptyQueue(); - if (!VN_IS(nodep->backp(), Assign)) + if (!VN_IS(nodep->backp(), Assign)) { nodep->v3warn(E_UNSUPPORTED, "Unsupported/Illegal: empty queue ('{}') in this context"); + } } virtual void visit(AstFell* nodep) override { if (m_vup->prelim()) { @@ -4040,6 +4042,7 @@ private: argsp->v3error("Illegal to foreach loop on basic '" + fromDtp->prettyTypeName() + "'"); VL_DO_DANGLING(nodep->unlinkFrBack()->deleteTree(), nodep); + VL_DO_DANGLING(bodyPointp->deleteTree(), bodyPointp); return; } if (varp) { @@ -4592,9 +4595,11 @@ private: // TOP LEVEL NODE if (nodep->modVarp() && nodep->modVarp()->isGParam()) { // Widthing handled as special init() case - if (auto* const patternp = VN_CAST(nodep->exprp(), Pattern)) - if (const auto* modVarp = nodep->modVarp()) + if (auto* const patternp = VN_CAST(nodep->exprp(), Pattern)) { + if (const auto* modVarp = nodep->modVarp()) { patternp->childDTypep(modVarp->childDTypep()->cloneTree(false)); + } + } userIterateChildren(nodep, WidthVP(SELF, BOTH).p()); } else if (!m_paramsOnly) { if (!nodep->modVarp()->didWidth()) { diff --git a/src/V3WidthSel.cpp b/src/V3WidthSel.cpp index 4d6b7de01..b4afe191a 100644 --- a/src/V3WidthSel.cpp +++ b/src/V3WidthSel.cpp @@ -70,7 +70,7 @@ private: , m_fromRange{fromRange} {} ~FromData() = default; }; - FromData fromDataForArray(AstNode* nodep, AstNode* basefromp) { + static FromData fromDataForArray(AstNode* nodep, AstNode* basefromp) { // What is the data type and information for this SEL-ish's from()? UINFO(9, " fromData start ddtypep = " << basefromp << endl); VNumRange fromRange; // constructs to isRanged(false) diff --git a/src/Verilator.cpp b/src/Verilator.cpp index f503ee6b9..f28faabf6 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -703,7 +703,7 @@ static void execHierVerilation() { //###################################################################### -int main(int argc, char** argv, char** env) { +int main(int argc, char** argv, char** /*env*/) { // General initialization std::ios::sync_with_stdio(); From 38e5b6c1ad067b0aa2e365394226e617f3af3445 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 27 Jul 2022 18:42:45 +0100 Subject: [PATCH 062/119] Replace __gcov_flush with __gcov_dump __gcov_flush was a private function and was removed from later GCC versions (at least from 11.2.0, possibly earlier). Replace with the documented public __gcov_dump. --- include/verilated.cpp | 2 +- include/verilatedos.h | 11 ++++------- src/V3Error.cpp | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/include/verilated.cpp b/include/verilated.cpp index be9740700..6dfc844e1 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -2809,7 +2809,7 @@ void Verilated::runFlushCallbacks() VL_MT_SAFE { // When running internal code coverage (gcc --coverage, as opposed to // verilator --coverage), dump coverage data to properly cover failing // tests. - VL_GCOV_FLUSH(); + VL_GCOV_DUMP(); } void Verilated::addExitCb(VoidPCb cb, void* datap) VL_MT_SAFE { diff --git a/include/verilatedos.h b/include/verilatedos.h index b01eac65c..5c74bc065 100644 --- a/include/verilatedos.h +++ b/include/verilatedos.h @@ -255,14 +255,11 @@ // Internal coverage #ifdef VL_GCOV -extern "C" { -void __gcov_flush(); // gcc sources gcc/gcov-io.h has the prototype -} -// Flush internal code coverage data before e.g. std::abort() -# define VL_GCOV_FLUSH() \ - __gcov_flush() +extern "C" void __gcov_dump(); +// Dump internal code coverage data before e.g. std::abort() +# define VL_GCOV_DUMP() __gcov_dump() #else -# define VL_GCOV_FLUSH() +# define VL_GCOV_DUMP() #endif //========================================================================= diff --git a/src/V3Error.cpp b/src/V3Error.cpp index 1f1f8d08f..f032e7c06 100644 --- a/src/V3Error.cpp +++ b/src/V3Error.cpp @@ -167,7 +167,7 @@ void V3Error::vlAbortOrExit() { } void V3Error::vlAbort() { - VL_GCOV_FLUSH(); + VL_GCOV_DUMP(); std::abort(); } From dce8f3d25d8a9db9a8556634747d7171b1721bcb Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sat, 30 Jul 2022 10:26:47 -0400 Subject: [PATCH 063/119] Internals: Spacing from develop-v5. No functional change. --- include/verilated.mk.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/verilated.mk.in b/include/verilated.mk.in index 03a6ef35d..c86e93676 100644 --- a/include/verilated.mk.in +++ b/include/verilated.mk.in @@ -167,7 +167,7 @@ VM_SLOW += $(VM_CLASSES_SLOW) $(VM_SUPPORT_SLOW) VK_FAST_OBJS = $(addsuffix .o, $(VM_FAST)) VK_SLOW_OBJS = $(addsuffix .o, $(VM_SLOW)) -VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES)) +VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES)) # Note VM_GLOBAL_FAST and VM_GLOBAL_SLOW holds the files required from the # run-time library. In practice everything is actually in VM_GLOBAL_FAST, From a2d26b45bbfa736a5a8845a6a06a9bce229a89ee Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sat, 30 Jul 2022 11:52:35 -0400 Subject: [PATCH 064/119] Internals: Fix some clang-tidy issues. No functional change intended. --- Makefile.in | 7 +- include/verilated.cpp | 11 ++- include/verilated.h | 4 +- include/verilated_cov.cpp | 2 +- include/verilated_cov.h | 5 +- include/verilated_fst_c.cpp | 3 +- include/verilated_fst_c.h | 2 +- include/verilated_fst_sc.h | 2 +- include/verilated_imp.h | 37 ++++----- include/verilated_profiler.h | 4 +- include/verilated_save.h | 5 +- include/verilated_sym_props.h | 1 - include/verilated_threads.h | 13 ++-- include/verilated_trace.h | 4 +- include/verilated_vcd_c.h | 3 +- include/verilated_vcd_sc.h | 2 +- include/verilated_vpi.h | 2 +- src/V3Ast.h | 12 +-- src/V3AstNodes.h | 142 +++++++++++++++++----------------- src/V3Dead.cpp | 4 +- src/V3DupFinder.h | 5 +- src/V3FileLine.cpp | 2 +- src/V3FileLine.h | 4 +- src/V3HierBlock.cpp | 2 +- src/V3Options.cpp | 8 +- src/V3ParseGrammar.cpp | 3 +- src/V3ParseImp.h | 5 +- src/V3PartitionGraph.h | 4 +- src/V3PreProc.cpp | 2 +- src/V3Simulate.h | 7 +- src/V3TraceDecl.cpp | 4 +- src/V3Tristate.cpp | 2 +- src/V3Unroll.cpp | 2 +- src/VlcBucket.h | 10 +-- 34 files changed, 160 insertions(+), 165 deletions(-) diff --git a/Makefile.in b/Makefile.in index dc14bddfd..b805f4886 100644 --- a/Makefile.in +++ b/Makefile.in @@ -322,12 +322,13 @@ cppcheck: $(CPPCHECK_DEP) $(CPPCHECK) $(CPPCHECK_FLAGS) -DVL_DEBUG=1 -DVL_CPPCHECK=1 -DVL_THREADED=1 $(CPPCHECK_INC) $< CLANGTIDY = clang-tidy -CLANGTIDY_FLAGS = -config='' -checks='-fuchsia-*,-cppcoreguidelines-avoid-c-arrays,-cppcoreguidelines-init-variables' +CLANGTIDY_FLAGS = -config='' \ + -header-filter='.*' \ + -checks='-fuchsia-*,-cppcoreguidelines-avoid-c-arrays,-cppcoreguidelines-init-variables,-cppcoreguidelines-avoid-goto,-modernize-avoid-c-arrays,-readability-magic-numbers,-readability-simplify-boolean-expr,-cppcoreguidelines-macro-usage' \ + CLANGTIDY_DEP = $(subst .h,.h.tidy,$(CPPCHECK_H)) \ $(subst .cpp,.cpp.tidy,$(CPPCHECK_CPP)) CLANGTIDY_DEFS = -DVL_DEBUG=1 -DVL_THREADED=1 -DVL_CPPCHECK=1 -# cppcoreguidelines-avoid-goto modernize-avoid-c-arrays readability-magic-numbers readability-simplify-boolean-expr -# cppcoreguidelines-macro-usage clang-tidy: $(CLANGTIDY_DEP) %.cpp.tidy: %.cpp diff --git a/include/verilated.cpp b/include/verilated.cpp index 6dfc844e1..90b5f46c9 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -700,7 +700,8 @@ std::string _vl_vsformat_time(char* tmp, T ld, int timeunit, bool left, size_t w = VL_EXTEND_WQ(b, 0, tmp2, std::numeric_limits::max()); // breaks shifted if (VL_GT_W(w, integer, max64Bit)) { WDataOutP v = VL_ASSIGN_W(b, tmp3, integer); // breaks fracDigitsPow10 - VlWide zero, ten; + VlWide zero; + VlWide ten; VL_ZERO_W(b, zero); VL_EXTEND_WI(b, 0, ten, 10); char buf[128]; // 128B is obviously long enough to represent 128bit integer in decimal @@ -1850,8 +1851,7 @@ VlReadMem::VlReadMem(bool hex, int bits, const std::string& filename, QData star , m_bits{bits} , m_filename(filename) // Need () or GCC 4.8 false warning , m_end{end} - , m_addr{start} - , m_linenum{0} { + , m_addr{start} { m_fp = std::fopen(filename.c_str(), "r"); if (VL_UNLIKELY(!m_fp)) { // We don't report the Verilog source filename as it slow to have to pass it down @@ -1985,8 +1985,7 @@ void VlReadMem::setData(void* valuep, const std::string& rhs) { VlWriteMem::VlWriteMem(bool hex, int bits, const std::string& filename, QData start, QData end) : m_hex{hex} - , m_bits{bits} - , m_addr{0} { + , m_bits{bits} { if (VL_UNLIKELY(start > end)) { VL_FATAL_MT(filename.c_str(), 0, "", "$writemem invalid address range"); return; @@ -2311,7 +2310,7 @@ std::string VerilatedContext::dumpfile() const VL_MT_SAFE_EXCLUDES(m_timeDumpMut return m_dumpfile; } std::string VerilatedContext::dumpfileCheck() const VL_MT_SAFE_EXCLUDES(m_timeDumpMutex) { - const std::string out = dumpfile(); + std::string out = dumpfile(); if (VL_UNLIKELY(out.empty())) { VL_PRINTF_MT("%%Warning: $dumpvar ignored as not proceeded by $dumpfile\n"); return ""; diff --git a/include/verilated.h b/include/verilated.h index 8943fc523..bed666c7d 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -558,7 +558,8 @@ public: /// releases - contact the authors before production use. void scopesDump() const VL_MT_SAFE; -public: // But for internal use only + // METHODS - public but for internal use only + // Internal: access to implementation class VerilatedContextImp* impp() { return reinterpret_cast(this); } const VerilatedContextImp* impp() const { @@ -884,7 +885,6 @@ public: } #endif -public: // METHODS - INTERNAL USE ONLY (but public due to what uses it) // Internal: Create a new module name by concatenating two strings // Returns pointer to thread-local static data (overwritten on next call) diff --git a/include/verilated_cov.cpp b/include/verilated_cov.cpp index d6f03a7f8..6c0f72340 100644 --- a/include/verilated_cov.cpp +++ b/include/verilated_cov.cpp @@ -205,7 +205,7 @@ private: // Forward to . so we have a whole word const std::string suffix = *bpost ? std::string{bpost + 1} : ""; - const std::string out = prefix + "*" + suffix; + std::string out = prefix + "*" + suffix; // cout << "\nch pre="<seedp() && reset() == samep->reset() && urandom() == samep->urandom(); @@ -5772,7 +5772,7 @@ public: virtual bool isGateOptimizable() const override { return false; } virtual bool isPredictOptimizable() const override { return false; } virtual int instrCount() const override { return INSTR_COUNT_TIME; } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } virtual void dump(std::ostream& str = std::cout) const override; void timeunit(const VTimescale& flag) { m_timeunit = flag; } VTimescale timeunit() const { return m_timeunit; } @@ -5793,7 +5793,7 @@ public: virtual bool isGateOptimizable() const override { return false; } virtual bool isPredictOptimizable() const override { return false; } virtual int instrCount() const override { return INSTR_COUNT_TIME; } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } virtual void dump(std::ostream& str = std::cout) const override; void timeunit(const VTimescale& flag) { m_timeunit = flag; } VTimescale timeunit() const { return m_timeunit; } @@ -5818,7 +5818,7 @@ public: virtual bool isSubstOptimizable() const override { return false; } virtual bool isPredictOptimizable() const override { return false; } virtual int instrCount() const override { return INSTR_COUNT_PLI; } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } }; //====================================================================== @@ -6406,7 +6406,7 @@ public: virtual bool cleanOut() const override { return true; } virtual bool cleanLhs() const override { return true; } virtual bool sizeMattersLhs() const override { return false; } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } }; class AstFEof final : public AstNodeUniop { @@ -6448,7 +6448,7 @@ public: AstNode* filep() const { return op1p(); } void strp(AstNode* nodep) { setOp2p(nodep); } AstNode* strp() const { return op2p(); } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } }; class AstFGetC final : public AstNodeUniop { @@ -8573,7 +8573,7 @@ public: AstNode* exprp() const { return op1p(); } // op1 = expression AstSenTree* sentreep() const { return VN_AS(op2p(), SenTree); } // op2 = clock domain void sentreep(AstSenTree* sentreep) { addOp2p(sentreep); } // op2 = clock domain - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } }; class AstPast final : public AstNodeMath { @@ -8596,7 +8596,7 @@ public: AstNode* ticksp() const { return op2p(); } // op2 = ticks or nullptr means 1 AstSenTree* sentreep() const { return VN_AS(op4p(), SenTree); } // op4 = clock domain void sentreep(AstSenTree* sentreep) { addOp4p(sentreep); } // op4 = clock domain - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } }; class AstRose final : public AstNodeMath { @@ -8617,7 +8617,7 @@ public: AstNode* exprp() const { return op1p(); } // op1 = expression AstSenTree* sentreep() const { return VN_AS(op2p(), SenTree); } // op2 = clock domain void sentreep(AstSenTree* sentreep) { addOp2p(sentreep); } // op2 = clock domain - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } }; class AstSampled final : public AstNodeMath { @@ -8636,7 +8636,7 @@ public: virtual bool cleanOut() const override { V3ERROR_NA_RETURN(""); } virtual int instrCount() const override { return 0; } AstNode* exprp() const { return op1p(); } // op1 = expression - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } }; class AstStable final : public AstNodeMath { @@ -8657,7 +8657,7 @@ public: AstNode* exprp() const { return op1p(); } // op1 = expression AstSenTree* sentreep() const { return VN_AS(op2p(), SenTree); } // op2 = clock domain void sentreep(AstSenTree* sentreep) { addOp2p(sentreep); } // op2 = clock domain - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } }; class AstPattern final : public AstNodeMath { @@ -8732,7 +8732,7 @@ public: void rhsp(AstNode* nodep) { return setOp2p(nodep); } AstSenTree* sentreep() const { return VN_AS(op4p(), SenTree); } // op4 = clock domain void sentreep(AstSenTree* sentreep) { addOp4p(sentreep); } // op4 = clock domain - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } }; //====================================================================== @@ -8953,7 +8953,7 @@ public: virtual bool isPredictOptimizable() const override { return false; } virtual bool isPure() const override { return false; } virtual bool isOutputter() const override { return true; } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } }; //====================================================================== @@ -8973,7 +8973,7 @@ public: ASTNODE_BASE_FUNCS(NodeFile) virtual void dump(std::ostream& str) const override; virtual string name() const override { return m_name; } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } void tblockp(AstTextBlock* tblockp) { setOp1p(tblockp); } AstTextBlock* tblockp() { return VN_AS(op1p(), TextBlock); } }; @@ -9241,7 +9241,7 @@ public: } ASTNODE_NODE_FUNCS(CReturn) virtual int instrCount() const override { return widthInstrs(); } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } // AstNode* lhsp() const { return op1p(); } }; @@ -9272,7 +9272,7 @@ public: virtual bool cleanOut() const override { return m_cleanOut; } virtual string emitVerilog() override { V3ERROR_NA_RETURN(""); } virtual string emitC() override { V3ERROR_NA_RETURN(""); } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } void addBodysp(AstNode* nodep) { addNOp1p(nodep); } AstNode* bodysp() const { return op1p(); } // op1 = expressions to print bool pure() const { return m_pure; } @@ -9289,7 +9289,7 @@ public: ASTNODE_NODE_FUNCS(CReset) virtual bool isGateOptimizable() const override { return false; } virtual bool isPredictOptimizable() const override { return false; } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } AstVarRef* varrefp() const { return VN_AS(op1p(), VarRef); } // op1 = varref to reset }; @@ -9307,7 +9307,7 @@ public: ASTNODE_NODE_FUNCS(CStmt) virtual bool isGateOptimizable() const override { return false; } virtual bool isPredictOptimizable() const override { return false; } - virtual bool same(const AstNode* samep) const override { return true; } + virtual bool same(const AstNode* /*samep*/) const override { return true; } void addBodysp(AstNode* nodep) { addNOp1p(nodep); } AstNode* bodysp() const { return op1p(); } // op1 = expressions to print }; @@ -9403,7 +9403,7 @@ class AstTypeTable final : public AstNode { AstEmptyQueueDType* m_emptyQueuep = nullptr; AstQueueDType* m_queueIndexp = nullptr; AstVoidDType* m_voidp = nullptr; - AstBasicDType* m_basicps[VBasicDTypeKwd::_ENUM_MAX]; + AstBasicDType* m_basicps[VBasicDTypeKwd::_ENUM_MAX]{}; // using DetailedMap = std::map; DetailedMap m_detailedMap; diff --git a/src/V3Dead.cpp b/src/V3Dead.cpp index 6347fc098..8018e94e9 100644 --- a/src/V3Dead.cpp +++ b/src/V3Dead.cpp @@ -85,7 +85,7 @@ private: } if (AstNode* const subnodep = nodep->getChildDTypep()) subnodep->user1Inc(); } - void checkVarRef(AstNodeVarRef* nodep) { + void checkVarRef(AstNodeVarRef* nodep) const { if (nodep->classOrPackagep() && m_elimCells) nodep->classOrPackagep(nullptr); } void checkDType(AstNodeDType* nodep) { @@ -316,7 +316,7 @@ private: } } } - bool mightElimVar(AstVar* nodep) { + bool mightElimVar(AstVar* nodep) const { if (nodep->isSigPublic()) return false; // Can't elim publics! if (nodep->isIO() || nodep->isClassMember()) return false; if (nodep->isTemp() && !nodep->isTrace()) return true; diff --git a/src/V3DupFinder.h b/src/V3DupFinder.h index ed8d13097..1ee51a992 100644 --- a/src/V3DupFinder.h +++ b/src/V3DupFinder.h @@ -44,7 +44,7 @@ class V3DupFinder final : private std::multimap { using Super = std::multimap; // MEMBERS - const V3Hasher* const m_hasherp; // Pointer to owned hasher + const V3Hasher* const m_hasherp = nullptr; // Pointer to owned hasher const V3Hasher& m_hasher; // Reference to hasher public: @@ -53,8 +53,7 @@ public: : m_hasherp{new V3Hasher} , m_hasher{*m_hasherp} {} explicit V3DupFinder(const V3Hasher& hasher) - : m_hasherp{nullptr} - , m_hasher{hasher} {} + : m_hasher{hasher} {} ~V3DupFinder() { if (m_hasherp) delete m_hasherp; } diff --git a/src/V3FileLine.cpp b/src/V3FileLine.cpp index 028ee77d1..2e5a5fbef 100644 --- a/src/V3FileLine.cpp +++ b/src/V3FileLine.cpp @@ -126,7 +126,7 @@ string VFileContent::getLine(int lineno) const { return ""; } } - const string text = m_lines[lineno]; + string text = m_lines[lineno]; UINFO(9, "Get Stream[ct" << m_id << "+" << lineno << "]: " << text); return text; } diff --git a/src/V3FileLine.h b/src/V3FileLine.h index 328b682cd..d49d5e44a 100644 --- a/src/V3FileLine.h +++ b/src/V3FileLine.h @@ -162,7 +162,7 @@ public: m_firstColumn = firstColumn; m_lastColumn = lastColumn; } - void language(V3LangCode lang) { singleton().numberToLang(filenameno(), lang); } + void language(V3LangCode lang) const { singleton().numberToLang(filenameno(), lang); } void filename(const string& name) { m_filenameno = singleton().nameToNumber(name); } void parent(FileLine* fileline) { m_parent = fileline; } void lineDirective(const char* textp, int& enterExitRef); @@ -213,7 +213,7 @@ public: void warnStyleOff(bool flag); void warnStateFrom(const FileLine& from) { m_warnOn = from.m_warnOn; } void warnResetDefault() { warnStateFrom(defaultFileLine()); } - bool lastWarnWaived() { return m_waive; } + bool lastWarnWaived() const { return m_waive; } // Specific flag ACCESSORS/METHODS bool celldefineOn() const { return m_warnOn.test(V3ErrorCode::I_CELLDEFINE); } diff --git a/src/V3HierBlock.cpp b/src/V3HierBlock.cpp index 22006fa23..2430e19db 100644 --- a/src/V3HierBlock.cpp +++ b/src/V3HierBlock.cpp @@ -201,7 +201,7 @@ string V3HierBlock::hierGenerated(bool withDir) const { } string V3HierBlock::vFileIfNecessary() const { - const string filename = V3Os::filenameRealPath(m_modp->fileline()->filename()); + string filename = V3Os::filenameRealPath(m_modp->fileline()->filename()); for (const string& v : v3Global.opt.vFiles()) { // Already listed in vFiles, so no need to add the file. if (filename == V3Os::filenameRealPath(v)) return ""; diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 29c4a4ca3..d54b96c11 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -339,7 +339,7 @@ bool V3Options::hasParameter(const string& name) { } string V3Options::parameter(const string& name) { - const string value = m_parameters.find(name)->second; + string value = m_parameters.find(name)->second; m_parameters.erase(m_parameters.find(name)); return value; } @@ -477,7 +477,7 @@ string V3Options::fileExists(const string& filename) { return ""; // Not found } // Check if it is a directory, ignore if so - const string filenameOut = V3Os::filenameFromDirBase(dir, basename); + string filenameOut = V3Os::filenameFromDirBase(dir, basename); if (!fileStatNormal(filenameOut)) return ""; // Directory return filenameOut; } @@ -518,11 +518,11 @@ string V3Options::filePath(FileLine* fl, const string& modname, const string& la // using the incdir and libext's. // Return "" if not found. for (const string& dir : m_impp->m_incDirUsers) { - const string exists = filePathCheckOneDir(modname, dir); + string exists = filePathCheckOneDir(modname, dir); if (exists != "") return exists; } for (const string& dir : m_impp->m_incDirFallbacks) { - const string exists = filePathCheckOneDir(modname, dir); + string exists = filePathCheckOneDir(modname, dir); if (exists != "") return exists; } diff --git a/src/V3ParseGrammar.cpp b/src/V3ParseGrammar.cpp index f2d57f289..38bae0374 100644 --- a/src/V3ParseGrammar.cpp +++ b/src/V3ParseGrammar.cpp @@ -270,7 +270,8 @@ string V3ParseGrammar::deQuote(FileLine* fileline, string text) { } else if (*cp == 'x' && isxdigit(cp[1]) && isxdigit(cp[2])) { // SystemVerilog 3.1 #define vl_decodexdigit(c) ((isdigit(c) ? ((c) - '0') : (tolower((c)) - 'a' + 10))) - newtext += (char)(16 * vl_decodexdigit(cp[1]) + vl_decodexdigit(cp[2])); + newtext + += static_cast(16 * vl_decodexdigit(cp[1]) + vl_decodexdigit(cp[2])); cp += 2; } else if (isalnum(*cp)) { fileline->v3error("Unknown escape sequence: \\" << *cp); diff --git a/src/V3ParseImp.h b/src/V3ParseImp.h index 777e205cc..cc1918865 100644 --- a/src/V3ParseImp.h +++ b/src/V3ParseImp.h @@ -173,9 +173,10 @@ public: } static int debug() { static int level = -1; - if (VL_UNLIKELY(level < 0)) + if (VL_UNLIKELY(level < 0)) { level = std::max(std::max(debugBison(), debugFlex()), v3Global.opt.debugSrcLevel("V3ParseImp")); + } return level; } @@ -273,7 +274,7 @@ public: //==== Symbol tables V3ParseSym* symp() { return m_symp; } - AstPackage* unitPackage(FileLine* fl) { + AstPackage* unitPackage(FileLine* /*fl*/) { // Find one made earlier? const VSymEnt* const rootSymp = symp()->symRootp()->findIdFlat(AstPackage::dollarUnitName()); diff --git a/src/V3PartitionGraph.h b/src/V3PartitionGraph.h index 2fba23172..262a2b286 100644 --- a/src/V3PartitionGraph.h +++ b/src/V3PartitionGraph.h @@ -30,7 +30,7 @@ class AbstractMTask VL_NOT_FINAL : public V3GraphVertex { public: - AbstractMTask(V3Graph* graphp) + explicit AbstractMTask(V3Graph* graphp) : V3GraphVertex{graphp} {} virtual ~AbstractMTask() override = default; virtual uint32_t id() const = 0; @@ -42,7 +42,7 @@ public: // TYPES using VxList = std::list; // CONSTRUCTORS - AbstractLogicMTask(V3Graph* graphp) + explicit AbstractLogicMTask(V3Graph* graphp) : AbstractMTask{graphp} {} virtual ~AbstractLogicMTask() override = default; // METHODS diff --git a/src/V3PreProc.cpp b/src/V3PreProc.cpp index 7301d6b1e..7dfeb9c12 100644 --- a/src/V3PreProc.cpp +++ b/src/V3PreProc.cpp @@ -1612,7 +1612,7 @@ string V3PreProcImp::getline() { // Make new string with data up to the newline. const int len = rtnp - m_lineChars.c_str() + 1; - const string theLine(m_lineChars, 0, len); + string theLine(m_lineChars, 0, len); m_lineChars = m_lineChars.erase(0, len); // Remove returned characters if (debug() >= 4) { const string lncln = V3PreLex::cleanDbgStrg(theLine); diff --git a/src/V3Simulate.h b/src/V3Simulate.h index 195139cd2..378cdd0e4 100644 --- a/src/V3Simulate.h +++ b/src/V3Simulate.h @@ -183,7 +183,7 @@ public: } m_whyNotOptimizable = why; std::ostringstream stack; - for (auto& callstack : vlstd::reverse_view(m_callStack)) { + for (const auto& callstack : vlstd::reverse_view(m_callStack)) { AstFuncRef* const funcp = callstack->m_funcp; stack << "\n " << funcp->fileline() << "... Called from " << funcp->prettyName() << "() with parameters:"; @@ -193,9 +193,10 @@ public: AstVar* const portp = conIt->first; AstNode* const pinp = conIt->second->exprp(); AstNodeDType* const dtypep = pinp->dtypep(); - if (AstConst* const valp = fetchConstNull(pinp)) + if (AstConst* const valp = fetchConstNull(pinp)) { stack << "\n " << portp->prettyName() << " = " << prettyNumber(&valp->num(), dtypep); + } } } m_whyNotOptimizable += stack.str(); @@ -380,7 +381,7 @@ private: UASSERT_OBJ(vscp, nodep, "Not linked"); return vscp; } - int unrollCount() { + int unrollCount() const { return m_params ? v3Global.opt.unrollCount() * 16 : v3Global.opt.unrollCount(); } bool jumpingOver(AstNode* nodep) { diff --git a/src/V3TraceDecl.cpp b/src/V3TraceDecl.cpp index 791034307..09c2ac8f5 100644 --- a/src/V3TraceDecl.cpp +++ b/src/V3TraceDecl.cpp @@ -192,7 +192,9 @@ private: m_subFuncSize += stmtp->nodeCount(); } - std::string getScopeChar(VltTraceScope sct) { return std::string(1, (char)(0x80 + sct)); } + std::string getScopeChar(VltTraceScope sct) { + return std::string(1, static_cast(0x80 + sct)); + } std::string addAboveInterface(const std::string& scopeName) { std::string out; diff --git a/src/V3Tristate.cpp b/src/V3Tristate.cpp index 6a9d7f4a8..da76ddb17 100644 --- a/src/V3Tristate.cpp +++ b/src/V3Tristate.cpp @@ -360,7 +360,7 @@ class TristateVisitor final : public TristateBaseVisitor { VDouble0 m_statTriSigs; // stat tracking // METHODS - string dbgState() { + string dbgState() const { string o = (m_graphing ? " gr " : " ng "); if (m_alhs) o += "alhs "; return o; diff --git a/src/V3Unroll.cpp b/src/V3Unroll.cpp index 429708301..769faf369 100644 --- a/src/V3Unroll.cpp +++ b/src/V3Unroll.cpp @@ -58,7 +58,7 @@ private: VL_DEBUG_FUNC; // Declare debug() // VISITORS - bool cantUnroll(AstNode* nodep, const char* reason) { + bool cantUnroll(AstNode* nodep, const char* reason) const { if (m_generate) nodep->v3warn(E_UNSUPPORTED, "Unsupported: Can't unroll generate for; " << reason); UINFO(3, " Can't Unroll: " << reason << " :" << nodep << endl); diff --git a/src/VlcBucket.h b/src/VlcBucket.h index ef5fdcdc7..0160ae1b2 100644 --- a/src/VlcBucket.h +++ b/src/VlcBucket.h @@ -96,20 +96,20 @@ public: } uint64_t popCount() const { uint64_t pop = 0; - for (uint64_t i = 0; i < m_dataSize; i++) { - if (hits(i)) pop++; + for (uint64_t i = 0; i < m_dataSize; ++i) { + if (hits(i)) ++pop; } return pop; } uint64_t dataPopCount(const VlcBuckets& remaining) { uint64_t pop = 0; - for (uint64_t i = 0; i < m_dataSize; i++) { - if (hits(i) && remaining.hits(i)) pop++; + for (uint64_t i = 0; i < m_dataSize; ++i) { + if (hits(i) && remaining.hits(i)) ++pop; } return pop; } void orData(const VlcBuckets& ordata) { - for (uint64_t i = 0; i < m_dataSize; i++) { + for (uint64_t i = 0; i < m_dataSize; ++i) { if (hits(i) && ordata.hits(i)) clearHits(i); } } From f91793e931e56b8218ab7f54d068d90e7ca28436 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sat, 30 Jul 2022 13:53:54 -0400 Subject: [PATCH 065/119] Revert - SC overrides cause non-override clang error. --- include/verilated_fst_sc.h | 2 +- include/verilated_vcd_sc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/verilated_fst_sc.h b/include/verilated_fst_sc.h index 872fe2e0b..9c7c07c4c 100644 --- a/include/verilated_fst_sc.h +++ b/include/verilated_fst_sc.h @@ -51,7 +51,7 @@ public: } spTrace()->set_time_resolution(sc_get_time_resolution().to_string()); } - ~VerilatedFstSc() override { close(); } + virtual ~VerilatedFstSc() /*override*/ { close(); } // METHODS /// Called by SystemC simulate() diff --git a/include/verilated_vcd_sc.h b/include/verilated_vcd_sc.h index 5610fcd9a..6f8edf3a1 100644 --- a/include/verilated_vcd_sc.h +++ b/include/verilated_vcd_sc.h @@ -54,7 +54,7 @@ public: spTrace()->set_time_resolution(sc_get_time_resolution().to_string()); } /// Destruct, flush, and close the dump - ~VerilatedVcdSc() override { close(); } + virtual ~VerilatedVcdSc() /*override*/ { close(); } // METHODS - for SC kernel // Called by SystemC simulate() From 152a6cd886dfb5417eb2baa30819832b083beada Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sun, 31 Jul 2022 18:13:55 +0100 Subject: [PATCH 066/119] Improve AstNode::foreach (also exists and forall) Speed improvements: - Use a direct, recursion-free implementation - Improve pre-fetching Functionality: - Support remove/replace of currently iterated node --- src/V3Ast.h | 344 +++++++++++++++++++++++++++++++----------------- src/V3Force.cpp | 8 +- 2 files changed, 230 insertions(+), 122 deletions(-) diff --git a/src/V3Ast.h b/src/V3Ast.h index 1302c1e76..4d1880ffd 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -1334,25 +1334,27 @@ inline std::ostream& operator<<(std::ostream& os, const VNRelinker& rhs) { return os; } -//###################################################################### -// Callback base class to determine if node matches some formula +// ###################################################################### +// Callback base class to determine if node matches some formula class VNodeMatcher VL_NOT_FINAL { public: virtual bool nodeMatch(const AstNode* nodep) const { return true; } }; -//###################################################################### -// AstNode -- Base type of all Ast types +// ###################################################################### +// AstNode -- Base type of all Ast types // Prefetch a node. +#define ASTNODE_PREFETCH_NON_NULL(nodep) \ + do { \ + VL_PREFETCH_RD(&((nodep)->m_nextp)); \ + VL_PREFETCH_RD(&((nodep)->m_type)); \ + } while (false) // The if() makes it faster, even though prefetch won't fault on null pointers #define ASTNODE_PREFETCH(nodep) \ do { \ - if (nodep) { \ - VL_PREFETCH_RD(&((nodep)->m_nextp)); \ - VL_PREFETCH_RD(&((nodep)->m_type)); \ - } \ + if (nodep) ASTNODE_PREFETCH_NON_NULL(nodep); \ } while (false) class AstNode VL_NOT_FINAL { @@ -1859,12 +1861,6 @@ private: // Note: specializations for particular node types are provided by 'astgen' template inline static bool privateTypeTest(const AstNode* nodep); - // For internal use only. - // Note: specializations for particular node types are provided below - template inline static bool privateMayBeUnder(const AstNode* nodep) { - return true; - } - // For internal use only. template constexpr static bool uselessCast() { using NonRef = typename std::remove_reference::type; @@ -1923,102 +1919,39 @@ public: // Predicate that returns true if the given 'nodep' might have a descendant of type 'T_Node'. // This is conservative and is used to speed up traversals. - template inline static bool mayBeUnder(const AstNode* nodep) { + // Note: specializations for particular node types are provided below + template static bool mayBeUnder(const AstNode* nodep) { static_assert(!std::is_const::value, "Type parameter 'T_Node' should not be const qualified"); static_assert(std::is_base_of::value, "Type parameter 'T_Node' must be a subtype of AstNode"); - return privateMayBeUnder(nodep); + return true; + } + + // Predicate that is true for node subtypes 'T_Node' that do not have any children + // This is conservative and is used to speed up traversals. + // Note: specializations for particular node types are provided below + template static constexpr bool isLeaf() { + static_assert(!std::is_const::value, + "Type parameter 'T_Node' should not be const qualified"); + static_assert(std::is_base_of::value, + "Type parameter 'T_Node' must be a subtype of AstNode"); + return false; } private: - template - static void foreachImpl( - // Using std::conditional for const correctness in the public 'foreach' functions - typename std::conditional::value, const AstNode*, AstNode*>::type - nodep, - std::function f) { + // Using std::conditional for const correctness in the public 'foreach' functions + template + using ConstCorrectAstNode = + typename std::conditional::value, const AstNode, AstNode>::type; - // Note: Using a loop to iterate the nextp() chain, instead of tail recursion, because - // debug builds don't eliminate tail calls, causing stack overflow on long lists of nodes. - do { - // Prefetch children and next - ASTNODE_PREFETCH(nodep->op1p()); - ASTNODE_PREFETCH(nodep->op2p()); - ASTNODE_PREFETCH(nodep->op3p()); - ASTNODE_PREFETCH(nodep->op4p()); - if VL_CONSTEXPR_CXX17 (VisitNext) ASTNODE_PREFETCH(nodep->nextp()); + template + inline static void foreachImpl(ConstCorrectAstNode* nodep, + const std::function& f, bool visitNext); - // Apply function in pre-order - if (privateTypeTest::type>(nodep)) { - f(static_cast(nodep)); - } - - // Traverse children (including their 'nextp()' chains), unless futile - if (mayBeUnder::type>(nodep)) { - if (AstNode* const op1p = nodep->op1p()) foreachImpl(op1p, f); - if (AstNode* const op2p = nodep->op2p()) foreachImpl(op2p, f); - if (AstNode* const op3p = nodep->op3p()) foreachImpl(op3p, f); - if (AstNode* const op4p = nodep->op4p()) foreachImpl(op4p, f); - } - - // Traverse 'nextp()' chain if requested - if VL_CONSTEXPR_CXX17 (VisitNext) { - nodep = nodep->nextp(); - } else { - break; - } - } while (nodep); - } - - template - static bool predicateImpl( - // Using std::conditional for const correctness in the public 'foreach' functions - typename std::conditional::value, const AstNode*, AstNode*>::type - nodep, - std::function p) { - - // Note: Using a loop to iterate the nextp() chain, instead of tail recursion, because - // debug builds don't eliminate tail calls, causing stack overflow on long lists of nodes. - do { - // Prefetch children and next - ASTNODE_PREFETCH(nodep->op1p()); - ASTNODE_PREFETCH(nodep->op2p()); - ASTNODE_PREFETCH(nodep->op3p()); - ASTNODE_PREFETCH(nodep->op4p()); - if VL_CONSTEXPR_CXX17 (VisitNext) ASTNODE_PREFETCH(nodep->nextp()); - - // Apply function in pre-order - if (privateTypeTest::type>(nodep)) { - if (p(static_cast(nodep)) != Default) return !Default; - } - - // Traverse children (including their 'nextp()' chains), unless futile - if (mayBeUnder::type>(nodep)) { - if (AstNode* const op1p = nodep->op1p()) { - if (predicateImpl(op1p, p) != Default) return !Default; - } - if (AstNode* const op2p = nodep->op2p()) { - if (predicateImpl(op2p, p) != Default) return !Default; - } - if (AstNode* const op3p = nodep->op3p()) { - if (predicateImpl(op3p, p) != Default) return !Default; - } - if (AstNode* const op4p = nodep->op4p()) { - if (predicateImpl(op4p, p) != Default) return !Default; - } - } - - // Traverse 'nextp()' chain if requested - if VL_CONSTEXPR_CXX17 (VisitNext) { - nodep = nodep->nextp(); - } else { - break; - } - } while (nodep); - - return Default; - } + template + inline static bool predicateImpl(ConstCorrectAstNode* nodep, + const std::function& p); template constexpr static bool checkTypeParameter() { static_assert(!std::is_const::value, @@ -2030,31 +1963,32 @@ private: public: // Traverse subtree and call given function 'f' in pre-order on each node that has type - // 'T_Node'. Prefer 'foreach' over simple VNVisitor that only needs to handle a single (or a - // few) node types, as it's easier to write, but more importantly, the dispatch to the - // operation function in 'foreach' should be completely predictable by branch target caches in - // modern CPUs, while it is basically unpredictable for VNVisitor. + // 'T_Node'. The node passd to the function 'f' can be removed or replaced, but other editing + // of the iterated tree is not safe. Prefer 'foreach' over simple VNVisitor that only needs to + // handle a single (or a few) node types, as it's easier to write, but more importantly, the + // dispatch to the operation function in 'foreach' should be completely predictable by branch + // target caches in modern CPUs, while it is basically unpredictable for VNVisitor. template void foreach (std::function f) { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); - foreachImpl(this, f); + foreachImpl(this, f, /* visitNext: */ false); } // Same as above, but for 'const' nodes template void foreach (std::function f) const { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); - foreachImpl(this, f); + foreachImpl(this, f, /* visitNext: */ false); } // Same as 'foreach' but also follows 'this->nextp()' template void foreachAndNext(std::function f) { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); - foreachImpl(this, f); + foreachImpl(this, f, /* visitNext: */ true); } // Same as 'foreach' but also follows 'this->nextp()' template void foreachAndNext(std::function f) const { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); - foreachImpl(this, f); + foreachImpl(this, f, /* visitNext: */ true); } // Given a predicate function 'p' return true if and only if there exists a node of type @@ -2063,13 +1997,13 @@ public: // result can be determined. template bool exists(std::function p) { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); - return predicateImpl(this, p); + return predicateImpl(this, p); } // Same as above, but for 'const' nodes template void exists(std::function p) const { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); - return predicateImpl(this, p); + return predicateImpl(this, p); } // Given a predicate function 'p' return true if and only if all nodes of type @@ -2078,13 +2012,13 @@ public: // result can be determined. template bool forall(std::function p) { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); - return predicateImpl(this, p); + return predicateImpl(this, p); } // Same as above, but for 'const' nodes template void forall(std::function p) const { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); - return predicateImpl(this, p); + return predicateImpl(this, p); } int nodeCount() const { @@ -2098,22 +2032,196 @@ public: // Specialisations of privateTypeTest #include "V3Ast__gen_impl.h" // From ./astgen -// Specializations of privateMayBeUnder -template <> inline bool AstNode::privateMayBeUnder(const AstNode* nodep) { +// Specializations of AstNode::mayBeUnder +template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { return !VN_IS(nodep, NodeStmt) && !VN_IS(nodep, NodeMath); } -template <> inline bool AstNode::privateMayBeUnder(const AstNode* nodep) { +template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { return !VN_IS(nodep, NodeMath); } -template <> inline bool AstNode::privateMayBeUnder(const AstNode* nodep) { +template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { return !VN_IS(nodep, NodeStmt) && !VN_IS(nodep, NodeMath); } -template <> inline bool AstNode::privateMayBeUnder(const AstNode* nodep) { +template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { if (VN_IS(nodep, ExecGraph)) return false; // Should not nest if (VN_IS(nodep, NodeStmt)) return false; // Should be directly under CFunc return true; } +// Specializations of AstNode::isLeaf +template <> constexpr bool AstNode::isLeaf() { return true; } +template <> constexpr bool AstNode::isLeaf() { return true; } +template <> constexpr bool AstNode::isLeaf() { return true; } + +// foreach implementation +template +void AstNode::foreachImpl(ConstCorrectAstNode* nodep, const std::function& f, + bool visitNext) { + // Checking the function is bound up front eliminates this check from the loop at invocation + if (!f) { + nodep->v3fatal("AstNode::foreach called with unbound function"); // LCOV_EXCL_LINE + } else { + // Pre-order traversal implemented directly (without recursion) for speed reasons. The very + // first iteration (the one that operates on the input nodep) is special, as we might or + // might not need to enqueue nodep->nextp() depending on VisitNext, while in all other + // iterations, we do want to enqueue nodep->nextp(). Duplicating code (via + // 'foreachImplVisit') for the initial iteration here to avoid an extra branch in the loop + + using T_Arg_NonConst = typename std::remove_const::type; + using Node = ConstCorrectAstNode; + + // Traversal stack + std::vector stack; // Kept as a vector for easy resizing + Node** basep = nullptr; // Pointer to base of stack + Node** topp = nullptr; // Pointer to top of stack + Node** limp = nullptr; // Pointer to stack limit (when need growing) + + // We prefetch this far into the stack + constexpr int prefetchDistance = 2; + + // Grow stack to given size + const auto grow = [&](size_t size) VL_ATTR_ALWINLINE { + const ptrdiff_t occupancy = topp - basep; + stack.resize(size); + basep = stack.data() + prefetchDistance; + topp = basep + occupancy; + limp = basep + size - 5; // We push max 5 items per iteration + }; + + // Initial stack size + grow(32); + + // We want some non-null pointers at the beginning. These will be prefetched, but not + // visited, so the root node will suffice. This eliminates needing branches in the loop. + for (int i = -prefetchDistance; i; ++i) basep[i] = nodep; + + // Visit given node, enqueue children for traversal + const auto visit = [&](Node* currp) VL_ATTR_ALWINLINE { + // Type test this node + if (AstNode::privateTypeTest(currp)) { + // Call the client function + f(static_cast(currp)); + // Short circuit if iterating leaf nodes + if VL_CONSTEXPR_CXX17 (isLeaf()) return; + } + + // Enqueue children for traversal, unless futile + if (mayBeUnder(currp)) { + if (AstNode* const op4p = currp->op4p()) *topp++ = op4p; + if (AstNode* const op3p = currp->op3p()) *topp++ = op3p; + if (AstNode* const op2p = currp->op2p()) *topp++ = op2p; + if (AstNode* const op1p = currp->op1p()) *topp++ = op1p; + } + }; + + // Enqueue the next of the root node, if required + if (visitNext && nodep->nextp()) *topp++ = nodep->nextp(); + + // Visit the root node + visit(nodep); + + // Visit the rest of the tree + while (VL_LIKELY(topp > basep)) { + // Pop next node in the traversal + Node* const headp = *--topp; + + // Prefetch in case we are ascending the tree + ASTNODE_PREFETCH_NON_NULL(topp[-prefetchDistance]); + + // Ensure we have stack space for nextp and the 4 children + if (VL_UNLIKELY(topp >= limp)) grow(stack.size() * 2); + + // Enqueue the next node + if (headp->nextp()) *topp++ = headp->nextp(); + + // Visit the head node + visit(headp); + } + } +} + +// predicate implementation +template +bool AstNode::predicateImpl(ConstCorrectAstNode* nodep, + const std::function& p) { + // Implementation similar to foreach, but abort traversal as soon as result is determined. + if (!p) { + nodep->v3fatal("AstNode::foreach called with unbound function"); // LCOV_EXCL_LINE + } else { + using T_Arg_NonConst = typename std::remove_const::type; + using Node = ConstCorrectAstNode; + + // Traversal stack + std::vector stack; // Kept as a vector for easy resizing + Node** basep = nullptr; // Pointer to base of stack + Node** topp = nullptr; // Pointer to top of stack + Node** limp = nullptr; // Pointer to stack limit (when need growing) + + // We prefetch this far into the stack + constexpr int prefetchDistance = 2; + + // Grow stack to given size + const auto grow = [&](size_t size) VL_ATTR_ALWINLINE { + const ptrdiff_t occupancy = topp - basep; + stack.resize(size); + basep = stack.data() + prefetchDistance; + topp = basep + occupancy; + limp = basep + size - 5; // We push max 5 items per iteration + }; + + // Initial stack size + grow(32); + + // We want some non-null pointers at the beginning. These will be prefetched, but not + // visited, so the root node will suffice. This eliminates needing branches in the loop. + for (int i = -prefetchDistance; i; ++i) basep[i] = nodep; + + // Visit given node, enqueue children for traversal, return true if result determined. + const auto visit = [&](Node* currp) VL_ATTR_ALWINLINE { + // Type test this node + if (AstNode::privateTypeTest(currp)) { + // Call the client function + if (p(static_cast(currp)) != Default) return true; + // Short circuit if iterating leaf nodes + if VL_CONSTEXPR_CXX17 (isLeaf()) return false; + } + + // Enqueue children for traversal, unless futile + if (mayBeUnder(currp)) { + if (AstNode* const op4p = currp->op4p()) *topp++ = op4p; + if (AstNode* const op3p = currp->op3p()) *topp++ = op3p; + if (AstNode* const op2p = currp->op2p()) *topp++ = op2p; + if (AstNode* const op1p = currp->op1p()) *topp++ = op1p; + } + + return false; + }; + + // Visit the root node + if (visit(nodep)) return !Default; + + // Visit the rest of the tree + while (VL_LIKELY(topp > basep)) { + // Pop next node in the traversal + Node* const headp = *--topp; + + // Prefetch in case we are ascending the tree + ASTNODE_PREFETCH_NON_NULL(topp[-prefetchDistance]); + + // Ensure we have stack space for nextp and the 4 children + if (VL_UNLIKELY(topp >= limp)) grow(stack.size() * 2); + + // Enqueue the next node + if (headp->nextp()) *topp++ = headp->nextp(); + + // Visit the head node + if (visit(headp)) return !Default; + } + + return Default; + } +} + inline std::ostream& operator<<(std::ostream& os, const AstNode* rhs) { if (!rhs) { os << "nullptr"; diff --git a/src/V3Force.cpp b/src/V3Force.cpp index 137f74fde..2c21b842c 100644 --- a/src/V3Force.cpp +++ b/src/V3Force.cpp @@ -151,14 +151,14 @@ class ForceConvertVisitor final : public VNVisitor { // referenced AstVarScope with the given function. void transformWritenVarScopes(AstNode* nodep, std::function f) { UASSERT_OBJ(nodep->backp(), nodep, "Must have backp, otherwise will be lost if replaced"); - nodep->foreach([this, &f](AstNodeVarRef* refp) { + nodep->foreach([&f](AstNodeVarRef* refp) { if (refp->access() != VAccess::WRITE) return; // TODO: this is not strictly speaking safe for some complicated lvalues, eg.: // 'force foo[a(cnt)] = 1;', where 'cnt' is an out parameter, but it will // do for now... refp->replaceWith( new AstVarRef{refp->fileline(), f(refp->varScopep()), VAccess::WRITE}); - pushDeletep(refp); + VL_DO_DANGLING(refp->deleteTree(), refp); }); } @@ -238,7 +238,7 @@ class ForceConvertVisitor final : public VNVisitor { flp->warnOff(V3ErrorCode::BLKANDNBLK, true); AstVarRef* const newpRefp = new AstVarRef{flp, newVscp, VAccess::WRITE}; refp->replaceWith(newpRefp); - pushDeletep(refp); + VL_DO_DANGLING(refp->deleteTree(), refp); }); // Replace write refs on RHS resetRdp->rhsp()->foreach([this](AstNodeVarRef* refp) { @@ -249,7 +249,7 @@ class ForceConvertVisitor final : public VNVisitor { AstVarRef* const newpRefp = new AstVarRef{refp->fileline(), newVscp, VAccess::READ}; newpRefp->user2(1); // Don't replace this read ref with the read signal refp->replaceWith(newpRefp); - pushDeletep(refp); + VL_DO_DANGLING(refp->deleteTree(), refp); }); resetEnp->addNext(resetRdp); From 2ab6272cc7c62cffdab8500031083cfdc0186166 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sun, 31 Jul 2022 18:36:56 +0100 Subject: [PATCH 067/119] Use AstNode::foreach in V3Gate This yields a little speedup. --- src/V3Ast.h | 7 +- src/V3Gate.cpp | 181 ++++++++++++++++++++----------------------------- 2 files changed, 78 insertions(+), 110 deletions(-) diff --git a/src/V3Ast.h b/src/V3Ast.h index 4d1880ffd..e93dc4d86 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -2040,7 +2040,12 @@ template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) return !VN_IS(nodep, NodeMath); } template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { - return !VN_IS(nodep, NodeStmt) && !VN_IS(nodep, NodeMath); + if (VN_IS(nodep, VarScope)) return false; // Should not nest + if (VN_IS(nodep, Var)) return false; + if (VN_IS(nodep, Active)) return false; + if (VN_IS(nodep, NodeStmt)) return false; + if (VN_IS(nodep, NodeMath)) return false; + return true; } template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { if (VN_IS(nodep, ExecGraph)) return false; // Should not nest diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index e5cecd07a..f70f122b0 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -34,9 +34,10 @@ #include #include -#include #include +class GateDedupeVarVisitor; + using GateVarRefList = std::list; constexpr int GATE_DEDUP_MAX_DEPTH = 20; @@ -297,6 +298,12 @@ public: const GateVarRefList& rhsVarRefs() const { return m_rhsVarRefs; } }; +//###################################################################### +// Replace refs to 'varscp' with 'substp' in 'consumerp' + +static bool eliminate(AstNode* consumerp, AstVarScope* varscp, AstNode* substp, + GateDedupeVarVisitor* varVisp); + //###################################################################### // Gate class functions @@ -368,9 +375,23 @@ private: return vertexp; } + void optimizeElimVar(AstVarScope* varscp, AstNode* substp, AstNode* consumerp) { + if (debug() >= 5) consumerp->dumpTree(cout, " elimUsePre: "); + const bool replaced = eliminate(consumerp, varscp, substp, nullptr); + if (!replaced) return; + + if (debug() >= 9) consumerp->dumpTree(cout, " elimUseCns: "); + // Caution: Can't let V3Const change our handle to consumerp, such as by + // optimizing away this assignment, etc. + consumerp = V3Const::constifyEdit(consumerp); + if (debug() >= 5) consumerp->dumpTree(cout, " elimUseDne: "); + // Some previous input edges may have disappeared, perhaps all of them. + // If we remove the edges we can further optimize + // See e.g t_var_overzero.v. + } + void optimizeSignals(bool allowMultiIn); bool elimLogicOkOutputs(GateLogicVertex* consumeVertexp, const GateOkVisitor& okVisitor); - void optimizeElimVar(AstVarScope* varscp, AstNode* substp, AstNode* consumerp); void warnSignals(); void consumedMark(); void consumedMarkRecurse(GateEitherVertex* vertexp); @@ -817,83 +838,6 @@ void GateVisitor::warnSignals() { } } -//###################################################################### -// Push constant into expressions and reevaluate - -class GateDedupeVarVisitor; - -class GateElimVisitor final : public GateBaseVisitor { -private: - // NODE STATE - // STATE - const AstVarScope* const m_elimVarScp; // Variable being eliminated - AstNode* const m_replaceTreep; // What to replace the variable with - bool m_didReplace = false; // Did we do any replacements - GateDedupeVarVisitor* const m_varVisp; // Callback to keep hash up to date - - // METHODS - void hashReplace(AstNode* oldp, AstNode* newp); - - // VISITORS - virtual void visit(AstNodeVarRef* nodep) override { - if (nodep->varScopep() == m_elimVarScp) { - // Substitute in the new tree - // It's possible we substitute into something that will be reduced more later, - // however, as we never delete the top Always/initial statement, all should be well. - m_didReplace = true; - UASSERT_OBJ(nodep->access().isReadOnly(), nodep, - "Can't replace lvalue assignments with const var"); - AstNode* const substp = m_replaceTreep->cloneTree(false); - UASSERT_OBJ(!(VN_IS(substp, NodeVarRef) && nodep->same(substp)), - // Prevent an infinite loop... - substp, "Replacing node with itself; perhaps circular logic?"); - // Which fileline() to use? - // If replacing with logic, an error/warning is likely to want to point to the logic - // IE what we're replacing with. - // However a VARREF should point to the original as it's otherwise confusing - // to throw warnings that point to a PIN rather than where the pin us used. - if (VN_IS(substp, VarRef)) substp->fileline(nodep->fileline()); - // Make the substp an rvalue like nodep. This facilitates the hashing in dedupe. - if (AstNodeVarRef* const varrefp = VN_CAST(substp, NodeVarRef)) - varrefp->access(VAccess::READ); - hashReplace(nodep, substp); - nodep->replaceWith(substp); - VL_DO_DANGLING(nodep->deleteTree(), nodep); - } - } - virtual void visit(AstNode* nodep) override { iterateChildren(nodep); } - -public: - // CONSTRUCTORS - virtual ~GateElimVisitor() override = default; - GateElimVisitor(AstNode* nodep, AstVarScope* varscp, AstNode* replaceTreep, - GateDedupeVarVisitor* varVisp) - : m_elimVarScp{varscp} - , m_replaceTreep{replaceTreep} - , m_varVisp{varVisp} { - UINFO(9, " elimvisitor " << nodep << endl); - UINFO(9, " elim varscp " << varscp << endl); - UINFO(9, " elim repce " << replaceTreep << endl); - iterate(nodep); - } - bool didReplace() const { return m_didReplace; } -}; - -void GateVisitor::optimizeElimVar(AstVarScope* varscp, AstNode* substp, AstNode* consumerp) { - if (debug() >= 5) consumerp->dumpTree(cout, " elimUsePre: "); - const GateElimVisitor elimVisitor{consumerp, varscp, substp, nullptr}; - if (elimVisitor.didReplace()) { - if (debug() >= 9) consumerp->dumpTree(cout, " elimUseCns: "); - // Caution: Can't let V3Const change our handle to consumerp, such as by - // optimizing away this assignment, etc. - consumerp = V3Const::constifyEdit(consumerp); - if (debug() >= 5) consumerp->dumpTree(cout, " elimUseDne: "); - // Some previous input edges may have disappeared, perhaps all of them. - // If we remove the edges we can further optimize - // See e.g t_var_overzero.v. - } -} - //###################################################################### // Auxiliary hash class for GateDedupeVarVisitor @@ -1113,9 +1057,47 @@ public: //###################################################################### -void GateElimVisitor::hashReplace(AstNode* oldp, AstNode* newp) { - UINFO(9, "hashReplace " << (void*)oldp << " -> " << (void*)newp << endl); - if (m_varVisp) m_varVisp->hashReplace(oldp, newp); +static bool eliminate(AstNode* consumerp, AstVarScope* varscp, AstNode* substp, + GateDedupeVarVisitor* varVisp) { + UINFO(9, " Eliminating inside: " << consumerp << endl); + UINFO(9, " Eliminating varscopep: " << varscp << endl); + UINFO(9, " Eliminating substitute: " << substp << endl); + + bool didReplace = false; // Did we do any replacements + consumerp->foreach([=, &didReplace](AstNodeVarRef* nodep) { + if (nodep->varScopep() != varscp) return; + + // Substitute in the new tree +#ifdef VL_DEBUG // Can be hot code, so expensive + UASSERT_OBJ(nodep->access().isReadOnly(), nodep, + "Can't replace lvalue assignments with const var"); + UASSERT_OBJ(!(VN_IS(substp, NodeVarRef) && nodep->same(substp)), + // Prevent an infinite loop... + substp, "Replacing node with itself; perhaps circular logic?"); +#endif + // It's possible we substitute into something that will be reduced more later, + // however, as we never delete the top Always/initial statement, all should be well. + didReplace = true; + + // The replacement + AstNode* const newp = substp->cloneTree(false); + // Which fileline() to use? If replacing with logic, an error/warning is likely to want + // to point to the logic IE what we're replacing with. However, a VARREF should point + // to the original as it's otherwise confusing to throw warnings that point to a PIN + // rather than where the pin us used. + if (VN_IS(newp, VarRef)) newp->fileline(nodep->fileline()); + // Make the newp an rvalue like nodep. This facilitates the hashing in dedupe. + if (AstNodeVarRef* const varrefp = VN_CAST(newp, NodeVarRef)) { + varrefp->access(VAccess::READ); + } + // Update hash? + if (varVisp) varVisp->hashReplace(nodep, newp); + // Replace the node + nodep->replaceWith(newp); + VL_DO_DANGLING(nodep->deleteTree(), nodep); + }); + + return didReplace; } //###################################################################### @@ -1170,8 +1152,7 @@ private: if (lvertexp == consumeVertexp) { UINFO(9, "skipping as self-recirculates\n"); } else { - const GateElimVisitor elimVisitor(consumerp, vvertexp->varScp(), - dupVarRefp, &m_varVisitor); + eliminate(consumerp, vvertexp->varScp(), dupVarRefp, &m_varVisitor); } outedgep = outedgep->relinkFromp(dupVvertexp); } @@ -1571,12 +1552,13 @@ void GateVisitor::decomposeClkVectors() { } //###################################################################### -// Convert VARSCOPE(ASSIGN(default, VARREF)) to just VARSCOPE(default) +// Gate class functions -class GateDeassignVisitor final : public GateBaseVisitor { -private: - // VISITORS - virtual void visit(AstVarScope* nodep) override { +void V3Gate::gateAll(AstNetlist* nodep) { + UINFO(2, __FUNCTION__ << ": " << endl); + { const GateVisitor visitor{nodep}; } // Destruct before checking + + nodep->foreach([](AstVarScope* nodep) { if (AstNodeAssign* const assp = VN_CAST(nodep->valuep(), NodeAssign)) { UINFO(5, " Removeassign " << assp << endl); AstNode* const valuep = assp->rhsp(); @@ -1584,26 +1566,7 @@ private: assp->replaceWith(valuep); VL_DO_DANGLING(assp->deleteTree(), assp); } - } - // Speedups - virtual void visit(AstVar*) override {} // Accelerate - virtual void visit(AstActive*) override {} // Accelerate - virtual void visit(AstNode* nodep) override { iterateChildren(nodep); } + }); -public: - // CONSTRUCTORS - explicit GateDeassignVisitor(AstNode* nodep) { iterate(nodep); } - virtual ~GateDeassignVisitor() override = default; -}; - -//###################################################################### -// Gate class functions - -void V3Gate::gateAll(AstNetlist* nodep) { - UINFO(2, __FUNCTION__ << ": " << endl); - { - const GateVisitor visitor{nodep}; - GateDeassignVisitor{nodep}; - } // Destruct before checking V3Global::dumpCheckGlobalTree("gate", 0, v3Global.opt.dumpTreeLevel(__FILE__) >= 3); } From 682a60e32547f743bdbbc9852485e639e272f741 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sun, 31 Jul 2022 20:04:39 +0100 Subject: [PATCH 068/119] Cleanup V3Gate, no functional change --- src/V3Gate.cpp | 215 +++++++++++++++++++------------------------------ 1 file changed, 85 insertions(+), 130 deletions(-) diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index f70f122b0..a5256c3c9 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -558,141 +558,96 @@ public: void GateVisitor::optimizeSignals(bool allowMultiIn) { for (V3GraphVertex* itp = m_graph.verticesBeginp(); itp; itp = itp->verticesNextp()) { - if (GateVarVertex* const vvertexp = dynamic_cast(itp)) { - if (vvertexp->inEmpty()) { - vvertexp->clearReducibleAndDedupable("inEmpty"); // Can't deal with no sources - if (!vvertexp->isTop() // Ok if top inputs are driverless - && !vvertexp->varScp()->varp()->valuep() - && !vvertexp->varScp()->varp()->isSigPublic()) { - UINFO(4, "No drivers " << vvertexp->varScp() << endl); - if (false) { - // If we warned here after constant propagation, what the user considered - // reasonable logic may have disappeared. Issuing a warning would - // thus be confusing. V3Undriven now handles this. - vvertexp->varScp()->varp()->v3warn( - UNDRIVEN, "Signal has no drivers: '" - << vvertexp->scopep()->prettyName() << "." - << vvertexp->varScp()->varp()->prettyName() << "'"); - } - } - } else if (!vvertexp->inSize1()) { - // Can't deal with more than one src - vvertexp->clearReducibleAndDedupable("size!1"); + GateVarVertex* const vvertexp = dynamic_cast(itp); + + // Consider "inlining" variables + if (!vvertexp) continue; + + if (vvertexp->inEmpty()) { // Can't deal with no sources + vvertexp->clearReducibleAndDedupable("inEmpty"); + } else if (!vvertexp->inSize1()) { // Can't deal with more than one src + vvertexp->clearReducibleAndDedupable("size!1"); + } + + // Reduce it? + if (!vvertexp->reducible()) continue; + + // Grab the driving logic + GateLogicVertex* const logicVertexp + = static_cast(vvertexp->inBeginp()->fromp()); + if (!logicVertexp->reducible()) continue; + + // Can we eliminate? + AstNode* const logicp = logicVertexp->nodep(); + const GateOkVisitor okVisitor{logicp, vvertexp->isClock(), false}; + + // Was it ok? + if (!okVisitor.isSimple()) continue; + + // Does it read multiple source variables? + if (okVisitor.rhsVarRefs().size() > 1) { + if (!allowMultiIn) continue; + // Do it if not used, or used only once, ignoring traces + int n = 0; + for (V3GraphEdge* edgep = vvertexp->outBeginp(); edgep; edgep = edgep->outNextp()) { + const GateLogicVertex* const consumeVertexp + = static_cast(edgep->top()); + // Ignore tracing or other slow path junk, or if the destination is not used + if (!consumeVertexp->slow() && consumeVertexp->outBeginp()) n += edgep->weight(); + if (n > 1) break; } - // Reduce it? - if (!vvertexp->reducible()) { - UINFO(8, "SigNotRed " << vvertexp->name() << endl); + + if (n > 1) continue; + } + + // Process it + AstNode* const substp = okVisitor.substTree(); + if (debug() >= 5) logicp->dumpTree(cout, " elimVar: "); + if (debug() >= 5) substp->dumpTree(cout, " subst: "); + ++m_statSigs; + bool removedAllUsages = true; + for (V3GraphEdge* edgep = vvertexp->outBeginp(); edgep;) { + GateLogicVertex* const consumeVertexp = static_cast(edgep->top()); + AstNode* const consumerp = consumeVertexp->nodep(); + if (!elimLogicOkOutputs(consumeVertexp, okVisitor /*ref*/)) { + // Cannot optimize this replacement + removedAllUsages = false; + edgep = edgep->outNextp(); } else { - UINFO(8, "Sig " << vvertexp->name() << endl); - GateLogicVertex* const logicVertexp - = dynamic_cast(vvertexp->inBeginp()->fromp()); - UINFO(8, " From " << logicVertexp->name() << endl); - AstNode* logicp = logicVertexp->nodep(); - if (logicVertexp->reducible()) { - // Can we eliminate? - const GateOkVisitor okVisitor{logicp, vvertexp->isClock(), false}; - const bool multiInputs = okVisitor.rhsVarRefs().size() > 1; - // Was it ok? - bool doit = okVisitor.isSimple(); - if (doit && multiInputs) { - if (!allowMultiIn) doit = false; - // Doit if one input, or not used, or used only once, ignoring traces - int n = 0; - for (V3GraphEdge* edgep = vvertexp->outBeginp(); edgep; - edgep = edgep->outNextp()) { - const GateLogicVertex* const consumeVertexp - = dynamic_cast(edgep->top()); - if (!consumeVertexp->slow()) { // Not tracing or other slow path junk - if (edgep->top()->outBeginp()) { // Destination is itself used - n += edgep->weight(); - } - } - if (n > 1) { - doit = false; - break; - } - } - } - // Process it - if (!doit) { - if (allowMultiIn && (debug() >= 9)) { - UINFO(9, "Not ok simp" << okVisitor.isSimple() << " mi" << multiInputs - << " ob" << vvertexp->outBeginp() << " on" - << (vvertexp->outBeginp() - ? vvertexp->outBeginp()->outNextp() - : nullptr) - << " " << vvertexp->name() << endl); - for (V3GraphEdge* edgep = vvertexp->outBeginp(); edgep; - edgep = edgep->outNextp()) { - const GateLogicVertex* const consumeVertexp - = dynamic_cast(edgep->top()); - UINFO(9, " edge " << edgep << " to: " << consumeVertexp->nodep() - << endl); - } - for (V3GraphEdge* edgep = vvertexp->inBeginp(); edgep; - edgep = edgep->inNextp()) { - const GateLogicVertex* const consumeVertexp - = dynamic_cast(edgep->fromp()); - UINFO(9, " edge " << edgep << " from: " - << consumeVertexp->nodep() << endl); - } - } - } else { - AstNode* const substp = okVisitor.substTree(); - if (debug() >= 5) logicp->dumpTree(cout, " elimVar: "); - if (debug() >= 5) substp->dumpTree(cout, " subst: "); - ++m_statSigs; - bool removedAllUsages = true; - for (V3GraphEdge* edgep = vvertexp->outBeginp(); edgep;) { - GateLogicVertex* const consumeVertexp - = dynamic_cast(edgep->top()); - AstNode* const consumerp = consumeVertexp->nodep(); - if (!elimLogicOkOutputs(consumeVertexp, okVisitor /*ref*/)) { - // Cannot optimize this replacement - removedAllUsages = false; - edgep = edgep->outNextp(); - } else { - optimizeElimVar(vvertexp->varScp(), substp, consumerp); - // If the new replacement referred to a signal, - // Correct the graph to point to this new generating variable - const GateVarRefList& rhsVarRefs = okVisitor.rhsVarRefs(); - for (GateVarRefList::const_iterator it = rhsVarRefs.begin(); - it != rhsVarRefs.end(); ++it) { - AstVarScope* const newvarscp = (*it)->varScopep(); - UINFO(9, " Point-to-new vertex " << newvarscp << endl); - GateVarVertex* const varvertexp = makeVarVertex(newvarscp); - new V3GraphEdge(&m_graph, varvertexp, consumeVertexp, 1); - // Propagate clock attribute onto generating node - varvertexp->propagateAttrClocksFrom(vvertexp); - } - // Remove the edge - VL_DO_DANGLING(edgep->unlinkDelete(), edgep); - ++m_statRefs; - edgep = vvertexp->outBeginp(); - } - } - if (removedAllUsages) { - // Remove input links - while (V3GraphEdge* const edgep = vvertexp->inBeginp()) { - VL_DO_DANGLING(edgep->unlinkDelete(), edgep); - } - // Clone tree so we remember it for tracing, and keep the pointer - // to the "ALWAYS" part of the tree as part of this statement - // That way if a later signal optimization that - // retained a pointer to the always can - // optimize it further - logicp->unlinkFrBack(); - vvertexp->varScp()->valuep(logicp); - logicp = nullptr; - // Mark the vertex so we don't mark it as being - // unconsumed in the next step - vvertexp->user(true); - logicVertexp->user(true); - } - } + optimizeElimVar(vvertexp->varScp(), substp, consumerp); + // If the new replacement referred to a signal, + // Correct the graph to point to this new generating variable + const GateVarRefList& rhsVarRefs = okVisitor.rhsVarRefs(); + for (AstNodeVarRef* const refp : rhsVarRefs) { + AstVarScope* const newvarscp = refp->varScopep(); + GateVarVertex* const varvertexp = makeVarVertex(newvarscp); + new V3GraphEdge(&m_graph, varvertexp, consumeVertexp, 1); + // Propagate clock attribute onto generating node + varvertexp->propagateAttrClocksFrom(vvertexp); } + // Remove the edge + VL_DO_DANGLING(edgep->unlinkDelete(), edgep); + ++m_statRefs; + edgep = vvertexp->outBeginp(); } } + + if (removedAllUsages) { + // Remove input links + while (V3GraphEdge* const edgep = vvertexp->inBeginp()) { + VL_DO_DANGLING(edgep->unlinkDelete(), edgep); + } + // Clone tree so we remember it for tracing, and keep the pointer + // to the "ALWAYS" part of the tree as part of this statement + // That way if a later signal optimization that + // retained a pointer to the always can + // optimize it further + VL_DO_DANGLING(vvertexp->varScp()->valuep(logicp->unlinkFrBack()), logicp); + // Mark the vertex so we don't mark it as being + // unconsumed in the next step + vvertexp->user(true); + logicVertexp->user(true); + } } } From 0d2bf23d820e4b96da3f6480e7114da8928ff765 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sun, 31 Jul 2022 19:28:54 +0100 Subject: [PATCH 069/119] V3Gate: Defer constant folding until required Rather than constant folding each logic block after every substitution, only constant fold updated blocks when re-analysed, or at the end. This removes a lot of invocations of V3Const on large blocks that can be optimized well, and should yield the same result. This speeds up V3Gate by ~4x on a design where V3Gate dominates. --- src/V3Gate.cpp | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index a5256c3c9..15bc772f2 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -312,7 +312,7 @@ private: // NODE STATE // Entire netlist: // AstVarScope::user1p -> GateVarVertex* for usage var, 0=not set yet - // {statement}Node::user1p -> GateLogicVertex* for this statement + // {logic}Node::user1 -> bool: Some signals were optimized, hence needs constant folding // AstVarScope::user2 -> bool: Signal used in SenItem in *this* always statement // AstVar::user2 -> bool: Warned about SYNCASYNCNET // AstNodeVarRef::user2 -> bool: ConcatOffset visited @@ -328,6 +328,8 @@ private: bool m_activeReducible = true; // Is activation block reducible? bool m_inSenItem = false; // Underneath AstSenItem; any varrefs are clocks bool m_inSlow = false; // Inside a slow structure + std::vector m_optimized; // Logic blocks optimized + VDouble0 m_statSigs; // Statistic tracking VDouble0 m_statRefs; // Statistic tracking VDouble0 m_statDedupLogic; // Statistic tracking @@ -378,16 +380,8 @@ private: void optimizeElimVar(AstVarScope* varscp, AstNode* substp, AstNode* consumerp) { if (debug() >= 5) consumerp->dumpTree(cout, " elimUsePre: "); const bool replaced = eliminate(consumerp, varscp, substp, nullptr); - if (!replaced) return; - - if (debug() >= 9) consumerp->dumpTree(cout, " elimUseCns: "); - // Caution: Can't let V3Const change our handle to consumerp, such as by - // optimizing away this assignment, etc. - consumerp = V3Const::constifyEdit(consumerp); - if (debug() >= 5) consumerp->dumpTree(cout, " elimUseDne: "); - // Some previous input edges may have disappeared, perhaps all of them. - // If we remove the edges we can further optimize - // See e.g t_var_overzero.v. + if (replaced && !consumerp->user1()) m_optimized.push_back(consumerp); + consumerp->user1(2); // Added to m_optimized and needs folding } void optimizeSignals(bool allowMultiIn); @@ -417,6 +411,17 @@ private: optimizeSignals(false); // Then propagate more complicated equations optimizeSignals(true); + + // Constant fold optimized logic + for (AstNode* const logicp : m_optimized) { + // Ignore if already simplified + if (logicp->user1() != 2) continue; + AstNode* const foldedp = V3Const::constifyEdit(logicp); + // Caution: Can't let V3Const change our handle to consumerp, such as by + // optimizing away this assignment, etc. + UASSERT_OBJ(foldedp == logicp, foldedp, "should not remove node"); + } + // Remove redundant logic if (v3Global.opt.fDedupe()) { dedupe(); @@ -577,8 +582,15 @@ void GateVisitor::optimizeSignals(bool allowMultiIn) { = static_cast(vvertexp->inBeginp()->fromp()); if (!logicVertexp->reducible()) continue; - // Can we eliminate? + // Constant fold driving logic if itself has been optimized, but not yet folded AstNode* const logicp = logicVertexp->nodep(); + if (logicp->user1() == 2) { + logicp->user1(1); // Added to m_optimized but already folded + AstNode* const foldedp = V3Const::constifyEdit(logicp); + UASSERT_OBJ(foldedp == logicp, foldedp, "Should not remove whole logic"); + } + + // Can we eliminate? const GateOkVisitor okVisitor{logicp, vvertexp->isClock(), false}; // Was it ok? From cb60663d49f1f7c77d136a7c5df1dfc95a8a6c4e Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 1 Aug 2022 12:22:56 +0100 Subject: [PATCH 070/119] V3Gate: Defer substitutions until required as well Similarly to the earlier patch that defers constant folding on optimized logic, now we also defer the variable substitutions as well. This again eliminates a lot of traversals, and yields another ~10x speedup of V3Gate on a design where V3Gate used to dominate while producing identical results. --- src/V3Gate.cpp | 96 ++++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index 15bc772f2..4073c5462 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -27,6 +27,7 @@ #include "V3Global.h" #include "V3Gate.h" #include "V3Ast.h" +#include "V3AstUserAllocator.h" #include "V3Graph.h" #include "V3Const.h" #include "V3Stats.h" @@ -34,6 +35,7 @@ #include #include +#include #include class GateDedupeVarVisitor; @@ -298,27 +300,31 @@ public: const GateVarRefList& rhsVarRefs() const { return m_rhsVarRefs; } }; -//###################################################################### -// Replace refs to 'varscp' with 'substp' in 'consumerp' +// ###################################################################### +// Replace refs to 'varscp' with 'substp' in 'consumerp' -static bool eliminate(AstNode* consumerp, AstVarScope* varscp, AstNode* substp, +static void eliminate(AstNode* logicp, + const std::unordered_map& substitutions, GateDedupeVarVisitor* varVisp); -//###################################################################### -// Gate class functions +// ###################################################################### +// Gate class functions class GateVisitor final : public GateBaseVisitor { private: // NODE STATE // Entire netlist: // AstVarScope::user1p -> GateVarVertex* for usage var, 0=not set yet - // {logic}Node::user1 -> bool: Some signals were optimized, hence needs constant folding + // {logic}Node::user1 -> map of substitutions, via m_substitutions // AstVarScope::user2 -> bool: Signal used in SenItem in *this* always statement // AstVar::user2 -> bool: Warned about SYNCASYNCNET // AstNodeVarRef::user2 -> bool: ConcatOffset visited const VNUser1InUse m_inuser1; const VNUser2InUse m_inuser2; + // Variable substitutions to apply to a given logic block + AstUser1Allocator> m_substitutions; + // STATE V3Graph m_graph; // Scoreboard of var usages/dependencies GateLogicVertex* m_logicVertexp = nullptr; // Current statement being tracked, nullptr=ignored @@ -379,9 +385,20 @@ private: void optimizeElimVar(AstVarScope* varscp, AstNode* substp, AstNode* consumerp) { if (debug() >= 5) consumerp->dumpTree(cout, " elimUsePre: "); - const bool replaced = eliminate(consumerp, varscp, substp, nullptr); - if (replaced && !consumerp->user1()) m_optimized.push_back(consumerp); - consumerp->user1(2); // Added to m_optimized and needs folding + if (!m_substitutions.tryGet(consumerp)) m_optimized.push_back(consumerp); + m_substitutions(consumerp).emplace(varscp, substp->cloneTree(false)); + } + + void commitElimVar(AstNode* logicp) { + if (auto* const substitutionsp = m_substitutions.tryGet(logicp)) { + if (!substitutionsp->empty()) { + eliminate(logicp, *substitutionsp, nullptr); + AstNode* const foldedp = V3Const::constifyEdit(logicp); + UASSERT_OBJ(foldedp == logicp, foldedp, "Should not remove whole logic"); + for (const auto& pair : *substitutionsp) pair.second->deleteTree(); + substitutionsp->clear(); + } + } } void optimizeSignals(bool allowMultiIn); @@ -411,17 +428,8 @@ private: optimizeSignals(false); // Then propagate more complicated equations optimizeSignals(true); - - // Constant fold optimized logic - for (AstNode* const logicp : m_optimized) { - // Ignore if already simplified - if (logicp->user1() != 2) continue; - AstNode* const foldedp = V3Const::constifyEdit(logicp); - // Caution: Can't let V3Const change our handle to consumerp, such as by - // optimizing away this assignment, etc. - UASSERT_OBJ(foldedp == logicp, foldedp, "should not remove node"); - } - + // Commit substitutions on the optimized logic + for (AstNode* const logicp : m_optimized) commitElimVar(logicp); // Remove redundant logic if (v3Global.opt.fDedupe()) { dedupe(); @@ -581,14 +589,10 @@ void GateVisitor::optimizeSignals(bool allowMultiIn) { GateLogicVertex* const logicVertexp = static_cast(vvertexp->inBeginp()->fromp()); if (!logicVertexp->reducible()) continue; - - // Constant fold driving logic if itself has been optimized, but not yet folded AstNode* const logicp = logicVertexp->nodep(); - if (logicp->user1() == 2) { - logicp->user1(1); // Added to m_optimized but already folded - AstNode* const foldedp = V3Const::constifyEdit(logicp); - UASSERT_OBJ(foldedp == logicp, foldedp, "Should not remove whole logic"); - } + + // Commit pendingg optimizations to driving logic, as we will re-analyse + commitElimVar(logicp); // Can we eliminate? const GateOkVisitor okVisitor{logicp, vvertexp->isClock(), false}; @@ -1022,30 +1026,25 @@ public: void hashReplace(AstNode* oldp, AstNode* newp) { m_ghash.hashReplace(oldp, newp); } }; -//###################################################################### +// ###################################################################### -static bool eliminate(AstNode* consumerp, AstVarScope* varscp, AstNode* substp, +static void eliminate(AstNode* logicp, + const std::unordered_map& substitutions, GateDedupeVarVisitor* varVisp) { - UINFO(9, " Eliminating inside: " << consumerp << endl); - UINFO(9, " Eliminating varscopep: " << varscp << endl); - UINFO(9, " Eliminating substitute: " << substp << endl); - bool didReplace = false; // Did we do any replacements - consumerp->foreach([=, &didReplace](AstNodeVarRef* nodep) { - if (nodep->varScopep() != varscp) return; + const std::function visit + = [&substitutions, &visit, varVisp](AstNodeVarRef* nodep) -> void { + // See if this variable has a substitution + const auto& it = substitutions.find(nodep->varScopep()); + if (it == substitutions.end()) return; + AstNode* const substp = it->second; - // Substitute in the new tree -#ifdef VL_DEBUG // Can be hot code, so expensive + // Substitute in the new tree UASSERT_OBJ(nodep->access().isReadOnly(), nodep, "Can't replace lvalue assignments with const var"); UASSERT_OBJ(!(VN_IS(substp, NodeVarRef) && nodep->same(substp)), // Prevent an infinite loop... substp, "Replacing node with itself; perhaps circular logic?"); -#endif - // It's possible we substitute into something that will be reduced more later, - // however, as we never delete the top Always/initial statement, all should be well. - didReplace = true; - // The replacement AstNode* const newp = substp->cloneTree(false); // Which fileline() to use? If replacing with logic, an error/warning is likely to want @@ -1062,13 +1061,15 @@ static bool eliminate(AstNode* consumerp, AstVarScope* varscp, AstNode* substp, // Replace the node nodep->replaceWith(newp); VL_DO_DANGLING(nodep->deleteTree(), nodep); - }); + // Recursively substitute the new tree + newp->foreach(visit); + }; - return didReplace; + logicp->foreach(visit); } -//###################################################################### -// Recurse through the graph, looking for duplicate expressions on the rhs of an assign +// ###################################################################### +// Recurse through the graph, looking for duplicate expressions on the rhs of an assign class GateDedupeGraphVisitor final : public GateGraphBaseVisitor { private: @@ -1119,7 +1120,8 @@ private: if (lvertexp == consumeVertexp) { UINFO(9, "skipping as self-recirculates\n"); } else { - eliminate(consumerp, vvertexp->varScp(), dupVarRefp, &m_varVisitor); + eliminate(consumerp, {std::make_pair(vvertexp->varScp(), dupVarRefp)}, + &m_varVisitor); } outedgep = outedgep->relinkFromp(dupVvertexp); } From cfb6fd8b3407357470193e6f302c4062be59098b Mon Sep 17 00:00:00 2001 From: Kamil Rakoczy Date: Tue, 2 Aug 2022 14:36:14 +0200 Subject: [PATCH 071/119] Reduce max RSS usage (#3483) By constant folding nodes earlier in V3Expand, we can save some max RSS on large designs. --- src/V3Const.cpp | 12 +++++++++++- src/V3Const.h | 3 +++ src/V3Expand.cpp | 10 +++++++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/V3Const.cpp b/src/V3Const.cpp index c9250a3f6..8a7670c14 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -3583,7 +3583,11 @@ public: } virtual ~ConstVisitor() override { if (m_doCpp) { - V3Stats::addStat("Optimizations, Const bit op reduction", m_statBitOpReduction); + if (m_globalPass) { + V3Stats::addStat("Optimizations, Const bit op reduction", m_statBitOpReduction); + } else { + V3Stats::addStatSum("Optimizations, Const bit op reduction", m_statBitOpReduction); + } } } @@ -3676,6 +3680,12 @@ AstNode* V3Const::constifyEdit(AstNode* nodep) { return nodep; } +AstNode* V3Const::constifyEditCpp(AstNode* nodep) { + ConstVisitor visitor{ConstVisitor::PROC_CPP, /* globalPass: */ false}; + nodep = visitor.mainAcceptEdit(nodep); + return nodep; +} + void V3Const::constifyAllLive(AstNetlist* nodep) { // Only call from Verilator.cpp, as it uses user#'s // This only pushes constants up, doesn't make any other edits diff --git a/src/V3Const.h b/src/V3Const.h index 59f5a5f76..ada396efd 100644 --- a/src/V3Const.h +++ b/src/V3Const.h @@ -39,6 +39,9 @@ public: static void constifyCpp(AstNetlist* nodep); // Only the current node and lower // Return new node that may have replaced nodep + static AstNode* constifyEditCpp(AstNode* nodep); + // Only the current node and lower + // Return new node that may have replaced nodep static AstNode* constifyEdit(AstNode* nodep); // Only the current node and lower, with special SenTree optimization // Return new node that may have replaced nodep diff --git a/src/V3Expand.cpp b/src/V3Expand.cpp index 5c341db26..c10c3c85f 100644 --- a/src/V3Expand.cpp +++ b/src/V3Expand.cpp @@ -32,6 +32,7 @@ #include "V3Expand.h" #include "V3Stats.h" #include "V3Ast.h" +#include "V3Const.h" #include @@ -160,6 +161,7 @@ private: new AstShiftL{fl, llowp, new AstConst{fl, static_cast(loffset)}, VL_EDATASIZE}}}; + newp = V3Const::constifyEditCpp(newp); } else { newp = llowp; } @@ -520,8 +522,9 @@ private: cleanmask.setMask(VL_BITBIT_E(destp->widthMin())); newp = new AstAnd{lfl, newp, new AstConst{lfl, cleanmask}}; } - - addWordAssign(nodep, w, destp, new AstOr{lfl, oldvalp, newp}); + AstNode* const orp + = V3Const::constifyEditCpp(new AstOr{lfl, oldvalp, newp}); + addWordAssign(nodep, w, destp, orp); } } VL_DO_DANGLING(rhsp->deleteTree(), rhsp); @@ -541,7 +544,8 @@ private: AstNode* const shifted = new AstShiftL{ lfl, rhsp, new AstConst{lfl, static_cast(lsb)}, destp->width()}; AstNode* const cleaned = new AstAnd{lfl, shifted, new AstConst{lfl, cleanmask}}; - AstNode* const newp = new AstAssign{nfl, destp, new AstOr{lfl, oldvalp, cleaned}}; + AstNode* const orp = V3Const::constifyEditCpp(new AstOr{lfl, oldvalp, cleaned}); + AstNode* newp = new AstAssign{nfl, destp, orp}; insertBefore(nodep, newp); } return true; From 6c33e6e889bbc59eda8ae822a8a2db340f2cb8aa Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 2 Aug 2022 16:31:45 +0100 Subject: [PATCH 072/119] Tell clang-tidy .h files are C++ (not C) headers --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index b805f4886..fc9b6b1a9 100644 --- a/Makefile.in +++ b/Makefile.in @@ -334,7 +334,7 @@ clang-tidy: $(CLANGTIDY_DEP) %.cpp.tidy: %.cpp $(CLANGTIDY) $(CLANGTIDY_FLAGS) $< -- $(CLANGTIDY_DEFS) $(CPPCHECK_INC) | 2>&1 tee $@ %.h.tidy: %.h - $(CLANGTIDY) $(CLANGTIDY_FLAGS) $< -- $(CLANGTIDY_DEFS) $(CPPCHECK_INC) | 2>&1 tee $@ + $(CLANGTIDY) $(CLANGTIDY_FLAGS) $< -- $(CLANGTIDY_DEFS) $(CPPCHECK_INC) -x c++-header | 2>&1 tee $@ analyzer-src: -rm -rf src/obj_dbg From bd211c87aa2f5ae30c1c01dd31f582c46c79dfd9 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 2 Aug 2022 16:46:31 +0100 Subject: [PATCH 073/119] astgen: split 'visit' method declarations from definitions Add definitions to V3Ast.cpp, and use static_cast. This fixes a lot of clang-tidy noise. --- src/V3Ast.cpp | 5 +++++ src/V3Ast.h | 6 ++---- src/astgen | 21 ++++++++++++++------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/V3Ast.cpp b/src/V3Ast.cpp index 9a6e7fca8..de595be61 100644 --- a/src/V3Ast.cpp +++ b/src/V3Ast.cpp @@ -1293,3 +1293,8 @@ void VNDeleter::doDeletes() { for (AstNode* const nodep : m_deleteps) nodep->deleteTree(); m_deleteps.clear(); } + +//###################################################################### +// VNVisitor + +#include "V3Ast__gen_visitor_defns.h" // From ./astgen diff --git a/src/V3Ast.h b/src/V3Ast.h index e93dc4d86..45fe1fc98 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -1297,10 +1297,8 @@ public: /// Return edited nodep; see comments in V3Ast.cpp AstNode* iterateSubtreeReturnEdits(AstNode* nodep); -#include "V3Ast__gen_visitor.h" // From ./astgen - // Things like: - // virtual void visit(AstBreak* nodep) { visit((AstNodeStmt*)(nodep)); } - // virtual void visit(AstNodeStmt* nodep) { visit((AstNode*)(nodep)); } + virtual void visit(AstNode* nodep) = 0; +#include "V3Ast__gen_visitor_decls.h" // From ./astgen }; //###################################################################### diff --git a/src/astgen b/src/astgen index b6071c06b..44529a0e4 100755 --- a/src/astgen +++ b/src/astgen @@ -507,15 +507,21 @@ def write_classes(filename): fh.write("\n") -def write_visitor(filename): +def write_visitor_decls(filename): with open_file(filename) as fh: for typen in sorted(Classes.keys()): - if typen == "Node": - fh.write(" virtual void visit(Ast" + typen + "*) = 0;\n") - else: + if typen != "Node": + fh.write("virtual void visit(Ast" + typen + "*);\n") + + +def write_visitor_defns(filename): + with open_file(filename) as fh: + for typen in sorted(Classes.keys()): + if typen != "Node": base = Classes[typen] - fh.write(" virtual void visit(Ast" + typen + - "* nodep) { visit((Ast" + base + "*)(nodep)); }\n") + fh.write("void VNVisitor::visit(Ast" + typen + + "* nodep) { visit(static_cast(nodep)); }\n") def write_impl(filename): @@ -692,7 +698,8 @@ for filename in source_files: if Args.classes: write_report("V3Ast__gen_report.txt") write_classes("V3Ast__gen_classes.h") - write_visitor("V3Ast__gen_visitor.h") + write_visitor_decls("V3Ast__gen_visitor_decls.h") + write_visitor_defns("V3Ast__gen_visitor_defns.h") write_impl("V3Ast__gen_impl.h") write_types("V3Ast__gen_types.h") write_yystype("V3Ast__gen_yystype.h") From f9f66d787e5b386c89cb9c9714bcf18a55c4090d Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 3 Aug 2022 09:41:30 +0100 Subject: [PATCH 074/119] Fix integer overflow in V3Unroll (#3451) --- src/V3Unroll.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/V3Unroll.cpp b/src/V3Unroll.cpp index 769faf369..469f5656e 100644 --- a/src/V3Unroll.cpp +++ b/src/V3Unroll.cpp @@ -50,6 +50,7 @@ private: bool m_varModeReplace; // Replacing varrefs bool m_varAssignHit; // Assign var hit bool m_generate; // Expand single generate For loop + int m_unrollLimit; // Unrolling limit string m_beginName; // What name to give begin iterations VDouble0 m_statLoops; // Statistic tracking VDouble0 m_statIters; // Statistic tracking @@ -67,10 +68,6 @@ private: return false; } - int unrollCount() const { - return m_generate ? v3Global.opt.unrollCount() * 16 : v3Global.opt.unrollCount(); - } - bool bodySizeOverRecurse(AstNode* nodep, int& bodySize, int bodyLimit) { if (!nodep) return false; bodySize++; @@ -163,7 +160,7 @@ private: // Check whether to we actually want to try and unroll. int loops; - if (!countLoops(initAssp, condp, incp, unrollCount(), loops)) { + if (!countLoops(initAssp, condp, incp, m_unrollLimit, loops)) { return cantUnroll(nodep, "Unable to simulate loop"); } @@ -336,11 +333,11 @@ private: } ++m_statIters; - if (++times > unrollCount() * 3) { + if (++times / 3 > m_unrollLimit) { nodep->v3error( "Loop unrolling took too long;" " probably this is an infinite loop, or set --unroll-count above " - << unrollCount()); + << m_unrollLimit); break; } @@ -485,6 +482,12 @@ public: m_varModeReplace = false; m_varAssignHit = false; m_generate = generate; + m_unrollLimit = v3Global.opt.unrollCount(); + if (generate) { + m_unrollLimit = std::numeric_limits::max() / 16 > m_unrollLimit + ? m_unrollLimit * 16 + : std::numeric_limits::max(); + } m_beginName = beginName; } void process(AstNode* nodep, bool generate, const string& beginName) { From b864f5f5baef23df633b9a3017026c27198c37f1 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 3 Aug 2022 16:33:55 +0100 Subject: [PATCH 075/119] V3Partition: use static_cast with LogicMTaskVertex dynamic_cast is not free, and the mtask graph contains only LogicMTaskVertex vertices, use static_cast instead for some speedup. --- src/V3Partition.cpp | 52 ++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 4b37f53df..3263b8493 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -375,17 +375,17 @@ public: ~CpCostAccessor() = default; // Return cost of this node uint32_t cost(const V3GraphVertex* vxp) const { - const LogicMTask* const mtaskp = dynamic_cast(vxp); + const LogicMTask* const mtaskp = static_cast(vxp); return mtaskp->stepCost(); } // Return stored CP to this node uint32_t critPathCost(const V3GraphVertex* vxp, GraphWay way) const { - const LogicMTask* const mtaskp = dynamic_cast(vxp); + const LogicMTask* const mtaskp = static_cast(vxp); return mtaskp->critPathCost(way); } // Store a new CP to this node void setCritPathCost(V3GraphVertex* vxp, GraphWay way, uint32_t cost) const { - LogicMTask* const mtaskp = dynamic_cast(vxp); + LogicMTask* const mtaskp = static_cast(vxp); mtaskp->setCritPathCost(way, cost); } // Notify vxp that the wayward CP at the throughp-->vxp edge @@ -393,15 +393,15 @@ public: // This is our cue to update vxp's m_edges[!way][throughp]. void notifyEdgeCp(V3GraphVertex* vxp, GraphWay way, V3GraphVertex* throuvhVxp, uint32_t cp) const { - LogicMTask* const updateVxp = dynamic_cast(vxp); - LogicMTask* const lthrouvhVxp = dynamic_cast(throuvhVxp); + LogicMTask* const updateVxp = static_cast(vxp); + LogicMTask* const lthrouvhVxp = static_cast(throuvhVxp); EdgeSet& edges = updateVxp->m_edges[way.invert()]; const uint32_t edgeCp = edges.at(lthrouvhVxp); if (cp > edgeCp) edges.set(lthrouvhVxp, cp); } // Check that CP matches that of the longest edge wayward of vxp. void checkNewCpVersusEdges(V3GraphVertex* vxp, GraphWay way, uint32_t cp) const { - LogicMTask* const mtaskp = dynamic_cast(vxp); + LogicMTask* const mtaskp = static_cast(vxp); const EdgeSet& edges = mtaskp->m_edges[way.invert()]; // This is mtaskp's relative with longest !wayward inclusive CP: const auto edgeIt = edges.rbegin(); @@ -603,7 +603,7 @@ private: for (const V3GraphEdge* followp = fromp->outBeginp(); followp; followp = followp->outNextp()) { if (followp == excludedEdgep) continue; - LogicMTask* const nextp = dynamic_cast(followp->top()); + LogicMTask* const nextp = static_cast(followp->top()); if (pathExistsFromInternal(nextp, top, nullptr, generation)) return true; } return false; @@ -634,7 +634,7 @@ public: const LogicMTask* startp = nullptr; for (const V3GraphVertex* vxp = graphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - const LogicMTask* const mtaskp = dynamic_cast(vxp); + const LogicMTask* const mtaskp = static_cast(vxp); if (!startp) { startp = mtaskp; continue; @@ -805,10 +805,10 @@ public: } // METHODS LogicMTask* furtherMTaskp(GraphWay way) const { - return dynamic_cast(this->furtherp(way)); + return static_cast(this->furtherp(way)); } - LogicMTask* fromMTaskp() const { return dynamic_cast(fromp()); } - LogicMTask* toMTaskp() const { return dynamic_cast(top()); } + LogicMTask* fromMTaskp() const { return static_cast(fromp()); } + LogicMTask* toMTaskp() const { return static_cast(top()); } bool mergeWouldCreateCycle() const { return LogicMTask::pathExistsFrom(fromMTaskp(), toMTaskp(), this); } @@ -962,7 +962,7 @@ static void partInitHalfCriticalPaths(GraphWay way, V3Graph* mtasksp, bool check GraphStreamUnordered order(mtasksp, way); const GraphWay rev = way.invert(); for (const V3GraphVertex* vertexp; (vertexp = order.nextp());) { - const LogicMTask* const mtaskcp = dynamic_cast(vertexp); + const LogicMTask* const mtaskcp = static_cast(vertexp); LogicMTask* const mtaskp = const_cast(mtaskcp); uint32_t cpCost = 0; #if VL_DEBUG @@ -977,7 +977,7 @@ static void partInitHalfCriticalPaths(GraphWay way, V3Graph* mtasksp, bool check "Should be no redundant edges in mtasks graph"); relatives.insert(edgep->furtherp(rev)); #endif - const LogicMTask* const relativep = dynamic_cast(edgep->furtherp(rev)); + const LogicMTask* const relativep = static_cast(edgep->furtherp(rev)); cpCost = std::max(cpCost, (relativep->critPathCost(way) + static_cast(relativep->stepCost()))); } @@ -1010,7 +1010,7 @@ static void partCheckCriticalPaths(V3Graph* mtasksp) { partInitHalfCriticalPaths(GraphWay::FORWARD, mtasksp, true); partInitHalfCriticalPaths(GraphWay::REVERSE, mtasksp, true); for (V3GraphVertex* vxp = mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - const LogicMTask* const mtaskp = dynamic_cast(vxp); + const LogicMTask* const mtaskp = static_cast(vxp); mtaskp->checkRelativesCp(GraphWay::FORWARD); mtaskp->checkRelativesCp(GraphWay::REVERSE); } @@ -1326,8 +1326,8 @@ private: MTaskEdge* mergeEdgep = mergeCanp->toMTaskEdge(); const SiblingMC* mergeSibsp = nullptr; if (mergeEdgep) { - top = dynamic_cast(mergeEdgep->top()); - fromp = dynamic_cast(mergeEdgep->fromp()); + top = static_cast(mergeEdgep->top()); + fromp = static_cast(mergeEdgep->fromp()); } else { mergeSibsp = mergeCanp->toSiblingMC(); UASSERT(mergeSibsp, "Failed to cast mergeCanp to either MTaskEdge or SiblingMC"); @@ -1437,14 +1437,14 @@ private: siblingPairFromRelatives(GraphWay::FORWARD, recipientp, true); unsigned edges = 0; for (V3GraphEdge* edgep = recipientp->outBeginp(); edgep; edgep = edgep->outNextp()) { - LogicMTask* const postreqp = dynamic_cast(edgep->top()); + LogicMTask* const postreqp = static_cast(edgep->top()); siblingPairFromRelatives(GraphWay::REVERSE, postreqp, false); ++edges; if (edges > PART_SIBLING_EDGE_LIMIT) break; } edges = 0; for (V3GraphEdge* edgep = recipientp->inBeginp(); edgep; edgep = edgep->inNextp()) { - LogicMTask* const prereqp = dynamic_cast(edgep->fromp()); + LogicMTask* const prereqp = static_cast(edgep->fromp()); siblingPairFromRelatives(GraphWay::FORWARD, prereqp, false); ++edges; if (edges > PART_SIBLING_EDGE_LIMIT) break; @@ -1491,8 +1491,8 @@ private: // Score this edge. Lower is better. The score is the new local CP // length if we merge these mtasks. ("Local" means the longest // critical path running through the merged node.) - const LogicMTask* const top = dynamic_cast(edgep->top()); - const LogicMTask* const fromp = dynamic_cast(edgep->fromp()); + const LogicMTask* const top = static_cast(edgep->top()); + const LogicMTask* const fromp = static_cast(edgep->fromp()); const uint32_t mergedCpCostFwd = std::max(fromp->critPathCost(GraphWay::FORWARD), top->critPathCostWithout(GraphWay::FORWARD, edgep)); @@ -1534,7 +1534,7 @@ private: std::vector shortestPrereqs; for (V3GraphEdge* edgep = mtaskp->beginp(way); edgep; edgep = edgep->nextp(way)) { - LogicMTask* const prereqp = dynamic_cast(edgep->furtherp(way)); + LogicMTask* const prereqp = static_cast(edgep->furtherp(way)); shortestPrereqs.push_back(prereqp); // Prevent nodes with huge numbers of edges from massively // slowing down the partitioner: @@ -1932,7 +1932,7 @@ public: OvvSet ovvSetSystemC(ovvOrder); for (V3GraphVertex* vxp = m_mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - LogicMTask* const mtaskp = dynamic_cast(vxp); + LogicMTask* const mtaskp = static_cast(vxp); // Should be only one MTaskMoveVertex in each mtask at this // stage, but whatever, write it as a loop: for (LogicMTask::VxList::const_iterator it = mtaskp->vertexListp()->begin(); @@ -2042,7 +2042,7 @@ public: TasksByRank tasksByRank; for (V3GraphVertex* vxp = m_mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - LogicMTask* const mtaskp = dynamic_cast(vxp); + LogicMTask* const mtaskp = static_cast(vxp); if (hasDpiHazard(mtaskp)) tasksByRank[vxp->rank()].insert(mtaskp); } mergeSameRankTasks(&tasksByRank); @@ -2521,7 +2521,7 @@ void V3Partition::hashGraphDebug(const V3Graph* graphp, const char* debugName) { void V3Partition::setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp) { // Look at each mtask for (V3GraphVertex* itp = mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) { - LogicMTask* const mtaskp = dynamic_cast(itp); + LogicMTask* const mtaskp = static_cast(itp); const LogicMTask::VxList* vertexListp = mtaskp->vertexListp(); // For each logic vertex in this mtask, create an mtask-to-mtask @@ -2651,7 +2651,7 @@ void V3Partition::go(V3Graph* mtasksp) { using SortedMTaskSet = std::set; SortedMTaskSet sorted; for (V3GraphVertex* itp = mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) { - LogicMTask* const mtaskp = dynamic_cast(itp); + LogicMTask* const mtaskp = static_cast(itp); sorted.insert(mtaskp); } for (auto it = sorted.begin(); it != sorted.end(); ++it) { @@ -2667,7 +2667,7 @@ void V3Partition::go(V3Graph* mtasksp) { // Set color to indicate an mtaskId on every underlying MTaskMoveVertex. for (V3GraphVertex* itp = mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) { - const LogicMTask* const mtaskp = dynamic_cast(itp); + const LogicMTask* const mtaskp = static_cast(itp); for (LogicMTask::VxList::const_iterator it = mtaskp->vertexListp()->begin(); it != mtaskp->vertexListp()->end(); ++it) { MTaskMoveVertex* const mvertexp = *it; From fac8e76923e40c7d20feffb3b058da53a50b09e9 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 3 Aug 2022 18:59:40 +0100 Subject: [PATCH 076/119] Rework SortByValueMap for better performance Keep a single std::set of key/value pairs, and a single unordered_map from key to iterators into the set. Also improve some of the accessing mechanisms using modern C++. This speeds up multi-threaded ordering by about 10%. --- src/V3Partition.cpp | 56 +++---- src/V3Scoreboard.h | 375 +++++++++++--------------------------------- 2 files changed, 114 insertions(+), 317 deletions(-) diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 3263b8493..d9a071a96 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -195,12 +195,9 @@ public: // longest !wayward edge. Schedule that to be resolved. const uint32_t newPendingVal = newInclusiveCp - m_accessp->critPathCost(relativep, m_way); - if (m_pending.has(relativep)) { - if (newPendingVal > m_pending.at(relativep)) { - m_pending.set(relativep, newPendingVal); - } - } else { - m_pending.set(relativep, newPendingVal); + const auto pair = m_pending.emplace(relativep, newPendingVal); + if (!pair.second && (newPendingVal > pair.first->second)) { + m_pending.update(pair.first, newPendingVal); } } } @@ -225,8 +222,8 @@ public: // This generalizes to multiple seed nodes also. while (!m_pending.empty()) { const auto it = m_pending.rbegin(); - V3GraphVertex* const updateMep = (*it).key(); - const uint32_t cpGrowBy = (*it).value(); + V3GraphVertex* const updateMep = it->first; + const uint32_t cpGrowBy = it->second; m_pending.erase(it); // For *updateMep, whose critPathCost was out-of-date with respect @@ -396,8 +393,8 @@ public: LogicMTask* const updateVxp = static_cast(vxp); LogicMTask* const lthrouvhVxp = static_cast(throuvhVxp); EdgeSet& edges = updateVxp->m_edges[way.invert()]; - const uint32_t edgeCp = edges.at(lthrouvhVxp); - if (cp > edgeCp) edges.set(lthrouvhVxp, cp); + const auto it = edges.find(lthrouvhVxp); + if (cp > it->second) edges.update(it, cp); } // Check that CP matches that of the longest edge wayward of vxp. void checkNewCpVersusEdges(V3GraphVertex* vxp, GraphWay way, uint32_t cp) const { @@ -405,7 +402,7 @@ public: const EdgeSet& edges = mtaskp->m_edges[way.invert()]; // This is mtaskp's relative with longest !wayward inclusive CP: const auto edgeIt = edges.rbegin(); - const uint32_t edgeCp = (*edgeIt).value(); + const uint32_t edgeCp = edgeIt->second; UASSERT_OBJ(edgeCp == cp, vxp, "CP doesn't match longest wayward edge"); } @@ -512,26 +509,21 @@ public: } void addRelative(GraphWay way, LogicMTask* relativep) { - EdgeSet& edges = m_edges[way]; - UASSERT(!edges.has(relativep), "Adding existing edge"); // value is !way cp to this edge - edges.set(relativep, relativep->stepCost() + relativep->critPathCost(way.invert())); - } - void removeRelative(GraphWay way, LogicMTask* relativep) { - EdgeSet& edges = m_edges[way]; - edges.erase(relativep); - } - bool hasRelative(GraphWay way, LogicMTask* relativep) { - const EdgeSet& edges = m_edges[way]; - return edges.has(relativep); + const uint32_t cp = relativep->stepCost() + relativep->critPathCost(way.invert()); + VL_ATTR_UNUSED const bool exits = !m_edges[way].emplace(relativep, cp).second; +#if VL_DEBUG + UASSERT(!exits, "Adding existing edge"); +#endif } + void removeRelative(GraphWay way, LogicMTask* relativep) { m_edges[way].erase(relativep); } + bool hasRelative(GraphWay way, LogicMTask* relativep) { return m_edges[way].has(relativep); } void checkRelativesCp(GraphWay way) const { - const EdgeSet& edges = m_edges[way]; - for (const auto& edge : vlstd::reverse_view(edges)) { - const LogicMTask* const relativep = edge.key(); - const uint32_t cachedCp = edge.value(); - partCheckCachedScoreVsActual(cachedCp, relativep->critPathCost(way.invert()) - + relativep->stepCost()); + for (const auto& edge : vlstd::reverse_view(m_edges[way])) { + const LogicMTask* const relativep = edge.first; + const uint32_t cachedCp = edge.second; + const uint32_t cp = relativep->critPathCost(way.invert()) + relativep->stepCost(); + partCheckCachedScoreVsActual(cachedCp, cp); } } @@ -557,11 +549,11 @@ public: const EdgeSet& edges = m_edges[way.invert()]; uint32_t result = 0; for (const auto& edge : vlstd::reverse_view(edges)) { - if (edge.key() != withoutp->furtherp(way.invert())) { + if (edge.first != withoutp->furtherp(way.invert())) { // Use the cached cost. It could be a small overestimate // due to stepping. This is consistent with critPathCost() // which also returns the cached cost. - result = edge.value(); + result = edge.second; break; } } @@ -657,7 +649,7 @@ public: if (it == children.rend()) { nextp = nullptr; } else { - nextp = (*it).key(); + nextp = it->first; } } @@ -1477,6 +1469,7 @@ private: return 0; } + VL_ATTR_NOINLINE static uint32_t siblingScore(const SiblingMC* sibsp) { const LogicMTask* const ap = sibsp->ap(); const LogicMTask* const bp = sibsp->bp(); @@ -1487,6 +1480,7 @@ private: return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(ap->cost() + bp->cost()); } + VL_ATTR_NOINLINE static uint32_t edgeScore(const V3GraphEdge* edgep) { // Score this edge. Lower is better. The score is the new local CP // length if we merge these mtasks. ("Local" means the longest diff --git a/src/V3Scoreboard.h b/src/V3Scoreboard.h index 7130e7284..43d5be804 100644 --- a/src/V3Scoreboard.h +++ b/src/V3Scoreboard.h @@ -34,298 +34,103 @@ #include #include -//###################################################################### -// SortByValueMap +// ###################################################################### +// SortByValueMap -/// A generic key-value map, except it also supports iterating in -/// value-sorted order. Values need not be unique. Uses T_KeyCompare to -/// break ties in the sort when values collide. +// A generic key-value map, except iteration is in *value* sorted order. Values need not be unique. +// Uses T_KeyCompare to break ties in the sort when values collide. Note: Only const iteration is +// possible, as updating mapped values via iterators is not safe. template > class SortByValueMap final { - // TYPES -private: - using KeySet = std::set; - using Val2Keys = std::map; + // Current implementation is a std::set of key/value pairs, plus a std_unordered_map from keys + // to iterators into the set. This keeps most operations fairly cheap and also has the benefit + // of being able to re-use the std::set iterators. + // TYPES + + using Pair = std::pair; + + struct PairCmp final { + bool operator()(const Pair& a, const Pair& b) const { + // First compare values + if (a.second != b.second) return a.second < b.second; + // Then compare keys + return T_KeyCompare{}(a.first, b.first); + } + }; + + using PairSet = std::set; + +public: + using const_iterator = typename PairSet::const_iterator; + using const_reverse_iterator = typename PairSet::const_reverse_iterator; + +private: // MEMBERS - std::unordered_map m_keys; // Map each key to its value. Not sorted. - Val2Keys m_vals; // Map each value to its keys. Sorted. + PairSet m_pairs; // The contents of the map, stored directly as key-value pairs + std::unordered_map m_kiMap; // Key to iterator map + + VL_UNCOPYABLE(SortByValueMap); public: // CONSTRUCTORS SortByValueMap() = default; - class const_iterator VL_NOT_FINAL { - // TYPES - public: - using value_type = const_iterator; - using reference = const_iterator; // See comment on operator*() - using pointer = void; - using difference_type = std::ptrdiff_t; - using iterator_category = std::bidirectional_iterator_tag; + // Only const iteration is possible + const_iterator begin() const { return m_pairs.begin(); } + const_iterator end() const { return m_pairs.end(); } + const_iterator cbegin() const { m_pairs.cbegin(); } + const_iterator cend() const { return m_pairs.cend(); } + const_reverse_iterator rbegin() const { return m_pairs.rbegin(); } + const_reverse_iterator rend() const { return m_pairs.rend(); } + const_reverse_iterator crbegin() const { return m_pairs.crbegin(); } + const_reverse_iterator crend() const { return m_pairs.crend(); } - protected: - friend class SortByValueMap; - - // MEMBERS - typename KeySet::iterator m_keyIt; - typename Val2Keys::iterator m_valIt; - SortByValueMap* const m_sbvmp; - bool m_end = true; // At the end() - - // CONSTRUCTORS - explicit const_iterator(SortByValueMap* sbmvp) // for end() - : m_sbvmp{sbmvp} {} - const_iterator(typename Val2Keys::iterator valIt, typename KeySet::iterator keyIt, - SortByValueMap* sbvmp) - : m_keyIt{keyIt} - , m_valIt{valIt} - , m_sbvmp{sbvmp} - , m_end{false} {} - - // METHODS - void advanceUntilValid() { - ++m_keyIt; - if (m_keyIt != m_valIt->second.end()) { // Valid iterator, done. - return; - } - // Try the next value? - ++m_valIt; - if (m_valIt == m_sbvmp->m_vals.end()) { // No more values - m_end = true; - return; - } - // Should find a value here, as every value bucket is supposed - // to have at least one key, even after keys get removed. - m_keyIt = m_valIt->second.begin(); - UASSERT(m_keyIt != m_valIt->second.end(), "Algorithm should have key left"); - } - void reverseUntilValid() { - if (m_end) { - UASSERT(!m_sbvmp->m_vals.empty(), "Reverse iterator causes underflow"); - m_valIt = m_sbvmp->m_vals.end(); - --m_valIt; - - UASSERT(!m_valIt->second.empty(), "Reverse iterator causes underflow"); - m_keyIt = m_valIt->second.end(); - --m_keyIt; - - m_end = false; - return; - } - if (m_keyIt != m_valIt->second.begin()) { - // Valid iterator, we're done. - --m_keyIt; - return; - } - // Try the previous value? - if (VL_UNCOVERABLE(m_valIt == m_sbvmp->m_vals.begin())) { - // No more values but it's not defined to decrement an - // iterator past the beginning. - v3fatalSrc("Decremented iterator past beginning"); - return; // LCOV_EXCL_LINE - } - --m_valIt; - // Should find a value here, as Every value bucket is supposed - // to have at least one key, even after keys get removed. - UASSERT(!m_valIt->second.empty(), "Value bucket should have key"); - m_keyIt = m_valIt->second.end(); - --m_keyIt; - UASSERT(m_keyIt != m_valIt->second.end(), "Value bucket should have key"); - } - - public: - const T_Key& key() const { return *m_keyIt; } - const T_Value& value() const { return m_valIt->first; } - const_iterator& operator++() { - advanceUntilValid(); - return *this; - } - const_iterator& operator--() { - reverseUntilValid(); - return *this; - } - bool operator==(const const_iterator& other) const { - // It's not legal to compare iterators from different - // sequences. So check m_end before comparing m_valIt, and - // compare m_valIt's before comparing m_keyIt to ensure nothing - // here is undefined. - if (m_end || other.m_end) return m_end && other.m_end; - return ((m_valIt == other.m_valIt) && (m_keyIt == other.m_keyIt)); - } - bool operator!=(const const_iterator& other) const { return (!this->operator==(other)); } - - // WARNING: Cleverness. - // - // The "reference" returned by *it must remain valid after 'it' - // gets destroyed. The reverse_iterator relies on this for its - // operator*(), so it's not just a theoretical requirement, it's a - // real requirement. - // - // To make that work, define the "reference" type to be the - // iterator itself. So clients can do (*it).key() and - // (*it).value(). This is the clever part. - // - // That's mostly useful for a reverse iterator, where *rit returns - // the forward iterator pointing the to same element, so - // (*rit).key() and (*rit).value() work where rit.key() and - // rit.value() cannot. - // - // It would be nice to support it->key() and it->value(), however - // uncertain what would be an appropriate 'pointer' type define - // that makes this work safely through a reverse iterator. So this - // class does not provide an operator->(). - // - // Q) Why not make our value_type be a pair like a - // normal map, and return a reference to that? This could - // return a reference to one of the pairs inside m_keys, that - // would satisfy the constraint above. - // - // A) It would take a lookup to find that pair within m_keys. This - // iterator is designed to minimize the number of hashtable and - // tree lookups. Increment, decrement, key(), value(), erase() - // by iterator, begin(), end() -- none of these require a - // container lookup. That's true for reverse_iterators too. - reference operator*() const { - UASSERT(!m_end, "Dereferencing iterator that is at end()"); - return *this; - } - }; - - class iterator final : public const_iterator { - public: - // TYPES - using value_type = iterator; - using reference = iterator; - // pointer, difference_type, and iterator_category inherit from - // const_iterator - - // CONSTRUCTORS - explicit iterator(SortByValueMap* sbvmp) - : const_iterator{sbvmp} {} - iterator(typename Val2Keys::iterator valIt, typename KeySet::iterator keyIt, - SortByValueMap* sbvmp) - : const_iterator{valIt, keyIt, sbvmp} {} - - // METHODS - iterator& operator++() { - this->advanceUntilValid(); - return *this; - } - iterator& operator--() { - this->reverseUntilValid(); - return *this; - } - reference operator*() const { - UASSERT(!this->m_end, "Dereferencing iterator that is at end()"); - return *this; - } - }; - - using reverse_iterator = std::reverse_iterator; - using const_reverse_iterator = std::reverse_iterator; - - // METHODS -private: - void removeKeyFromOldVal(const T_Key& k, const T_Value& oldVal) { - // The value of 'k' is about to change, or, 'k' is about to be - // removed from the map. - // Clear the m_vals mapping for k. - KeySet& keysAtOldVal = m_vals[oldVal]; - const size_t erased = keysAtOldVal.erase(k); - UASSERT(erased == 1, "removeKeyFromOldVal() removal key not found"); - if (keysAtOldVal.empty()) { - // Don't keep empty sets in the value map. - m_vals.erase(oldVal); - } + const_iterator find(const T_Key& key) const { + const auto kiIt = m_kiMap.find(key); + if (kiIt == m_kiMap.end()) return cend(); + return kiIt->second; } - void removeKeyFromOldVal(iterator it) { - it.m_valIt->second.erase(it.m_keyIt); - if (it.m_valIt->second.empty()) m_vals.erase(it.m_valIt); - } - -public: - iterator begin() { - const auto valIt = m_vals.begin(); - if (valIt == m_vals.end()) return end(); - const auto keyIt = valIt->second.begin(); - return iterator(valIt, keyIt, this); - } - const_iterator begin() const { - SortByValueMap* const mutp = const_cast(this); - const auto valIt = mutp->m_vals.begin(); - if (valIt == mutp->m_vals.end()) return end(); - const auto keyIt = valIt->second.begin(); - return const_iterator(valIt, keyIt, mutp); - } - iterator end() { return iterator(this); } - const_iterator end() const { - // Safe to cast away const; the const_iterator will still enforce - // it. Same for the const begin() below. - return const_iterator(const_cast(this)); - } - reverse_iterator rbegin() { return reverse_iterator(end()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } - - iterator find(const T_Key& k) { - const auto kvit = m_keys.find(k); - if (kvit == m_keys.end()) return end(); - - const auto valIt = m_vals.find(kvit->second); - const auto keyIt = valIt->second.find(k); - return iterator(valIt, keyIt, this); - } - const_iterator find(const T_Key& k) const { - SortByValueMap* const mutp = const_cast(this); - const auto kvit = mutp->m_keys.find(k); - if (kvit == mutp->m_keys.end()) return end(); - - const auto valIt = mutp->m_vals.find(kvit->second); - const auto keyIt = valIt->second.find(k); - return const_iterator(valIt, keyIt, mutp); - } - void set(const T_Key& k, const T_Value& v) { - const auto kvit = m_keys.find(k); - if (kvit != m_keys.end()) { - if (kvit->second == v) { - return; // LCOV_EXCL_LINE // Same value already present; stop. - } - // Must remove element from m_vals[oldValue] - removeKeyFromOldVal(k, kvit->second); - } - m_keys[k] = v; - m_vals[v].insert(k); - } - size_t erase(const T_Key& k) { - const auto kvit = m_keys.find(k); - if (kvit == m_keys.end()) return 0; - removeKeyFromOldVal(k, kvit->second); - m_keys.erase(kvit); + size_t erase(const T_Key& key) { + const auto kiIt = m_kiMap.find(key); + if (kiIt == m_kiMap.end()) return 0; + m_pairs.erase(kiIt->second); + m_kiMap.erase(kiIt); return 1; } - void erase(const iterator& it) { - m_keys.erase(it.key()); - removeKeyFromOldVal(it); + void erase(const_iterator it) { + m_kiMap.erase(it->first); + m_pairs.erase(it); } - void erase(const reverse_iterator& it) { - erase(*it); // Dereferencing returns a copy of the forward iterator + void erase(const_reverse_iterator rit) { + m_kiMap.erase(rit->first); + m_pairs.erase(std::next(rit).base()); } - bool has(const T_Key& k) const { return (m_keys.find(k) != m_keys.end()); } - bool empty() const { return m_keys.empty(); } - // Look up a value. Returns a reference for efficiency. Note this must - // be a const reference, otherwise the client could corrupt the sorted - // order of m_byValue by reaching through and changing the value. - const T_Value& at(const T_Key& k) const { - const auto kvit = m_keys.find(k); - UASSERT(kvit != m_keys.end(), "at() lookup key not found"); - return kvit->second; + bool has(const T_Key& key) const { return m_kiMap.count(key); } + bool empty() const { return m_pairs.empty(); } + // Returns const reference. + const T_Value& at(const T_Key& key) const { return m_kiMap.at(key)->second; } + // Note this returns const_iterator + template // + std::pair emplace(const T_Key& key, Args&&... args) { + const auto kiEmp = m_kiMap.emplace(key, end()); + if (kiEmp.second) { + const auto result = m_pairs.emplace(key, std::forward(args)...); +#if VL_DEBUG + UASSERT(result.second, "Should not be in set yet"); +#endif + kiEmp.first->second = result.first; + return result; + } + return {kiEmp.first->second, false}; + } + // Invalidates iterators + void update(const_iterator it, T_Value value) { + const auto kiIt = m_kiMap.find(it->first); + m_pairs.erase(it); + kiIt->second = m_pairs.emplace(kiIt->first, value).first; } - -private: - VL_UNCOPYABLE(SortByValueMap); }; //###################################################################### @@ -333,7 +138,7 @@ private: /// V3Scoreboard takes a set of Elem*'s, each having some score. /// Scores are assigned by a user-supplied scoring function. /// -/// At any time, the V3Scoreboard can return the elem with the "best" score +/// At any time, the V3Scoreboard can return th515e elem with the "best" score /// among those elements whose scores are known. /// /// The best score is the _lowest_ score. This makes sense in contexts @@ -418,9 +223,9 @@ public: // reflected in the result of bestp(). Otherwise, bestp() only // considers elements that aren't pending rescore. const T_Elem* bestp() { - const auto result = m_sorted.begin(); - if (VL_UNLIKELY(result == m_sorted.end())) return nullptr; - return (*result).key(); + const auto it = m_sorted.begin(); + if (VL_UNLIKELY(it == m_sorted.end())) return nullptr; + return it->first; } // Tell the scoreboard that this element's score may have changed. @@ -444,20 +249,18 @@ public: bool needsRescore() { return !m_unknown.empty(); } // False if elp's score is known to V3Scoreboard, // else true if elp's score is unknown until the next rescore(). - bool needsRescore(const T_Elem* elp) { return (m_unknown.find(elp) != m_unknown.end()); } + bool needsRescore(const T_Elem* elp) { return m_unknown.count(elp); } // Retrieve the last known score for an element. - T_Score cachedScore(const T_Elem* elp) { - const auto result = m_sorted.find(elp); - UASSERT(result != m_sorted.end(), "V3Scoreboard::cachedScore() failed to find element"); - return (*result).value(); - } + T_Score cachedScore(const T_Elem* elp) { return m_sorted.at(elp); } // For each element whose score is unknown to V3Scoreboard, // call the client's scoring function to get a new score, // and sort all elements by their current score. void rescore() { for (const T_Elem* elp : m_unknown) { - const T_Score sortScore = m_scoreFnp(elp); - m_sorted.set(elp, sortScore); + VL_ATTR_UNUSED const bool exists = !m_sorted.emplace(elp, m_scoreFnp(elp)).second; +#if VL_DEBUG + UASSERT(!exists, "Should not be in both m_unknown and m_sorted"); +#endif } m_unknown.clear(); } From 96a4b3e5a5d69bbfeec8911128a3c095ff82adaa Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Fri, 5 Aug 2022 10:56:57 +0100 Subject: [PATCH 077/119] Update clang-format config and apply - Regroup and sort #include directives (like we used to, but automatic) - Set AlwaysBreakTemplateDeclarations to true --- .clang-format | 31 ++++-- include/verilated.cpp | 13 +-- include/verilated.h | 6 +- include/verilated_cov.cpp | 7 +- include/verilated_cov.h | 4 +- include/verilated_dpi.cpp | 2 + include/verilated_dpi.h | 1 + include/verilated_fst_c.h | 18 ++-- include/verilated_fst_sc.cpp | 1 + include/verilated_fst_sc.h | 3 +- include/verilated_profiler.cpp | 4 +- include/verilated_profiler.h | 4 +- include/verilated_save.cpp | 4 +- include/verilated_save.h | 1 + include/verilated_syms.h | 1 + include/verilated_threads.cpp | 1 + include/verilated_threads.h | 3 +- include/verilated_trace.h | 16 +-- include/verilated_trace_imp.h | 58 ++++++---- include/verilated_types.h | 93 ++++++++++------ include/verilated_vcd_c.cpp | 3 +- include/verilated_vcd_c.h | 18 ++-- include/verilated_vcd_sc.cpp | 1 + include/verilated_vcd_sc.h | 1 + include/verilated_vpi.cpp | 3 +- include/verilated_vpi.h | 1 + include/verilatedos.h | 13 ++- src/V3Active.cpp | 3 +- src/V3ActiveTop.cpp | 5 +- src/V3Assert.cpp | 3 +- src/V3Assert.h | 2 +- src/V3AssertPre.cpp | 3 +- src/V3Ast.cpp | 5 +- src/V3Ast.h | 100 ++++++++++++------ src/V3AstUserAllocator.h | 5 +- src/V3Begin.cpp | 3 +- src/V3Branch.cpp | 3 +- src/V3Broken.cpp | 3 +- src/V3CCtors.cpp | 5 +- src/V3CUse.cpp | 3 +- src/V3Case.cpp | 3 +- src/V3Cast.cpp | 3 +- src/V3Cdc.cpp | 5 +- src/V3Changed.cpp | 5 +- src/V3Class.cpp | 3 +- src/V3Clean.cpp | 3 +- src/V3Clock.cpp | 3 +- src/V3Combine.cpp | 7 +- src/V3Common.cpp | 3 +- src/V3Config.cpp | 6 +- src/V3Config.h | 2 +- src/V3Const.cpp | 7 +- src/V3Coverage.cpp | 3 +- src/V3CoverageJoin.cpp | 3 +- src/V3Dead.cpp | 3 +- src/V3Delayed.cpp | 3 +- src/V3Depth.cpp | 3 +- src/V3DepthBlock.cpp | 3 +- src/V3Descope.cpp | 3 +- src/V3DupFinder.cpp | 3 +- src/V3DupFinder.h | 2 +- src/V3EmitCBase.cpp | 1 + src/V3EmitCBase.h | 6 +- src/V3EmitCConstPool.cpp | 2 +- src/V3EmitCFunc.cpp | 3 +- src/V3EmitCFunc.h | 2 +- src/V3EmitCHeaders.cpp | 2 +- src/V3EmitCImp.cpp | 4 +- src/V3EmitCInlines.cpp | 2 +- src/V3EmitCMain.cpp | 5 +- src/V3EmitCMake.cpp | 8 +- src/V3EmitCModel.cpp | 2 +- src/V3EmitCSyms.cpp | 2 +- src/V3EmitMk.cpp | 5 +- src/V3EmitV.cpp | 3 +- src/V3EmitXml.cpp | 5 +- src/V3Expand.cpp | 5 +- src/V3File.cpp | 6 +- src/V3File.h | 8 +- src/V3FileLine.h | 4 +- src/V3Force.cpp | 4 +- src/V3Gate.cpp | 7 +- src/V3GenClk.cpp | 3 +- src/V3Global.cpp | 1 + src/V3Global.h | 3 +- src/V3Graph.cpp | 5 +- src/V3GraphAlg.cpp | 7 +- src/V3GraphPathChecker.cpp | 5 +- src/V3GraphStream.h | 3 +- src/V3Hash.h | 8 +- src/V3Hasher.h | 2 +- src/V3HierBlock.cpp | 13 +-- src/V3HierBlock.h | 2 +- src/V3Inline.cpp | 8 +- src/V3Inst.cpp | 3 +- src/V3InstrCount.cpp | 3 +- src/V3LangCode.h | 2 +- src/V3Life.cpp | 5 +- src/V3LifePost.cpp | 9 +- src/V3LinkCells.cpp | 9 +- src/V3LinkDot.cpp | 7 +- src/V3LinkDot.h | 2 +- src/V3LinkInc.cpp | 3 +- src/V3LinkJump.cpp | 3 +- src/V3LinkLValue.cpp | 3 +- src/V3LinkLevel.cpp | 3 +- src/V3LinkLevel.h | 2 +- src/V3LinkParse.cpp | 3 +- src/V3LinkResolve.cpp | 5 +- src/V3List.h | 12 ++- src/V3Localize.cpp | 5 +- src/V3MergeCond.cpp | 7 +- src/V3Name.cpp | 3 +- src/V3Number.cpp | 3 +- src/V3Number_test.cpp | 2 +- src/V3OptionParser.cpp | 15 ++- src/V3OptionParser.h | 3 +- src/V3Options.cpp | 9 +- src/V3Order.cpp | 11 +- src/V3Param.cpp | 7 +- src/V3ParseImp.cpp | 14 +-- src/V3Partition.cpp | 10 +- src/V3PreProc.cpp | 5 +- src/V3PreProc.h | 4 +- src/V3PreShell.cpp | 7 +- src/V3Premit.cpp | 3 +- src/V3ProtectLib.cpp | 5 +- src/V3Randomize.cpp | 3 +- src/V3Reloop.cpp | 5 +- src/V3Scope.cpp | 3 +- src/V3Scoreboard.h | 2 +- src/V3Simulate.h | 4 +- src/V3Slice.cpp | 3 +- src/V3Split.cpp | 7 +- src/V3SplitAs.cpp | 5 +- src/V3SplitVar.cpp | 3 +- src/V3Stats.cpp | 3 +- src/V3StatsReport.cpp | 4 +- src/V3String.cpp | 2 +- src/V3String.h | 3 +- src/V3Subst.cpp | 5 +- src/V3SymTable.h | 2 +- src/V3TSP.cpp | 17 +-- src/V3Table.cpp | 5 +- src/V3Task.cpp | 5 +- src/V3Task.h | 2 +- src/V3Trace.cpp | 9 +- src/V3TraceDecl.cpp | 6 +- src/V3Tristate.cpp | 7 +- src/V3Undriven.cpp | 5 +- src/V3Unknown.cpp | 3 +- src/V3Unroll.cpp | 7 +- src/V3Unroll.h | 2 +- src/V3VariableOrder.cpp | 3 +- src/V3Waiver.cpp | 3 +- src/V3Waiver.h | 2 +- src/V3Width.cpp | 5 +- src/V3WidthCommit.h | 2 +- src/V3WidthSel.cpp | 4 +- src/Verilator.cpp | 7 +- src/VlcPoint.h | 5 +- src/VlcTest.h | 2 +- src/VlcTop.cpp | 4 +- src/VlcTop.h | 2 +- test_regress/t/TestSimulator.h | 1 + test_regress/t/t_const_bitoptree_bug3096.cpp | 3 +- test_regress/t/t_cover_lib_1.out | 12 +-- test_regress/t/t_cover_lib_1_per_instance.out | 14 +-- test_regress/t/t_cover_lib_2.out | 4 +- test_regress/t/t_cover_lib_3.out | 4 +- test_regress/t/t_cover_lib_c.cpp | 8 +- test_regress/t/t_dpi_accessors.cpp | 9 +- test_regress/t/t_dpi_arg_inout_unpack.cpp | 59 +++++++---- test_regress/t/t_dpi_arg_input_unpack.cpp | 26 +++-- test_regress/t/t_dpi_arg_output_unpack.cpp | 43 +++++--- test_regress/t/t_dpi_context_c.cpp | 3 +- test_regress/t/t_dpi_display_c.cpp | 3 +- test_regress/t/t_dpi_export_c.cpp | 3 +- test_regress/t/t_dpi_imp_gen_c.cpp | 3 +- test_regress/t/t_dpi_import_c.cpp | 3 +- test_regress/t/t_dpi_lib_c.cpp | 4 +- test_regress/t/t_dpi_open_c.cpp | 4 +- test_regress/t/t_dpi_open_elem_c.cpp | 4 +- test_regress/t/t_dpi_open_oob_bad_c.cpp | 4 +- test_regress/t/t_dpi_open_vecval_c.cpp | 4 +- test_regress/t/t_dpi_openfirst_c.cpp | 4 +- test_regress/t/t_dpi_qw_c.cpp | 3 +- test_regress/t/t_dpi_result_type.cpp | 4 +- test_regress/t/t_dpi_shortcircuit_c.cpp | 3 +- test_regress/t/t_dpi_string_c.cpp | 3 +- test_regress/t/t_dpi_sys_c.cpp | 3 +- test_regress/t/t_dpi_threads_c.cpp | 3 +- test_regress/t/t_dpi_vams.cpp | 1 + test_regress/t/t_dpi_var.cpp | 4 +- test_regress/t/t_embed1_c.cpp | 4 +- test_regress/t/t_enum_public.cpp | 2 +- test_regress/t/t_flag_fi.cpp | 1 + test_regress/t/t_flag_ldflags_c.cpp | 3 +- test_regress/t/t_forceable_net.cpp | 5 +- test_regress/t/t_forceable_var.cpp | 5 +- test_regress/t/t_func_rand.cpp | 1 + test_regress/t/t_gantt_two.cpp | 4 +- test_regress/t/t_leak.cpp | 5 +- test_regress/t/t_math_imm2.cpp | 1 + test_regress/t/t_mem_slot.cpp | 4 +- test_regress/t/t_multitop_sig.cpp | 5 +- test_regress/t/t_order_dpi_export_1.cpp | 3 +- test_regress/t/t_order_dpi_export_2.cpp | 3 +- test_regress/t/t_order_dpi_export_3.cpp | 3 +- test_regress/t/t_order_dpi_export_4.cpp | 3 +- test_regress/t/t_order_dpi_export_5.cpp | 3 +- test_regress/t/t_order_multidriven.cpp | 3 +- test_regress/t/t_order_quad.cpp | 1 + test_regress/t/t_param_public.cpp | 2 +- test_regress/t/t_protect_ids_c.cpp | 3 +- test_regress/t/t_runflag_uninit_bad.cpp | 4 +- test_regress/t/t_savable_open_bad2.cpp | 5 +- test_regress/t/t_scope_map.cpp | 5 +- test_regress/t/t_time_vpi_c.cpp | 5 +- test_regress/t/t_trace_cat.cpp | 3 +- test_regress/t/t_trace_cat_fst.cpp | 3 +- test_regress/t/t_trace_dumpvars_dyn.cpp | 3 +- test_regress/t/t_trace_public_func.cpp | 3 +- test_regress/t/t_var_overwidth_bad.cpp | 3 +- test_regress/t/t_vpi_cb_iter.cpp | 9 +- test_regress/t/t_vpi_cbs_called.cpp | 9 +- test_regress/t/t_vpi_finish_c.cpp | 5 +- test_regress/t/t_vpi_get.cpp | 13 +-- test_regress/t/t_vpi_memory.cpp | 15 +-- test_regress/t/t_vpi_module.cpp | 13 +-- test_regress/t/t_vpi_param.cpp | 13 +-- test_regress/t/t_vpi_release_dup_bad_c.cpp | 3 +- test_regress/t/t_vpi_stop_bad_c.cpp | 5 +- test_regress/t/t_vpi_time_cb.cpp | 16 +-- test_regress/t/t_vpi_time_cb_c.cpp | 3 +- test_regress/t/t_vpi_unimpl.cpp | 10 +- test_regress/t/t_vpi_var.cpp | 15 +-- test_regress/t/t_vpi_zero_time_cb.cpp | 17 +-- test_regress/t/t_wrapper_context.cpp | 6 +- test_regress/t/t_wrapper_legacy.cpp | 3 +- test_regress/t/t_x_assign.cpp | 4 +- 241 files changed, 947 insertions(+), 580 deletions(-) diff --git a/.clang-format b/.clang-format index 251eebe52..b41d0c25a 100644 --- a/.clang-format +++ b/.clang-format @@ -17,7 +17,7 @@ AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: false +AlwaysBreakTemplateDeclarations: true BinPackArguments: true BinPackParameters: true BraceWrapping: @@ -59,15 +59,27 @@ ForEachMacros: - foreach - Q_FOREACH - BOOST_FOREACH -IncludeBlocks: Preserve + +# Include grouping/sorting +SortIncludes: true +IncludeBlocks: Regroup IncludeCategories: - - Regex: '^"(llvm|llvm-c|clang|clang-c)/' - Priority: 2 - - Regex: '^(<|"(gtest|gmock|isl|json)/)' - Priority: 3 - - Regex: '.*' - Priority: 1 -IncludeIsMainRegex: '(Test)?$' + - Regex: '"(config_build|verilated_config|verilatedos)\.h"' + Priority: -1 # Sepecials before main header + - Regex: '(<|")verilated.*' + Priority: 1 # Runtime headers + - Regex: '"V3.*__gen.*\.h"' + Priority: 3 # Generated internal headers separately + - Regex: '"V3.*"' + Priority: 2 # Internal header + - Regex: '".*"' + Priority: 4 # Other non-system headers + - Regex: '<[[:alnum:]_.]+>' + Priority: 5 # Simple system headers next + - Regex: '<.*>' + Priority: 6 # Other system headers next +IncludeIsMainRegex: '$' + IndentCaseLabels: false IndentPPDirectives: None IndentWidth: 4 @@ -91,7 +103,6 @@ PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left ReflowComments: true -SortIncludes: false SortUsingDeclarations: true SpaceAfterCStyleCast: false SpaceAfterTemplateKeyword: true diff --git a/include/verilated.cpp b/include/verilated.cpp index 90b5f46c9..ee5090507 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -47,21 +47,22 @@ #define VERILATOR_VERILATED_CPP_ -#include "verilatedos.h" -#include "verilated_imp.h" - #include "verilated_config.h" +#include "verilatedos.h" + +#include "verilated_imp.h" #include #include #include #include -#include -#include // mkdir -#include #include +#include +#include #include +#include // mkdir + // clang-format off #if defined(_WIN32) || defined(__MINGW32__) # include // mkdir diff --git a/include/verilated.h b/include/verilated.h index bed666c7d..5700e67be 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -91,7 +91,8 @@ class VerilatedFstC; class VerilatedFstSc; class VerilatedScope; class VerilatedScopeNameMap; -template class VerilatedTrace; +template +class VerilatedTrace; class VerilatedTraceConfig; class VerilatedVar; class VerilatedVarNameMap; @@ -283,7 +284,8 @@ public: private: // The following are for use by Verilator internals only - template friend class VerilatedTrace; + template + friend class VerilatedTrace; // Run-time trace configuration requested by this model virtual std::unique_ptr traceConfig() const; }; diff --git a/include/verilated_cov.cpp b/include/verilated_cov.cpp index 6c0f72340..3932afef6 100644 --- a/include/verilated_cov.cpp +++ b/include/verilated_cov.cpp @@ -22,8 +22,10 @@ //============================================================================= #include "verilatedos.h" -#include "verilated.h" + #include "verilated_cov.h" + +#include "verilated.h" #include "verilated_cov_key.h" #include @@ -69,7 +71,8 @@ public: // But only local to this file // This isn't in the header file for auto-magic conversion because it // inlines to too much code and makes compilation too slow. -template class VerilatedCoverItemSpec final : public VerilatedCovImpItem { +template +class VerilatedCoverItemSpec final : public VerilatedCovImpItem { private: // MEMBERS T* m_countp; // Count value diff --git a/include/verilated_cov.h b/include/verilated_cov.h index d110e91c3..4c3b1550d 100644 --- a/include/verilated_cov.h +++ b/include/verilated_cov.h @@ -26,6 +26,7 @@ #define VERILATOR_VERILATED_COV_H_ #include "verilatedos.h" + #include "verilated.h" #include @@ -88,7 +89,8 @@ class VerilatedCovImp; //============================================================================= // Convert VL_COVER_INSERT value arguments to strings, is \internal -template std::string vlCovCvtToStr(const T& t) VL_PURE { +template +std::string vlCovCvtToStr(const T& t) VL_PURE { std::ostringstream os; os << t; return os.str(); diff --git a/include/verilated_dpi.cpp b/include/verilated_dpi.cpp index 0eefa6e5b..caa72af85 100644 --- a/include/verilated_dpi.cpp +++ b/include/verilated_dpi.cpp @@ -28,7 +28,9 @@ #define VERILATOR_VERILATED_DPI_CPP_ #include "verilatedos.h" + #include "verilated_dpi.h" + #include "verilated_imp.h" // On MSVC++ we need svdpi.h to declare exports, not imports diff --git a/include/verilated_dpi.h b/include/verilated_dpi.h index f37679f91..31989b0b5 100644 --- a/include/verilated_dpi.h +++ b/include/verilated_dpi.h @@ -27,6 +27,7 @@ #define VERILATOR_VERILATED_DPI_H_ #include "verilatedos.h" + #include "verilated.h" // Also presumably included by caller #include "verilated_sym_props.h" diff --git a/include/verilated_fst_c.h b/include/verilated_fst_c.h index b41b5a92c..73c821a60 100644 --- a/include/verilated_fst_c.h +++ b/include/verilated_fst_c.h @@ -118,12 +118,18 @@ public: #ifndef DOXYGEN // Declare specialization here as it's used in VerilatedFstC just below -template <> void VerilatedFst::Super::dump(uint64_t time); -template <> void VerilatedFst::Super::set_time_unit(const char* unitp); -template <> void VerilatedFst::Super::set_time_unit(const std::string& unit); -template <> void VerilatedFst::Super::set_time_resolution(const char* unitp); -template <> void VerilatedFst::Super::set_time_resolution(const std::string& unit); -template <> void VerilatedFst::Super::dumpvars(int level, const std::string& hier); +template <> +void VerilatedFst::Super::dump(uint64_t time); +template <> +void VerilatedFst::Super::set_time_unit(const char* unitp); +template <> +void VerilatedFst::Super::set_time_unit(const std::string& unit); +template <> +void VerilatedFst::Super::set_time_resolution(const char* unitp); +template <> +void VerilatedFst::Super::set_time_resolution(const std::string& unit); +template <> +void VerilatedFst::Super::dumpvars(int level, const std::string& hier); #endif //============================================================================= diff --git a/include/verilated_fst_sc.cpp b/include/verilated_fst_sc.cpp index 084546435..2acb38781 100644 --- a/include/verilated_fst_sc.cpp +++ b/include/verilated_fst_sc.cpp @@ -22,6 +22,7 @@ //============================================================================= #include "verilatedos.h" + #include "verilated_fst_sc.h" //====================================================================== diff --git a/include/verilated_fst_sc.h b/include/verilated_fst_sc.h index 9c7c07c4c..ae0943d8b 100644 --- a/include/verilated_fst_sc.h +++ b/include/verilated_fst_sc.h @@ -23,8 +23,9 @@ #define _VERILATED_FST_SC_H_ 1 #include "verilatedos.h" -#include "verilated_sc.h" + #include "verilated_fst_c.h" +#include "verilated_sc.h" #include diff --git a/include/verilated_profiler.cpp b/include/verilated_profiler.cpp index d65442f44..b04cc34e3 100644 --- a/include/verilated_profiler.cpp +++ b/include/verilated_profiler.cpp @@ -17,6 +17,7 @@ //============================================================================= #include "verilatedos.h" + #include "verilated_profiler.h" #if VL_THREADED @@ -60,7 +61,8 @@ uint16_t VlExecutionRecord::getcpu() { //============================================================================= // VlExecutionProfiler implementation -template static size_t roundUptoMultipleOf(size_t value) { +template +static size_t roundUptoMultipleOf(size_t value) { static_assert((N & (N - 1)) == 0, "'N' must be a power of 2"); size_t mask = N - 1; return (value + mask) & ~mask; diff --git a/include/verilated_profiler.h b/include/verilated_profiler.h index dcb906b45..a237ee5cf 100644 --- a/include/verilated_profiler.h +++ b/include/verilated_profiler.h @@ -23,6 +23,7 @@ #define VERILATOR_VERILATED_PROFILER_H_ #include "verilatedos.h" + #include "verilated.h" #include @@ -192,7 +193,8 @@ public: //============================================================================= // VlPgoProfiler is for collecting profiling data for PGO -template class VlPgoProfiler final { +template +class VlPgoProfiler final { // TYPES struct Record final { const std::string m_name; // Hashed name of mtask/etc diff --git a/include/verilated_save.cpp b/include/verilated_save.cpp index 20d4014ad..f2c80363d 100644 --- a/include/verilated_save.cpp +++ b/include/verilated_save.cpp @@ -24,8 +24,10 @@ #define VERILATOR_VERILATED_SAVE_CPP_ #include "verilatedos.h" -#include "verilated.h" + #include "verilated_save.h" + +#include "verilated.h" #include "verilated_imp.h" #include diff --git a/include/verilated_save.h b/include/verilated_save.h index 728c2aead..71d01fa49 100644 --- a/include/verilated_save.h +++ b/include/verilated_save.h @@ -23,6 +23,7 @@ #define VERILATOR_VERILATED_SAVE_C_H_ #include "verilatedos.h" + #include "verilated.h" #include diff --git a/include/verilated_syms.h b/include/verilated_syms.h index 72243c268..b2ca74849 100644 --- a/include/verilated_syms.h +++ b/include/verilated_syms.h @@ -30,6 +30,7 @@ #define VERILATOR_VERILATED_SYMS_H_ #include "verilatedos.h" + #include "verilated.h" #include "verilated_sym_props.h" diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp index 6696d738d..9d6bdedc9 100644 --- a/include/verilated_threads.cpp +++ b/include/verilated_threads.cpp @@ -22,6 +22,7 @@ //============================================================================= #include "verilatedos.h" + #include "verilated_threads.h" #include diff --git a/include/verilated_threads.h b/include/verilated_threads.h index 5a0e35b9d..59658bf20 100644 --- a/include/verilated_threads.h +++ b/include/verilated_threads.h @@ -24,6 +24,7 @@ #define VERILATOR_VERILATED_THREADS_H_ #include "verilatedos.h" + #include "verilated.h" // for VerilatedMutex and clang annotations #ifndef VL_THREADED @@ -169,7 +170,7 @@ public: ~VlWorkerThread(); // METHODS - template // + template inline void dequeWork(ExecRec* workp) VL_MT_SAFE_EXCLUDES(m_mutex) { // Spin for a while, waiting for new data if VL_CONSTEXPR_CXX17 (SpinWait) { diff --git a/include/verilated_trace.h b/include/verilated_trace.h index a66166363..03ca2ba2c 100644 --- a/include/verilated_trace.h +++ b/include/verilated_trace.h @@ -43,15 +43,18 @@ // clang-format on class VlThreadPool; -template class VerilatedTraceBuffer; -template class VerilatedTraceOffloadBuffer; +template +class VerilatedTraceBuffer; +template +class VerilatedTraceOffloadBuffer; #ifdef VL_THREADED //============================================================================= // Offloaded tracing // A simple synchronized first in first out queue -template class VerilatedThreadQueue final { // LCOV_EXCL_LINE // lcov bug +template +class VerilatedThreadQueue final { // LCOV_EXCL_LINE // lcov bug private: mutable VerilatedMutex m_mutex; // Protects m_queue std::condition_variable_any m_cv; @@ -137,7 +140,8 @@ public: // T_Trace is the format specific subclass of VerilatedTrace. // T_Buffer is the format specific base class of VerilatedTraceBuffer. -template class VerilatedTrace VL_NOT_FINAL { +template +class VerilatedTrace VL_NOT_FINAL { public: using Buffer = VerilatedTraceBuffer; using OffloadBuffer = VerilatedTraceOffloadBuffer; @@ -384,7 +388,7 @@ public: // T_Buffer is the format specific base class of VerilatedTraceBuffer. // The format-specific hot-path methods use duck-typing via T_Buffer for performance. -template // +template class VerilatedTraceBuffer VL_NOT_FINAL : public T_Buffer { protected: // Type of the owner trace file @@ -477,7 +481,7 @@ public: // T_Buffer is the format specific base class of VerilatedTraceBuffer. // The format-specific hot-path methods use duck-typing via T_Buffer for performance. -template // +template class VerilatedTraceOffloadBuffer final : public VerilatedTraceBuffer { using typename VerilatedTraceBuffer::Trace; diff --git a/include/verilated_trace_imp.h b/include/verilated_trace_imp.h index 1401f67eb..0663986f1 100644 --- a/include/verilated_trace_imp.h +++ b/include/verilated_trace_imp.h @@ -82,7 +82,8 @@ static std::string doubleToTimescale(double value) { //========================================================================= // Buffer management -template <> uint32_t* VerilatedTrace::getOffloadBuffer() { +template <> +uint32_t* VerilatedTrace::getOffloadBuffer() { uint32_t* bufferp; // Some jitter is expected, so some number of alternative offlaod buffers are // required, but don't allocate more than 8 buffers. @@ -101,7 +102,8 @@ template <> uint32_t* VerilatedTrace::getOffloadBuffer() { return bufferp; } -template <> void VerilatedTrace::waitForOffloadBuffer(const uint32_t* buffp) { +template <> +void VerilatedTrace::waitForOffloadBuffer(const uint32_t* buffp) { // Slow path code only called on flush/shutdown, so use a simple algorithm. // Collect buffers from worker and stash them until we get the one we want. std::deque stash; @@ -116,7 +118,8 @@ template <> void VerilatedTrace::waitForOffloadBuffer(const //========================================================================= // Worker thread -template <> void VerilatedTrace::offloadWorkerThreadMain() { +template <> +void VerilatedTrace::offloadWorkerThreadMain() { bool shutdown = false; do { @@ -231,7 +234,8 @@ template <> void VerilatedTrace::offloadWorkerThreadMain() { } while (VL_LIKELY(!shutdown)); } -template <> void VerilatedTrace::shutdownOffloadWorker() { +template <> +void VerilatedTrace::shutdownOffloadWorker() { // If the worker thread is not running, done.. if (!m_workerThread) return; @@ -251,7 +255,8 @@ template <> void VerilatedTrace::shutdownOffloadWorker() { //============================================================================= // Life cycle -template <> void VerilatedTrace::closeBase() { +template <> +void VerilatedTrace::closeBase() { #ifdef VL_THREADED if (offload()) { shutdownOffloadWorker(); @@ -263,7 +268,8 @@ template <> void VerilatedTrace::closeBase() { #endif } -template <> void VerilatedTrace::flushBase() { +template <> +void VerilatedTrace::flushBase() { #ifdef VL_THREADED if (offload()) { // Hand an empty buffer to the worker thread @@ -280,12 +286,14 @@ template <> void VerilatedTrace::flushBase() { //============================================================================= // Callbacks to run on global events -template <> void VerilatedTrace::onFlush(void* selfp) { +template <> +void VerilatedTrace::onFlush(void* selfp) { // This calls 'flush' on the derived class (which must then get any mutex) reinterpret_cast(selfp)->flush(); } -template <> void VerilatedTrace::onExit(void* selfp) { +template <> +void VerilatedTrace::onExit(void* selfp) { // This calls 'close' on the derived class (which must then get any mutex) reinterpret_cast(selfp)->close(); } @@ -293,12 +301,14 @@ template <> void VerilatedTrace::onExit(void* selfp) { //============================================================================= // VerilatedTrace -template <> VerilatedTrace::VerilatedTrace() { +template <> +VerilatedTrace::VerilatedTrace() { set_time_unit(Verilated::threadContextp()->timeunitString()); set_time_resolution(Verilated::threadContextp()->timeprecisionString()); } -template <> VerilatedTrace::~VerilatedTrace() { +template <> +VerilatedTrace::~VerilatedTrace() { if (m_sigs_oldvalp) VL_DO_CLEAR(delete[] m_sigs_oldvalp, m_sigs_oldvalp = nullptr); if (m_sigs_enabledp) VL_DO_CLEAR(delete[] m_sigs_enabledp, m_sigs_enabledp = nullptr); Verilated::removeFlushCb(VerilatedTrace::onFlush, this); @@ -309,7 +319,8 @@ template <> VerilatedTrace::~VerilatedTrace() { //========================================================================= // Internals available to format specific implementations -template <> void VerilatedTrace::traceInit() VL_MT_UNSAFE { +template <> +void VerilatedTrace::traceInit() VL_MT_UNSAFE { // Note: It is possible to re-open a trace file (VCD in particular), // so we must reset the next code here, but it must have the same number // of codes on re-open @@ -419,14 +430,16 @@ bool VerilatedTrace::declCode(uint32_t code, const char* nam //========================================================================= // Internals available to format specific implementations -template <> std::string VerilatedTrace::timeResStr() const { +template <> +std::string VerilatedTrace::timeResStr() const { return doubleToTimescale(m_timeRes); } //========================================================================= // External interface to client code -template <> void VerilatedTrace::set_time_unit(const char* unitp) VL_MT_SAFE { +template <> +void VerilatedTrace::set_time_unit(const char* unitp) VL_MT_SAFE { m_timeUnit = timescaleToDouble(unitp); } template <> @@ -456,7 +469,7 @@ void VerilatedTrace::dumpvars(int level, const std::string& } #ifdef VL_THREADED -template <> // +template <> void VerilatedTrace::parallelWorkerTask(void* datap, bool) { ParallelWorkerData* const wdp = reinterpret_cast(datap); // Run the task @@ -467,7 +480,8 @@ void VerilatedTrace::parallelWorkerTask(void* datap, bool) { if (wdp->m_waiting) wdp->m_cv.notify_one(); } -template <> VL_ATTR_NOINLINE void VerilatedTrace::ParallelWorkerData::wait() { +template <> +VL_ATTR_NOINLINE void VerilatedTrace::ParallelWorkerData::wait() { // Spin for a while, waiting for the buffer to become ready for (int i = 0; i < VL_LOCK_SPINS; ++i) { if (VL_LIKELY(m_ready.load(std::memory_order_relaxed))) return; @@ -723,11 +737,13 @@ void VerilatedTrace::addCleanupCb(cleanupCb_t cb, void* user addCallbackRecord(m_cleanupCbs, CallbackRecord{cb, userp}); } -template <> void VerilatedTrace::pushNamePrefix(const std::string& prefix) { +template <> +void VerilatedTrace::pushNamePrefix(const std::string& prefix) { m_namePrefixStack.push_back(m_namePrefixStack.back() + prefix); } -template <> void VerilatedTrace::popNamePrefix(unsigned count) { +template <> +void VerilatedTrace::popNamePrefix(unsigned count) { while (count--) m_namePrefixStack.pop_back(); assert(!m_namePrefixStack.empty()); } @@ -825,7 +841,7 @@ static inline void cvtQDataToStr(char* dstp, QData value) { //========================================================================= // VerilatedTraceBuffer -template <> // +template <> VerilatedTraceBuffer::VerilatedTraceBuffer(Trace& owner) : VL_BUF_T{owner} , m_sigs_oldvalp{owner.m_sigs_oldvalp} @@ -836,7 +852,7 @@ VerilatedTraceBuffer::VerilatedTraceBuffer(Trace& owner) // that this file must be included in the format specific implementation, so // the emit* functions can be inlined for performance. -template <> // +template <> void VerilatedTraceBuffer::fullBit(uint32_t* oldp, CData newval) { const uint32_t code = oldp - m_sigs_oldvalp; *oldp = newval; // Still copy even if not tracing so chg doesn't call full @@ -884,7 +900,7 @@ void VerilatedTraceBuffer::fullWData(uint32_t* oldp, const WData* newv emitWData(code, newvalp, bits); } -template <> // +template <> void VerilatedTraceBuffer::fullDouble(uint32_t* oldp, double newval) { const uint32_t code = oldp - m_sigs_oldvalp; *reinterpret_cast(oldp) = newval; @@ -897,7 +913,7 @@ void VerilatedTraceBuffer::fullDouble(uint32_t* oldp, double newval) { //========================================================================= // VerilatedTraceOffloadBuffer -template <> // +template <> VerilatedTraceOffloadBuffer::VerilatedTraceOffloadBuffer(VL_SUB_T& owner) : VerilatedTraceBuffer{owner} , m_offloadBufferWritep{owner.m_offloadBufferWritep} diff --git a/include/verilated_types.h b/include/verilated_types.h index 93906b1d3..d11aee5d7 100644 --- a/include/verilated_types.h +++ b/include/verilated_types.h @@ -131,7 +131,8 @@ public: static int _vl_cmp_w(int words, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE; -template struct VlWide final { +template +struct VlWide final { // MEMBERS // This should be the only data member, otherwise generated static initializers need updating EData m_storage[T_Words]; // Contents of the packed array @@ -163,7 +164,8 @@ VlWide& VL_CVT_W_A(const WDataInP inp, const VlWide&) { return *((VlWide*)inp); } -template std::string VL_TO_STRING(const VlWide& obj) { +template +std::string VL_TO_STRING(const VlWide& obj) { return VL_TO_STRING_W(T_Words, obj.data()); } @@ -174,7 +176,8 @@ template std::string VL_TO_STRING(const VlWide& o // // Bound here is the maximum size() allowed, e.g. 1 + SystemVerilog bound // For dynamic arrays it is always zero -template class VlQueue final { +template +class VlQueue final { private: // TYPES using Deque = std::deque; @@ -199,7 +202,8 @@ public: // Standard copy constructor works. Verilog: assoca = assocb // Also must allow conversion from a different T_MaxSize queue - template VlQueue operator=(const VlQueue& rhs) { + template + VlQueue operator=(const VlQueue& rhs) { m_deque = rhs.privateDeque(); if (VL_UNLIKELY(T_MaxSize && T_MaxSize < m_deque.size())) m_deque.resize(T_MaxSize - 1); return *this; @@ -330,7 +334,8 @@ public: // Methods void sort() { std::sort(m_deque.begin(), m_deque.end()); } - template void sort(Func with_func) { + template + void sort(Func with_func) { // with_func returns arbitrary type to use for the sort comparison std::sort(m_deque.begin(), m_deque.end(), [=](const T_Value& a, const T_Value& b) { // index number is meaninless with sort, as it changes @@ -338,7 +343,8 @@ public: }); } void rsort() { std::sort(m_deque.rbegin(), m_deque.rend()); } - template void rsort(Func with_func) { + template + void rsort(Func with_func) { // with_func returns arbitrary type to use for the sort comparison std::sort(m_deque.rbegin(), m_deque.rend(), [=](const T_Value& a, const T_Value& b) { // index number is meaninless with sort, as it changes @@ -373,7 +379,8 @@ public: } return out; } - template VlQueue find(Func with_func) const { + template + VlQueue find(Func with_func) const { VlQueue out; IData index = 0; for (const auto& i : m_deque) { @@ -382,7 +389,8 @@ public: } return out; } - template VlQueue find_index(Func with_func) const { + template + VlQueue find_index(Func with_func) const { VlQueue out; IData index = 0; for (const auto& i : m_deque) { @@ -391,7 +399,8 @@ public: } return out; } - template VlQueue find_first(Func with_func) const { + template + VlQueue find_first(Func with_func) const { // Can't use std::find_if as need index number IData index = 0; for (const auto& i : m_deque) { @@ -400,7 +409,8 @@ public: } return VlQueue{}; } - template VlQueue find_first_index(Func with_func) const { + template + VlQueue find_first_index(Func with_func) const { IData index = 0; for (const auto& i : m_deque) { if (with_func(index, i)) return VlQueue::cons(index); @@ -408,7 +418,8 @@ public: } return VlQueue{}; } - template VlQueue find_last(Func with_func) const { + template + VlQueue find_last(Func with_func) const { IData index = m_deque.size() - 1; for (auto& item : vlstd::reverse_view(m_deque)) { if (with_func(index, item)) return VlQueue::cons(item); @@ -416,7 +427,8 @@ public: } return VlQueue{}; } - template VlQueue find_last_index(Func with_func) const { + template + VlQueue find_last_index(Func with_func) const { IData index = m_deque.size() - 1; for (auto& item : vlstd::reverse_view(m_deque)) { if (with_func(index, item)) return VlQueue::cons(index); @@ -442,7 +454,8 @@ public: for (const auto& i : m_deque) out += i; return out; } - template T_Value r_sum(Func with_func) const { + template + T_Value r_sum(Func with_func) const { T_Value out(0); // Type must have assignment operator IData index = 0; for (const auto& i : m_deque) out += with_func(index++, i); @@ -456,7 +469,8 @@ public: for (; it != m_deque.end(); ++it) out *= *it; return out; } - template T_Value r_product(Func with_func) const { + template + T_Value r_product(Func with_func) const { if (m_deque.empty()) return T_Value(0); auto it = m_deque.begin(); IData index = 0; @@ -474,7 +488,8 @@ public: for (; it != m_deque.end(); ++it) out &= *it; return out; } - template T_Value r_and(Func with_func) const { + template + T_Value r_and(Func with_func) const { if (m_deque.empty()) return T_Value(0); auto it = m_deque.begin(); IData index = 0; @@ -489,7 +504,8 @@ public: for (const auto& i : m_deque) out |= i; return out; } - template T_Value r_or(Func with_func) const { + template + T_Value r_or(Func with_func) const { T_Value out(0); // Type must have assignment operator IData index = 0; for (const auto& i : m_deque) out |= with_func(index++, i); @@ -500,7 +516,8 @@ public: for (const auto& i : m_deque) out ^= i; return out; } - template T_Value r_xor(Func with_func) const { + template + T_Value r_xor(Func with_func) const { T_Value out(0); // Type must have assignment operator IData index = 0; for (const auto& i : m_deque) out ^= with_func(index++, i); @@ -520,7 +537,8 @@ public: } }; -template std::string VL_TO_STRING(const VlQueue& obj) { +template +std::string VL_TO_STRING(const VlQueue& obj) { return obj.to_string(); } @@ -529,7 +547,8 @@ template std::string VL_TO_STRING(const VlQueue& obj) { // There are no multithreaded locks on this; the base variable must // be protected by other means // -template class VlAssocArray final { +template +class VlAssocArray final { private: // TYPES using Map = std::map; @@ -654,19 +673,22 @@ public: } return out; } - template VlQueue find(Func with_func) const { + template + VlQueue find(Func with_func) const { VlQueue out; for (const auto& i : m_map) if (with_func(i.first, i.second)) out.push_back(i.second); return out; } - template VlQueue find_index(Func with_func) const { + template + VlQueue find_index(Func with_func) const { VlQueue out; for (const auto& i : m_map) if (with_func(i.first, i.second)) out.push_back(i.first); return out; } - template VlQueue find_first(Func with_func) const { + template + VlQueue find_first(Func with_func) const { const auto it = std::find_if(m_map.begin(), m_map.end(), [=](const std::pair& i) { return with_func(i.first, i.second); @@ -674,7 +696,8 @@ public: if (it == m_map.end()) return VlQueue{}; return VlQueue::cons(it->second); } - template VlQueue find_first_index(Func with_func) const { + template + VlQueue find_first_index(Func with_func) const { const auto it = std::find_if(m_map.begin(), m_map.end(), [=](const std::pair& i) { return with_func(i.first, i.second); @@ -682,7 +705,8 @@ public: if (it == m_map.end()) return VlQueue{}; return VlQueue::cons(it->first); } - template VlQueue find_last(Func with_func) const { + template + VlQueue find_last(Func with_func) const { const auto it = std::find_if(m_map.rbegin(), m_map.rend(), [=](const std::pair& i) { return with_func(i.first, i.second); @@ -690,7 +714,8 @@ public: if (it == m_map.rend()) return VlQueue{}; return VlQueue::cons(it->second); } - template VlQueue find_last_index(Func with_func) const { + template + VlQueue find_last_index(Func with_func) const { const auto it = std::find_if(m_map.rbegin(), m_map.rend(), [=](const std::pair& i) { return with_func(i.first, i.second); @@ -724,7 +749,8 @@ public: for (const auto& i : m_map) out += i.second; return out; } - template T_Value r_sum(Func with_func) const { + template + T_Value r_sum(Func with_func) const { T_Value out(0); // Type must have assignment operator for (const auto& i : m_map) out += with_func(i.first, i.second); return out; @@ -737,7 +763,8 @@ public: for (; it != m_map.end(); ++it) out *= it->second; return out; } - template T_Value r_product(Func with_func) const { + template + T_Value r_product(Func with_func) const { if (m_map.empty()) return T_Value(0); auto it = m_map.begin(); T_Value out{with_func(it->first, it->second)}; @@ -753,7 +780,8 @@ public: for (; it != m_map.end(); ++it) out &= it->second; return out; } - template T_Value r_and(Func with_func) const { + template + T_Value r_and(Func with_func) const { if (m_map.empty()) return T_Value(0); auto it = m_map.begin(); T_Value out{with_func(it->first, it->second)}; @@ -766,7 +794,8 @@ public: for (const auto& i : m_map) out |= i.second; return out; } - template T_Value r_or(Func with_func) const { + template + T_Value r_or(Func with_func) const { T_Value out(0); // Type must have assignment operator for (const auto& i : m_map) out |= with_func(i.first, i.second); return out; @@ -776,7 +805,8 @@ public: for (const auto& i : m_map) out ^= i.second; return out; } - template T_Value r_xor(Func with_func) const { + template + T_Value r_xor(Func with_func) const { T_Value out(0); // Type must have assignment operator for (const auto& i : m_map) out ^= with_func(i.first, i.second); return out; @@ -839,7 +869,8 @@ void VL_WRITEMEM_N(bool hex, int bits, const std::string& filename, /// This class may get exposed to a Verilated Model's top I/O, if the top /// IO has an unpacked array. -template struct VlUnpacked final { +template +struct VlUnpacked final { // MEMBERS // This should be the only data member, otherwise generated static initializers need updating T_Value m_storage[T_Depth]; // Contents of the unpacked array diff --git a/include/verilated_vcd_c.cpp b/include/verilated_vcd_c.cpp index 342ba7b2c..9681e1883 100644 --- a/include/verilated_vcd_c.cpp +++ b/include/verilated_vcd_c.cpp @@ -65,7 +65,8 @@ constexpr unsigned VL_TRACE_SUFFIX_ENTRY_SIZE = 8; // Size of a suffix entry //============================================================================= // Utility functions: TODO: put these in a common place and share them. -template static size_t roundUpToMultipleOf(size_t value) { +template +static size_t roundUpToMultipleOf(size_t value) { static_assert((N & (N - 1)) == 0, "'N' must be a power of 2"); size_t mask = N - 1; return (value + mask) & ~mask; diff --git a/include/verilated_vcd_c.h b/include/verilated_vcd_c.h index 0d2003eba..153401b29 100644 --- a/include/verilated_vcd_c.h +++ b/include/verilated_vcd_c.h @@ -151,12 +151,18 @@ public: #ifndef DOXYGEN // Declare specialization here as it's used in VerilatedFstC just below -template <> void VerilatedVcd::Super::dump(uint64_t time); -template <> void VerilatedVcd::Super::set_time_unit(const char* unitp); -template <> void VerilatedVcd::Super::set_time_unit(const std::string& unit); -template <> void VerilatedVcd::Super::set_time_resolution(const char* unitp); -template <> void VerilatedVcd::Super::set_time_resolution(const std::string& unit); -template <> void VerilatedVcd::Super::dumpvars(int level, const std::string& hier); +template <> +void VerilatedVcd::Super::dump(uint64_t time); +template <> +void VerilatedVcd::Super::set_time_unit(const char* unitp); +template <> +void VerilatedVcd::Super::set_time_unit(const std::string& unit); +template <> +void VerilatedVcd::Super::set_time_resolution(const char* unitp); +template <> +void VerilatedVcd::Super::set_time_resolution(const std::string& unit); +template <> +void VerilatedVcd::Super::dumpvars(int level, const std::string& hier); #endif // DOXYGEN //============================================================================= diff --git a/include/verilated_vcd_sc.cpp b/include/verilated_vcd_sc.cpp index 0ebff7554..20f9a426a 100644 --- a/include/verilated_vcd_sc.cpp +++ b/include/verilated_vcd_sc.cpp @@ -22,6 +22,7 @@ //============================================================================= #include "verilatedos.h" + #include "verilated_vcd_sc.h" //====================================================================== diff --git a/include/verilated_vcd_sc.h b/include/verilated_vcd_sc.h index 6f8edf3a1..a32134b23 100644 --- a/include/verilated_vcd_sc.h +++ b/include/verilated_vcd_sc.h @@ -23,6 +23,7 @@ #define VERILATOR_VERILATED_VCD_SC_H_ #include "verilatedos.h" + #include "verilated_sc.h" #include "verilated_vcd_c.h" diff --git a/include/verilated_vpi.cpp b/include/verilated_vpi.cpp index 77c79412b..ff2d25c27 100644 --- a/include/verilated_vpi.cpp +++ b/include/verilated_vpi.cpp @@ -26,8 +26,9 @@ #define VERILATOR_VERILATED_VPI_CPP_ -#include "verilated.h" #include "verilated_vpi.h" + +#include "verilated.h" #include "verilated_imp.h" #include diff --git a/include/verilated_vpi.h b/include/verilated_vpi.h index 9097b0c35..aeb58e210 100644 --- a/include/verilated_vpi.h +++ b/include/verilated_vpi.h @@ -25,6 +25,7 @@ #define VERILATOR_VERILATED_VPI_H_ #include "verilatedos.h" + #include "verilated.h" #include "verilated_syms.h" diff --git a/include/verilatedos.h b/include/verilatedos.h index 5c74bc065..d06d5fd99 100644 --- a/include/verilatedos.h +++ b/include/verilatedos.h @@ -534,7 +534,8 @@ using ssize_t = uint32_t; ///< signed size_t; returned from read() namespace vlstd { -template struct reverse_wrapper { +template +struct reverse_wrapper { const T& m_v; explicit reverse_wrapper(const T& a_v) @@ -544,10 +545,16 @@ template struct reverse_wrapper { }; // C++20's std::ranges::reverse_view -template reverse_wrapper reverse_view(const T& v) { return reverse_wrapper(v); } +template +reverse_wrapper reverse_view(const T& v) { + return reverse_wrapper(v); +} // C++17's std::as_const -template T const& as_const(T& v) { return v; } +template +T const& as_const(T& v) { + return v; +} }; // namespace vlstd //========================================================================= diff --git a/src/V3Active.cpp b/src/V3Active.cpp index ba69b1b27..fa629c11f 100644 --- a/src/V3Active.cpp +++ b/src/V3Active.cpp @@ -29,10 +29,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Active.h" + #include "V3Ast.h" #include "V3Const.h" +#include "V3Global.h" #include "V3Graph.h" #include diff --git a/src/V3ActiveTop.cpp b/src/V3ActiveTop.cpp index f4f9782b1..e39742dbd 100644 --- a/src/V3ActiveTop.cpp +++ b/src/V3ActiveTop.cpp @@ -26,11 +26,12 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3ActiveTop.h" + #include "V3Ast.h" -#include "V3SenTree.h" #include "V3Const.h" +#include "V3Global.h" +#include "V3SenTree.h" //###################################################################### // Active class functions diff --git a/src/V3Assert.cpp b/src/V3Assert.cpp index dd58d3674..86e8710a8 100644 --- a/src/V3Assert.cpp +++ b/src/V3Assert.cpp @@ -17,9 +17,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Assert.h" + #include "V3Ast.h" +#include "V3Global.h" #include "V3Stats.h" //###################################################################### diff --git a/src/V3Assert.h b/src/V3Assert.h index 3846ef589..d20b9b436 100644 --- a/src/V3Assert.h +++ b/src/V3Assert.h @@ -20,8 +20,8 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" #include "V3Ast.h" +#include "V3Error.h" //============================================================================ diff --git a/src/V3AssertPre.cpp b/src/V3AssertPre.cpp index a0ed59aee..705dfcbb2 100644 --- a/src/V3AssertPre.cpp +++ b/src/V3AssertPre.cpp @@ -20,9 +20,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3AssertPre.h" + #include "V3Ast.h" +#include "V3Global.h" //###################################################################### // Assert class functions diff --git a/src/V3Ast.cpp b/src/V3Ast.cpp index de595be61..83b5052ad 100644 --- a/src/V3Ast.cpp +++ b/src/V3Ast.cpp @@ -18,10 +18,11 @@ #include "verilatedos.h" #include "V3Ast.h" -#include "V3File.h" -#include "V3Global.h" + #include "V3Broken.h" #include "V3EmitV.h" +#include "V3File.h" +#include "V3Global.h" #include "V3String.h" #include diff --git a/src/V3Ast.h b/src/V3Ast.h index 45fe1fc98..c527c71f9 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -20,11 +20,13 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3Broken.h" #include "V3Error.h" #include "V3FileLine.h" -#include "V3Number.h" #include "V3Global.h" -#include "V3Broken.h" +#include "V3Number.h" + +#include "V3Ast__gen_classes.h" // From ./astgen #include #include @@ -34,8 +36,6 @@ #include #include #include - -#include "V3Ast__gen_classes.h" // From ./astgen // Things like: // class V3AstNode; @@ -1133,7 +1133,7 @@ public: explicit VNUser(void* p) { m_u.up = p; } ~VNUser() = default; // Casters - template // + template typename std::enable_if::value, T>::type to() const { return reinterpret_cast(m_u.up); } @@ -1857,10 +1857,12 @@ private: // For internal use only. // Note: specializations for particular node types are provided by 'astgen' - template inline static bool privateTypeTest(const AstNode* nodep); + template + inline static bool privateTypeTest(const AstNode* nodep); // For internal use only. - template constexpr static bool uselessCast() { + template + constexpr static bool uselessCast() { using NonRef = typename std::remove_reference::type; using NonPtr = typename std::remove_pointer::type; using NonCV = typename std::remove_cv::type; @@ -1868,7 +1870,8 @@ private: } // For internal use only. - template constexpr static bool impossibleCast() { + template + constexpr static bool impossibleCast() { using NonRef = typename std::remove_reference::type; using NonPtr = typename std::remove_pointer::type; using NonCV = typename std::remove_cv::type; @@ -1877,20 +1880,23 @@ private: public: // For use via the VN_IS macro only - template inline static bool privateIs(const AstNode* nodep) { + template + inline static bool privateIs(const AstNode* nodep) { static_assert(!uselessCast(), "Unnecessary VN_IS, node known to have target type."); static_assert(!impossibleCast(), "Unnecessary VN_IS, node cannot be this type."); return nodep && privateTypeTest(nodep); } // For use via the VN_CAST macro only - template inline static T* privateCast(AstNode* nodep) { + template + inline static T* privateCast(AstNode* nodep) { static_assert(!uselessCast(), "Unnecessary VN_CAST, node known to have target type."); static_assert(!impossibleCast(), "Unnecessary VN_CAST, node cannot be this type."); return nodep && privateTypeTest(nodep) ? reinterpret_cast(nodep) : nullptr; } - template inline static const T* privateCast(const AstNode* nodep) { + template + inline static const T* privateCast(const AstNode* nodep) { static_assert(!uselessCast(), "Unnecessary VN_CAST, node known to have target type."); static_assert(!impossibleCast(), "Unnecessary VN_CAST, node cannot be this type."); @@ -1898,7 +1904,8 @@ public: } // For use via the VN_AS macro only - template inline static T* privateAs(AstNode* nodep) { + template + inline static T* privateAs(AstNode* nodep) { static_assert(!uselessCast(), "Unnecessary VN_AS, node known to have target type."); static_assert(!impossibleCast(), "Unnecessary VN_AS, node cannot be this type."); UASSERT_OBJ(!nodep || privateTypeTest(nodep), nodep, @@ -1906,7 +1913,8 @@ public: << "'"); return reinterpret_cast(nodep); } - template inline static const T* privateAs(const AstNode* nodep) { + template + inline static const T* privateAs(const AstNode* nodep) { static_assert(!uselessCast(), "Unnecessary VN_AS, node known to have target type."); static_assert(!impossibleCast(), "Unnecessary VN_AS, node cannot be this type."); UASSERT_OBJ(!nodep || privateTypeTest(nodep), nodep, @@ -1918,7 +1926,8 @@ public: // Predicate that returns true if the given 'nodep' might have a descendant of type 'T_Node'. // This is conservative and is used to speed up traversals. // Note: specializations for particular node types are provided below - template static bool mayBeUnder(const AstNode* nodep) { + template + static bool mayBeUnder(const AstNode* nodep) { static_assert(!std::is_const::value, "Type parameter 'T_Node' should not be const qualified"); static_assert(std::is_base_of::value, @@ -1929,7 +1938,8 @@ public: // Predicate that is true for node subtypes 'T_Node' that do not have any children // This is conservative and is used to speed up traversals. // Note: specializations for particular node types are provided below - template static constexpr bool isLeaf() { + template + static constexpr bool isLeaf() { static_assert(!std::is_const::value, "Type parameter 'T_Node' should not be const qualified"); static_assert(std::is_base_of::value, @@ -1951,7 +1961,8 @@ private: inline static bool predicateImpl(ConstCorrectAstNode* nodep, const std::function& p); - template constexpr static bool checkTypeParameter() { + template + constexpr static bool checkTypeParameter() { static_assert(!std::is_const::value, "Type parameter 'T_Node' should not be const qualified"); static_assert(std::is_base_of::value, @@ -1966,25 +1977,29 @@ public: // handle a single (or a few) node types, as it's easier to write, but more importantly, the // dispatch to the operation function in 'foreach' should be completely predictable by branch // target caches in modern CPUs, while it is basically unpredictable for VNVisitor. - template void foreach (std::function f) { + template + void foreach (std::function f) { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); foreachImpl(this, f, /* visitNext: */ false); } // Same as above, but for 'const' nodes - template void foreach (std::function f) const { + template + void foreach (std::function f) const { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); foreachImpl(this, f, /* visitNext: */ false); } // Same as 'foreach' but also follows 'this->nextp()' - template void foreachAndNext(std::function f) { + template + void foreachAndNext(std::function f) { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); foreachImpl(this, f, /* visitNext: */ true); } // Same as 'foreach' but also follows 'this->nextp()' - template void foreachAndNext(std::function f) const { + template + void foreachAndNext(std::function f) const { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); foreachImpl(this, f, /* visitNext: */ true); } @@ -1993,13 +2008,15 @@ public: // 'T_Node' that satisfies the predicate 'p'. Returns false if no node of type 'T_Node' is // present. Traversal is performed in some arbitrary order and is terminated as soon as the // result can be determined. - template bool exists(std::function p) { + template + bool exists(std::function p) { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); return predicateImpl(this, p); } // Same as above, but for 'const' nodes - template void exists(std::function p) const { + template + void exists(std::function p) const { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); return predicateImpl(this, p); } @@ -2008,13 +2025,15 @@ public: // 'T_Node' satisfy the predicate 'p'. Returns true if no node of type 'T_Node' is // present. Traversal is performed in some arbitrary order and is terminated as soon as the // result can be determined. - template bool forall(std::function p) { + template + bool forall(std::function p) { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); return predicateImpl(this, p); } // Same as above, but for 'const' nodes - template void forall(std::function p) const { + template + void forall(std::function p) const { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); return predicateImpl(this, p); } @@ -2031,13 +2050,16 @@ public: #include "V3Ast__gen_impl.h" // From ./astgen // Specializations of AstNode::mayBeUnder -template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { +template <> +inline bool AstNode::mayBeUnder(const AstNode* nodep) { return !VN_IS(nodep, NodeStmt) && !VN_IS(nodep, NodeMath); } -template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { +template <> +inline bool AstNode::mayBeUnder(const AstNode* nodep) { return !VN_IS(nodep, NodeMath); } -template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { +template <> +inline bool AstNode::mayBeUnder(const AstNode* nodep) { if (VN_IS(nodep, VarScope)) return false; // Should not nest if (VN_IS(nodep, Var)) return false; if (VN_IS(nodep, Active)) return false; @@ -2045,16 +2067,26 @@ template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { if (VN_IS(nodep, NodeMath)) return false; return true; } -template <> inline bool AstNode::mayBeUnder(const AstNode* nodep) { +template <> +inline bool AstNode::mayBeUnder(const AstNode* nodep) { if (VN_IS(nodep, ExecGraph)) return false; // Should not nest if (VN_IS(nodep, NodeStmt)) return false; // Should be directly under CFunc return true; } // Specializations of AstNode::isLeaf -template <> constexpr bool AstNode::isLeaf() { return true; } -template <> constexpr bool AstNode::isLeaf() { return true; } -template <> constexpr bool AstNode::isLeaf() { return true; } +template <> +constexpr bool AstNode::isLeaf() { + return true; +} +template <> +constexpr bool AstNode::isLeaf() { + return true; +} +template <> +constexpr bool AstNode::isLeaf() { + return true; +} // foreach implementation template @@ -2238,7 +2270,7 @@ inline void VNRelinker::relink(AstNode* newp) { newp->AstNode::relink(this); } //###################################################################### // VNRef is std::reference_wrapper that can only hold AstNode subtypes -template // +template class VNRef final : public std::reference_wrapper { static_assert(std::is_base_of::value, "Type parameter 'T_Node' must be a subtype of AstNode"); @@ -2264,13 +2296,13 @@ static_assert(sizeof(VNRef) == sizeof(std::reference_wrapper), size_t V3HasherUncachedHash(const AstNode&); // Specialization of std::hash for VNRef -template // +template struct std::hash> final { size_t operator()(VNRef r) const { return V3HasherUncachedHash(r); } }; // Specialization of std::equal_to for VNRef -template // +template struct std::equal_to> final { size_t operator()(VNRef ra, VNRef rb) const { return ra.get().sameTree(&(rb.get())); diff --git a/src/V3AstUserAllocator.h b/src/V3AstUserAllocator.h index f8982bf16..76ec4fc5d 100644 --- a/src/V3AstUserAllocator.h +++ b/src/V3AstUserAllocator.h @@ -27,7 +27,8 @@ #include #include -template class AstUserAllocatorBase VL_NOT_FINAL { +template +class AstUserAllocatorBase VL_NOT_FINAL { static_assert(1 <= T_UserN && T_UserN <= 5, "Wrong user pointer number"); static_assert(std::is_base_of::value, "T_Node must be an AstNode type"); @@ -91,7 +92,7 @@ protected: public: // Get a reference to the user data. If does not exist, construct it with given arguments. - template // + template T_Data& operator()(T_Node* nodep, Args&&... args) { T_Data* userp = getUserp(nodep); if (!userp) { diff --git a/src/V3Begin.cpp b/src/V3Begin.cpp index f2d74b678..0e4176739 100644 --- a/src/V3Begin.cpp +++ b/src/V3Begin.cpp @@ -28,9 +28,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Begin.h" + #include "V3Ast.h" +#include "V3Global.h" #include diff --git a/src/V3Branch.cpp b/src/V3Branch.cpp index a7d751d37..51c749eb4 100644 --- a/src/V3Branch.cpp +++ b/src/V3Branch.cpp @@ -26,9 +26,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Branch.h" + #include "V3Ast.h" +#include "V3Global.h" #include diff --git a/src/V3Broken.cpp b/src/V3Broken.cpp index 2eb5de33b..6f1f906e8 100644 --- a/src/V3Broken.cpp +++ b/src/V3Broken.cpp @@ -25,9 +25,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Broken.h" + #include "V3Ast.h" +#include "V3Global.h" // This visitor does not edit nodes, and is called at error-exit, so should use constant iterators #include "V3AstConstOnly.h" diff --git a/src/V3CCtors.cpp b/src/V3CCtors.cpp index 2ebb9f73c..f28e959fc 100644 --- a/src/V3CCtors.cpp +++ b/src/V3CCtors.cpp @@ -27,10 +27,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3EmitCBase.h" #include "V3CCtors.h" +#include "V3EmitCBase.h" +#include "V3Global.h" + #include #include diff --git a/src/V3CUse.cpp b/src/V3CUse.cpp index cf52c03da..2d301567e 100644 --- a/src/V3CUse.cpp +++ b/src/V3CUse.cpp @@ -25,9 +25,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3CUse.h" + #include "V3Ast.h" +#include "V3Global.h" #include diff --git a/src/V3Case.cpp b/src/V3Case.cpp index c65fb3e7d..c8a2e639f 100644 --- a/src/V3Case.cpp +++ b/src/V3Case.cpp @@ -37,9 +37,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Case.h" + #include "V3Ast.h" +#include "V3Global.h" #include "V3Stats.h" #include diff --git a/src/V3Cast.cpp b/src/V3Cast.cpp index d0a26e6c9..fa5b748e9 100644 --- a/src/V3Cast.cpp +++ b/src/V3Cast.cpp @@ -40,9 +40,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Cast.h" + #include "V3Ast.h" +#include "V3Global.h" #include diff --git a/src/V3Cdc.cpp b/src/V3Cdc.cpp index acb305ab0..56e26ebe8 100644 --- a/src/V3Cdc.cpp +++ b/src/V3Cdc.cpp @@ -24,13 +24,14 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Cdc.h" + #include "V3Ast.h" -#include "V3Graph.h" #include "V3Const.h" #include "V3EmitV.h" #include "V3File.h" +#include "V3Global.h" +#include "V3Graph.h" #include #include diff --git a/src/V3Changed.cpp b/src/V3Changed.cpp index bddcf76a7..32bf3d4c5 100644 --- a/src/V3Changed.cpp +++ b/src/V3Changed.cpp @@ -29,10 +29,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3Ast.h" #include "V3Changed.h" +#include "V3Ast.h" +#include "V3Global.h" + #include //###################################################################### diff --git a/src/V3Class.cpp b/src/V3Class.cpp index f65fbfbf6..94f513802 100644 --- a/src/V3Class.cpp +++ b/src/V3Class.cpp @@ -23,9 +23,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Class.h" + #include "V3Ast.h" +#include "V3Global.h" //###################################################################### diff --git a/src/V3Clean.cpp b/src/V3Clean.cpp index eaf7c1f39..3cca98d7f 100644 --- a/src/V3Clean.cpp +++ b/src/V3Clean.cpp @@ -26,9 +26,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Clean.h" + #include "V3Ast.h" +#include "V3Global.h" #include diff --git a/src/V3Clock.cpp b/src/V3Clock.cpp index a8b8f7006..d94728f35 100644 --- a/src/V3Clock.cpp +++ b/src/V3Clock.cpp @@ -30,9 +30,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Clock.h" + #include "V3Ast.h" +#include "V3Global.h" #include diff --git a/src/V3Combine.cpp b/src/V3Combine.cpp index b1db26b9b..8fa0edbeb 100644 --- a/src/V3Combine.cpp +++ b/src/V3Combine.cpp @@ -22,12 +22,13 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Combine.h" -#include "V3DupFinder.h" -#include "V3Stats.h" + #include "V3Ast.h" #include "V3AstUserAllocator.h" +#include "V3DupFinder.h" +#include "V3Global.h" +#include "V3Stats.h" #include #include diff --git a/src/V3Common.cpp b/src/V3Common.cpp index 7915126a4..0d5ca45f3 100644 --- a/src/V3Common.cpp +++ b/src/V3Common.cpp @@ -24,9 +24,10 @@ #include "verilatedos.h" #include "V3Common.h" + #include "V3Ast.h" -#include "V3Global.h" #include "V3EmitCBase.h" +#include "V3Global.h" //###################################################################### // Common component builders diff --git a/src/V3Config.cpp b/src/V3Config.cpp index d27db65ca..5961bf7d0 100644 --- a/src/V3Config.cpp +++ b/src/V3Config.cpp @@ -17,9 +17,10 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3Config.h" + #include "V3Global.h" #include "V3String.h" -#include "V3Config.h" #include #include @@ -33,7 +34,8 @@ // as wildcards and are accessed by a resolved name. It rebuilds a name lookup // cache of resolved entities. Entities stored in this container need an update // function that takes a reference of this type to join multiple entities into one. -template class V3ConfigWildcardResolver { +template +class V3ConfigWildcardResolver final { using Map = std::map; Map m_mapWildcard; // Wildcard strings to entities diff --git a/src/V3Config.h b/src/V3Config.h index 80d1fb9f2..effe2da08 100644 --- a/src/V3Config.h +++ b/src/V3Config.h @@ -20,9 +20,9 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3Ast.h" #include "V3Error.h" #include "V3FileLine.h" -#include "V3Ast.h" //###################################################################### diff --git a/src/V3Const.cpp b/src/V3Const.cpp index 8a7670c14..31e9e6360 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -23,14 +23,15 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3String.h" #include "V3Const.h" + #include "V3Ast.h" -#include "V3Width.h" +#include "V3Global.h" #include "V3Simulate.h" #include "V3Stats.h" +#include "V3String.h" #include "V3UniqueNames.h" +#include "V3Width.h" #include #include diff --git a/src/V3Coverage.cpp b/src/V3Coverage.cpp index cac65eb31..ae4088f4f 100644 --- a/src/V3Coverage.cpp +++ b/src/V3Coverage.cpp @@ -27,9 +27,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Coverage.h" + #include "V3Ast.h" +#include "V3Global.h" #include #include diff --git a/src/V3CoverageJoin.cpp b/src/V3CoverageJoin.cpp index 864e3b6fe..1b16971d6 100644 --- a/src/V3CoverageJoin.cpp +++ b/src/V3CoverageJoin.cpp @@ -20,9 +20,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3CoverageJoin.h" + #include "V3DupFinder.h" +#include "V3Global.h" #include "V3Stats.h" #include diff --git a/src/V3Dead.cpp b/src/V3Dead.cpp index 8018e94e9..5c6c11fb4 100644 --- a/src/V3Dead.cpp +++ b/src/V3Dead.cpp @@ -36,9 +36,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Dead.h" + #include "V3Ast.h" +#include "V3Global.h" #include #include diff --git a/src/V3Delayed.cpp b/src/V3Delayed.cpp index 668fff8a4..ce06a16a2 100644 --- a/src/V3Delayed.cpp +++ b/src/V3Delayed.cpp @@ -51,9 +51,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Delayed.h" + #include "V3Ast.h" +#include "V3Global.h" #include "V3Stats.h" #include diff --git a/src/V3Depth.cpp b/src/V3Depth.cpp index 0c3b791e3..7b81e22ca 100644 --- a/src/V3Depth.cpp +++ b/src/V3Depth.cpp @@ -26,9 +26,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Depth.h" + #include "V3Ast.h" +#include "V3Global.h" #include "V3UniqueNames.h" #include diff --git a/src/V3DepthBlock.cpp b/src/V3DepthBlock.cpp index 13ebc929c..22da37e91 100644 --- a/src/V3DepthBlock.cpp +++ b/src/V3DepthBlock.cpp @@ -23,10 +23,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3DepthBlock.h" + #include "V3Ast.h" #include "V3EmitCBase.h" +#include "V3Global.h" #include diff --git a/src/V3Descope.cpp b/src/V3Descope.cpp index c43c0352f..4fe90837f 100644 --- a/src/V3Descope.cpp +++ b/src/V3Descope.cpp @@ -25,10 +25,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Descope.h" + #include "V3Ast.h" #include "V3EmitCBase.h" +#include "V3Global.h" #include diff --git a/src/V3DupFinder.cpp b/src/V3DupFinder.cpp index 809c06670..17c498646 100644 --- a/src/V3DupFinder.cpp +++ b/src/V3DupFinder.cpp @@ -17,10 +17,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3DupFinder.h" + #include "V3Ast.h" #include "V3File.h" +#include "V3Global.h" #include #include diff --git a/src/V3DupFinder.h b/src/V3DupFinder.h index 1ee51a992..3853fa35f 100644 --- a/src/V3DupFinder.h +++ b/src/V3DupFinder.h @@ -23,8 +23,8 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" #include "V3Ast.h" +#include "V3Error.h" #include "V3Hasher.h" #include diff --git a/src/V3EmitCBase.cpp b/src/V3EmitCBase.cpp index 0fa62cc25..1e3e9383f 100644 --- a/src/V3EmitCBase.cpp +++ b/src/V3EmitCBase.cpp @@ -18,6 +18,7 @@ #include "verilatedos.h" #include "V3EmitCBase.h" + #include "V3Task.h" //###################################################################### diff --git a/src/V3EmitCBase.h b/src/V3EmitCBase.h index 5f3f09f33..66b954f35 100644 --- a/src/V3EmitCBase.h +++ b/src/V3EmitCBase.h @@ -20,12 +20,12 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3File.h" #include "V3Ast.h" +#include "V3File.h" +#include "V3Global.h" -#include #include +#include //###################################################################### // Set user4p in all CFunc and Var to point to the containing AstNodeModule diff --git a/src/V3EmitCConstPool.cpp b/src/V3EmitCConstPool.cpp index 3b11b4236..2fd22ccc4 100644 --- a/src/V3EmitCConstPool.cpp +++ b/src/V3EmitCConstPool.cpp @@ -21,8 +21,8 @@ #include "V3EmitCConstInit.h" #include "V3File.h" #include "V3Global.h" -#include "V3String.h" #include "V3Stats.h" +#include "V3String.h" #include #include diff --git a/src/V3EmitCFunc.cpp b/src/V3EmitCFunc.cpp index b3a224c5c..8f97b1139 100644 --- a/src/V3EmitCFunc.cpp +++ b/src/V3EmitCFunc.cpp @@ -17,9 +17,10 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3EmitCFunc.h" + #include "V3Global.h" #include "V3String.h" -#include "V3EmitCFunc.h" #include "V3TSP.h" #include diff --git a/src/V3EmitCFunc.h b/src/V3EmitCFunc.h index 31d1f5a11..9d863c7e5 100644 --- a/src/V3EmitCFunc.h +++ b/src/V3EmitCFunc.h @@ -20,8 +20,8 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3EmitCConstInit.h" +#include "V3Global.h" #include #include diff --git a/src/V3EmitCHeaders.cpp b/src/V3EmitCHeaders.cpp index f0734f670..a32f261d0 100644 --- a/src/V3EmitCHeaders.cpp +++ b/src/V3EmitCHeaders.cpp @@ -17,9 +17,9 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3EmitC.h" #include "V3EmitCConstInit.h" +#include "V3Global.h" #include #include diff --git a/src/V3EmitCImp.cpp b/src/V3EmitCImp.cpp index c88648d3f..c6985dd30 100644 --- a/src/V3EmitCImp.cpp +++ b/src/V3EmitCImp.cpp @@ -17,10 +17,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3EmitC.h" #include "V3Ast.h" +#include "V3EmitC.h" #include "V3EmitCFunc.h" +#include "V3Global.h" #include "V3String.h" #include "V3UniqueNames.h" diff --git a/src/V3EmitCInlines.cpp b/src/V3EmitCInlines.cpp index d2002bf0d..f9a667e25 100644 --- a/src/V3EmitCInlines.cpp +++ b/src/V3EmitCInlines.cpp @@ -17,9 +17,9 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3EmitC.h" #include "V3EmitCBase.h" +#include "V3Global.h" #include "V3Stats.h" #include diff --git a/src/V3EmitCMain.cpp b/src/V3EmitCMain.cpp index 6e0ee7674..c717b7751 100644 --- a/src/V3EmitCMain.cpp +++ b/src/V3EmitCMain.cpp @@ -17,10 +17,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" +#include "V3EmitCMain.h" + #include "V3EmitC.h" #include "V3EmitCBase.h" -#include "V3EmitCMain.h" +#include "V3Global.h" #include diff --git a/src/V3EmitCMake.cpp b/src/V3EmitCMake.cpp index db74dcce9..c053cc45a 100644 --- a/src/V3EmitCMake.cpp +++ b/src/V3EmitCMake.cpp @@ -17,11 +17,12 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3Os.h" #include "V3EmitCMake.h" + #include "V3EmitCBase.h" +#include "V3Global.h" #include "V3HierBlock.h" +#include "V3Os.h" #include @@ -36,7 +37,8 @@ class CMakeEmitter final { // STATIC FUNCTIONS // Concatenate all strings in 'strs' with ' ' between them. - template static string cmake_list(const List& strs) { + template + static string cmake_list(const List& strs) { string s; if (strs.begin() != strs.end()) { s.append("\""); diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index 5ec26221f..28687c19c 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -17,9 +17,9 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3EmitC.h" #include "V3EmitCFunc.h" +#include "V3Global.h" #include "V3UniqueNames.h" #include diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index aaa920664..de8be89b6 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -17,9 +17,9 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3EmitC.h" #include "V3EmitCBase.h" +#include "V3Global.h" #include "V3LanguageWords.h" #include "V3PartitionGraph.h" diff --git a/src/V3EmitMk.cpp b/src/V3EmitMk.cpp index 75dabd3ff..e4db53ede 100644 --- a/src/V3EmitMk.cpp +++ b/src/V3EmitMk.cpp @@ -17,11 +17,12 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3EmitMk.h" + +#include "V3EmitCBase.h" #include "V3Global.h" #include "V3HierBlock.h" #include "V3Os.h" -#include "V3EmitMk.h" -#include "V3EmitCBase.h" //###################################################################### // Emit statements and math operators diff --git a/src/V3EmitV.cpp b/src/V3EmitV.cpp index 82015f46d..940277a8d 100644 --- a/src/V3EmitV.cpp +++ b/src/V3EmitV.cpp @@ -17,9 +17,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3EmitV.h" + #include "V3EmitCBase.h" +#include "V3Global.h" #include #include diff --git a/src/V3EmitXml.cpp b/src/V3EmitXml.cpp index d77a959ad..933f27079 100644 --- a/src/V3EmitXml.cpp +++ b/src/V3EmitXml.cpp @@ -17,10 +17,11 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3EmitXml.h" + +#include "V3EmitCBase.h" #include "V3Global.h" #include "V3String.h" -#include "V3EmitXml.h" -#include "V3EmitCBase.h" #include #include diff --git a/src/V3Expand.cpp b/src/V3Expand.cpp index c10c3c85f..6e8e2c1ec 100644 --- a/src/V3Expand.cpp +++ b/src/V3Expand.cpp @@ -28,11 +28,12 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Expand.h" -#include "V3Stats.h" + #include "V3Ast.h" #include "V3Const.h" +#include "V3Global.h" +#include "V3Stats.h" #include diff --git a/src/V3File.cpp b/src/V3File.cpp index b19d1f868..63f264b57 100644 --- a/src/V3File.cpp +++ b/src/V3File.cpp @@ -17,11 +17,12 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3File.h" + +#include "V3Ast.h" +#include "V3Global.h" #include "V3Os.h" #include "V3String.h" -#include "V3Ast.h" #include #include @@ -29,6 +30,7 @@ #include #include #include + #include #include diff --git a/src/V3File.h b/src/V3File.h index 6a4ded0de..91bfa90f9 100644 --- a/src/V3File.h +++ b/src/V3File.h @@ -23,12 +23,12 @@ #include "V3Error.h" #include -#include -#include -#include -#include #include +#include #include +#include +#include +#include //============================================================================ // V3File: Create streams, recording dependency information diff --git a/src/V3FileLine.h b/src/V3FileLine.h index d49d5e44a..f16bc5bca 100644 --- a/src/V3FileLine.h +++ b/src/V3FileLine.h @@ -23,12 +23,12 @@ #include "V3Error.h" #include "V3LangCode.h" -#include #include +#include #include #include #include -#include +#include //###################################################################### diff --git a/src/V3Force.cpp b/src/V3Force.cpp index 2c21b842c..ea4c61a02 100644 --- a/src/V3Force.cpp +++ b/src/V3Force.cpp @@ -40,11 +40,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" -#include "V3Global.h" #include "V3Force.h" #include "V3AstUserAllocator.h" +#include "V3Error.h" +#include "V3Global.h" //###################################################################### // Convert force/release statements and signals marked 'forceable' diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index 4073c5462..88a9332d4 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -24,14 +24,15 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Gate.h" + #include "V3Ast.h" #include "V3AstUserAllocator.h" -#include "V3Graph.h" #include "V3Const.h" -#include "V3Stats.h" #include "V3DupFinder.h" +#include "V3Global.h" +#include "V3Graph.h" +#include "V3Stats.h" #include #include diff --git a/src/V3GenClk.cpp b/src/V3GenClk.cpp index a358ca2aa..dd3985b6f 100644 --- a/src/V3GenClk.cpp +++ b/src/V3GenClk.cpp @@ -22,9 +22,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3GenClk.h" + #include "V3Ast.h" +#include "V3Global.h" //###################################################################### // GenClk state, as a visitor of each AstNode diff --git a/src/V3Global.cpp b/src/V3Global.cpp index cb42337e4..c52243d74 100644 --- a/src/V3Global.cpp +++ b/src/V3Global.cpp @@ -18,6 +18,7 @@ #include "verilatedos.h" #include "V3Global.h" + #include "V3Ast.h" #include "V3File.h" #include "V3HierBlock.h" diff --git a/src/V3Global.h b/src/V3Global.h index a7b54e866..1341332f5 100644 --- a/src/V3Global.h +++ b/src/V3Global.h @@ -48,7 +48,8 @@ class V3HierBlockPlan; // Object used by VL_RESTORER. This object must be an auto variable, not // allocated on the heap or otherwise. -template class VRestorer { +template +class VRestorer final { T& m_ref; // Reference to object we're saving and restoring const T m_saved; // Value saved, for later restore diff --git a/src/V3Graph.cpp b/src/V3Graph.cpp index 9bb30a6d0..1c4169e7a 100644 --- a/src/V3Graph.cpp +++ b/src/V3Graph.cpp @@ -17,10 +17,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3File.h" #include "V3Graph.h" +#include "V3File.h" +#include "V3Global.h" + #include #include #include diff --git a/src/V3GraphAlg.cpp b/src/V3GraphAlg.cpp index 9d4aa776f..c5e7dcec9 100644 --- a/src/V3GraphAlg.cpp +++ b/src/V3GraphAlg.cpp @@ -17,14 +17,15 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3GraphAlg.h" + +#include "V3Global.h" #include "V3GraphPathChecker.h" #include -#include -#include #include +#include +#include //###################################################################### //###################################################################### diff --git a/src/V3GraphPathChecker.cpp b/src/V3GraphPathChecker.cpp index a88ab6221..6f40a9aa7 100644 --- a/src/V3GraphPathChecker.cpp +++ b/src/V3GraphPathChecker.cpp @@ -17,10 +17,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3GraphStream.h" -#include "V3Global.h" #include "V3GraphPathChecker.h" +#include "V3Global.h" +#include "V3GraphStream.h" + //###################################################################### // GraphPCNode diff --git a/src/V3GraphStream.h b/src/V3GraphStream.h index f05e9a37e..37d68ca31 100644 --- a/src/V3GraphStream.h +++ b/src/V3GraphStream.h @@ -39,7 +39,8 @@ // not generally safe. If you want a raw pointer compare, see // GraphStreamUnordered below. -template class GraphStream { +template +class GraphStream final { private: // TYPES class VxHolder final { diff --git a/src/V3Hash.h b/src/V3Hash.h index 8c4d75e08..13f86dd6d 100644 --- a/src/V3Hash.h +++ b/src/V3Hash.h @@ -55,12 +55,16 @@ public: bool operator<(const V3Hash& rh) const { return m_value < rh.m_value; } // '+' combines hashes - template V3Hash operator+(T that) const { + template + V3Hash operator+(T that) const { return V3Hash(combine(m_value, V3Hash(that).m_value)); } // '+=' combines in place - template V3Hash& operator+=(T that) { return *this = *this + that; } + template + V3Hash& operator+=(T that) { + return *this = *this + that; + } }; std::ostream& operator<<(std::ostream& os, const V3Hash& rhs); diff --git a/src/V3Hasher.h b/src/V3Hasher.h index 57e3176a0..9944f84b1 100644 --- a/src/V3Hasher.h +++ b/src/V3Hasher.h @@ -23,8 +23,8 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" #include "V3Ast.h" +#include "V3Error.h" #include "V3Hash.h" //============================================================================ diff --git a/src/V3HierBlock.cpp b/src/V3HierBlock.cpp index 2430e19db..ef45c5bd9 100644 --- a/src/V3HierBlock.cpp +++ b/src/V3HierBlock.cpp @@ -72,18 +72,19 @@ // Used for b) and c). // This options is repeated for all instantiating hierarchical blocks. -#include -#include -#include -#include +#include "V3HierBlock.h" #include "V3Ast.h" #include "V3Error.h" #include "V3File.h" -#include "V3HierBlock.h" #include "V3Os.h" -#include "V3String.h" #include "V3Stats.h" +#include "V3String.h" + +#include +#include +#include +#include static string V3HierCommandArgsFileName(const string& prefix, bool forCMake) { return v3Global.opt.makeDir() + "/" + prefix diff --git a/src/V3HierBlock.h b/src/V3HierBlock.h index c247dce52..b4fe14374 100644 --- a/src/V3HierBlock.h +++ b/src/V3HierBlock.h @@ -28,9 +28,9 @@ #include #include #include -#include #include #include +#include #include class AstNetlist; diff --git a/src/V3Inline.cpp b/src/V3Inline.cpp index de62ca20d..6ab1daf9d 100644 --- a/src/V3Inline.cpp +++ b/src/V3Inline.cpp @@ -27,15 +27,15 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Inline.h" + +#include "V3Ast.h" +#include "V3AstUserAllocator.h" +#include "V3Global.h" #include "V3Inst.h" #include "V3Stats.h" -#include "V3Ast.h" #include "V3String.h" -#include "V3AstUserAllocator.h" - #include #include #include diff --git a/src/V3Inst.cpp b/src/V3Inst.cpp index 339a5e0a5..e66936eec 100644 --- a/src/V3Inst.cpp +++ b/src/V3Inst.cpp @@ -24,10 +24,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Inst.h" + #include "V3Ast.h" #include "V3Const.h" +#include "V3Global.h" #include diff --git a/src/V3InstrCount.cpp b/src/V3InstrCount.cpp index a517bee15..d0ec37562 100644 --- a/src/V3InstrCount.cpp +++ b/src/V3InstrCount.cpp @@ -18,9 +18,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Ast.h" #include "V3InstrCount.h" +#include "V3Ast.h" + #include /// Estimate the instruction cost for executing all logic within and below diff --git a/src/V3LangCode.h b/src/V3LangCode.h index 5fa1b133e..3877dcf3d 100644 --- a/src/V3LangCode.h +++ b/src/V3LangCode.h @@ -20,9 +20,9 @@ #include "config_build.h" #include "verilatedos.h" -#include #include #include +#include //###################################################################### //! Class for the different languages supported. diff --git a/src/V3Life.cpp b/src/V3Life.cpp index 57ab26d63..112c3920e 100644 --- a/src/V3Life.cpp +++ b/src/V3Life.cpp @@ -26,11 +26,12 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Life.h" -#include "V3Stats.h" + #include "V3Ast.h" #include "V3Const.h" +#include "V3Global.h" +#include "V3Stats.h" #include #include diff --git a/src/V3LifePost.cpp b/src/V3LifePost.cpp index 14405ee3d..51b2e0e2c 100644 --- a/src/V3LifePost.cpp +++ b/src/V3LifePost.cpp @@ -27,12 +27,13 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3PartitionGraph.h" -#include "V3GraphPathChecker.h" #include "V3LifePost.h" -#include "V3Stats.h" + #include "V3Ast.h" +#include "V3Global.h" +#include "V3GraphPathChecker.h" +#include "V3PartitionGraph.h" +#include "V3Stats.h" #include // for std::unique_ptr -> auto_ptr or unique_ptr #include diff --git a/src/V3LinkCells.cpp b/src/V3LinkCells.cpp index da4dabaa4..ec0da2f09 100644 --- a/src/V3LinkCells.cpp +++ b/src/V3LinkCells.cpp @@ -26,17 +26,18 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3LinkCells.h" -#include "V3SymTable.h" -#include "V3Parse.h" + #include "V3Ast.h" +#include "V3Global.h" #include "V3Graph.h" +#include "V3Parse.h" +#include "V3SymTable.h" #include #include -#include #include +#include //###################################################################### // Graph subclasses diff --git a/src/V3LinkDot.cpp b/src/V3LinkDot.cpp index 7c0d5269b..45e5a2038 100644 --- a/src/V3LinkDot.cpp +++ b/src/V3LinkDot.cpp @@ -64,12 +64,13 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3LinkDot.h" -#include "V3SymTable.h" -#include "V3Graph.h" + #include "V3Ast.h" +#include "V3Global.h" +#include "V3Graph.h" #include "V3String.h" +#include "V3SymTable.h" #include #include diff --git a/src/V3LinkDot.h b/src/V3LinkDot.h index 08c4bc2ec..260c8cbcf 100644 --- a/src/V3LinkDot.h +++ b/src/V3LinkDot.h @@ -20,8 +20,8 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" #include "V3Ast.h" +#include "V3Error.h" //============================================================================ diff --git a/src/V3LinkInc.cpp b/src/V3LinkInc.cpp index f21764919..66dcbf8e1 100644 --- a/src/V3LinkInc.cpp +++ b/src/V3LinkInc.cpp @@ -39,9 +39,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3LinkInc.h" + #include "V3Ast.h" +#include "V3Global.h" #include diff --git a/src/V3LinkJump.cpp b/src/V3LinkJump.cpp index f7f68c28a..9b85ac6f6 100644 --- a/src/V3LinkJump.cpp +++ b/src/V3LinkJump.cpp @@ -32,9 +32,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3LinkJump.h" + #include "V3Ast.h" +#include "V3Global.h" #include #include diff --git a/src/V3LinkLValue.cpp b/src/V3LinkLValue.cpp index 1f955e361..0e8f4ed65 100644 --- a/src/V3LinkLValue.cpp +++ b/src/V3LinkLValue.cpp @@ -21,9 +21,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3LinkLValue.h" + #include "V3Ast.h" +#include "V3Global.h" #include diff --git a/src/V3LinkLevel.cpp b/src/V3LinkLevel.cpp index d96892762..b9ecca249 100644 --- a/src/V3LinkLevel.cpp +++ b/src/V3LinkLevel.cpp @@ -22,9 +22,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3LinkLevel.h" + #include "V3Ast.h" +#include "V3Global.h" #include #include diff --git a/src/V3LinkLevel.h b/src/V3LinkLevel.h index 49e56254b..6a9ecdd11 100644 --- a/src/V3LinkLevel.h +++ b/src/V3LinkLevel.h @@ -20,8 +20,8 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" #include "V3Ast.h" +#include "V3Error.h" #include diff --git a/src/V3LinkParse.cpp b/src/V3LinkParse.cpp index 67d82b7fb..2acc01f3e 100644 --- a/src/V3LinkParse.cpp +++ b/src/V3LinkParse.cpp @@ -21,10 +21,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3LinkParse.h" + #include "V3Ast.h" #include "V3Config.h" +#include "V3Global.h" #include #include diff --git a/src/V3LinkResolve.cpp b/src/V3LinkResolve.cpp index a20100a80..f1b859b51 100644 --- a/src/V3LinkResolve.cpp +++ b/src/V3LinkResolve.cpp @@ -27,10 +27,11 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3LinkResolve.h" + +#include "V3Ast.h" #include "V3Global.h" #include "V3String.h" -#include "V3LinkResolve.h" -#include "V3Ast.h" #include #include diff --git a/src/V3List.h b/src/V3List.h index 7871cd282..883db89af 100644 --- a/src/V3List.h +++ b/src/V3List.h @@ -24,10 +24,13 @@ //============================================================================ -template class V3List; -template class V3ListEnt; +template +class V3List; +template +class V3ListEnt; -template class V3List { +template +class V3List final { // List container for linked list of elements of type *T (T is a pointer type) private: // MEMBERS @@ -50,7 +53,8 @@ public: //============================================================================ -template class V3ListEnt { +template +class V3ListEnt final { // List entry for linked list of elements of type *T (T is a pointer type) private: // MEMBERS diff --git a/src/V3Localize.cpp b/src/V3Localize.cpp index c121d5387..27d652110 100644 --- a/src/V3Localize.cpp +++ b/src/V3Localize.cpp @@ -25,11 +25,12 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Localize.h" -#include "V3Stats.h" + #include "V3Ast.h" #include "V3AstUserAllocator.h" +#include "V3Global.h" +#include "V3Stats.h" #include diff --git a/src/V3MergeCond.cpp b/src/V3MergeCond.cpp index 74dfaecb1..bb3389861 100644 --- a/src/V3MergeCond.cpp +++ b/src/V3MergeCond.cpp @@ -75,13 +75,14 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3MergeCond.h" -#include "V3Stats.h" + #include "V3Ast.h" #include "V3AstUserAllocator.h" -#include "V3Hasher.h" #include "V3DupFinder.h" +#include "V3Global.h" +#include "V3Hasher.h" +#include "V3Stats.h" #include #include diff --git a/src/V3Name.cpp b/src/V3Name.cpp index 73e0a1839..d958c1ddb 100644 --- a/src/V3Name.cpp +++ b/src/V3Name.cpp @@ -22,9 +22,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Name.h" + #include "V3Ast.h" +#include "V3Global.h" #include "V3LanguageWords.h" //###################################################################### diff --git a/src/V3Number.cpp b/src/V3Number.cpp index 801d98c2d..9ff6bf5b1 100644 --- a/src/V3Number.cpp +++ b/src/V3Number.cpp @@ -17,9 +17,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Number.h" + #include "V3Ast.h" +#include "V3Global.h" #include #include diff --git a/src/V3Number_test.cpp b/src/V3Number_test.cpp index f7dc63484..9aa4ba26c 100644 --- a/src/V3Number_test.cpp +++ b/src/V3Number_test.cpp @@ -18,7 +18,7 @@ #define V3NUMBER_ASCII_BINARY #define V3ERROR_NO_GLOBAL_ -#include +#include "config_build.h" #include "verilatedos.h" #include "V3Error.cpp" diff --git a/src/V3OptionParser.cpp b/src/V3OptionParser.cpp index d98b4fd90..f9d5c721d 100644 --- a/src/V3OptionParser.cpp +++ b/src/V3OptionParser.cpp @@ -48,12 +48,16 @@ struct V3OptionParser::Impl { }; // Actual action classes - template class ActionSet; // "-opt" for bool-ish, "-opt val" for int and string - template class ActionFOnOff; // "-fopt" and "-fno-opt" for bool-ish - template class ActionOnOff; // "-opt" and "-no-opt" for bool-ish + template + class ActionSet; // "-opt" for bool-ish, "-opt val" for int and string + template + class ActionFOnOff; // "-fopt" and "-fno-opt" for bool-ish + template + class ActionOnOff; // "-opt" and "-no-opt" for bool-ish class ActionCbCall; // Callback without argument for "-opt" class ActionCbOnOff; // Callback for "-opt" and "-no-opt" - template class ActionCbVal; // Callback for "-opt val" + template + class ActionCbVal; // Callback for "-opt val" class ActionCbPartialMatch; // Callback "-O3" for "-O" class ActionCbPartialMatchVal; // Callback "-debugi-V3Options 3" for "-debugi-" @@ -67,7 +71,8 @@ struct V3OptionParser::Impl { // Action classes in V3OptionParser::Impl #define V3OPTION_PARSER_DEF_ACT_CLASS(className, type, body, enType) \ - template <> class V3OptionParser::Impl::className final : public ActionBase { \ + template <> \ + class V3OptionParser::Impl::className final : public ActionBase { \ type* const m_valp; /* Pointer to a option variable*/ \ \ public: \ diff --git a/src/V3OptionParser.h b/src/V3OptionParser.h index e77f43a26..2c5dbf6fb 100644 --- a/src/V3OptionParser.h +++ b/src/V3OptionParser.h @@ -65,7 +65,8 @@ private: // METHODS ActionIfs* find(const char* optp); - template ActionIfs& add(const string& opt, ARG arg); + template + ActionIfs& add(const string& opt, ARG arg); static bool hasPrefixFNo(const char* strp); // Returns true if strp starts with "-fno" static bool hasPrefixNo(const char* strp); // Returns true if strp starts with "-no" diff --git a/src/V3Options.cpp b/src/V3Options.cpp index d54b96c11..8ef8038b6 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -17,13 +17,14 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3Ast.h" -#include "V3Os.h" #include "V3Options.h" -#include "V3OptionParser.h" + +#include "V3Ast.h" #include "V3Error.h" #include "V3File.h" +#include "V3Global.h" +#include "V3OptionParser.h" +#include "V3Os.h" #include "V3PreShell.h" #include "V3String.h" diff --git a/src/V3Order.cpp b/src/V3Order.cpp index 311b4a381..1d83983ea 100644 --- a/src/V3Order.cpp +++ b/src/V3Order.cpp @@ -79,6 +79,8 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3Order.h" + #include "V3Ast.h" #include "V3AstUserAllocator.h" #include "V3Const.h" @@ -88,24 +90,22 @@ #include "V3Graph.h" #include "V3GraphStream.h" #include "V3List.h" +#include "V3OrderGraph.h" #include "V3Partition.h" #include "V3PartitionGraph.h" #include "V3SenTree.h" #include "V3SplitVar.h" #include "V3Stats.h" -#include "V3Order.h" -#include "V3OrderGraph.h" - #include #include #include #include #include #include -#include #include #include +#include //###################################################################### // Utilities @@ -824,7 +824,8 @@ inline std::ostream& operator<<(std::ostream& lhs, const OrderMoveDomScope& rhs) //###################################################################### // ProcessMoveBuildGraph -template class ProcessMoveBuildGraph { +template +class ProcessMoveBuildGraph final { // ProcessMoveBuildGraph takes as input the fine-grained graph of // OrderLogicVertex, OrderVarVertex, etc; this is 'm_graph' in // OrderVisitor. It produces a slightly coarsened graph to drive the diff --git a/src/V3Param.cpp b/src/V3Param.cpp index 610bdba9a..8a2b5a415 100644 --- a/src/V3Param.cpp +++ b/src/V3Param.cpp @@ -47,16 +47,17 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Param.h" + #include "V3Ast.h" #include "V3Case.h" #include "V3Const.h" +#include "V3Global.h" +#include "V3Hasher.h" #include "V3Os.h" #include "V3Parse.h" -#include "V3Width.h" #include "V3Unroll.h" -#include "V3Hasher.h" +#include "V3Width.h" #include #include diff --git a/src/V3ParseImp.cpp b/src/V3ParseImp.cpp index 6eb40cf7c..924a5062b 100644 --- a/src/V3ParseImp.cpp +++ b/src/V3ParseImp.cpp @@ -25,16 +25,16 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" -#include "V3Global.h" -#include "V3Os.h" -#include "V3Ast.h" -#include "V3File.h" #include "V3ParseImp.h" -#include "V3PreShell.h" -#include "V3LanguageWords.h" +#include "V3Ast.h" +#include "V3Error.h" +#include "V3File.h" +#include "V3Global.h" +#include "V3LanguageWords.h" +#include "V3Os.h" #include "V3ParseBison.h" // Generated by bison +#include "V3PreShell.h" #include diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index d9a071a96..b410b9bea 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -17,14 +17,15 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3EmitCBase.h" +#include "V3Partition.h" + #include "V3Config.h" -#include "V3Os.h" +#include "V3EmitCBase.h" #include "V3File.h" #include "V3GraphAlg.h" #include "V3GraphStream.h" #include "V3InstrCount.h" -#include "V3Partition.h" +#include "V3Os.h" #include "V3PartitionGraph.h" #include "V3Scoreboard.h" #include "V3Stats.h" @@ -160,7 +161,8 @@ static void partCheckCachedScoreVsActual(uint32_t cached, uint32_t actual) { // * Client calls PartPropagateCp::go(). Internally, this iteratively // propagates the new CPs wayward through the graph. // -template class PartPropagateCp : GraphAlg<> { +template +class PartPropagateCp final : GraphAlg<> { private: // MEMBERS const GraphWay m_way; // CPs oriented in this direction: either FORWARD diff --git a/src/V3PreProc.cpp b/src/V3PreProc.cpp index 7dfeb9c12..33fffc337 100644 --- a/src/V3PreProc.cpp +++ b/src/V3PreProc.cpp @@ -17,12 +17,13 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3PreProc.h" + #include "V3Error.h" -#include "V3Global.h" #include "V3File.h" +#include "V3Global.h" #include "V3LanguageWords.h" #include "V3PreLex.h" -#include "V3PreProc.h" #include "V3PreShell.h" #include "V3String.h" diff --git a/src/V3PreProc.h b/src/V3PreProc.h index 17b8d202f..f446a4f35 100644 --- a/src/V3PreProc.h +++ b/src/V3PreProc.h @@ -24,9 +24,9 @@ #include "V3FileLine.h" #include "V3Global.h" -#include -#include #include +#include +#include // Compatibility with Verilog-Perl's preprocessor #define fatalSrc(msg) v3fatalSrc(msg) diff --git a/src/V3PreShell.cpp b/src/V3PreShell.cpp index 4b817a53b..752ce3b8b 100644 --- a/src/V3PreShell.cpp +++ b/src/V3PreShell.cpp @@ -17,12 +17,13 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3PreShell.h" -#include "V3PreProc.h" + #include "V3File.h" -#include "V3Parse.h" +#include "V3Global.h" #include "V3Os.h" +#include "V3Parse.h" +#include "V3PreProc.h" #include #include diff --git a/src/V3Premit.cpp b/src/V3Premit.cpp index 936395f9d..e10f13458 100644 --- a/src/V3Premit.cpp +++ b/src/V3Premit.cpp @@ -27,9 +27,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Premit.h" + #include "V3Ast.h" +#include "V3Global.h" #include "V3Stats.h" #include "V3UniqueNames.h" diff --git a/src/V3ProtectLib.cpp b/src/V3ProtectLib.cpp index c89d9ce5c..1012b25eb 100644 --- a/src/V3ProtectLib.cpp +++ b/src/V3ProtectLib.cpp @@ -17,10 +17,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3String.h" #include "V3ProtectLib.h" + +#include "V3Global.h" #include "V3Hasher.h" +#include "V3String.h" #include "V3Task.h" #include diff --git a/src/V3Randomize.cpp b/src/V3Randomize.cpp index dbcdbd9ed..0ac20c3b7 100644 --- a/src/V3Randomize.cpp +++ b/src/V3Randomize.cpp @@ -27,9 +27,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Ast.h" #include "V3Randomize.h" +#include "V3Ast.h" + //###################################################################### // Visitor that marks classes needing a randomize() method diff --git a/src/V3Reloop.cpp b/src/V3Reloop.cpp index 3186b182a..6851717c9 100644 --- a/src/V3Reloop.cpp +++ b/src/V3Reloop.cpp @@ -32,10 +32,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Reloop.h" -#include "V3Stats.h" + #include "V3Ast.h" +#include "V3Global.h" +#include "V3Stats.h" #include diff --git a/src/V3Scope.cpp b/src/V3Scope.cpp index 576931d75..aa0915815 100644 --- a/src/V3Scope.cpp +++ b/src/V3Scope.cpp @@ -24,9 +24,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Scope.h" + #include "V3Ast.h" +#include "V3Global.h" #include #include diff --git a/src/V3Scoreboard.h b/src/V3Scoreboard.h index 43d5be804..dc5fce0b0 100644 --- a/src/V3Scoreboard.h +++ b/src/V3Scoreboard.h @@ -112,7 +112,7 @@ public: // Returns const reference. const T_Value& at(const T_Key& key) const { return m_kiMap.at(key)->second; } // Note this returns const_iterator - template // + template std::pair emplace(const T_Key& key, Args&&... args) { const auto kiEmp = m_kiMap.emplace(key, end()); if (kiEmp.second) { diff --git a/src/V3Simulate.h b/src/V3Simulate.h index 378cdd0e4..e2418e60d 100644 --- a/src/V3Simulate.h +++ b/src/V3Simulate.h @@ -35,10 +35,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" #include "V3Ast.h" -#include "V3Width.h" +#include "V3Error.h" #include "V3Task.h" +#include "V3Width.h" #include #include diff --git a/src/V3Slice.cpp b/src/V3Slice.cpp index a70aabde1..94a70fe99 100644 --- a/src/V3Slice.cpp +++ b/src/V3Slice.cpp @@ -38,9 +38,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Slice.h" + #include "V3Ast.h" +#include "V3Global.h" //************************************************************************* diff --git a/src/V3Split.cpp b/src/V3Split.cpp index bd3355fb5..83dc6ac5b 100644 --- a/src/V3Split.cpp +++ b/src/V3Split.cpp @@ -80,17 +80,18 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Split.h" -#include "V3Stats.h" + #include "V3Ast.h" +#include "V3Global.h" #include "V3Graph.h" +#include "V3Stats.h" #include #include -#include #include #include +#include //###################################################################### // Support classes diff --git a/src/V3SplitAs.cpp b/src/V3SplitAs.cpp index caea1a33a..ebdac1d7a 100644 --- a/src/V3SplitAs.cpp +++ b/src/V3SplitAs.cpp @@ -24,10 +24,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3SplitAs.h" -#include "V3Stats.h" + #include "V3Ast.h" +#include "V3Global.h" +#include "V3Stats.h" #include diff --git a/src/V3SplitVar.cpp b/src/V3SplitVar.cpp index 193b0e061..7ca9464d0 100644 --- a/src/V3SplitVar.cpp +++ b/src/V3SplitVar.cpp @@ -113,9 +113,10 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3SplitVar.h" + #include "V3Ast.h" #include "V3Global.h" -#include "V3SplitVar.h" #include "V3Stats.h" #include "V3UniqueNames.h" diff --git a/src/V3Stats.cpp b/src/V3Stats.cpp index 4eeda8e53..7eb07363e 100644 --- a/src/V3Stats.cpp +++ b/src/V3Stats.cpp @@ -17,9 +17,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Stats.h" + #include "V3Ast.h" +#include "V3Global.h" // This visitor does not edit nodes, and is called at error-exit, so should use constant iterators #include "V3AstConstOnly.h" diff --git a/src/V3StatsReport.cpp b/src/V3StatsReport.cpp index 86dfe15f7..ad12bd812 100644 --- a/src/V3StatsReport.cpp +++ b/src/V3StatsReport.cpp @@ -17,11 +17,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3Stats.h" #include "V3Ast.h" #include "V3File.h" +#include "V3Global.h" #include "V3Os.h" +#include "V3Stats.h" #include #include diff --git a/src/V3String.cpp b/src/V3String.cpp index dcdb0a042..9d20bd078 100644 --- a/src/V3String.cpp +++ b/src/V3String.cpp @@ -18,8 +18,8 @@ #include "verilatedos.h" // Limited V3 headers here - this is a base class for Vlc etc -#include "V3String.h" #include "V3Error.h" +#include "V3String.h" #include diff --git a/src/V3String.h b/src/V3String.h index fd349f9ae..0da8bdfc7 100644 --- a/src/V3String.h +++ b/src/V3String.h @@ -33,7 +33,8 @@ //###################################################################### // Global string-related functions -template std::string cvtToStr(const T& t) { +template +std::string cvtToStr(const T& t) { std::ostringstream os; os << t; return os.str(); diff --git a/src/V3Subst.cpp b/src/V3Subst.cpp index ff8122533..ef7562ac5 100644 --- a/src/V3Subst.cpp +++ b/src/V3Subst.cpp @@ -25,10 +25,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Subst.h" -#include "V3Stats.h" + #include "V3Ast.h" +#include "V3Global.h" +#include "V3Stats.h" #include #include diff --git a/src/V3SymTable.h b/src/V3SymTable.h index a594a4541..c1607f5f4 100644 --- a/src/V3SymTable.h +++ b/src/V3SymTable.h @@ -20,9 +20,9 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Ast.h" #include "V3File.h" +#include "V3Global.h" #include "V3String.h" #include diff --git a/src/V3TSP.cpp b/src/V3TSP.cpp index db80fbb82..27d877369 100644 --- a/src/V3TSP.cpp +++ b/src/V3TSP.cpp @@ -22,20 +22,21 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" -#include "V3Global.h" -#include "V3File.h" -#include "V3Graph.h" #include "V3TSP.h" +#include "V3Error.h" +#include "V3File.h" +#include "V3Global.h" +#include "V3Graph.h" + #include #include #include #include #include #include -#include #include +#include #include //###################################################################### @@ -51,7 +52,8 @@ VL_DEBUG_FUNC; // Declare debug() } // namespace V3TSP // Vertex that tracks a per-vertex key -template class TspVertexTmpl : public V3GraphVertex { +template +class TspVertexTmpl final : public V3GraphVertex { private: const T_Key m_key; @@ -68,7 +70,8 @@ private: // TspGraphTmpl represents a complete graph, templatized to work with // different T_Key types. -template class TspGraphTmpl : public V3Graph { +template +class TspGraphTmpl final : public V3Graph { public: // TYPES using Vertex = TspVertexTmpl; diff --git a/src/V3Table.cpp b/src/V3Table.cpp index 30a9cb587..e1f6cc2f0 100644 --- a/src/V3Table.cpp +++ b/src/V3Table.cpp @@ -24,11 +24,12 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Table.h" + +#include "V3Ast.h" +#include "V3Global.h" #include "V3Simulate.h" #include "V3Stats.h" -#include "V3Ast.h" #include #include diff --git a/src/V3Task.cpp b/src/V3Task.cpp index 0e150d31f..c736bb86b 100644 --- a/src/V3Task.cpp +++ b/src/V3Task.cpp @@ -26,11 +26,12 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3Const.h" #include "V3Task.h" + #include "V3Ast.h" +#include "V3Const.h" #include "V3EmitCBase.h" +#include "V3Global.h" #include "V3Graph.h" #include "V3LinkLValue.h" diff --git a/src/V3Task.h b/src/V3Task.h index 33e11512d..a2ae41769 100644 --- a/src/V3Task.h +++ b/src/V3Task.h @@ -20,8 +20,8 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" #include "V3Ast.h" +#include "V3Error.h" #include #include diff --git a/src/V3Trace.cpp b/src/V3Trace.cpp index c8a665f3e..b6fbb95ce 100644 --- a/src/V3Trace.cpp +++ b/src/V3Trace.cpp @@ -38,15 +38,16 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Trace.h" -#include "V3EmitCBase.h" -#include "V3Graph.h" + #include "V3DupFinder.h" +#include "V3EmitCBase.h" +#include "V3Global.h" +#include "V3Graph.h" #include "V3Stats.h" -#include #include +#include #include //###################################################################### diff --git a/src/V3TraceDecl.cpp b/src/V3TraceDecl.cpp index 09c2ac8f5..3f9c1ae6a 100644 --- a/src/V3TraceDecl.cpp +++ b/src/V3TraceDecl.cpp @@ -22,13 +22,15 @@ #include "config_build.h" #include "verilatedos.h" + +#include "V3TraceDecl.h" + #include "verilated_trace_defs.h" // For VLT_TRACE_SCOPE_* -#include "V3Global.h" #include "V3Config.h" #include "V3EmitCBase.h" +#include "V3Global.h" #include "V3Stats.h" -#include "V3TraceDecl.h" #include #include diff --git a/src/V3Tristate.cpp b/src/V3Tristate.cpp index da76ddb17..6f80c2f82 100644 --- a/src/V3Tristate.cpp +++ b/src/V3Tristate.cpp @@ -60,12 +60,13 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Tristate.h" + #include "V3Ast.h" -#include "V3Stats.h" -#include "V3Inst.h" +#include "V3Global.h" #include "V3Graph.h" +#include "V3Inst.h" +#include "V3Stats.h" #include #include diff --git a/src/V3Undriven.cpp b/src/V3Undriven.cpp index dc8283469..0732dafa1 100644 --- a/src/V3Undriven.cpp +++ b/src/V3Undriven.cpp @@ -26,10 +26,11 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3Undriven.h" + +#include "V3Ast.h" #include "V3Global.h" #include "V3String.h" -#include "V3Undriven.h" -#include "V3Ast.h" #include #include diff --git a/src/V3Unknown.cpp b/src/V3Unknown.cpp index 0df1ff3a6..12f011366 100644 --- a/src/V3Unknown.cpp +++ b/src/V3Unknown.cpp @@ -31,10 +31,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Unknown.h" + #include "V3Ast.h" #include "V3Const.h" +#include "V3Global.h" #include "V3Stats.h" #include "V3UniqueNames.h" diff --git a/src/V3Unroll.cpp b/src/V3Unroll.cpp index 469f5656e..719f3cbc0 100644 --- a/src/V3Unroll.cpp +++ b/src/V3Unroll.cpp @@ -27,12 +27,13 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Unroll.h" -#include "V3Stats.h" -#include "V3Const.h" + #include "V3Ast.h" +#include "V3Const.h" +#include "V3Global.h" #include "V3Simulate.h" +#include "V3Stats.h" #include diff --git a/src/V3Unroll.h b/src/V3Unroll.h index 91c5b3386..6cdece617 100644 --- a/src/V3Unroll.h +++ b/src/V3Unroll.h @@ -20,8 +20,8 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" #include "V3Ast.h" +#include "V3Error.h" //============================================================================ /// Unroller with saved state, so caller can determine when pushDelete's are executed. diff --git a/src/V3VariableOrder.cpp b/src/V3VariableOrder.cpp index 6628cb7ed..6dda8290b 100644 --- a/src/V3VariableOrder.cpp +++ b/src/V3VariableOrder.cpp @@ -23,11 +23,12 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3VariableOrder.h" + #include "V3Ast.h" #include "V3AstUserAllocator.h" #include "V3EmitCBase.h" +#include "V3Global.h" #include "V3TSP.h" #include diff --git a/src/V3Waiver.cpp b/src/V3Waiver.cpp index 2e08c57c8..42d222fe8 100644 --- a/src/V3Waiver.cpp +++ b/src/V3Waiver.cpp @@ -15,9 +15,10 @@ #include "verilatedos.h" -#include "V3File.h" #include "V3Waiver.h" +#include "V3File.h" + #include #include diff --git a/src/V3Waiver.h b/src/V3Waiver.h index 2203cc52b..ba714c6c9 100644 --- a/src/V3Waiver.h +++ b/src/V3Waiver.h @@ -19,8 +19,8 @@ #include "V3Error.h" -#include #include +#include class V3Waiver final { // TYPES diff --git a/src/V3Width.cpp b/src/V3Width.cpp index f24380c2c..215e282b6 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -66,10 +66,11 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" #include "V3Width.h" -#include "V3Number.h" + #include "V3Const.h" +#include "V3Global.h" +#include "V3Number.h" #include "V3Randomize.h" #include "V3String.h" #include "V3Task.h" diff --git a/src/V3WidthCommit.h b/src/V3WidthCommit.h index b4c745123..5b784387d 100644 --- a/src/V3WidthCommit.h +++ b/src/V3WidthCommit.h @@ -20,8 +20,8 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Error.h" #include "V3Ast.h" +#include "V3Error.h" // clang-format off #ifndef VERILATOR_V3WIDTH_CPP_ diff --git a/src/V3WidthSel.cpp b/src/V3WidthSel.cpp index b4afe191a..d958f6d52 100644 --- a/src/V3WidthSel.cpp +++ b/src/V3WidthSel.cpp @@ -29,10 +29,10 @@ #include "config_build.h" #include "verilatedos.h" -#include "V3Global.h" -#include "V3Width.h" #include "V3Ast.h" #include "V3Const.h" +#include "V3Global.h" +#include "V3Width.h" //###################################################################### // Width state, as a visitor of each AstNode diff --git a/src/Verilator.cpp b/src/Verilator.cpp index f28faabf6..b5d2df2cb 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -14,13 +14,11 @@ // //************************************************************************* -#include "V3Global.h" -#include "V3Ast.h" - #include "V3Active.h" #include "V3ActiveTop.h" #include "V3Assert.h" #include "V3AssertPre.h" +#include "V3Ast.h" #include "V3Begin.h" #include "V3Branch.h" #include "V3Broken.h" @@ -54,6 +52,7 @@ #include "V3Force.h" #include "V3Gate.h" #include "V3GenClk.h" +#include "V3Global.h" #include "V3Graph.h" #include "V3HierBlock.h" #include "V3Inline.h" @@ -61,8 +60,8 @@ #include "V3Life.h" #include "V3LifePost.h" #include "V3LinkDot.h" -#include "V3LinkJump.h" #include "V3LinkInc.h" +#include "V3LinkJump.h" #include "V3LinkLValue.h" #include "V3LinkLevel.h" #include "V3LinkParse.h" diff --git a/src/VlcPoint.h b/src/VlcPoint.h index f4bc6ffba..b5c9598fc 100644 --- a/src/VlcPoint.h +++ b/src/VlcPoint.h @@ -20,15 +20,16 @@ #include "config_build.h" #include "verilatedos.h" -#include #include #include #include +#include #define V3ERROR_NO_GLOBAL_ -#include "V3Error.h" #include "verilated_cov_key.h" +#include "V3Error.h" + //******************************************************************** // VlcPoint - A coverage point (across all tests) diff --git a/src/VlcTest.h b/src/VlcTest.h index d4717d16c..2c85950dc 100644 --- a/src/VlcTest.h +++ b/src/VlcTest.h @@ -20,8 +20,8 @@ #include "config_build.h" #include "verilatedos.h" -#include "VlcPoint.h" #include "VlcBucket.h" +#include "VlcPoint.h" #include #include diff --git a/src/VlcTop.cpp b/src/VlcTop.cpp index ba08eb6d0..fe94464ea 100644 --- a/src/VlcTop.cpp +++ b/src/VlcTop.cpp @@ -14,10 +14,12 @@ // //************************************************************************* +#include "VlcTop.h" + #include "V3Error.h" #include "V3Os.h" + #include "VlcOptions.h" -#include "VlcTop.h" #include #include diff --git a/src/VlcTop.h b/src/VlcTop.h index 2d9ccc39c..2f453b5ff 100644 --- a/src/VlcTop.h +++ b/src/VlcTop.h @@ -21,9 +21,9 @@ #include "verilatedos.h" #include "VlcOptions.h" -#include "VlcTest.h" #include "VlcPoint.h" #include "VlcSource.h" +#include "VlcTest.h" //###################################################################### // VlcTop - Top level options container diff --git a/test_regress/t/TestSimulator.h b/test_regress/t/TestSimulator.h index 2e0098e73..6272887d8 100644 --- a/test_regress/t/TestSimulator.h +++ b/test_regress/t/TestSimulator.h @@ -10,6 +10,7 @@ //************************************************************************* #include "vpi_user.h" + #include class TestSimulator { diff --git a/test_regress/t/t_const_bitoptree_bug3096.cpp b/test_regress/t/t_const_bitoptree_bug3096.cpp index 27a3ca385..deb9e1ee9 100644 --- a/test_regress/t/t_const_bitoptree_bug3096.cpp +++ b/test_regress/t/t_const_bitoptree_bug3096.cpp @@ -9,11 +9,10 @@ // //************************************************************************* +#include #include #include -#include - int main(int argc, char* argv[]) { Vt_const_bitoptree_bug3096* const tb = new Vt_const_bitoptree_bug3096; diff --git a/test_regress/t/t_cover_lib_1.out b/test_regress/t/t_cover_lib_1.out index 919cea884..c6846d4cc 100644 --- a/test_regress/t/t_cover_lib_1.out +++ b/test_regress/t/t_cover_lib_1.out @@ -1,7 +1,7 @@ # SystemC::Coverage-3 -C 'f../../t/t_cover_lib_c.cppl37pagesp_user/t_cover_lib_cP0htop.a*.pi' 500 -C 'f../../t/t_cover_lib_c.cppl37pagesp_user/t_cover_lib_cP1htop.a0.npi' 200 -C 'f../../t/t_cover_lib_c.cppl37pagesp_user/t_cover_lib_cP1htop.a1.npi' 300 -C 'f../../t/t_cover_lib_c.cppl46pagesp_user/t_cover_lib_cokept_onehmain' 100 -C 'f../../t/t_cover_lib_c.cppl47pagesp_user/t_cover_lib_cokept_twohmain' 210 -C 'f../../t/t_cover_lib_c.cppl48pagesp_user/t_cover_lib_colost_threehmain' 220 +C 'f../../t/t_cover_lib_c.cppl39pagesp_user/t_cover_lib_cP0htop.a*.pi' 500 +C 'f../../t/t_cover_lib_c.cppl39pagesp_user/t_cover_lib_cP1htop.a0.npi' 200 +C 'f../../t/t_cover_lib_c.cppl39pagesp_user/t_cover_lib_cP1htop.a1.npi' 300 +C 'f../../t/t_cover_lib_c.cppl48pagesp_user/t_cover_lib_cokept_onehmain' 100 +C 'f../../t/t_cover_lib_c.cppl49pagesp_user/t_cover_lib_cokept_twohmain' 210 +C 'f../../t/t_cover_lib_c.cppl50pagesp_user/t_cover_lib_colost_threehmain' 220 diff --git a/test_regress/t/t_cover_lib_1_per_instance.out b/test_regress/t/t_cover_lib_1_per_instance.out index dd3dfec0d..8ed9d2b81 100644 --- a/test_regress/t/t_cover_lib_1_per_instance.out +++ b/test_regress/t/t_cover_lib_1_per_instance.out @@ -1,8 +1,8 @@ # SystemC::Coverage-3 -C 'f../../t/t_cover_lib_c.cppl37pagesp_user/t_cover_lib_cP0htop.a0.pi' 200 -C 'f../../t/t_cover_lib_c.cppl37pagesp_user/t_cover_lib_cP0htop.a1.pi' 300 -C 'f../../t/t_cover_lib_c.cppl37pagesp_user/t_cover_lib_cP1htop.a0.npi' 200 -C 'f../../t/t_cover_lib_c.cppl37pagesp_user/t_cover_lib_cP1htop.a1.npi' 300 -C 'f../../t/t_cover_lib_c.cppl46pagesp_user/t_cover_lib_cokept_onehmain' 100 -C 'f../../t/t_cover_lib_c.cppl47pagesp_user/t_cover_lib_cokept_twohmain' 210 -C 'f../../t/t_cover_lib_c.cppl48pagesp_user/t_cover_lib_colost_threehmain' 220 +C 'f../../t/t_cover_lib_c.cppl39pagesp_user/t_cover_lib_cP0htop.a0.pi' 200 +C 'f../../t/t_cover_lib_c.cppl39pagesp_user/t_cover_lib_cP0htop.a1.pi' 300 +C 'f../../t/t_cover_lib_c.cppl39pagesp_user/t_cover_lib_cP1htop.a0.npi' 200 +C 'f../../t/t_cover_lib_c.cppl39pagesp_user/t_cover_lib_cP1htop.a1.npi' 300 +C 'f../../t/t_cover_lib_c.cppl48pagesp_user/t_cover_lib_cokept_onehmain' 100 +C 'f../../t/t_cover_lib_c.cppl49pagesp_user/t_cover_lib_cokept_twohmain' 210 +C 'f../../t/t_cover_lib_c.cppl50pagesp_user/t_cover_lib_colost_threehmain' 220 diff --git a/test_regress/t/t_cover_lib_2.out b/test_regress/t/t_cover_lib_2.out index e9fa447e8..fcb461bba 100644 --- a/test_regress/t/t_cover_lib_2.out +++ b/test_regress/t/t_cover_lib_2.out @@ -1,3 +1,3 @@ # SystemC::Coverage-3 -C 'f../../t/t_cover_lib_c.cppl46pagesp_user/t_cover_lib_cokept_onehmain' 100 -C 'f../../t/t_cover_lib_c.cppl47pagesp_user/t_cover_lib_cokept_twohmain' 210 +C 'f../../t/t_cover_lib_c.cppl48pagesp_user/t_cover_lib_cokept_onehmain' 100 +C 'f../../t/t_cover_lib_c.cppl49pagesp_user/t_cover_lib_cokept_twohmain' 210 diff --git a/test_regress/t/t_cover_lib_3.out b/test_regress/t/t_cover_lib_3.out index 7db8b9cb0..5c1bd2c8c 100644 --- a/test_regress/t/t_cover_lib_3.out +++ b/test_regress/t/t_cover_lib_3.out @@ -1,3 +1,3 @@ # SystemC::Coverage-3 -C 'f../../t/t_cover_lib_c.cppl46pagesp_user/t_cover_lib_cokept_onehmain' 0 -C 'f../../t/t_cover_lib_c.cppl47pagesp_user/t_cover_lib_cokept_twohmain' 0 +C 'f../../t/t_cover_lib_c.cppl48pagesp_user/t_cover_lib_cokept_onehmain' 0 +C 'f../../t/t_cover_lib_c.cppl49pagesp_user/t_cover_lib_cokept_twohmain' 0 diff --git a/test_regress/t/t_cover_lib_c.cpp b/test_regress/t/t_cover_lib_c.cpp index ddc94db07..3ba805720 100644 --- a/test_regress/t/t_cover_lib_c.cpp +++ b/test_regress/t/t_cover_lib_c.cpp @@ -9,15 +9,17 @@ // //************************************************************************* +#include "verilated_cov.h" + +#include "svdpi.h" + #include #include #include -#include "svdpi.h" +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" -#include "verilated_cov.h" - #include VM_PREFIX_INCLUDE //====================================================================== diff --git a/test_regress/t/t_dpi_accessors.cpp b/test_regress/t/t_dpi_accessors.cpp index f160a888a..3ccae802c 100644 --- a/test_regress/t/t_dpi_accessors.cpp +++ b/test_regress/t/t_dpi_accessors.cpp @@ -11,13 +11,12 @@ // //************************************************************************* -#include -#include - -#include "svdpi.h" - #include "Vt_dpi_accessors.h" #include "Vt_dpi_accessors__Dpi.h" +#include "svdpi.h" + +#include +#include using std::cout; using std::dec; diff --git a/test_regress/t/t_dpi_arg_inout_unpack.cpp b/test_regress/t/t_dpi_arg_inout_unpack.cpp index fffe3fcb1..4c3ffdc7f 100644 --- a/test_regress/t/t_dpi_arg_inout_unpack.cpp +++ b/test_regress/t/t_dpi_arg_inout_unpack.cpp @@ -89,7 +89,8 @@ void set_uint(svBitVecVal* v0, sv_longint_unsigned_t val, int bitwidth) { } } -template bool compare(const T& act, const T& exp) { +template +bool compare(const T& act, const T& exp) { if (exp == act) { if (VERBOSE_MESSAGE) std::cout << "OK Exp:" << exp << " actual:" << act << std::endl; return true; @@ -140,13 +141,15 @@ bool compare(const svBitVecVal* v0, sv_longint_unsigned_t val, int bitwidth) { return true; } -template bool update_0d(T* v) { +template +bool update_0d(T* v) { if (!compare(*v, 42)) return false; ++(*v); return true; } -template bool update_1d(T* v) { +template +bool update_1d(T* v) { if (!compare(v[0], 43)) return false; if (!compare(v[1], 44)) return false; ++v[0]; @@ -154,7 +157,8 @@ template bool update_1d(T* v) { return true; } -template bool update_2d(T* v) { +template +bool update_2d(T* v) { if (!compare(v[0 * 2 + 1], 45)) return false; if (!compare(v[1 * 2 + 1], 46)) return false; if (!compare(v[2 * 2 + 1], 47)) return false; @@ -164,7 +168,8 @@ template bool update_2d(T* v) { return true; } -template bool update_3d(T* v) { +template +bool update_3d(T* v) { if (!compare(v[(0 * 3 + 0) * 2 + 0], 48)) return false; if (!compare(v[(1 * 3 + 0) * 2 + 0], 49)) return false; if (!compare(v[(2 * 3 + 0) * 2 + 0], 50)) return false; @@ -209,12 +214,14 @@ bool update_3d_scalar(svScalar* v) { return true; } -template bool update_0d(T* v, int bitwidth) { +template +bool update_0d(T* v, int bitwidth) { if (!compare(v, 42, bitwidth)) return false; set_uint(v, 43, bitwidth); return true; } -template bool update_1d(T* v, int bitwidth) { +template +bool update_1d(T* v, int bitwidth) { const int unit = (bitwidth + 31) / 32; if (!compare(v + unit * 0, 43, bitwidth)) return false; if (!compare(v + unit * 1, 44, bitwidth)) return false; @@ -222,7 +229,8 @@ template bool update_1d(T* v, int bitwidth) { set_uint(v + unit * 1, 45, bitwidth); return true; } -template bool update_2d(T* v, int bitwidth) { +template +bool update_2d(T* v, int bitwidth) { const int unit = (bitwidth + 31) / 32; if (!compare(v + unit * (0 * 2 + 1), 45, bitwidth)) return false; if (!compare(v + unit * (1 * 2 + 1), 46, bitwidth)) return false; @@ -232,7 +240,8 @@ template bool update_2d(T* v, int bitwidth) { set_uint(v + unit * (2 * 2 + 1), 48, bitwidth); return true; } -template bool update_3d(T* v, int bitwidth) { +template +bool update_3d(T* v, int bitwidth) { const int unit = (bitwidth + 31) / 32; if (!compare(v + unit * ((0 * 3 + 0) * 2 + 0), 48, bitwidth)) return false; if (!compare(v + unit * ((1 * 3 + 0) * 2 + 0), 49, bitwidth)) return false; @@ -245,7 +254,8 @@ template bool update_3d(T* v, int bitwidth) { return true; } -template void set_values(T (&v)[4][3][2]) { +template +void set_values(T (&v)[4][3][2]) { for (int i = 0; i < 4; ++i) for (int j = 0; j < 3; ++j) for (int k = 0; k < 2; ++k) v[i][j][k] = 0; @@ -261,7 +271,8 @@ template void set_values(T (&v)[4][3][2]) { v[3][0][0] = 51; } -template void set_values(T (&v)[4][3][2][N], int bitwidth) { +template +void set_values(T (&v)[4][3][2][N], int bitwidth) { for (int i = 0; i < 4; ++i) for (int j = 0; j < 3; ++j) for (int k = 0; k < 2; ++k) set_uint(v[i][j][k], 0, bitwidth); @@ -277,29 +288,39 @@ template void set_values(T (&v)[4][3][2][N], int bitwidth set_uint(v[3][0][0], 51, bitwidth); } -template bool check_0d(T v) { return compare(v, 43); } -template bool check_1d(const T (&v)[2]) { +template +bool check_0d(T v) { + return compare(v, 43); +} +template +bool check_1d(const T (&v)[2]) { return compare(v[0], 44) && compare(v[1], 45); } -template bool check_2d(const T (&v)[3][2]) { +template +bool check_2d(const T (&v)[3][2]) { return compare(v[0][1], 46) && compare(v[1][1], 47) && compare(v[2][1], 48); } -template bool check_3d(const T (&v)[4][3][2]) { +template +bool check_3d(const T (&v)[4][3][2]) { return compare(v[0][0][0], 49) && compare(v[1][0][0], 50) && compare(v[2][0][0], 51) && compare(v[3][0][0], 52); } -template bool check_0d(const T (&v)[N], unsigned int bitwidth) { +template +bool check_0d(const T (&v)[N], unsigned int bitwidth) { return compare(v, 43, bitwidth); } -template bool check_1d(const T (&v)[2][N], unsigned int bitwidth) { +template +bool check_1d(const T (&v)[2][N], unsigned int bitwidth) { return compare(v[0], 44, bitwidth) && compare(v[1], 45, bitwidth); } -template bool check_2d(const T (&v)[3][2][N], unsigned int bitwidth) { +template +bool check_2d(const T (&v)[3][2][N], unsigned int bitwidth) { return compare(v[0][1], 46, bitwidth) && compare(v[1][1], 47, bitwidth) && compare(v[2][1], 48, bitwidth); } -template bool check_3d(const T (&v)[4][3][2][N], unsigned int bitwidth) { +template +bool check_3d(const T (&v)[4][3][2][N], unsigned int bitwidth) { return compare(v[0][0][0], 49, bitwidth) && compare(v[1][0][0], 50, bitwidth) && compare(v[2][0][0], 51, bitwidth) && compare(v[3][0][0], 52, bitwidth); } diff --git a/test_regress/t/t_dpi_arg_input_unpack.cpp b/test_regress/t/t_dpi_arg_input_unpack.cpp index 2442f9578..3fe37bf1b 100644 --- a/test_regress/t/t_dpi_arg_input_unpack.cpp +++ b/test_regress/t/t_dpi_arg_input_unpack.cpp @@ -75,7 +75,8 @@ const bool VERBOSE_MESSAGE = false; abort(); \ } while (0) -template bool compare(const T& act, const T& exp) { +template +bool compare(const T& act, const T& exp) { if (exp == act) { if (VERBOSE_MESSAGE) std::cout << "OK Exp:" << exp << " actual:" << act << std::endl; return true; @@ -115,15 +116,21 @@ bool compare(const svBitVecVal* v0, sv_longint_unsigned_t val, int bitwidth) { return true; } -template bool check_0d(T v) { return compare(v, 42); } -template bool check_1d(const T* v) { +template +bool check_0d(T v) { + return compare(v, 42); +} +template +bool check_1d(const T* v) { return compare(v[0], 43) && compare(v[1], 44); } -template bool check_2d(const T* v) { +template +bool check_2d(const T* v) { return compare(v[0 * 2 + 1], 45) && compare(v[1 * 2 + 1], 46) && compare(v[2 * 2 + 1], 47); } -template bool check_3d(const T* v) { +template +bool check_3d(const T* v) { return compare(v[(0 * 3 + 0) * 2 + 0], 48) && compare(v[(1 * 3 + 0) * 2 + 0], 49) && compare(v[(2 * 3 + 0) * 2 + 0], 50) && compare(v[(3 * 3 + 0) * 2 + 0], 51); } @@ -181,7 +188,8 @@ bool check_3d(const char** v) { && compare(v[(3 * 3 + 0) * 2 + 0], "51"); } -template void set_values(T (&v)[4][3][2]) { +template +void set_values(T (&v)[4][3][2]) { for (int i = 0; i < 4; ++i) for (int j = 0; j < 3; ++j) for (int k = 0; k < 2; ++k) v[i][j][k] = 0; @@ -215,7 +223,8 @@ void set_uint(svBitVecVal* v0, sv_longint_unsigned_t val, int bitwidth) { } } -template void set_values(svLogicVecVal (&v)[4][3][2][N], int bitwidth) { +template +void set_values(svLogicVecVal (&v)[4][3][2][N], int bitwidth) { for (int i = 0; i < 4; ++i) for (int j = 0; j < 3; ++j) for (int k = 0; k < 2; ++k) set_uint(v[i][j][k], 0, bitwidth); @@ -234,7 +243,8 @@ template void set_values(svLogicVecVal (&v)[4][3][2][N], int bitwidth set_uint(v[3][0][0], 51, bitwidth); } -template void set_values(svBitVecVal (&v)[4][3][2][N], int bitwidth) { +template +void set_values(svBitVecVal (&v)[4][3][2][N], int bitwidth) { for (int i = 0; i < 4; ++i) for (int j = 0; j < 3; ++j) for (int k = 0; k < 2; ++k) set_uint(v[i][j][k], 0, bitwidth); diff --git a/test_regress/t/t_dpi_arg_output_unpack.cpp b/test_regress/t/t_dpi_arg_output_unpack.cpp index 918081f8e..4b288f565 100644 --- a/test_regress/t/t_dpi_arg_output_unpack.cpp +++ b/test_regress/t/t_dpi_arg_output_unpack.cpp @@ -89,18 +89,24 @@ void set_uint(svBitVecVal* v0, sv_longint_unsigned_t val, int bitwidth) { } } -template void set_0d(T* v) { *v = 42; } -template void set_1d(T* v) { +template +void set_0d(T* v) { + *v = 42; +} +template +void set_1d(T* v) { v[0] = 43; v[1] = 44; } -template void set_2d(T* v) { +template +void set_2d(T* v) { v[0 * 2 + 1] = 45; v[1 * 2 + 1] = 46; v[2 * 2 + 1] = 47; } -template void set_3d(T* v) { +template +void set_3d(T* v) { v[(0 * 3 + 0) * 2 + 0] = 48; v[(1 * 3 + 0) * 2 + 0] = 49; v[(2 * 3 + 0) * 2 + 0] = 50; @@ -173,7 +179,8 @@ void set_3d(svBitVecVal* v, int bitwidth) { set_uint(v + ((3 * 3 + 0) * 2 + 0) * unit, 51, bitwidth); } -template bool compare(const T& act, const T& exp) { +template +bool compare(const T& act, const T& exp) { if (exp == act) { if (VERBOSE_MESSAGE) std::cout << "OK Exp:" << exp << " actual:" << act << std::endl; return true; @@ -183,14 +190,20 @@ template bool compare(const T& act, const T& exp) { } } -template bool check_0d(T v) { return compare(v, 42); } -template bool check_1d(const T (&v)[2]) { +template +bool check_0d(T v) { + return compare(v, 42); +} +template +bool check_1d(const T (&v)[2]) { return compare(v[0], 43) && compare(v[1], 44); } -template bool check_2d(const T (&v)[3][2]) { +template +bool check_2d(const T (&v)[3][2]) { return compare(v[0][1], 45) && compare(v[1][1], 46) && compare(v[2][1], 47); } -template bool check_3d(const T (&v)[4][3][2]) { +template +bool check_3d(const T (&v)[4][3][2]) { return compare(v[0][0][0], 48) && compare(v[1][0][0], 49) && compare(v[2][0][0], 50) && compare(v[3][0][0], 51); } @@ -225,17 +238,21 @@ bool compare(const svBitVecVal* v0, sv_longint_unsigned_t val, int bitwidth) { return true; } -template bool check_0d(const T (&v)[N], int bitwidth) { +template +bool check_0d(const T (&v)[N], int bitwidth) { return compare(v, 42, bitwidth); } -template bool check_1d(const T (&v)[2][N], int bitwidth) { +template +bool check_1d(const T (&v)[2][N], int bitwidth) { return compare(v[0], 43, bitwidth) && compare(v[1], 44, bitwidth); } -template bool check_2d(const T (&v)[3][2][N], int bitwidth) { +template +bool check_2d(const T (&v)[3][2][N], int bitwidth) { return compare(v[0][1], 45, bitwidth) && compare(v[1][1], 46, bitwidth) && compare(v[2][1], 47, bitwidth); } -template bool check_3d(const T (&v)[4][3][2][N], int bitwidth) { +template +bool check_3d(const T (&v)[4][3][2][N], int bitwidth) { return compare(v[0][0][0], 48, bitwidth) && compare(v[1][0][0], 49, bitwidth) && compare(v[2][0][0], 50, bitwidth) && compare(v[3][0][0], 51, bitwidth); } diff --git a/test_regress/t/t_dpi_context_c.cpp b/test_regress/t/t_dpi_context_c.cpp index 5c1060ed4..e21d79363 100644 --- a/test_regress/t/t_dpi_context_c.cpp +++ b/test_regress/t/t_dpi_context_c.cpp @@ -9,9 +9,10 @@ // //************************************************************************* -#include #include "svdpi.h" +#include + //====================================================================== // clang-format off diff --git a/test_regress/t/t_dpi_display_c.cpp b/test_regress/t/t_dpi_display_c.cpp index 41d292027..3a8f93c92 100644 --- a/test_regress/t/t_dpi_display_c.cpp +++ b/test_regress/t/t_dpi_display_c.cpp @@ -9,9 +9,10 @@ // //************************************************************************* -#include #include "svdpi.h" +#include + //====================================================================== // clang-format off diff --git a/test_regress/t/t_dpi_export_c.cpp b/test_regress/t/t_dpi_export_c.cpp index 9493e4af6..da9fee221 100644 --- a/test_regress/t/t_dpi_export_c.cpp +++ b/test_regress/t/t_dpi_export_c.cpp @@ -9,11 +9,12 @@ // //************************************************************************* +#include "svdpi.h" + #include #include #include #include -#include "svdpi.h" //====================================================================== diff --git a/test_regress/t/t_dpi_imp_gen_c.cpp b/test_regress/t/t_dpi_imp_gen_c.cpp index 15a03eea5..b55252b19 100644 --- a/test_regress/t/t_dpi_imp_gen_c.cpp +++ b/test_regress/t/t_dpi_imp_gen_c.cpp @@ -9,9 +9,10 @@ // SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 //************************************************************************* +#include "svdpi.h" + #include #include -#include "svdpi.h" //====================================================================== diff --git a/test_regress/t/t_dpi_import_c.cpp b/test_regress/t/t_dpi_import_c.cpp index c28c58d0e..71554035e 100644 --- a/test_regress/t/t_dpi_import_c.cpp +++ b/test_regress/t/t_dpi_import_c.cpp @@ -9,9 +9,10 @@ // //************************************************************************* +#include "svdpi.h" + #include #include -#include "svdpi.h" //====================================================================== diff --git a/test_regress/t/t_dpi_lib_c.cpp b/test_regress/t/t_dpi_lib_c.cpp index 29a4d36b4..2667681bf 100644 --- a/test_regress/t/t_dpi_lib_c.cpp +++ b/test_regress/t/t_dpi_lib_c.cpp @@ -9,11 +9,13 @@ // //************************************************************************* +#include "svdpi.h" + #include #include #include -#include "svdpi.h" +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" //====================================================================== diff --git a/test_regress/t/t_dpi_open_c.cpp b/test_regress/t/t_dpi_open_c.cpp index 60b6b1d33..a09efcbab 100644 --- a/test_regress/t/t_dpi_open_c.cpp +++ b/test_regress/t/t_dpi_open_c.cpp @@ -9,11 +9,13 @@ // //************************************************************************* +#include "svdpi.h" + #include #include #include -#include "svdpi.h" +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" //====================================================================== diff --git a/test_regress/t/t_dpi_open_elem_c.cpp b/test_regress/t/t_dpi_open_elem_c.cpp index 6897375e6..0af199d79 100644 --- a/test_regress/t/t_dpi_open_elem_c.cpp +++ b/test_regress/t/t_dpi_open_elem_c.cpp @@ -9,11 +9,13 @@ // //************************************************************************* +#include "svdpi.h" + #include #include #include -#include "svdpi.h" +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" //====================================================================== diff --git a/test_regress/t/t_dpi_open_oob_bad_c.cpp b/test_regress/t/t_dpi_open_oob_bad_c.cpp index 2dd22cb86..f3b44189c 100644 --- a/test_regress/t/t_dpi_open_oob_bad_c.cpp +++ b/test_regress/t/t_dpi_open_oob_bad_c.cpp @@ -9,11 +9,13 @@ // //************************************************************************* +#include "svdpi.h" + #include #include #include -#include "svdpi.h" +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" //====================================================================== diff --git a/test_regress/t/t_dpi_open_vecval_c.cpp b/test_regress/t/t_dpi_open_vecval_c.cpp index b95fb6615..014db3f38 100644 --- a/test_regress/t/t_dpi_open_vecval_c.cpp +++ b/test_regress/t/t_dpi_open_vecval_c.cpp @@ -9,11 +9,13 @@ // //************************************************************************* +#include "svdpi.h" + #include #include #include -#include "svdpi.h" +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" //====================================================================== diff --git a/test_regress/t/t_dpi_openfirst_c.cpp b/test_regress/t/t_dpi_openfirst_c.cpp index 2d07a60c5..d33630fe2 100644 --- a/test_regress/t/t_dpi_openfirst_c.cpp +++ b/test_regress/t/t_dpi_openfirst_c.cpp @@ -9,11 +9,13 @@ // //************************************************************************* +#include "svdpi.h" + #include #include #include -#include "svdpi.h" +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" //====================================================================== diff --git a/test_regress/t/t_dpi_qw_c.cpp b/test_regress/t/t_dpi_qw_c.cpp index 49e15cf0f..7a18e6b83 100644 --- a/test_regress/t/t_dpi_qw_c.cpp +++ b/test_regress/t/t_dpi_qw_c.cpp @@ -9,9 +9,10 @@ // SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 //************************************************************************* +#include "svdpi.h" + #include #include -#include "svdpi.h" //====================================================================== diff --git a/test_regress/t/t_dpi_result_type.cpp b/test_regress/t/t_dpi_result_type.cpp index 21ea9e772..a0d7b3cb5 100644 --- a/test_regress/t/t_dpi_result_type.cpp +++ b/test_regress/t/t_dpi_result_type.cpp @@ -9,12 +9,12 @@ // //************************************************************************* +#include "svdpi.h" + #include #include #include -#include "svdpi.h" - // clang-format off #if defined(VERILATOR) // Verilator # include "Vt_dpi_result_type__Dpi.h" diff --git a/test_regress/t/t_dpi_shortcircuit_c.cpp b/test_regress/t/t_dpi_shortcircuit_c.cpp index dad4af8aa..9abe7e53d 100644 --- a/test_regress/t/t_dpi_shortcircuit_c.cpp +++ b/test_regress/t/t_dpi_shortcircuit_c.cpp @@ -9,9 +9,10 @@ // //************************************************************************* +#include "svdpi.h" + #include #include -#include "svdpi.h" //====================================================================== diff --git a/test_regress/t/t_dpi_string_c.cpp b/test_regress/t/t_dpi_string_c.cpp index ba6583d79..05b001886 100644 --- a/test_regress/t/t_dpi_string_c.cpp +++ b/test_regress/t/t_dpi_string_c.cpp @@ -9,9 +9,10 @@ // //************************************************************************* +#include "svdpi.h" + #include #include -#include "svdpi.h" //====================================================================== diff --git a/test_regress/t/t_dpi_sys_c.cpp b/test_regress/t/t_dpi_sys_c.cpp index 156609f16..d1e6baf8f 100644 --- a/test_regress/t/t_dpi_sys_c.cpp +++ b/test_regress/t/t_dpi_sys_c.cpp @@ -9,9 +9,10 @@ // //************************************************************************* -#include #include "svdpi.h" +#include + //====================================================================== // clang-format off diff --git a/test_regress/t/t_dpi_threads_c.cpp b/test_regress/t/t_dpi_threads_c.cpp index 0b2900352..9e02f5416 100644 --- a/test_regress/t/t_dpi_threads_c.cpp +++ b/test_regress/t/t_dpi_threads_c.cpp @@ -9,11 +9,12 @@ // //************************************************************************* +#include "svdpi.h" + #include #include #include #include -#include "svdpi.h" //====================================================================== diff --git a/test_regress/t/t_dpi_vams.cpp b/test_regress/t/t_dpi_vams.cpp index 264eb18cd..13ee8b82b 100644 --- a/test_regress/t/t_dpi_vams.cpp +++ b/test_regress/t/t_dpi_vams.cpp @@ -7,6 +7,7 @@ // SPDX-License-Identifier: CC0-1.0 #include + #include "Vt_dpi_vams.h" //====================================================================== diff --git a/test_regress/t/t_dpi_var.cpp b/test_regress/t/t_dpi_var.cpp index 190044f69..855e022b7 100644 --- a/test_regress/t/t_dpi_var.cpp +++ b/test_regress/t/t_dpi_var.cpp @@ -11,10 +11,10 @@ #include VM_PREFIX_INCLUDE #include "verilated.h" -#include "svdpi.h" - #include "verilated_syms.h" +#include "svdpi.h" + //====================================================================== struct MyMon { diff --git a/test_regress/t/t_embed1_c.cpp b/test_regress/t/t_embed1_c.cpp index e9deda818..0e6e075cb 100644 --- a/test_regress/t/t_embed1_c.cpp +++ b/test_regress/t/t_embed1_c.cpp @@ -9,10 +9,10 @@ // //************************************************************************* -#include +#include "../t_embed1_child/Vt_embed1_child.h" #include "svdpi.h" -#include "../t_embed1_child/Vt_embed1_child.h" +#include //====================================================================== diff --git a/test_regress/t/t_enum_public.cpp b/test_regress/t/t_enum_public.cpp index e46b8d57d..2ff06eea5 100644 --- a/test_regress/t/t_enum_public.cpp +++ b/test_regress/t/t_enum_public.cpp @@ -7,8 +7,8 @@ // SPDX-License-Identifier: CC0-1.0 #include -#include "Vt_enum_public.h" +#include "Vt_enum_public.h" #include "Vt_enum_public_p3.h" #include "Vt_enum_public_p62.h" diff --git a/test_regress/t/t_flag_fi.cpp b/test_regress/t/t_flag_fi.cpp index f91a5f303..176c434b7 100644 --- a/test_regress/t/t_flag_fi.cpp +++ b/test_regress/t/t_flag_fi.cpp @@ -7,6 +7,7 @@ // SPDX-License-Identifier: CC0-1.0 #include + #include "Vt_flag_fi.h" //====================================================================== diff --git a/test_regress/t/t_flag_ldflags_c.cpp b/test_regress/t/t_flag_ldflags_c.cpp index 6b32be9b9..113017b52 100644 --- a/test_regress/t/t_flag_ldflags_c.cpp +++ b/test_regress/t/t_flag_ldflags_c.cpp @@ -9,9 +9,10 @@ // //************************************************************************* -#include #include "svdpi.h" +#include + //====================================================================== // clang-format off diff --git a/test_regress/t/t_forceable_net.cpp b/test_regress/t/t_forceable_net.cpp index da9388a75..278020f0b 100644 --- a/test_regress/t/t_forceable_net.cpp +++ b/test_regress/t/t_forceable_net.cpp @@ -4,10 +4,11 @@ // any use, without warranty, 2021 by Geza Lore. // SPDX-License-Identifier: CC0-1.0 -#include - #include "verilatedos.h" + #include "verilated.h" + +#include #if VM_TRACE #include "verilated_vcd_c.h" #endif diff --git a/test_regress/t/t_forceable_var.cpp b/test_regress/t/t_forceable_var.cpp index 931903300..9286d3606 100644 --- a/test_regress/t/t_forceable_var.cpp +++ b/test_regress/t/t_forceable_var.cpp @@ -4,10 +4,11 @@ // any use, without warranty, 2021 by Geza Lore. // SPDX-License-Identifier: CC0-1.0 -#include - #include "verilatedos.h" + #include "verilated.h" + +#include #if VM_TRACE #include "verilated_vcd_c.h" #endif diff --git a/test_regress/t/t_func_rand.cpp b/test_regress/t/t_func_rand.cpp index c3b43a385..d2be39ab9 100644 --- a/test_regress/t/t_func_rand.cpp +++ b/test_regress/t/t_func_rand.cpp @@ -7,6 +7,7 @@ // SPDX-License-Identifier: CC0-1.0 #include + #include "Vt_func_rand.h" double sc_time_stamp() { return 0; } diff --git a/test_regress/t/t_gantt_two.cpp b/test_regress/t/t_gantt_two.cpp index da253fab7..ecd0be219 100644 --- a/test_regress/t/t_gantt_two.cpp +++ b/test_regress/t/t_gantt_two.cpp @@ -6,10 +6,12 @@ // SPDX-License-Identifier: CC0-1.0 // -#include #include "verilated.h" + #include "Vt_gantt_two.h" +#include + int main(int argc, char** argv, char** env) { srand48(5); diff --git a/test_regress/t/t_leak.cpp b/test_regress/t/t_leak.cpp index 247dce41a..decf48891 100644 --- a/test_regress/t/t_leak.cpp +++ b/test_regress/t/t_leak.cpp @@ -8,9 +8,10 @@ // Version 2.0. // SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 -#include -#include #include + +#include +#include #include VM_PREFIX_INCLUDE unsigned int main_time = 0; diff --git a/test_regress/t/t_math_imm2.cpp b/test_regress/t/t_math_imm2.cpp index 2736601e0..54c1657e5 100644 --- a/test_regress/t/t_math_imm2.cpp +++ b/test_regress/t/t_math_imm2.cpp @@ -4,6 +4,7 @@ // SPDX-License-Identifier: CC0-1.0 #include + #include "Vt_math_imm2.h" double sc_time_stamp() { return 0; } diff --git a/test_regress/t/t_mem_slot.cpp b/test_regress/t/t_mem_slot.cpp index 02f228c9b..5c32d9ec8 100644 --- a/test_regress/t/t_mem_slot.cpp +++ b/test_regress/t/t_mem_slot.cpp @@ -3,10 +3,12 @@ // any use, without warranty, 2020 by Wilson Snyder. // SPDX-License-Identifier: CC0-1.0 -#include #include + #include "Vt_mem_slot.h" +#include + double sc_time_stamp() { return 0; } unsigned int Array[3]; diff --git a/test_regress/t/t_multitop_sig.cpp b/test_regress/t/t_multitop_sig.cpp index aa3f7459f..91115cc41 100644 --- a/test_regress/t/t_multitop_sig.cpp +++ b/test_regress/t/t_multitop_sig.cpp @@ -6,10 +6,13 @@ // any use, without warranty, 2006 by Wilson Snyder. // SPDX-License-Identifier: CC0-1.0 -#include #include + #include "Vt_multitop_sig.h" +#include + +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" double sc_time_stamp() { return 0; } diff --git a/test_regress/t/t_order_dpi_export_1.cpp b/test_regress/t/t_order_dpi_export_1.cpp index 1dcec5c1e..7a261f661 100644 --- a/test_regress/t/t_order_dpi_export_1.cpp +++ b/test_regress/t/t_order_dpi_export_1.cpp @@ -9,10 +9,9 @@ // //************************************************************************* -#include - #include #include +#include int main(int argc, char* argv[]) { Vt_order_dpi_export_1* const tb = new Vt_order_dpi_export_1; diff --git a/test_regress/t/t_order_dpi_export_2.cpp b/test_regress/t/t_order_dpi_export_2.cpp index 3ba47b0d3..fb134ddeb 100644 --- a/test_regress/t/t_order_dpi_export_2.cpp +++ b/test_regress/t/t_order_dpi_export_2.cpp @@ -9,10 +9,9 @@ // //************************************************************************* -#include - #include #include +#include void toggle_other_clk(svBit val) { set_other_clk(val); } diff --git a/test_regress/t/t_order_dpi_export_3.cpp b/test_regress/t/t_order_dpi_export_3.cpp index 2855845c1..c8a5fcb25 100644 --- a/test_regress/t/t_order_dpi_export_3.cpp +++ b/test_regress/t/t_order_dpi_export_3.cpp @@ -9,10 +9,9 @@ // //************************************************************************* -#include - #include #include +#include void toggle_other_clk(svBit val) { set_other_clk(val); } diff --git a/test_regress/t/t_order_dpi_export_4.cpp b/test_regress/t/t_order_dpi_export_4.cpp index e4ecd5c89..95c169f47 100644 --- a/test_regress/t/t_order_dpi_export_4.cpp +++ b/test_regress/t/t_order_dpi_export_4.cpp @@ -9,10 +9,9 @@ // //************************************************************************* -#include - #include #include +#include void toggle_other_clk(svBit val) { set_other_clk(val); } diff --git a/test_regress/t/t_order_dpi_export_5.cpp b/test_regress/t/t_order_dpi_export_5.cpp index c14bf9ef3..494b161a8 100644 --- a/test_regress/t/t_order_dpi_export_5.cpp +++ b/test_regress/t/t_order_dpi_export_5.cpp @@ -9,10 +9,9 @@ // //************************************************************************* -#include - #include #include +#include int main(int argc, char* argv[]) { Vt_order_dpi_export_5* const tb = new Vt_order_dpi_export_5; diff --git a/test_regress/t/t_order_multidriven.cpp b/test_regress/t/t_order_multidriven.cpp index 628fbbc74..d8cd782a6 100644 --- a/test_regress/t/t_order_multidriven.cpp +++ b/test_regress/t/t_order_multidriven.cpp @@ -4,10 +4,11 @@ // without warranty, 2013 by Ted Campbell. // SPDX-License-Identifier: CC0-1.0 -#include "Vt_order_multidriven.h" #include "verilated.h" #include "verilated_vcd_c.h" +#include "Vt_order_multidriven.h" + double sc_time_stamp() { return 0; } Vt_order_multidriven* vcore; diff --git a/test_regress/t/t_order_quad.cpp b/test_regress/t/t_order_quad.cpp index dab33bd3d..95eddc4fa 100644 --- a/test_regress/t/t_order_quad.cpp +++ b/test_regress/t/t_order_quad.cpp @@ -7,6 +7,7 @@ // SPDX-License-Identifier: CC0-1.0 #include + #include "Vt_order_quad.h" //====================================================================== diff --git a/test_regress/t/t_param_public.cpp b/test_regress/t/t_param_public.cpp index c206441e3..66bfd20cf 100644 --- a/test_regress/t/t_param_public.cpp +++ b/test_regress/t/t_param_public.cpp @@ -7,8 +7,8 @@ // SPDX-License-Identifier: CC0-1.0 #include -#include "Vt_param_public.h" +#include "Vt_param_public.h" #include "Vt_param_public_p.h" #include "Vt_param_public_t.h" diff --git a/test_regress/t/t_protect_ids_c.cpp b/test_regress/t/t_protect_ids_c.cpp index 484ba07b8..8c14a7cc8 100644 --- a/test_regress/t/t_protect_ids_c.cpp +++ b/test_regress/t/t_protect_ids_c.cpp @@ -9,9 +9,10 @@ // //************************************************************************* +#include "svdpi.h" + #include #include -#include "svdpi.h" //====================================================================== diff --git a/test_regress/t/t_runflag_uninit_bad.cpp b/test_regress/t/t_runflag_uninit_bad.cpp index e1484272f..93b8eaf62 100644 --- a/test_regress/t/t_runflag_uninit_bad.cpp +++ b/test_regress/t/t_runflag_uninit_bad.cpp @@ -6,10 +6,10 @@ // any use, without warranty, 2010 by Wilson Snyder. // SPDX-License-Identifier: CC0-1.0 -#include - #include #include + +#include #include VM_PREFIX_INCLUDE //====================================================================== diff --git a/test_regress/t/t_savable_open_bad2.cpp b/test_regress/t/t_savable_open_bad2.cpp index f9faddd91..c3a278118 100644 --- a/test_regress/t/t_savable_open_bad2.cpp +++ b/test_regress/t/t_savable_open_bad2.cpp @@ -6,12 +6,13 @@ // any use, without warranty, 2010 by Wilson Snyder. // SPDX-License-Identifier: CC0-1.0 -#include - #include #include + +#include #include VM_PREFIX_INCLUDE +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" //====================================================================== diff --git a/test_regress/t/t_scope_map.cpp b/test_regress/t/t_scope_map.cpp index 14e7d5812..9136c8ee3 100644 --- a/test_regress/t/t_scope_map.cpp +++ b/test_regress/t/t_scope_map.cpp @@ -9,11 +9,12 @@ #include #include #include -#include -#include #include "Vt_scope_map.h" +#include +#include + const unsigned long long dt_2 = 3; int main(int argc, char** argv, char** env) { diff --git a/test_regress/t/t_time_vpi_c.cpp b/test_regress/t/t_time_vpi_c.cpp index 901de71cd..9b520afe4 100644 --- a/test_regress/t/t_time_vpi_c.cpp +++ b/test_regress/t/t_time_vpi_c.cpp @@ -9,9 +9,12 @@ // //************************************************************************* -#include #include "svdpi.h" #include "vpi_user.h" + +#include + +// These require the above. Comment prevents clang-format moving them #include "TestVpi.h" //====================================================================== diff --git a/test_regress/t/t_trace_cat.cpp b/test_regress/t/t_trace_cat.cpp index 8f851daf7..914f87a84 100644 --- a/test_regress/t/t_trace_cat.cpp +++ b/test_regress/t/t_trace_cat.cpp @@ -6,10 +6,11 @@ // any use, without warranty, 2008 by Wilson Snyder. // SPDX-License-Identifier: CC0-1.0 -#include #include #include +#include + #include VM_PREFIX_INCLUDE unsigned long long main_time = 0; diff --git a/test_regress/t/t_trace_cat_fst.cpp b/test_regress/t/t_trace_cat_fst.cpp index 8432da0e3..bb35df44b 100644 --- a/test_regress/t/t_trace_cat_fst.cpp +++ b/test_regress/t/t_trace_cat_fst.cpp @@ -6,10 +6,11 @@ // any use, without warranty, 2008 by Wilson Snyder. // SPDX-License-Identifier: CC0-1.0 -#include #include #include +#include + #include VM_PREFIX_INCLUDE unsigned long long main_time = 0; diff --git a/test_regress/t/t_trace_dumpvars_dyn.cpp b/test_regress/t/t_trace_dumpvars_dyn.cpp index f5d5f986e..02634e50a 100644 --- a/test_regress/t/t_trace_dumpvars_dyn.cpp +++ b/test_regress/t/t_trace_dumpvars_dyn.cpp @@ -6,11 +6,12 @@ // any use, without warranty, 2022 by Wilson Snyder. // SPDX-License-Identifier: CC0-1.0 -#include #include #include #include +#include + #include VM_PREFIX_INCLUDE unsigned long long main_time = 0; diff --git a/test_regress/t/t_trace_public_func.cpp b/test_regress/t/t_trace_public_func.cpp index b17540c2d..bd31c9a6f 100644 --- a/test_regress/t/t_trace_public_func.cpp +++ b/test_regress/t/t_trace_public_func.cpp @@ -6,10 +6,11 @@ // any use, without warranty, 2008 by Wilson Snyder. // SPDX-License-Identifier: CC0-1.0 -#include #include #include +#include + // clang-format off #include VM_PREFIX_INCLUDE #ifdef T_TRACE_PUBLIC_FUNC_VLT diff --git a/test_regress/t/t_var_overwidth_bad.cpp b/test_regress/t/t_var_overwidth_bad.cpp index 2aa7d801a..d82237617 100644 --- a/test_regress/t/t_var_overwidth_bad.cpp +++ b/test_regress/t/t_var_overwidth_bad.cpp @@ -9,9 +9,10 @@ // //************************************************************************* -#include "Vt_var_overwidth_bad.h" #include "verilated.h" +#include "Vt_var_overwidth_bad.h" + //====================================================================== double main_time; diff --git a/test_regress/t/t_vpi_cb_iter.cpp b/test_regress/t/t_vpi_cb_iter.cpp index 8258385e3..544e1bc62 100644 --- a/test_regress/t/t_vpi_cb_iter.cpp +++ b/test_regress/t/t_vpi_cb_iter.cpp @@ -9,22 +9,23 @@ // //************************************************************************* -#include "Vt_vpi_cb_iter.h" #include "verilated.h" #include "verilated_vpi.h" -#include +#include "Vt_vpi_cb_iter.h" +#include "vpi_user.h" + #include +#include #include #include #include +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" #include "TestSimulator.h" #include "TestVpi.h" -#include "vpi_user.h" - int errors = 0; TestVpiHandle vh_value_cb; diff --git a/test_regress/t/t_vpi_cbs_called.cpp b/test_regress/t/t_vpi_cbs_called.cpp index c3e39db12..a2371716a 100644 --- a/test_regress/t/t_vpi_cbs_called.cpp +++ b/test_regress/t/t_vpi_cbs_called.cpp @@ -9,21 +9,22 @@ // //************************************************************************* -#include "Vt_vpi_cbs_called.h" #include "verilated.h" #include "verilated_vpi.h" -#include +#include "Vt_vpi_cbs_called.h" +#include "vpi_user.h" + #include +#include #include #include #include +// These require the above. Comment prevents clang-format moving them #include "TestSimulator.h" #include "TestVpi.h" -#include "vpi_user.h" - const std::vector cbs_to_test{cbReadWriteSynch, cbReadOnlySynch, cbNextSimTime, cbStartOfSimulation, cbEndOfSimulation, cbValueChange}; diff --git a/test_regress/t/t_vpi_finish_c.cpp b/test_regress/t/t_vpi_finish_c.cpp index 714bbbe1a..27563051d 100644 --- a/test_regress/t/t_vpi_finish_c.cpp +++ b/test_regress/t/t_vpi_finish_c.cpp @@ -9,11 +9,12 @@ // //************************************************************************* -#include -#include #include "svdpi.h" #include "vpi_user.h" +#include +#include + //====================================================================== extern "C" { diff --git a/test_regress/t/t_vpi_get.cpp b/test_regress/t/t_vpi_get.cpp index 20d5d0fd7..fdd93ee01 100644 --- a/test_regress/t/t_vpi_get.cpp +++ b/test_regress/t/t_vpi_get.cpp @@ -12,18 +12,18 @@ #ifdef IS_VPI #include "vpi_user.h" + #include #else -#include "Vt_vpi_get.h" #include "verilated.h" -#include "svdpi.h" - -#include "Vt_vpi_get__Dpi.h" - -#include "verilated_vpi.h" #include "verilated_vcd_c.h" +#include "verilated_vpi.h" + +#include "Vt_vpi_get.h" +#include "Vt_vpi_get__Dpi.h" +#include "svdpi.h" #endif @@ -31,6 +31,7 @@ #include #include +// These require the above. Comment prevents clang-format moving them #include "TestSimulator.h" #include "TestVpi.h" diff --git a/test_regress/t/t_vpi_memory.cpp b/test_regress/t/t_vpi_memory.cpp index e7532b9ad..05a34a68d 100644 --- a/test_regress/t/t_vpi_memory.cpp +++ b/test_regress/t/t_vpi_memory.cpp @@ -12,18 +12,18 @@ #ifdef IS_VPI #include "vpi_user.h" + #include #else -#include "Vt_vpi_memory.h" #include "verilated.h" -#include "svdpi.h" - -#include "Vt_vpi_memory__Dpi.h" - -#include "verilated_vpi.h" #include "verilated_vcd_c.h" +#include "verilated_vpi.h" + +#include "Vt_vpi_memory.h" +#include "Vt_vpi_memory__Dpi.h" +#include "svdpi.h" #endif @@ -31,9 +31,10 @@ #include #include +// These require the above. Comment prevents clang-format moving them +#include "TestCheck.h" #include "TestSimulator.h" #include "TestVpi.h" -#include "TestCheck.h" // __FILE__ is too long #define FILENM "t_vpi_memory.cpp" diff --git a/test_regress/t/t_vpi_module.cpp b/test_regress/t/t_vpi_module.cpp index b09fe6ea7..754cc2660 100644 --- a/test_regress/t/t_vpi_module.cpp +++ b/test_regress/t/t_vpi_module.cpp @@ -12,18 +12,18 @@ #ifdef IS_VPI #include "vpi_user.h" + #include #else -#include "Vt_vpi_module.h" #include "verilated.h" -#include "svdpi.h" - -#include "Vt_vpi_module__Dpi.h" - -#include "verilated_vpi.h" #include "verilated_vcd_c.h" +#include "verilated_vpi.h" + +#include "Vt_vpi_module.h" +#include "Vt_vpi_module__Dpi.h" +#include "svdpi.h" #endif @@ -31,6 +31,7 @@ #include #include +// These require the above. Comment prevents clang-format moving them #include "TestSimulator.h" #include "TestVpi.h" diff --git a/test_regress/t/t_vpi_param.cpp b/test_regress/t/t_vpi_param.cpp index 8ac956531..78f1deeed 100644 --- a/test_regress/t/t_vpi_param.cpp +++ b/test_regress/t/t_vpi_param.cpp @@ -12,18 +12,18 @@ #ifdef IS_VPI #include "vpi_user.h" + #include #else -#include "Vt_vpi_param.h" #include "verilated.h" -#include "svdpi.h" - -#include "Vt_vpi_param__Dpi.h" - -#include "verilated_vpi.h" #include "verilated_vcd_c.h" +#include "verilated_vpi.h" + +#include "Vt_vpi_param.h" +#include "Vt_vpi_param__Dpi.h" +#include "svdpi.h" #endif @@ -31,6 +31,7 @@ #include #include +// These require the above. Comment prevents clang-format moving them #include "TestSimulator.h" #include "TestVpi.h" diff --git a/test_regress/t/t_vpi_release_dup_bad_c.cpp b/test_regress/t/t_vpi_release_dup_bad_c.cpp index 1ade9eaac..d78f3f329 100644 --- a/test_regress/t/t_vpi_release_dup_bad_c.cpp +++ b/test_regress/t/t_vpi_release_dup_bad_c.cpp @@ -9,9 +9,10 @@ // //************************************************************************* -#include #include "svdpi.h" #include "vpi_user.h" + +#include //#include "verilated.h" #include "Vt_vpi_release_dup_bad__Dpi.h" diff --git a/test_regress/t/t_vpi_stop_bad_c.cpp b/test_regress/t/t_vpi_stop_bad_c.cpp index 12128781c..f66641e1e 100644 --- a/test_regress/t/t_vpi_stop_bad_c.cpp +++ b/test_regress/t/t_vpi_stop_bad_c.cpp @@ -9,11 +9,12 @@ // //************************************************************************* -#include -#include #include "svdpi.h" #include "vpi_user.h" +#include +#include + //====================================================================== extern "C" { diff --git a/test_regress/t/t_vpi_time_cb.cpp b/test_regress/t/t_vpi_time_cb.cpp index 1e59a384d..0665e6a10 100644 --- a/test_regress/t/t_vpi_time_cb.cpp +++ b/test_regress/t/t_vpi_time_cb.cpp @@ -9,19 +9,19 @@ // //************************************************************************* -#include "Vt_vpi_time_cb.h" #include "verilated.h" +#include "verilated_vcd_c.h" +#include "verilated_vpi.h" + +#include "Vt_vpi_time_cb.h" +#include "Vt_vpi_time_cb__Dpi.h" #include "svdpi.h" -#include "Vt_vpi_time_cb__Dpi.h" - -#include "verilated_vpi.h" -#include "verilated_vcd_c.h" - -#include "TestCheck.h" - #include +// These require the above. Comment prevents clang-format moving them +#include "TestCheck.h" + //====================================================================== int main(int argc, char** argv, char** env) { diff --git a/test_regress/t/t_vpi_time_cb_c.cpp b/test_regress/t/t_vpi_time_cb_c.cpp index c6a82aae3..f25e08445 100644 --- a/test_regress/t/t_vpi_time_cb_c.cpp +++ b/test_regress/t/t_vpi_time_cb_c.cpp @@ -11,11 +11,12 @@ #include "vpi_user.h" -#include #include +#include #include #include +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" #include "TestSimulator.h" #include "TestVpi.h" diff --git a/test_regress/t/t_vpi_unimpl.cpp b/test_regress/t/t_vpi_unimpl.cpp index e8359ba45..549d66e1e 100644 --- a/test_regress/t/t_vpi_unimpl.cpp +++ b/test_regress/t/t_vpi_unimpl.cpp @@ -9,17 +9,17 @@ // //************************************************************************* -#include "Vt_vpi_unimpl.h" #include "verilated.h" -#include "svdpi.h" - -#include "Vt_vpi_unimpl__Dpi.h" - #include "verilated_vcd_c.h" + +#include "Vt_vpi_unimpl.h" +#include "Vt_vpi_unimpl__Dpi.h" +#include "svdpi.h" // No verilated_vpi.h, make sure can link without it #include +// These require the above. Comment prevents clang-format moving them #include "TestVpi.h" // __FILE__ is too long diff --git a/test_regress/t/t_vpi_var.cpp b/test_regress/t/t_vpi_var.cpp index f2c4dfa8f..5666bb3fd 100644 --- a/test_regress/t/t_vpi_var.cpp +++ b/test_regress/t/t_vpi_var.cpp @@ -15,25 +15,26 @@ #else -#include "Vt_vpi_var.h" #include "verilated.h" -#include "svdpi.h" - -#include "Vt_vpi_var__Dpi.h" - -#include "verilated_vpi.h" #include "verilated_vcd_c.h" +#include "verilated_vpi.h" + +#include "Vt_vpi_var.h" +#include "Vt_vpi_var__Dpi.h" +#include "svdpi.h" #endif -#include #include +#include #include #include +// These require the above. Comment prevents clang-format moving them #include "TestSimulator.h" #include "TestVpi.h" +int errors = 0; // __FILE__ is too long #define FILENM "t_vpi_var.cpp" diff --git a/test_regress/t/t_vpi_zero_time_cb.cpp b/test_regress/t/t_vpi_zero_time_cb.cpp index 0427ac226..56eea28ab 100644 --- a/test_regress/t/t_vpi_zero_time_cb.cpp +++ b/test_regress/t/t_vpi_zero_time_cb.cpp @@ -15,23 +15,24 @@ #else -#include "Vt_vpi_zero_time_cb.h" #include "verilated.h" -#include "svdpi.h" -#include - -#include "Vt_vpi_zero_time_cb__Dpi.h" - -#include "verilated_vpi.h" #include "verilated_vcd_c.h" +#include "verilated_vpi.h" + +#include "Vt_vpi_zero_time_cb.h" +#include "Vt_vpi_zero_time_cb__Dpi.h" +#include "svdpi.h" + +#include #endif -#include #include +#include #include #include +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" #include "TestSimulator.h" #include "TestVpi.h" diff --git a/test_regress/t/t_wrapper_context.cpp b/test_regress/t/t_wrapper_context.cpp index 31a9334f0..b802e6787 100644 --- a/test_regress/t/t_wrapper_context.cpp +++ b/test_regress/t/t_wrapper_context.cpp @@ -6,11 +6,13 @@ // SPDX-License-Identifier: CC0-1.0 // +#include +#include + #include #include -#include -#include +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" #include VM_PREFIX_INCLUDE diff --git a/test_regress/t/t_wrapper_legacy.cpp b/test_regress/t/t_wrapper_legacy.cpp index 6b123aed7..860e1a442 100644 --- a/test_regress/t/t_wrapper_legacy.cpp +++ b/test_regress/t/t_wrapper_legacy.cpp @@ -11,11 +11,12 @@ #include VM_PREFIX_INCLUDE -#include #include +#include #include #include +// These require the above. Comment prevents clang-format moving them #include "TestCheck.h" int errors = 0; diff --git a/test_regress/t/t_x_assign.cpp b/test_regress/t/t_x_assign.cpp index 9ab268a4f..85aa51797 100644 --- a/test_regress/t/t_x_assign.cpp +++ b/test_regress/t/t_x_assign.cpp @@ -9,9 +9,9 @@ // //************************************************************************* -#include - #include "verilated.h" + +#include #include VM_PREFIX_INCLUDE double sc_time_stamp() { return 0; } From 122e89ffdef511b06f47aeec3d8191e32408e873 Mon Sep 17 00:00:00 2001 From: Mariusz Glebocki Date: Fri, 5 Aug 2022 20:12:52 +0200 Subject: [PATCH 078/119] Fix V3Number::isMsbXZ(). (#3530) --- src/V3Number.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/V3Number.h b/src/V3Number.h index a574bb191..56dd34512 100644 --- a/src/V3Number.h +++ b/src/V3Number.h @@ -337,7 +337,7 @@ public: bool isAnyX() const; bool isAnyXZ() const; bool isAnyZ() const; - bool isMsbXZ() const { return bitIsXZ(m_width); } + bool isMsbXZ() const { return bitIsXZ(m_width - 1); } uint32_t toUInt() const; int32_t toSInt() const; uint64_t toUQuad() const; From f4fe10844b566bd80785009cb0866798bac32181 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sun, 7 Aug 2022 04:57:59 -0400 Subject: [PATCH 079/119] Tests: Fix t_flag_help.pl (#3532). --- test_regress/t/t_flag_help.pl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/test_regress/t/t_flag_help.pl b/test_regress/t/t_flag_help.pl index cb38bfb87..93524f9d4 100755 --- a/test_regress/t/t_flag_help.pl +++ b/test_regress/t/t_flag_help.pl @@ -8,6 +8,8 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di # Version 2.0. # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +use File::Basename; + scenarios(dist => 1); # See also t_flag_version.pl @@ -16,27 +18,25 @@ sub check { my $interpreter = shift; my $prog = shift; + my $logfile = "$Self->{obj_dir}/t_help__" . basename($prog) . ".log"; + run(fails => 0, cmd => [$interpreter, $prog, "--help"], - logfile => "$Self->{obj_dir}/t_help.log", + logfile => $logfile, tee => 0, verilator_run => 1, ); - file_grep("$Self->{obj_dir}/t_help.log", qr/DISTRIBUTION/i); + file_grep($logfile, qr/(DISTRIBUTION|usage:)/i); } -foreach my $prog ( - "../bin/verilator", - "../bin/verilator_coverage", - "../bin/verilator_difftree", - "../bin/verilator_gantt", - "../bin/verilator_profcfunc", - ) { - check("perl", $prog); -} +check("perl", "../bin/verilator"); +check("perl", "../bin/verilator_coverage"); check("python3", "../bin/verilator_ccache_report"); +check("python3", "../bin/verilator_difftree"); +check("python3", "../bin/verilator_gantt"); +check("python3", "../bin/verilator_profcfunc"); ok(1); 1; From d20f22beb10110d7078144ede9f00e4b822ad0d4 Mon Sep 17 00:00:00 2001 From: Yutetsu TAKATSUKASA Date: Sun, 7 Aug 2022 21:12:57 +0900 Subject: [PATCH 080/119] Fix tristate logic when reading inout port in a module #3399 (#3523) * Tests: Add a test to reproduce #3399 * Fix #3399. When reading an inout port in a module, it should refer the original inout port, not the generated MODTEMP. --- src/V3Tristate.cpp | 10 +++- test_regress/t/t_tri_inout.cpp | 10 ++++ test_regress/t/t_tri_inout.v | 85 +++++++++++++++++++++++++++++++++- 3 files changed, 103 insertions(+), 2 deletions(-) diff --git a/src/V3Tristate.cpp b/src/V3Tristate.cpp index 6f80c2f82..20861faa9 100644 --- a/src/V3Tristate.cpp +++ b/src/V3Tristate.cpp @@ -385,7 +385,15 @@ class TristateVisitor final : public TristateBaseVisitor { return newp; } AstNode* getEnp(AstNode* nodep) { - if (!nodep->user1p()) { + if (nodep->user1p()) { + if (AstVarRef* const refp = VN_CAST(nodep, VarRef)) { + if (refp->varp()->isIO()) { + // When reading a tri-state port, we can always use the value + // because such port will have resolution logic in upper module. + return newAllZerosOrOnes(nodep, true); + } + } + } else { // There's no select being built yet, so add what will become a // constant output enable driver of all 1's nodep->user1p(newAllZerosOrOnes(nodep, true)); diff --git a/test_regress/t/t_tri_inout.cpp b/test_regress/t/t_tri_inout.cpp index b3bd8aa46..874c5f15b 100644 --- a/test_regress/t/t_tri_inout.cpp +++ b/test_regress/t/t_tri_inout.cpp @@ -47,6 +47,16 @@ int main() { } } } + tb->SEL = tb->A = tb->B = 0; + + for (int i = 0; i < 256; ++i) { + tb->clk = 0; + tb->eval(); + tb->clk = 1; + tb->eval(); + if (tb->done) break; + if (i + 1 == 256) pass = false; + } if (pass) { VL_PRINTF("*-* All Finished *-*\n"); diff --git a/test_regress/t/t_tri_inout.v b/test_regress/t/t_tri_inout.v index d6624b04d..d9673beb9 100644 --- a/test_regress/t/t_tri_inout.v +++ b/test_regress/t/t_tri_inout.v @@ -4,10 +4,12 @@ // without warranty, 2008 by Lane Brooks. // SPDX-License-Identifier: CC0-1.0 -module top (input A, input B, input SEL, output Y1, output Y2, output Z); +module top (input A, input B, input SEL, input clk, output Y1, output Y2, output Z, output done); io io1(.A(A), .OE( SEL), .Z(Z), .Y(Y1)); pass io2(.A(B), .OE(!SEL), .Z(Z), .Y(Y2)); assign Z = 1'bz; + + pad_checker u_pad_checker(.clk(clk), .done(done)); endmodule module pass (input A, input OE, inout Z, output Y); @@ -27,3 +29,84 @@ module io_noinline (input A, input OE, inout Z, output Y); assign Y = Z; assign Z = 1'bz; endmodule + + +module pad_checker(input wire clk, output wire done); + wire tri_pad; + reg [1:0] ie = '0; + reg [1:0] oe = '0; + reg [1:0] in = '0; + wire out_0, out_1; + + pad u_pad0(.pad(tri_pad), .ie(ie[0]), .oe(oe[0]), .to_pad(in[0]), .from_pad(out_0)); + pad u_pad1(.pad(tri_pad), .ie(ie[1]), .oe(oe[1]), .to_pad(in[1]), .from_pad(out_1)); + + wire bin_pad_in_0, bin_pad_in_1; + wire bin_pad_01, bin_pad_10; + wire bin_pad_en_01, bin_pad_en_10; + wire bin_from_pad_out_0, bin_from_pad_out_1; + wire bin_from_pad_en_0, bin_from_pad_en_1; + + // Expectation model that simulates how Verilator solves tri-state + pad_binary u_pad_bin_0(.pad_in(bin_pad_in_0), + .pad_out(bin_pad_01), + .pad_en(bin_pad_en_01), + .ie(ie[0]), .oe(oe[0]), + .to_pad(in[0]), + .from_pad_out(bin_from_pad_out_0), + .from_pad_en(bin_from_pad_en_0)); + + pad_binary u_pad_bin_1(.pad_in(bin_pad_in_1), + .pad_out(bin_pad_10), + .pad_en(bin_pad_en_10), + .ie(ie[1]), + .oe(oe[1]), + .to_pad(in[1]), + .from_pad_out(bin_from_pad_out_1), + .from_pad_en(bin_from_pad_en_1)); + + assign bin_pad_in_0 = (bin_pad_en_10 & bin_pad_10) | (bin_pad_en_01 & bin_pad_01); + assign bin_pad_in_1 = (bin_pad_en_01 & bin_pad_01) | (bin_pad_en_10 & bin_pad_10); + + + logic done_reg = 0; + assign done = done_reg; + always @(posedge clk) begin + if ({ie, oe, in} == 6'b111111) begin + done_reg <= 1'b1; + end else begin + if (out_0 != bin_from_pad_out_0) begin + $display("ie:%b oe:%b in:%b out0 act:%b exp:%b", ie[0], oe[0], in[0], out_0, bin_from_pad_out_0); + $stop; + end + if (out_1 != bin_from_pad_out_1) begin + $display("ie:%b oe:%b in:%b out1 act:%b exp:%b", ie[1], oe[1], in[1], out_1, bin_from_pad_out_1); + $stop; + end + // Let's try all combination + {ie, oe, in} <= {ie, oe, in} + 1; + end + end + +endmodule + +module pad(inout wire pad, input wire ie, input wire oe, input wire to_pad, output wire from_pad); + + assign pad = oe ? to_pad : 1'bz; + assign from_pad = ie ? pad : 1'bz; +endmodule + +module pad_binary(input wire pad_in, + output wire pad_out, + output wire pad_en, + input wire ie, + input wire oe, + input wire to_pad, + output from_pad_out, + output wire from_pad_en); + + assign pad_out = oe & to_pad; + assign pad_en = oe; + assign from_pad_out = ie & ((oe & to_pad) | pad_in); + assign from_pad_en = ie; +endmodule From 2b12fe5773960126ced1135f779917f9a7ada5cb Mon Sep 17 00:00:00 2001 From: Mariusz Glebocki Date: Mon, 8 Aug 2022 14:17:02 +0200 Subject: [PATCH 081/119] Internals: Construct V3Number with correct type instead of changing it manually. (#3529) --- src/V3AstNodes.h | 7 ++++--- src/V3Const.cpp | 2 +- src/V3Number.cpp | 12 ++++++++++++ src/V3Number.h | 8 ++++++-- src/V3Param.cpp | 16 +++++++++------- src/V3Simulate.h | 4 +--- 6 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h index 81753a419..9657052fb 100644 --- a/src/V3AstNodes.h +++ b/src/V3AstNodes.h @@ -77,10 +77,11 @@ public: , m_num(this, width, value) { initWithNumber(); } - class DtypedValue {}; // for creator type-overload selection - AstConst(FileLine* fl, DtypedValue, AstNodeDType* nodedtypep, uint32_t value) + class DTyped {}; // for creator type-overload selection + // Zero/empty constant with a type matching nodetypep + AstConst(FileLine* fl, DTyped, const AstNodeDType* nodedtypep) : ASTGEN_SUPER_Const(fl) - , m_num(this, nodedtypep->width(), value, nodedtypep->widthSized()) { + , m_num(this, nodedtypep) { initWithNumber(); } class StringToParse {}; // for creator type-overload selection diff --git a/src/V3Const.cpp b/src/V3Const.cpp index 31e9e6360..fc4555304 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -1089,7 +1089,7 @@ private: if (orLIsRedundant && orRIsRedundant) { nodep->replaceWith( - new AstConst(nodep->fileline(), AstConst::DtypedValue(), nodep->dtypep(), 0)); + new AstConst(nodep->fileline(), AstConst::DTyped{}, nodep->dtypep())); VL_DO_DANGLING(nodep->deleteTree(), nodep); return true; } else if (orLIsRedundant) { diff --git a/src/V3Number.cpp b/src/V3Number.cpp index 9ff6bf5b1..214465f70 100644 --- a/src/V3Number.cpp +++ b/src/V3Number.cpp @@ -110,6 +110,18 @@ V3Number::V3Number(VerilogStringLiteral, AstNode* nodep, const string& str) { opCleanThis(true); } +V3Number::V3Number(AstNode* nodep, const AstNodeDType* nodedtypep) { + if (nodedtypep->isString()) { + init(nodep, 0); + setString(""); + } else if (nodedtypep->isDouble()) { + init(nodep, 64); + setDouble(0.0); + } else { + init(nodep, nodedtypep->width(), nodedtypep->widthSized()); + } +} + void V3Number::V3NumberCreate(AstNode* nodep, const char* sourcep, FileLine* fl) { init(nodep, 0); m_fileline = fl; diff --git a/src/V3Number.h b/src/V3Number.h index 56dd34512..fe2f11c5d 100644 --- a/src/V3Number.h +++ b/src/V3Number.h @@ -39,6 +39,7 @@ inline bool v3EpsilonEqual(double a, double b) { //============================================================================ class AstNode; +class AstNodeDType; class FileLine; // Holds a few entries of ValueAndX to avoid dynamic allocation in std::vector for less width of @@ -252,6 +253,11 @@ public: opCleanThis(); m_fileline = nump->fileline(); } + V3Number(AstNode* nodep, double value) { + init(nodep, 64); + setDouble(value); + } + V3Number(AstNode* nodep, const AstNodeDType* nodedtypep); private: void V3NumberCreate(AstNode* nodep, const char* sourcep, FileLine* fl); @@ -311,9 +317,7 @@ public: // (use AstConst::isSigned()) bool isDouble() const { return m_double; } // Only if have 64 bit value loaded, and want to indicate it's real - void isDouble(bool flag) { m_double = flag; } bool isString() const { return m_isString; } - void isString(bool flag) { m_isString = flag; } bool isNegative() const { return bitIs1(width() - 1); } bool isNull() const { return m_isNull; } bool isFourState() const; diff --git a/src/V3Param.cpp b/src/V3Param.cpp index 8a2b5a415..62aa66541 100644 --- a/src/V3Param.cpp +++ b/src/V3Param.cpp @@ -186,19 +186,21 @@ public: return pinValuep->num().toString() == hierOptParamp->num().toString(); } - // Bitwidth of hierOptParamp is accurate because V3Width already caluclated in the previous - // run. Bitwidth of pinValuep is before width analysis, so pinValuep is casted to - // hierOptParamp width. - V3Number varNum(pinValuep, hierOptParamp->num().width()); if (hierOptParamp->isDouble()) { - varNum.isDouble(true); + double var; if (pinValuep->isDouble()) { - varNum.opAssign(pinValuep->num()); + var = pinValuep->num().toDouble(); } else { // Cast from integer to real + V3Number varNum{pinValuep, 0.0}; varNum.opIToRD(pinValuep->num()); + var = varNum.toDouble(); } - return v3EpsilonEqual(varNum.toDouble(), hierOptParamp->num().toDouble()); + return v3EpsilonEqual(var, hierOptParamp->num().toDouble()); } else { // Now integer type is assumed + // Bitwidth of hierOptParamp is accurate because V3Width already caluclated in the + // previous run. Bitwidth of pinValuep is before width analysis, so pinValuep is casted + // to hierOptParamp width. + V3Number varNum{pinValuep, hierOptParamp->num().width()}; if (pinValuep->isDouble()) { // Need to cast to int // Parameter is actually an integral type, but passed value is floating point. // Conversion from real to integer uses rounding in V3Width.cpp diff --git a/src/V3Simulate.h b/src/V3Simulate.h index e2418e60d..42fadf297 100644 --- a/src/V3Simulate.h +++ b/src/V3Simulate.h @@ -239,13 +239,11 @@ private: } if (allocNewConst) { // Need to allocate new constant - constp = new AstConst{nodep->fileline(), AstConst::DtypedValue{}, nodep->dtypep(), 0}; + constp = new AstConst{nodep->fileline(), AstConst::DTyped{}, nodep->dtypep()}; // Mark as in use, add to free list for later reuse constp->user2(1); freeList.push_back(constp); } - constp->num().isDouble(nodep->isDouble()); - constp->num().isString(nodep->isString()); return constp; } From cbe1b8e2668e1e98970810688052276b5045129d Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Mon, 8 Aug 2022 17:53:45 -0400 Subject: [PATCH 082/119] Fix segfault exporting non-existant package (#3535). --- Changes | 1 + src/V3ParseSym.h | 2 +- src/verilog.y | 6 +++--- test_regress/t/t_package_alone_bad.out | 5 +++++ test_regress/t/t_package_alone_bad.pl | 19 +++++++++++++++++++ test_regress/t/t_package_alone_bad.v | 7 +++++++ 6 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 test_regress/t/t_package_alone_bad.out create mode 100755 test_regress/t/t_package_alone_bad.pl create mode 100644 test_regress/t/t_package_alone_bad.v diff --git a/Changes b/Changes index 6cfd4b984..ba347daf3 100644 --- a/Changes +++ b/Changes @@ -18,6 +18,7 @@ Verilator 4.225 devel * Fix table misoptimizing away display (#3488). [Stefan Post] * Fix wrong bit op tree optimization (#3509). [Nathan Graybeal] * Fix incorrect tristate logic (#3399) [shareefj, Vighnesh Iyer] +* Fix segfault exporting non-existant package (#3535). Verilator 4.224 2022-06-19 diff --git a/src/V3ParseSym.h b/src/V3ParseSym.h index 998dac016..3a60bfbea 100644 --- a/src/V3ParseSym.h +++ b/src/V3ParseSym.h @@ -168,7 +168,7 @@ public: "Export package not found"); symCurrentp()->exportFromPackage(&m_syms, symp, id_or_star); } - void exportStarStar(AstNode* packagep) { + void exportStarStar() { // Export *::* from remote packages symCurrentp()->exportStarStar(&m_syms); } diff --git a/src/verilog.y b/src/verilog.y index 79daad950..75f377213 100644 --- a/src/verilog.y +++ b/src/verilog.y @@ -1171,7 +1171,7 @@ package_import_itemObj: // IEEE: part of package_import_item package_export_declaration: // IEEE: package_export_declaration yEXPORT '*' yP_COLONCOLON '*' ';' - { $$ = new AstPackageExportStarStar{$2}; SYMP->exportStarStar($1); } + { $$ = new AstPackageExportStarStar{$2}; SYMP->exportStarStar(); } | yEXPORT package_export_itemList ';' { $$ = $2; } ; @@ -1182,8 +1182,8 @@ package_export_itemList: package_export_item: // ==IEEE: package_export_item idCC yP_COLONCOLON package_import_itemObj - { $$ = new AstPackageExport($3, VN_CAST($1, Package), *$3); - SYMP->exportItem($1,*$3); } + { $$ = new AstPackageExport{$3, VN_CAST($1, Package), *$3}; + if ($1) SYMP->exportItem($1, *$3); } ; //********************************************************************** diff --git a/test_regress/t/t_package_alone_bad.out b/test_regress/t/t_package_alone_bad.out new file mode 100644 index 000000000..6144b36ec --- /dev/null +++ b/test_regress/t/t_package_alone_bad.out @@ -0,0 +1,5 @@ +%Error-PKGNODECL: t/t_package_alone_bad.v:7:8: Package/class 'pkg' not found, and needs to be predeclared (IEEE 1800-2017 26.3) + 7 | export pkg::something; + | ^~~ + ... For error description see https://verilator.org/warn/PKGNODECL?v=latest +%Error: Exiting due to diff --git a/test_regress/t/t_package_alone_bad.pl b/test_regress/t/t_package_alone_bad.pl new file mode 100755 index 000000000..27159da5b --- /dev/null +++ b/test_regress/t/t_package_alone_bad.pl @@ -0,0 +1,19 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2019 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(linter => 1); + +lint( + fails => 1, + expect_filename => $Self->{golden_filename}, + ); + +ok(1); +1; diff --git a/test_regress/t/t_package_alone_bad.v b/test_regress/t/t_package_alone_bad.v new file mode 100644 index 000000000..34adc1b82 --- /dev/null +++ b/test_regress/t/t_package_alone_bad.v @@ -0,0 +1,7 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty, 2022 by Wilson Snyder. +// SPDX-License-Identifier: CC0-1.0 + +export pkg::something; From 1e2219347e94a712356048452f651f3d9c3b180b Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Thu, 11 Aug 2022 17:41:43 -0400 Subject: [PATCH 083/119] Internals: Cleanup ifdef, move up not under compilver version ifdef --- include/verilatedos.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/include/verilatedos.h b/include/verilatedos.h index d06d5fd99..c89b4c6dc 100644 --- a/include/verilatedos.h +++ b/include/verilatedos.h @@ -218,22 +218,23 @@ // C++-2011 #if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || defined(VL_CPPCHECK) -# ifndef VL_NO_LEGACY -// These are deprecated historical defines. We leave them in case users referenced them. -# define VL_EQ_DELETE = delete -# define vl_unique_ptr std::unique_ptr -# define vl_unordered_map std::unordered_map -# define vl_unordered_set std::unordered_set -# define VL_INCLUDE_UNORDERED_MAP -# define VL_INCLUDE_UNORDERED_SET -# define VL_FINAL final -# define VL_MUTABLE mutable -# define VL_OVERRIDE override -# endif #else # error "Verilator requires a C++11 or newer compiler" #endif +#ifndef VL_NO_LEGACY +// These are deprecated historical defines. We leave them in case users referenced them. +# define VL_EQ_DELETE = delete +# define vl_unique_ptr std::unique_ptr +# define vl_unordered_map std::unordered_map +# define vl_unordered_set std::unordered_set +# define VL_INCLUDE_UNORDERED_MAP +# define VL_INCLUDE_UNORDERED_SET +# define VL_FINAL final +# define VL_MUTABLE mutable +# define VL_OVERRIDE override +#endif + //========================================================================= // C++-2017 From b0c475205bfb8a4c6178e1f309f979a6d19d37d1 Mon Sep 17 00:00:00 2001 From: Drew Ranck Date: Fri, 12 Aug 2022 06:51:25 -0400 Subject: [PATCH 084/119] Fix void-cast queue pop_front or pop_back (#3542) (#3364) Fix compile error for queue method usage, if it is the first statement in a block of code, and the return value is not used. Example: > if (foo) > void'(bar.pop_front()); --- src/V3Ast.h | 15 +++ src/V3AstNodes.h | 6 + src/V3Width.cpp | 4 +- test_regress/t/t_void_queue_ops.pl | 21 ++++ test_regress/t/t_void_queue_ops.v | 190 +++++++++++++++++++++++++++++ 5 files changed, 235 insertions(+), 1 deletion(-) create mode 100755 test_regress/t/t_void_queue_ops.pl create mode 100644 test_regress/t/t_void_queue_ops.v diff --git a/src/V3Ast.h b/src/V3Ast.h index c527c71f9..dbc31cdab 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -1509,6 +1509,16 @@ public: AstNode* firstAbovep() const { // Returns nullptr when second or later in list return ((backp() && backp()->nextp() != this) ? backp() : nullptr); } + // isFirstInMyListOfStatements(n) -- implemented by child classes: + // AstNodeBlock, AstCaseItem, AstNodeIf, AstNodeFTask, and possibly others. + virtual bool isFirstInMyListOfStatements(AstNode* n) const { return false; } + // isStandaloneBodyStmt == Do we need a ; on generated cpp for this node? + bool isStandaloneBodyStmt() { + return (!firstAbovep() // we're 2nd or later in the list, so yes need ; + + // If we're first in the list, check what backp() thinks of us: + || (backp() && backp()->isFirstInMyListOfStatements(this))); + } uint8_t brokenState() const { return m_brokenState; } void brokenState(uint8_t value) { m_brokenState = value; } @@ -2566,6 +2576,7 @@ public: AstNode* stmtsp() const { return op1p(); } // op1 = List of statements void addStmtsp(AstNode* nodep) { addNOp1p(nodep); } bool unnamed() const { return m_unnamed; } + bool isFirstInMyListOfStatements(AstNode* nodep) const override { return nodep == stmtsp(); } }; class AstNodePreSel VL_NOT_FINAL : public AstNode { @@ -2711,6 +2722,9 @@ public: VBranchPred branchPred() const { return m_branchPred; } void isBoundsCheck(bool flag) { m_isBoundsCheck = flag; } bool isBoundsCheck() const { return m_isBoundsCheck; } + bool isFirstInMyListOfStatements(AstNode* n) const override { + return n == ifsp() || n == elsesp(); + } }; class AstNodeCase VL_NOT_FINAL : public AstNodeStmt { @@ -3237,6 +3251,7 @@ public: bool isVirtual() const { return m_virtual; } void lifetime(const VLifetime& flag) { m_lifetime = flag; } VLifetime lifetime() const { return m_lifetime; } + bool isFirstInMyListOfStatements(AstNode* n) const override { return n == stmtsp(); } }; class AstNodeFTaskRef VL_NOT_FINAL : public AstNodeStmt { diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h index 9657052fb..a6ebe193e 100644 --- a/src/V3AstNodes.h +++ b/src/V3AstNodes.h @@ -3586,6 +3586,7 @@ public: void addStmtp(AstNode* nodep) { addOp2p(nodep); } // Special accessors bool isJustOneBodyStmt() const { return bodysp() && !bodysp()->nextp(); } + bool isFirstInMyListOfStatements(AstNode* n) const override { return n == bodysp(); } }; class AstAssign final : public AstNodeAssign { @@ -4009,6 +4010,7 @@ public: void condsp(AstNode* nodep) { setOp1p(nodep); } void addBodysp(AstNode* newp) { addOp2p(newp); } bool isDefault() const { return condsp() == nullptr; } + bool isFirstInMyListOfStatements(AstNode* n) const override { return n == bodysp(); } }; class AstSFormatF final : public AstNode { @@ -4694,6 +4696,7 @@ public: virtual bool isGateOptimizable() const override { return false; } virtual int instrCount() const override { return INSTR_COUNT_BRANCH; } virtual bool same(const AstNode* /*samep*/) const override { return true; } + bool isFirstInMyListOfStatements(AstNode* n) const override { return n == bodysp(); } }; class AstRepeat final : public AstNodeStmt { @@ -4711,6 +4714,7 @@ public: } // Not relevant - converted to FOR virtual int instrCount() const override { return INSTR_COUNT_BRANCH; } virtual bool same(const AstNode* /*samep*/) const override { return true; } + bool isFirstInMyListOfStatements(AstNode* n) const override { return n == bodysp(); } }; class AstWait final : public AstNodeStmt { @@ -4722,6 +4726,7 @@ public: } ASTNODE_NODE_FUNCS(Wait) AstNode* bodysp() const { return op3p(); } // op3 = body of loop + bool isFirstInMyListOfStatements(AstNode* n) const override { return n == bodysp(); } }; class AstWhile final : public AstNodeStmt { @@ -4748,6 +4753,7 @@ public: virtual void addBeforeStmt(AstNode* newp, AstNode* belowp) override; // Stop statement searchback here virtual void addNextStmt(AstNode* newp, AstNode* belowp) override; + bool isFirstInMyListOfStatements(AstNode* n) const override { return n == bodysp(); } }; class AstBreak final : public AstNodeStmt { diff --git a/src/V3Width.cpp b/src/V3Width.cpp index 215e282b6..523d208da 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -3090,7 +3090,9 @@ private: newp = new AstCMethodHard(nodep->fileline(), nodep->fromp()->unlinkFrBack(), nodep->name()); newp->dtypeFrom(adtypep->subDTypep()); - if (!nodep->firstAbovep()) newp->makeStatement(); + // Because queue methods pop_front() or pop_back() can be void cast, + // they use makeStatement to check if they need the c++ ";" added. + if (nodep->isStandaloneBodyStmt()) newp->makeStatement(); } else if (nodep->name() == "push_back" || nodep->name() == "push_front") { methodOkArguments(nodep, 1, 1); methodCallLValueRecurse(nodep, nodep->fromp(), VAccess::WRITE); diff --git a/test_regress/t/t_void_queue_ops.pl b/test_regress/t/t_void_queue_ops.pl new file mode 100755 index 000000000..b46d46042 --- /dev/null +++ b/test_regress/t/t_void_queue_ops.pl @@ -0,0 +1,21 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +compile( + ); + +execute( + check_finished => 1, + ); + +ok(1); +1; diff --git a/test_regress/t/t_void_queue_ops.v b/test_regress/t/t_void_queue_ops.v new file mode 100644 index 000000000..f618cb876 --- /dev/null +++ b/test_regress/t/t_void_queue_ops.v @@ -0,0 +1,190 @@ +module t + (/*AUTOARG*/ + // Inputs + clk + ); + + input clk; + + int cyc = 0; + + + // Test for https://github.com/verilator/verilator/issues/3364 + // Make sure all SV queue API is supported and verilator can generate + // compile-able C++ models for it. + + // simple queue + logic [31:0] my_int_queue [$]; + + // On the functions and tasks, the my_int_queue.pop_[front|back]() call will + // have nodep->firstAbovep() != nullptr. Because the pop_front or pop_back is + // the first node on the "list". + // To fix this, V3Width.cpp will not use firstAbovep(), and instead us + // isStandalongStmt() -- which checks if the pop_front or pop_back is + // 2nd or later, or if it's first in the list that it's in a "block" of code. + // For functions/tasks, that is checked with: + // VN_IS(backp(), NodeFTask)=True, so even though + function automatic void f_pop_back__my_int_queue(); + void'(my_int_queue.pop_back()); + endfunction : f_pop_back__my_int_queue + + function automatic void f_pop_front__my_int_queue(); + void'(my_int_queue.pop_front()); + endfunction : f_pop_front__my_int_queue + + task automatic t_pop_back__my_int_queue(); + void'(my_int_queue.pop_back()); + endtask : t_pop_back__my_int_queue + + task automatic t_pop_front__my_int_queue(); + void'(my_int_queue.pop_front()); + endtask : t_pop_front__my_int_queue + + + task automatic do_random_queue_operation(); + bit [7:0] rand_op; + int rand_index; + logic [31:0] item; + + + rand_op = 8'($urandom_range(32, 0)); + case(rand_op) + 8'd0: ; // nop + + // pushes (2x of these) + 8'd1, 8'd2: my_int_queue.push_back($urandom); + 8'd3, 8'd4: my_int_queue.push_front($urandom); + + // delete: + 8'd5: my_int_queue.delete(); + + // insert(index, item): + 8'd6: begin + rand_index = $urandom_range(my_int_queue.size()); + my_int_queue.insert(rand_index, item); + end + + // shuffle + 8'd7: my_int_queue.shuffle(); + + // Various pops for rand_op >= 8: + // pops to var + // V3Width debug -- firstAbovep()=ASSIGN (which I guess does the ; for us + // so we don't need the queue op to + // do it.) + // isStandalongStmt() will ignore ASSIGN, return false (NodeAssign is + // child of AstNodeStmt) + 8'd8: if (my_int_queue.size() > 0) item = my_int_queue.pop_front(); + 8'd9: if (my_int_queue.size() > 0) item = my_int_queue.pop_back(); + + // pops to the void + // V3Width debug -- firstAbovep()=IF + // This is fixed with isStandalongStmt() -- VN_IS(backp(), NodeIf)=True + 8'd10: if (my_int_queue.size() > 0) void'(my_int_queue.pop_front()); + 8'd11: if (my_int_queue.size() > 0) void'(my_int_queue.pop_back()); + + // pop result to the lhs of a condition, and do something with it. + 8'd12: + if (my_int_queue.size() > 0) + // V3Width debug -- firstAbovep()=LTE (good we don't want a ; here) + if (my_int_queue.pop_front() <= 2022) + my_int_queue.push_front(3022); // living in the year 3022. + + // pop result to the rhs of a condition, and do something with it. + 8'd13: + if (my_int_queue.size() > 0) + // V3Width debug -- firstAbovep()=GT (good we don't want a ; here) + if (4022 > my_int_queue.pop_front()) + my_int_queue.push_front(3023); // living in the year 3023. + + // pops to the void after yet another case: + // V3Width debug -- firstAbovep()=CASEITEM (not a nullptr) + // This is fixed with isStandalongStmt() -- VN_IS(backp(), CaseItem)=True + 8'd14: + case (my_int_queue.size() > 0) + 0: ; + 1: void'(my_int_queue.pop_front()); + default: ; + endcase // case (my_int_queue.size() > 0) + + // V3Width debug -- firstAbovep()=CASEITEM (not a nullptr) + // backp()->nextp()=CASEITEM (different one) + // This is fixed with isStandalongStmt() -- VN_IS(backp(), CaseItem)=True + 8'd15: + case (my_int_queue.size() > 0) + 0: ; + 1: void'(my_int_queue.pop_back()); + default; + endcase // case (my_int_queue.size() > 0) + + // pops in a function or task + 8'd16: if (my_int_queue.size() > 0) f_pop_back__my_int_queue(); + 8'd17: if (my_int_queue.size() > 0) f_pop_front__my_int_queue(); + 8'd18: if (my_int_queue.size() > 0) t_pop_back__my_int_queue(); + 8'd19: if (my_int_queue.size() > 0) t_pop_front__my_int_queue(); + + // But what if we put some dummy code before the pop_back() or pop_front(): + 8'd20: begin + if (my_int_queue.size() > 0) begin + ; // dummy line + // V3Width debug -- firstAbovep()=BEGIN (is not nullptr). + // This is fixed with isStandalongStmt() -- VN_IS(backp(), NodeIf)=True + void'(my_int_queue.pop_back()); + end + end + 8'd21: begin + automatic int temp_int = 0; + if (my_int_queue.size() > 0) begin + temp_int = 5; // dummy line + // V3Width debug -- firstAbovep()=nullptr (good) + void'(my_int_queue.pop_back()); + end + end + 8'd22: begin + if (my_int_queue.size() > 0) begin + automatic int some_temp_dummy_int; + some_temp_dummy_int = 42; + // V3Width debug -- firstAbovep()=nullptr (good) + void'(my_int_queue.pop_back()); + end + end + 8'd23: begin + if (my_int_queue.size() > 0) begin + // no dummy here, just a 'begin' helper before it. + // V3Width debug -- firstAbovep()=BEGIN (is not nullptr). + // This is fixed with isStandalongStmt() -- VN_IS(backp(), NodeIf)=True + void'(my_int_queue.pop_back()); + end + end + + // What about an if of something else, followed by a pop_front? + 8'd24: begin + automatic int temp_int = 0; + if (my_int_queue.size() == 0) begin // dummy + temp_int = 1000; + end + void'(my_int_queue.pop_front()); // firstAbovep() should be nullptr here. + end + + + default: ; // nop + endcase // case (rand_op) + + endtask : do_random_queue_operation + + + + always @ (posedge clk) begin : main + cyc <= cyc + 1; + + do_random_queue_operation(); + + if (cyc > 100) begin + $write("*-* All Finished *-*\n"); + $finish(); + end + end + + + +endmodule : t From df5f95a5bd23332d57d8470cdb2a16e4dd50d760 Mon Sep 17 00:00:00 2001 From: Mostafa Gamal Date: Fri, 12 Aug 2022 12:55:07 +0200 Subject: [PATCH 085/119] Fix nested default assignment for struct pattern (#3511) (#3524) --- src/V3Width.cpp | 110 ++++++++++++------ src/verilog.y | 2 - test_regress/t/t_array_list_bad.out | 2 +- .../t/t_structu_dataType_assignment.v | 50 ++++++++ 4 files changed, 125 insertions(+), 39 deletions(-) diff --git a/src/V3Width.cpp b/src/V3Width.cpp index 523d208da..f1067a896 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -209,6 +209,7 @@ private: // TYPES using TableMap = std::map, AstVar*>; using PatVecMap = std::map; + using DTypeMap = std::map; // STATE WidthVP* m_vup = nullptr; // Current node state @@ -3564,11 +3565,9 @@ private: // which member each AstPatMember corresponds to before we can // determine the dtypep for that PatMember's value, and then // width the initial value appropriately. - using PatMap = std::map; // Store member: value - using DTypeMap - = std::map; // Store data_type: default_value - PatMap patmap; - DTypeMap dtypemap; + using PatMap = std::map; + PatMap patmap; // Store member: value + DTypeMap dtypemap; // Store data_type: default_value { const AstMemberDType* memp = vdtypep->membersp(); AstPatMember* patp = VN_CAST(nodep->itemsp(), PatMember); @@ -3631,44 +3630,23 @@ private: for (AstMemberDType* memp = vdtypep->membersp(); memp; memp = VN_AS(memp->nextp(), MemberDType)) { const auto it = patmap.find(memp); - AstPatMember* newpatp = nullptr; AstPatMember* patp = nullptr; if (it == patmap.end()) { - const string memp_DType = memp->virtRefDTypep()->prettyDTypeName(); - const auto it2 = dtypemap.find(memp_DType); - if (it2 != dtypemap.end()) { - // default_value for data_type - patp = it2->second; - newpatp = patp->cloneTree(false); - patp = newpatp; - } else if (defaultp) { - // default_value for any unassigned member yet - newpatp = defaultp->cloneTree(false); - patp = newpatp; + // default or deafult_type assignment + if (AstNodeUOrStructDType* const memp_nested_vdtypep + = VN_CAST(memp->virtRefDTypep(), NodeUOrStructDType)) { + newp = nestedvalueConcat_patternUOrStruct(memp_nested_vdtypep, defaultp, newp, + nodep, dtypemap); } else { - if (!VN_IS(vdtypep, UnionDType)) { - nodep->v3error("Assignment pattern missed initializing elements: " - << memp->virtRefDTypep()->prettyDTypeName() << " " - << memp->prettyName()); - } + patp = Defaultpatp_patternUOrStruct(nodep, memp, patp, vdtypep, defaultp, + dtypemap); + newp = valueConcat_patternUOrStruct(patp, newp, memp, nodep); } } else { + // member assignment patp = it->second; + newp = valueConcat_patternUOrStruct(patp, newp, memp, nodep); } - if (patp) { - // Determine initial values - patp->dtypep(memp); - AstNode* const valuep = patternMemberValueIterate(patp); - if (!newp) { - newp = valuep; - } else { - AstConcat* const concatp = new AstConcat(patp->fileline(), newp, valuep); - newp = concatp; - newp->dtypeSetLogicSized(concatp->lhsp()->width() + concatp->rhsp()->width(), - nodep->dtypep()->numeric()); - } - } - if (newpatp) VL_DO_DANGLING(pushDeletep(newpatp), newpatp); } if (newp) { nodep->replaceWith(newp); @@ -3677,6 +3655,66 @@ private: } VL_DO_DANGLING(pushDeletep(nodep), nodep); // Deletes defaultp also, if present } + + AstNode* nestedvalueConcat_patternUOrStruct(AstNodeUOrStructDType* memp_vdtypep, + AstPatMember* defaultp, AstNode* newp, + AstPattern* nodep, DTypeMap dtypemap) { + AstPatMember* patp = nullptr; + for (AstMemberDType* memp_nested = memp_vdtypep->membersp(); memp_nested; + memp_nested = VN_AS(memp_nested->nextp(), MemberDType)) { + if (AstNodeUOrStructDType* const memp_multinested_vdtypep + = VN_CAST(memp_nested->virtRefDTypep(), NodeUOrStructDType)) { + // When unpacked struct/union is supported this if will need some additional conditions + newp = nestedvalueConcat_patternUOrStruct(memp_multinested_vdtypep, defaultp, newp, + nodep, dtypemap); + } else { + patp = Defaultpatp_patternUOrStruct(nodep, memp_nested, patp, memp_vdtypep, + defaultp, dtypemap); + newp = valueConcat_patternUOrStruct(patp, newp, memp_nested, nodep); + } + } + return newp; + } + + AstPatMember* Defaultpatp_patternUOrStruct(AstPattern* nodep, AstMemberDType* memp, + AstPatMember* patp, + AstNodeUOrStructDType* memp_vdtypep, + AstPatMember* defaultp, DTypeMap dtypemap) { + const string memp_DType = memp->virtRefDTypep()->prettyDTypeName(); + const auto it = dtypemap.find(memp_DType); + if (it != dtypemap.end()) { + // default_value for data_type + patp = it->second->cloneTree(false); + } else if (defaultp) { + // default_value for any unmatched member yet + patp = defaultp->cloneTree(false); + } else { + if (!VN_IS(memp_vdtypep, UnionDType)) { + nodep->v3error("Assignment pattern missed initializing elements: " + << memp->virtRefDTypep()->prettyDTypeNameQ() << " " + << memp->prettyNameQ()); + } + } + return patp; + } + + AstNode* valueConcat_patternUOrStruct(AstPatMember* patp, AstNode* newp, AstMemberDType* memp, + AstPattern* nodep) { + if (patp) { + patp->dtypep(memp); + AstNode* const valuep = patternMemberValueIterate(patp); + if (!newp) { + newp = valuep; + } else { + AstConcat* const concatp = new AstConcat{patp->fileline(), newp, valuep}; + newp = concatp; + newp->dtypeSetLogicSized(concatp->lhsp()->width() + concatp->rhsp()->width(), + nodep->dtypep()->numeric()); + } + } + return newp; + } + void patternArray(AstPattern* nodep, AstNodeArrayDType* arrayDtp, AstPatMember* defaultp) { const VNumRange range = arrayDtp->declRange(); PatVecMap patmap = patVectorMap(nodep, range); diff --git a/src/verilog.y b/src/verilog.y index 75f377213..b53fe528e 100644 --- a/src/verilog.y +++ b/src/verilog.y @@ -3532,8 +3532,6 @@ patternKey: // IEEE: merge structure_pattern_key, array_patt // // id/*member*/ is part of constExpr below //UNSUP constExpr { $$ = $1; } // // IEEE: assignment_pattern_key - //UNSUP simple_type { $1->v3error("Unsupported: '{} with data type as key"); $$ = $1; } - // // simple_type reference looks like constExpr // // Verilator: // // The above expressions cause problems because "foo" may be // // a constant identifier (if array) or a reference to the diff --git a/test_regress/t/t_array_list_bad.out b/test_regress/t/t_array_list_bad.out index 0eeb8a06f..3ce83ac48 100644 --- a/test_regress/t/t_array_list_bad.out +++ b/test_regress/t/t_array_list_bad.out @@ -1,4 +1,4 @@ -%Error: t/t_array_list_bad.v:38:25: Assignment pattern missed initializing elements: logic t3 +%Error: t/t_array_list_bad.v:38:25: Assignment pattern missed initializing elements: 'logic' 't3' : ... In instance t 38 | test_out <= '{'0, '0}; | ^~ diff --git a/test_regress/t/t_structu_dataType_assignment.v b/test_regress/t/t_structu_dataType_assignment.v index 2962a6e78..209dc34b6 100644 --- a/test_regress/t/t_structu_dataType_assignment.v +++ b/test_regress/t/t_structu_dataType_assignment.v @@ -34,6 +34,20 @@ module top(); } DEF_struct; + typedef struct { // IEEE 1800-2017 SV CH:10.9.2 + int A; + struct { + int B, C; + struct{ + int D, E; + struct{ + int F; + shortint G; + } FG1; + } DE1; + } BC1; + } HIJ_struct; + // struct ab ab_struct ab; ab_struct abkey[1:0]; @@ -48,6 +62,9 @@ module top(); // struct DEF DEF_struct DEF; + // struct HIJ + HIJ_struct HIJ; + initial begin; // struct ab ab = '{0, 0}; //constant member by position @@ -130,6 +147,39 @@ module top(); if (DEF.BC2.B != 5) $stop; if (DEF.BC2.C != 5) $stop; + DEF = '{default:10}; + if (DEF.A != 10) $stop; + if (DEF.BC1.B != 10) $stop; + if (DEF.BC1.C != 10) $stop; + if (DEF.BC2.B != 10) $stop; + if (DEF.BC2.C != 10) $stop; + + DEF = '{int:10}; + if (DEF.A != 10) $stop; + if (DEF.BC1.B != 10) $stop; + if (DEF.BC1.C != 10) $stop; + if (DEF.BC2.B != 10) $stop; + if (DEF.BC2.C != 10) $stop; + + // struct HIJ + HIJ = '{int:10, default: 5}; + if (HIJ.A != 10) $stop; + if (HIJ.BC1.B != 10) $stop; + if (HIJ.BC1.C != 10) $stop; + if (HIJ.BC1.DE1.D != 10) $stop; + if (HIJ.BC1.DE1.E != 10) $stop; + if (HIJ.BC1.DE1.FG1.F != 10) $stop; + if (HIJ.BC1.DE1.FG1.G != 5) $stop; + + HIJ = '{shortint:10, default: 5}; + if (HIJ.A != 5) $stop; + if (HIJ.BC1.B != 5) $stop; + if (HIJ.BC1.C != 5) $stop; + if (HIJ.BC1.DE1.D != 5) $stop; + if (HIJ.BC1.DE1.E != 5) $stop; + if (HIJ.BC1.DE1.FG1.F != 5) $stop; + if (HIJ.BC1.DE1.FG1.G != 10) $stop; + $write("*-* All Finished *-*\n"); $finish; end From d32e3f042f71bee4955c415e7e10b823fc58b839 Mon Sep 17 00:00:00 2001 From: github action Date: Fri, 12 Aug 2022 10:56:12 +0000 Subject: [PATCH 086/119] Apply 'make format' --- src/V3Width.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/V3Width.cpp b/src/V3Width.cpp index f1067a896..91de05a95 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -3664,7 +3664,8 @@ private: memp_nested = VN_AS(memp_nested->nextp(), MemberDType)) { if (AstNodeUOrStructDType* const memp_multinested_vdtypep = VN_CAST(memp_nested->virtRefDTypep(), NodeUOrStructDType)) { - // When unpacked struct/union is supported this if will need some additional conditions + // When unpacked struct/union is supported this if will need some additional + // conditions newp = nestedvalueConcat_patternUOrStruct(memp_multinested_vdtypep, defaultp, newp, nodep, dtypemap); } else { From f435d9624180c6b0d511f427ff41a876e3c7aaea Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Mon, 15 Aug 2022 21:56:09 -0400 Subject: [PATCH 087/119] Fix case statement comparing string literal (#3544). --- Changes | 1 + src/V3Width.cpp | 4 +++- test_regress/t/t_case_string2.pl | 21 +++++++++++++++++++++ test_regress/t/t_case_string2.v | 21 +++++++++++++++++++++ 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100755 test_regress/t/t_case_string2.pl create mode 100644 test_regress/t/t_case_string2.v diff --git a/Changes b/Changes index ba347daf3..65ee0fd39 100644 --- a/Changes +++ b/Changes @@ -19,6 +19,7 @@ Verilator 4.225 devel * Fix wrong bit op tree optimization (#3509). [Nathan Graybeal] * Fix incorrect tristate logic (#3399) [shareefj, Vighnesh Iyer] * Fix segfault exporting non-existant package (#3535). +* Fix case statement comparing string literal (#3544). [Gustav Svensk] Verilator 4.224 2022-06-19 diff --git a/src/V3Width.cpp b/src/V3Width.cpp index 91de05a95..824d66beb 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -3977,8 +3977,10 @@ private: itemp = VN_AS(itemp->nextp(), CaseItem)) { for (AstNode* condp = itemp->condsp(); condp; condp = condp->nextp()) { if (condp->dtypep() != subDTypep) { - if (condp->dtypep()->isDouble()) { + if (condp->dtypep()->isDouble() || subDTypep->isDouble()) { subDTypep = nodep->findDoubleDType(); + } else if (condp->dtypep()->isString() || subDTypep->isString()) { + subDTypep = nodep->findStringDType(); } else { const int width = std::max(subDTypep->width(), condp->width()); const int mwidth = std::max(subDTypep->widthMin(), condp->widthMin()); diff --git a/test_regress/t/t_case_string2.pl b/test_regress/t/t_case_string2.pl new file mode 100755 index 000000000..1aa73f80a --- /dev/null +++ b/test_regress/t/t_case_string2.pl @@ -0,0 +1,21 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2022 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +compile( + ); + +execute( + check_finished => 1, + ); + +ok(1); +1; diff --git a/test_regress/t/t_case_string2.v b/test_regress/t/t_case_string2.v new file mode 100644 index 000000000..f2722639b --- /dev/null +++ b/test_regress/t/t_case_string2.v @@ -0,0 +1,21 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Wilson Snyder. +// SPDX-License-Identifier: CC0-1.0 + +module t; + +function automatic string broken_case(input string some_string); + case(some_string) + "alpha": return "alpha"; + default: return "beta"; + endcase +endfunction + + initial begin + $display(broken_case("gamma")); + $write("*-* All Finished *-*\n"); + $finish; + end +endmodule From 18b9e661c942a33971396cc55990c8009585d25b Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Mon, 15 Aug 2022 22:17:09 -0400 Subject: [PATCH 088/119] Tests: Confirm fixed (#446) --- test_regress/t/t_array_packed_write_read.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/test_regress/t/t_array_packed_write_read.pl b/test_regress/t/t_array_packed_write_read.pl index d0de24f1c..b46d46042 100755 --- a/test_regress/t/t_array_packed_write_read.pl +++ b/test_regress/t/t_array_packed_write_read.pl @@ -9,7 +9,6 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 scenarios(simulator => 1); -$Self->{vlt_all} and unsupported("Verilator unsupported, bug446"); compile( ); From 43abaeb0552ec86c73b99a66e8080d0f6e002909 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Mon, 15 Aug 2022 22:17:17 -0400 Subject: [PATCH 089/119] Tests: Confirm fixed (#485) --- test_regress/t/t_lint_block_redecl_bad.out | 7 +++++++ test_regress/t/t_lint_block_redecl_bad.pl | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 test_regress/t/t_lint_block_redecl_bad.out diff --git a/test_regress/t/t_lint_block_redecl_bad.out b/test_regress/t/t_lint_block_redecl_bad.out new file mode 100644 index 000000000..a3729f293 --- /dev/null +++ b/test_regress/t/t_lint_block_redecl_bad.out @@ -0,0 +1,7 @@ +%Error: t/t_lint_block_redecl_bad.v:21:34: Duplicate declaration of block: 'COMB' + 21 | for(i=0; i<9; i++ ) begin: COMB + | ^~~~ + t/t_lint_block_redecl_bad.v:18:35: ... Location of original declaration + 18 | for(i=0; i<10; i++ ) begin: COMB + | ^~~~ +%Error: Exiting due to diff --git a/test_regress/t/t_lint_block_redecl_bad.pl b/test_regress/t/t_lint_block_redecl_bad.pl index 30201602c..dbc9830f6 100755 --- a/test_regress/t/t_lint_block_redecl_bad.pl +++ b/test_regress/t/t_lint_block_redecl_bad.pl @@ -9,7 +9,6 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 scenarios(vlt_all => 1); -$Self->{vlt_all} and unsupported("Verilator unsupported, bug485, false begin due to WHILE conversion blocks duplicate name detection"); lint( fails => 1, From 93272c13fd47e8f60bd1a957e03565748d9f0b2b Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Mon, 15 Aug 2022 22:17:36 -0400 Subject: [PATCH 090/119] Tests: Confirm fixed (#181) --- test_regress/t/t_sv_bus_mux_demux.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/test_regress/t/t_sv_bus_mux_demux.pl b/test_regress/t/t_sv_bus_mux_demux.pl index c31dd4c0c..b46d46042 100755 --- a/test_regress/t/t_sv_bus_mux_demux.pl +++ b/test_regress/t/t_sv_bus_mux_demux.pl @@ -9,7 +9,6 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 scenarios(simulator => 1); -$Self->{vlt_all} and unsupported("Verilator unsupported, bug181"); compile( ); From 0eeb40b975170a4daaeddb19cae7b8576e0c1501 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kozdra Date: Thu, 18 Aug 2022 00:08:43 +0200 Subject: [PATCH 091/119] Fix converting subclasses to string (#3552) --- src/V3Common.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/V3Common.cpp b/src/V3Common.cpp index 0d5ca45f3..ad4ecaf6e 100644 --- a/src/V3Common.cpp +++ b/src/V3Common.cpp @@ -87,10 +87,10 @@ static void makeToStringMiddle(AstClass* nodep) { } } if (nodep->extendsp() && nodep->extendsp()->classp()->user1()) { - string stmt = "out += \""; + string stmt = "out += "; if (!comma.empty()) stmt += "\", \"+ "; // comma = ", "; // Nothing further so not needed - stmt += nodep->extendsp()->dtypep()->nameProtect(); + stmt += EmitCBaseVisitor::prefixNameProtect(nodep->extendsp()->dtypep()); stmt += "::to_string_middle();\n"; nodep->user1(true); // So what we extend dumps this funcp->addStmtsp(new AstCStmt{nodep->fileline(), stmt}); @@ -104,13 +104,13 @@ static void makeToStringMiddle(AstClass* nodep) { void V3Common::commonAll() { UINFO(2, __FUNCTION__ << ": " << endl); + // NODE STATE + // Entire netlist: + // AstClass::user1() -> bool. True if class needs to_string dumper + const VNUser1InUse m_inuser1; // Create common contents for each module for (AstNode* nodep = v3Global.rootp()->modulesp(); nodep; nodep = nodep->nextp()) { if (AstClass* const classp = VN_CAST(nodep, Class)) { - // NODE STATE - // Entire netlist: - // AstClass::user1() -> bool. True if class needs to_string dumper - const VNUser1InUse m_inuser1; // Create ToString methods makeVlToString(classp); makeToString(classp); From 951cd73fe094b3fabaaf2b6ab63427fdb202f353 Mon Sep 17 00:00:00 2001 From: Krzysztof Bieganski Date: Thu, 18 Aug 2022 12:33:45 +0200 Subject: [PATCH 092/119] Handle MemberSel in V3EmitV.cpp (#3555) --- src/V3EmitV.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/V3EmitV.cpp b/src/V3EmitV.cpp index 940277a8d..e59ecfa38 100644 --- a/src/V3EmitV.cpp +++ b/src/V3EmitV.cpp @@ -494,6 +494,11 @@ class EmitVBaseVisitor VL_NOT_FINAL : public EmitCBaseVisitor { emitVerilogFormat(nodep, nodep->emitVerilog(), nodep->lhsp(), nodep->rhsp(), nodep->thsp()); } + virtual void visit(AstMemberSel* nodep) override { + iterate(nodep->fromp()); + puts("."); + puts(nodep->prettyName()); + } virtual void visit(AstAttrOf* nodep) override { putfs(nodep, "$_ATTROF("); iterateAndNextConstNull(nodep->fromp()); From db5fdfb0ee871e4cf8a25b8890207b507d5fa7c9 Mon Sep 17 00:00:00 2001 From: Ryszard Rozak Date: Thu, 18 Aug 2022 13:03:05 +0200 Subject: [PATCH 093/119] Fix === with some tristate constants (#3551). --- docs/CONTRIBUTORS | 1 + src/V3Tristate.cpp | 17 ++++++++++++++++- test_regress/t/t_tri_cond_eqcase_with_1.pl | 21 +++++++++++++++++++++ test_regress/t/t_tri_cond_eqcase_with_1.v | 21 +++++++++++++++++++++ test_regress/t/t_tri_eqcase_input.pl | 21 +++++++++++++++++++++ test_regress/t/t_tri_eqcase_input.v | 20 ++++++++++++++++++++ 6 files changed, 100 insertions(+), 1 deletion(-) create mode 100755 test_regress/t/t_tri_cond_eqcase_with_1.pl create mode 100644 test_regress/t/t_tri_cond_eqcase_with_1.v create mode 100755 test_regress/t/t_tri_eqcase_input.pl create mode 100644 test_regress/t/t_tri_eqcase_input.v diff --git a/docs/CONTRIBUTORS b/docs/CONTRIBUTORS index 0b003034b..228787d5e 100644 --- a/docs/CONTRIBUTORS +++ b/docs/CONTRIBUTORS @@ -99,6 +99,7 @@ Rafal Kapuscik Raynard Qiao Richard Myers Rupert Swarbrick +Ryszard Rozak Samuel Riedel Sean Cross Sebastien Van Cauwenberghe diff --git a/src/V3Tristate.cpp b/src/V3Tristate.cpp index 20861faa9..21abe682c 100644 --- a/src/V3Tristate.cpp +++ b/src/V3Tristate.cpp @@ -937,7 +937,7 @@ class TristateVisitor final : public TristateBaseVisitor { iterateChildren(nodep); UINFO(9, dbgState() << nodep << endl); // Constification always moves const to LHS - const AstConst* const constp = VN_CAST(nodep->lhsp(), Const); + AstConst* const constp = VN_CAST(nodep->lhsp(), Const); AstVarRef* const varrefp = VN_CAST(nodep->rhsp(), VarRef); // Input variable if (constp && constp->user1p() && varrefp) { // 3'b1z0 -> ((3'b101 == in__en) && (3'b100 == in)) @@ -960,6 +960,21 @@ class TristateVisitor final : public TristateBaseVisitor { if (debug() >= 9) newp->dumpTree(cout, "-caseeq-new: "); nodep->replaceWith(newp); VL_DO_DANGLING(pushDeletep(nodep), nodep); + } else if (constp && nodep->rhsp()->user1p()) { + FileLine* const fl = nodep->fileline(); + constp->unlinkFrBack(); + AstNode* const rhsp = nodep->rhsp()->unlinkFrBack(); + AstNode* newp = new AstLogAnd{ + fl, new AstEq{fl, newAllZerosOrOnes(constp, false), rhsp->user1p()}, + // Keep the caseeq if there are X's present + new AstEqCase{fl, constp, rhsp}}; + if (neq) newp = new AstLogNot{fl, newp}; + rhsp->user1p(nullptr); + UINFO(9, " newceq " << newp << endl); + if (debug() >= 9) nodep->dumpTree(cout, "-caseeq-old: "); + if (debug() >= 9) newp->dumpTree(cout, "-caseeq-new: "); + nodep->replaceWith(newp); + VL_DO_DANGLING(pushDeletep(nodep), nodep); } else { checkUnhandled(nodep); } diff --git a/test_regress/t/t_tri_cond_eqcase_with_1.pl b/test_regress/t/t_tri_cond_eqcase_with_1.pl new file mode 100755 index 000000000..f5e338520 --- /dev/null +++ b/test_regress/t/t_tri_cond_eqcase_with_1.pl @@ -0,0 +1,21 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2022 by Antmicro Ltd. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +compile( + ); + +execute( + check_finished => 1, + ); + +ok(1); +1; diff --git a/test_regress/t/t_tri_cond_eqcase_with_1.v b/test_regress/t/t_tri_cond_eqcase_with_1.v new file mode 100644 index 000000000..552280b98 --- /dev/null +++ b/test_regress/t/t_tri_cond_eqcase_with_1.v @@ -0,0 +1,21 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Antmicro Ltd. +// SPDX-License-Identifier: CC0-1.0 + +module t (/*AUTOARG*/ + // Inputs + clk + ); + input clk; + wire a; + assign a = 1 === (clk ? 1 : 1'bz); + + always begin + if (!a) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end +endmodule diff --git a/test_regress/t/t_tri_eqcase_input.pl b/test_regress/t/t_tri_eqcase_input.pl new file mode 100755 index 000000000..f5e338520 --- /dev/null +++ b/test_regress/t/t_tri_eqcase_input.pl @@ -0,0 +1,21 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2022 by Antmicro Ltd. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +compile( + ); + +execute( + check_finished => 1, + ); + +ok(1); +1; diff --git a/test_regress/t/t_tri_eqcase_input.v b/test_regress/t/t_tri_eqcase_input.v new file mode 100644 index 000000000..518ae40e1 --- /dev/null +++ b/test_regress/t/t_tri_eqcase_input.v @@ -0,0 +1,20 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Antmicro Ltd. +// SPDX-License-Identifier: CC0-1.0 + +module t (/*AUTOARG*/ + // Inputs + clk + ); + input clk; + wire a = 1'bz === clk; + + always begin + if (a) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end +endmodule From 33e2acfe6100929b1250727b18686c807ccefcdb Mon Sep 17 00:00:00 2001 From: Krzysztof Bieganski Date: Fri, 19 Aug 2022 13:33:17 +0200 Subject: [PATCH 094/119] Fix `AstNode::forall` return type (#3559) Signed-off-by: Krzysztof Bieganski --- src/V3Ast.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/V3Ast.h b/src/V3Ast.h index dbc31cdab..89ae5a35b 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -2043,7 +2043,7 @@ public: // Same as above, but for 'const' nodes template - void forall(std::function p) const { + bool forall(std::function p) const { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); return predicateImpl(this, p); } From 90d22cbec6cf16c164396af44c5961ef95c4b94f Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Fri, 19 Aug 2022 13:22:06 +0100 Subject: [PATCH 095/119] Fix `AstNode::exists` return type --- src/V3Ast.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/V3Ast.h b/src/V3Ast.h index 89ae5a35b..c3243b30d 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -2026,7 +2026,7 @@ public: // Same as above, but for 'const' nodes template - void exists(std::function p) const { + bool exists(std::function p) const { static_assert(checkTypeParameter(), "Invalid type parameter 'T_Node'"); return predicateImpl(this, p); } From b4367947739f033d85d621710723498d6b33b57f Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 3 Aug 2022 13:34:38 +0100 Subject: [PATCH 096/119] Add specialized GraphStreamUnordered GraphStreamUnordered used to be GraphStream>, but a lot of performance improvements can be had by a specialized implementation, so added a highly optimized one. This helps a lot with --debug-partition. --- src/V3Graph.h | 10 +++--- src/V3GraphStream.h | 82 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 82 insertions(+), 10 deletions(-) diff --git a/src/V3Graph.h b/src/V3Graph.h index ae59fe4a4..08d5c0938 100644 --- a/src/V3Graph.h +++ b/src/V3Graph.h @@ -57,9 +57,9 @@ public: inline GraphWay() : m_e{FORWARD} {} // cppcheck-suppress noExplicitConstructor - inline GraphWay(en _e) + inline constexpr GraphWay(en _e) : m_e{_e} {} - explicit inline GraphWay(int _e) + explicit inline constexpr GraphWay(int _e) : m_e(static_cast(_e)) {} // Need () or GCC 4.8 false warning operator en() const { return m_e; } const char* ascii() const { @@ -67,9 +67,9 @@ public: return names[m_e]; } // METHODS unique to this class - GraphWay invert() const { return m_e == FORWARD ? REVERSE : FORWARD; } - bool forward() const { return m_e == FORWARD; } - bool reverse() const { return m_e != FORWARD; } + constexpr GraphWay invert() const { return m_e == FORWARD ? REVERSE : FORWARD; } + constexpr bool forward() const { return m_e == FORWARD; } + constexpr bool reverse() const { return m_e != FORWARD; } }; inline bool operator==(const GraphWay& lhs, const GraphWay& rhs) { return lhs.m_e == rhs.m_e; } inline bool operator==(const GraphWay& lhs, GraphWay::en rhs) { return lhs.m_e == rhs; } diff --git a/src/V3GraphStream.h b/src/V3GraphStream.h index 37d68ca31..5c53f0a0f 100644 --- a/src/V3GraphStream.h +++ b/src/V3GraphStream.h @@ -27,6 +27,7 @@ #include #include #include +#include //###################################################################### // GraphStream @@ -225,11 +226,82 @@ private: VL_UNCOPYABLE(GraphStream); }; -//###################################################################### +//================================================================================================= +// GraphStreamUnordered is similar to GraphStream, but iterates un-ordered vertices (those that are +// not ordered by dependencies) in an arbitrary order. Iteration order is still deterministic. -// GraphStreamUnordered is GraphStream using a plain pointer compare to -// break ties in the graph order. This WILL return nodes in -// nondeterministic order. -using GraphStreamUnordered = GraphStream>; +class GraphStreamUnordered final { + // MEMBERS + const GraphWay m_way; // Direction of traversal + size_t m_nextIndex = 0; // Which index to return from m_nextVertices next + std::vector m_nextVertices; // List of ready vertices returned next + std::vector m_readyVertices; // List of other ready vertices + +public: + // CONSTRUCTORS + VL_UNCOPYABLE(GraphStreamUnordered); + explicit GraphStreamUnordered(const V3Graph* graphp, GraphWay way = GraphWay::FORWARD) + : m_way{way} { + if (m_way == GraphWay::FORWARD) { + init(graphp); + } else { + init(graphp); + } + } + ~GraphStreamUnordered() = default; + + // METHODS + + // Each call to nextp() returns a unique vertex in the graph, in dependency order. Dependencies + // alone do not specify a total ordering. Un-ordered vertices are returned in an arbitrary but + // deterministic order. + const V3GraphVertex* nextp() { + if (VL_UNLIKELY(m_nextIndex == m_nextVertices.size())) { + if (VL_UNLIKELY(m_readyVertices.empty())) return nullptr; + m_nextIndex = 0; + // Use swap to avoid reallocation + m_nextVertices.swap(m_readyVertices); + m_readyVertices.clear(); + } + const V3GraphVertex* const resultp = m_nextVertices[m_nextIndex++]; + if (m_way == GraphWay::FORWARD) { + return unblock(resultp); + } else { + return unblock(resultp); + } + } + +private: + template // + VL_ATTR_NOINLINE void init(const V3Graph* graphp) { + constexpr GraphWay way{T_Way}; + constexpr GraphWay inv = way.invert(); + // Assign every vertex without an incoming edge to ready, others to waiting + for (V3GraphVertex *vertexp = graphp->verticesBeginp(), *nextp; vertexp; vertexp = nextp) { + nextp = vertexp->verticesNextp(); + uint32_t nDeps = 0; + for (V3GraphEdge* edgep = vertexp->beginp(inv); edgep; edgep = edgep->nextp(inv)) { + ++nDeps; + } + vertexp->color(nDeps); // Using color instead of user, as user might be used by client + if (VL_UNLIKELY(nDeps == 0)) m_nextVertices.push_back(vertexp); + } + } + + template // + VL_ATTR_NOINLINE const V3GraphVertex* unblock(const V3GraphVertex* resultp) { + constexpr GraphWay way{T_Way}; + for (V3GraphEdge *edgep = resultp->beginp(way), *nextp; edgep; edgep = nextp) { + nextp = edgep->nextp(way); + V3GraphVertex* const vertexp = edgep->furtherp(way); +#if VL_DEBUG + UASSERT_OBJ(vertexp->color() != 0, vertexp, "Should not be on waiting list"); +#endif + vertexp->color(vertexp->color() - 1); + if (!vertexp->color()) m_readyVertices.push_back(vertexp); + } + return resultp; // Returning input so we can tail call this method + } +}; #endif // Guard From f8a0389e737c8493a13cd40d327be4365d2813d6 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 9 Aug 2022 16:08:41 +0100 Subject: [PATCH 097/119] Do not use stepCost when gathering sibling merge candidates siblingPairFromRelatives gathers neighbours of a vertex, and sorts them. It then takes the N best nodes, and creates sibling merge candidates from them. We now use the unadjusted cost instead of the step cost of the vertices when sorting. This is both faster as we need not do the log-space rounding to compute stepCost, and will also make similar but yet cheaper nodes appear closer to the front as we don't lose precision in rounding, hence they are more likely to be entered as merge candidates. Note that when creating the merge candidate, we still use the stepCost, so it's purpose of reducing the propagation of critical path updates is maintained in full. In summary, this should make both Verilator and the generated model very slightly faster, at least in theory, and I have observed minor improvement in places. --- src/V3Partition.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index b410b9bea..5e8cd4d01 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -1540,8 +1540,8 @@ private: if (shortestPrereqs.size() <= 1) return; const auto cmp = [way](const LogicMTask* ap, const LogicMTask* bp) { - const uint32_t aCp = ap->critPathCost(way) + ap->stepCost(); - const uint32_t bCp = bp->critPathCost(way) + bp->stepCost(); + const uint32_t aCp = ap->critPathCost(way) + ap->cost(); + const uint32_t bCp = bp->critPathCost(way) + bp->cost(); if (aCp != bCp) return aCp < bCp; return ap->id() < bp->id(); }; From f0040c7b9a9a60510f16f32903d8d1b23f8e2f0b Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 15 Aug 2022 20:33:20 +0100 Subject: [PATCH 098/119] Remove reliance on pointer comparison in MT scheduling The critical path propagation used to rely on a pointer comparison to break equal scoring critical path updates. Use the corresponding mtask ids instead, which is deterministic across invocations. --- src/V3Partition.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 5e8cd4d01..f6dd5c05a 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -161,7 +161,7 @@ static void partCheckCachedScoreVsActual(uint32_t cached, uint32_t actual) { // * Client calls PartPropagateCp::go(). Internally, this iteratively // propagates the new CPs wayward through the graph. // -template +template > class PartPropagateCp final : GraphAlg<> { private: // MEMBERS @@ -171,7 +171,7 @@ private: T_CostAccessor* const m_accessp; // Access cost and CPs on V3GraphVertex's. // // confirm we only process each vertex once. const bool m_slowAsserts; // Enable nontrivial asserts - SortByValueMap m_pending; // Pending rescores + SortByValueMap m_pending; // Pending rescores public: // CONSTRUCTORS @@ -361,6 +361,10 @@ public: bool operator()(const LogicMTask* ap, const LogicMTask* bp) const { return ap->id() < bp->id(); } + bool operator()(const V3GraphVertex* ap, const V3GraphVertex* bp) const { + return operator()(static_cast(ap), + static_cast(bp)); + } }; // This adaptor class allows the PartPropagateCp class to be somewhat @@ -1380,10 +1384,10 @@ private: << donorNewCpFwd.propagateCp << endl); LogicMTask::CpCostAccessor cpAccess; - PartPropagateCp forwardPropagator(m_mtasksp, GraphWay::FORWARD, - &cpAccess, m_slowAsserts); - PartPropagateCp reversePropagator(m_mtasksp, GraphWay::REVERSE, - &cpAccess, m_slowAsserts); + PartPropagateCp forwardPropagator( + m_mtasksp, GraphWay::FORWARD, &cpAccess, m_slowAsserts); + PartPropagateCp reversePropagator( + m_mtasksp, GraphWay::REVERSE, &cpAccess, m_slowAsserts); recipientp->setCritPathCost(GraphWay::FORWARD, recipientNewCpFwd.cp); if (recipientNewCpFwd.propagate) { From cd50949a7e14e7e756c2b9b56cf531f45ad660a4 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 15 Aug 2022 20:28:10 +0100 Subject: [PATCH 099/119] Reuse MTaskEdge instances in MT scheduling Instead of deleting then re-allocating MTaskEdge instances when merging two MTasks, just redirect the edged of the donor MTask to the recipient MTask. This is both faster as it avoids an allocation and a deletion, together with one update of the sibling maps, and also makes the algorithm more stable due to MergeCandidate IDs being stable and allocated up front for all MTaskEdges, before any SiblingMCs are allocated. Perturbations in output are expected as the IDs used to break ties between merge candidates with equal costs are not updated when redirecting an edge (on purpose). The relinking of only one end of the graph edges also perturbs the order in which they are enumerated, which does change candidate opportunities when the number of edges is larger than PART_SIBLING_EDGE_LIMIT. Confirmed output is identical when IDs are updated and edges are updated to appear in their original order. --- src/V3Graph.cpp | 8 ++++++ src/V3Graph.h | 1 + src/V3Partition.cpp | 65 ++++++++++++++++++++++++++------------------- 3 files changed, 47 insertions(+), 27 deletions(-) diff --git a/src/V3Graph.cpp b/src/V3Graph.cpp index 1c4169e7a..fe0be1cf6 100644 --- a/src/V3Graph.cpp +++ b/src/V3Graph.cpp @@ -182,6 +182,14 @@ V3GraphEdge* V3GraphEdge::relinkFromp(V3GraphVertex* newFromp) { return oldNxt; } +V3GraphEdge* V3GraphEdge::relinkTop(V3GraphVertex* newTop) { + V3GraphEdge* oldNxt = inNextp(); + m_ins.unlink(m_top->m_ins, this); + m_top = newTop; + inPushBack(); + return oldNxt; +} + void V3GraphEdge::unlinkDelete() { // Unlink from side m_outs.unlink(m_fromp->m_outs, this); diff --git a/src/V3Graph.h b/src/V3Graph.h index 08d5c0938..da096ab2f 100644 --- a/src/V3Graph.h +++ b/src/V3Graph.h @@ -320,6 +320,7 @@ public: } void unlinkDelete(); V3GraphEdge* relinkFromp(V3GraphVertex* newFromp); + V3GraphEdge* relinkTop(V3GraphVertex* newTop); // ACCESSORS int weight() const { return m_weight; } void weight(int weight) { m_weight = weight; } diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index f6dd5c05a..13aa68b47 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -742,6 +742,7 @@ public: bool removedFromSb() const { return (m_id & REMOVED_MASK) != 0; } void removedFromSb(bool /*removed*/) { m_id |= REMOVED_MASK; } + void clearRemovedFromSb() { m_id &= ~REMOVED_MASK; } bool operator<(const MergeCandidate& other) const { return m_id < other.m_id; } }; @@ -1014,13 +1015,6 @@ static void partCheckCriticalPaths(V3Graph* mtasksp) { } } -// Advance to nextp(way) and delete edge -static V3GraphEdge* partBlastEdgep(GraphWay way, V3GraphEdge* edgep) { - V3GraphEdge* const nextp = edgep->nextp(way); - VL_DO_DANGLING(edgep->unlinkDelete(), edgep); - return nextp; -} - // Merge edges from a LogicMtask. // // This code removes 'hasRelative' edges. When this occurs, mark it in need @@ -1054,31 +1048,48 @@ static V3GraphEdge* partBlastEdgep(GraphWay way, V3GraphEdge* edgep) { // // Another way of stating this: this code ensures that scores of // non-transitive edges only ever increase. -static void partMergeEdgesFrom(V3Graph* mtasksp, LogicMTask* recipientp, LogicMTask* donorp, - V3Scoreboard* sbp) { +static void partRedirectEdgesFrom(LogicMTask* recipientp, LogicMTask* donorp, + V3Scoreboard* sbp) { for (const auto& way : {GraphWay::FORWARD, GraphWay::REVERSE}) { - for (V3GraphEdge* edgep = donorp->beginp(way); edgep; edgep = partBlastEdgep(way, edgep)) { - const MTaskEdge* const tedgep = MTaskEdge::cast(edgep); - if (sbp && !tedgep->removedFromSb()) sbp->removeElem(tedgep); - // Existing edge; mark it in need of a rescore - if (recipientp->hasRelative(way, tedgep->furtherMTaskp(way))) { + for (V3GraphEdge *edgep = donorp->beginp(way), *nextp; edgep; edgep = nextp) { + nextp = edgep->nextp(way); + MTaskEdge* const tedgep = MTaskEdge::cast(edgep); + LogicMTask* const relativep = tedgep->furtherMTaskp(way); + if (recipientp->hasRelative(way, relativep)) { + // An edge already exists between recipient and relative of donor. + // Mark it in need of a rescore if (sbp) { - const MTaskEdge* const existMTaskEdgep = MTaskEdge::cast( - recipientp->findConnectingEdgep(way, tedgep->furtherMTaskp(way))); + if (!tedgep->removedFromSb()) sbp->removeElem(tedgep); + const MTaskEdge* const existMTaskEdgep + = MTaskEdge::cast(recipientp->findConnectingEdgep(way, relativep)); UASSERT(existMTaskEdgep, "findConnectingEdge didn't find edge"); if (!existMTaskEdgep->removedFromSb()) { sbp->hintScoreChanged(existMTaskEdgep); } } + VL_DO_DANGLING(edgep->unlinkDelete(), edgep); } else { - // No existing edge into *this, make one. - const MTaskEdge* newEdgep; + // No existing edge between recipient and relative of donor. + // Redirect the edge from donor<->relative to recipient<->relative. if (way == GraphWay::REVERSE) { - newEdgep = new MTaskEdge(mtasksp, tedgep->fromMTaskp(), recipientp, 1); + tedgep->relinkTop(recipientp); + relativep->removeRelative(GraphWay::FORWARD, donorp); + relativep->addRelative(GraphWay::FORWARD, recipientp); + recipientp->addRelative(GraphWay::REVERSE, relativep); } else { - newEdgep = new MTaskEdge(mtasksp, recipientp, tedgep->toMTaskp(), 1); + tedgep->relinkFromp(recipientp); + relativep->removeRelative(GraphWay::REVERSE, donorp); + relativep->addRelative(GraphWay::REVERSE, recipientp); + recipientp->addRelative(GraphWay::FORWARD, relativep); + } + if (sbp) { + if (tedgep->removedFromSb()) { + tedgep->clearRemovedFromSb(); + sbp->addElem(tedgep); + } else { + sbp->hintScoreChanged(tedgep); + } } - if (sbp) sbp->addElem(newEdgep); } } } @@ -1334,7 +1345,7 @@ private: } // Merge the smaller mtask into the larger mtask. If one of them - // is much larger, this will save time in partMergeEdgesFrom(). + // is much larger, this will save time in partRedirectEdgesFrom(). // Assume the more costly mtask has more edges. // // [TODO: now that we have edge maps, we could count the edges @@ -1414,8 +1425,8 @@ private: // to a bounded number. removeSiblingMCsWith(recipientp); - // Merge all edges - partMergeEdgesFrom(m_mtasksp, recipientp, donorp, &m_sb); + // Redirect all edges + partRedirectEdgesFrom(recipientp, donorp, &m_sb); // Delete the donorp mtask from the graph VL_DO_CLEAR(donorp->unlinkDelete(m_mtasksp), donorp = nullptr); @@ -1855,7 +1866,7 @@ private: ++rankIt) { // Find the largest node at this rank, merge into it. (If we // happen to find a huge node, this saves time in - // partMergeEdgesFrom() versus merging into an arbitrary node.) + // partRedirectEdgesFrom() versus merging into an arbitrary node.) LogicMTask* mergedp = nullptr; for (LogicMTaskSet::iterator it = rankIt->second.begin(); it != rankIt->second.end(); ++it) { @@ -1883,8 +1894,8 @@ private: } // Move all vertices from donorp to mergedp mergedp->moveAllVerticesFrom(donorp); - // Move edges from donorp to recipientp - partMergeEdgesFrom(m_mtasksp, mergedp, donorp, nullptr); + // Redirect edges from donorp to recipientp + partRedirectEdgesFrom(mergedp, donorp, nullptr); // Remove donorp from the graph VL_DO_DANGLING(donorp->unlinkDelete(m_mtasksp), donorp); ++m_mergesDone; From 03ac7ad73033b70b53d06748e78ef57a3a65b4ef Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 17 Aug 2022 18:03:17 +0100 Subject: [PATCH 100/119] Make PartPropagateCp specific to the MTask graph While keeping the client code abstract in PartPropagateCp is nice for testing, there is performance to be had removing the abstraction. As this code dominates in scheduling large designs, we eliminate the abstraction and re-work the testing to use the actual LogicMTask and MTaskEdge graph types. No functional change intended. --- src/V3Partition.cpp | 390 ++++++++++++++++++++------------------------ 1 file changed, 173 insertions(+), 217 deletions(-) diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 13aa68b47..5b1474e91 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -143,212 +143,6 @@ static void partCheckCachedScoreVsActual(uint32_t cached, uint32_t actual) { #endif } -//###################################################################### -// PartPropagateCp - -// Propagate increasing critical path (CP) costs through a graph. -// -// Usage: -// * Client increases the cost and/or CP at a node or small set of nodes -// (often a pair in practice, eg. edge contraction.) -// * Client instances a PartPropagateCp object -// * Client calls PartPropagateCp::cpHasIncreased() one or more times. -// Each call indicates that the inclusive CP of some "seed" vertex -// has increased to a given value. -// * NOTE: PartPropagateCp will neither read nor modify the cost -// or CPs at the seed vertices, it only accesses and modifies -// vertices wayward from the seeds. -// * Client calls PartPropagateCp::go(). Internally, this iteratively -// propagates the new CPs wayward through the graph. -// -template > -class PartPropagateCp final : GraphAlg<> { -private: - // MEMBERS - const GraphWay m_way; // CPs oriented in this direction: either FORWARD - // // from graph-start to current node, or REVERSE - // // from graph-end to current node. - T_CostAccessor* const m_accessp; // Access cost and CPs on V3GraphVertex's. - // // confirm we only process each vertex once. - const bool m_slowAsserts; // Enable nontrivial asserts - SortByValueMap m_pending; // Pending rescores - -public: - // CONSTRUCTORS - PartPropagateCp(V3Graph* graphp, GraphWay way, T_CostAccessor* accessp, bool slowAsserts, - V3EdgeFuncP edgeFuncp = &V3GraphEdge::followAlwaysTrue) - : GraphAlg<>{graphp, edgeFuncp} - , m_way{way} - , m_accessp{accessp} - , m_slowAsserts{slowAsserts} {} - - // METHODS - void cpHasIncreased(V3GraphVertex* vxp, uint32_t newInclusiveCp) { - // For *vxp, whose CP-inclusive has just increased to - // newInclusiveCp, iterate to all wayward nodes, update the edges - // of each, and add each to m_pending if its overall CP has grown. - for (V3GraphEdge* edgep = vxp->beginp(m_way); edgep; edgep = edgep->nextp(m_way)) { - if (!m_edgeFuncp(edgep)) continue; - V3GraphVertex* const relativep = edgep->furtherp(m_way); - m_accessp->notifyEdgeCp(relativep, m_way, vxp, newInclusiveCp); - - if (m_accessp->critPathCost(relativep, m_way) < newInclusiveCp) { - // relativep's critPathCost() is out of step with its - // longest !wayward edge. Schedule that to be resolved. - const uint32_t newPendingVal - = newInclusiveCp - m_accessp->critPathCost(relativep, m_way); - const auto pair = m_pending.emplace(relativep, newPendingVal); - if (!pair.second && (newPendingVal > pair.first->second)) { - m_pending.update(pair.first, newPendingVal); - } - } - } - } - - void go() { - // m_pending maps each pending vertex to the amount that it wayward - // CP will grow. - // - // We can iterate over the pending set in reverse order, always - // choosing the nodes with the largest pending CP-growth. - // - // The intuition is: if the original seed node had its CP grow by - // 50, the most any wayward node can possibly grow is also 50. So - // for anything pending to grow by 50, we know we can process it - // once and we won't have to grow its CP again on the current pass. - // After we're done with all the grow-by-50s, nothing else will - // grow by 50 again on the current pass, and we can process the - // grow-by-49s and we know we'll only have to process each one - // once. And so on. - // - // This generalizes to multiple seed nodes also. - while (!m_pending.empty()) { - const auto it = m_pending.rbegin(); - V3GraphVertex* const updateMep = it->first; - const uint32_t cpGrowBy = it->second; - m_pending.erase(it); - - // For *updateMep, whose critPathCost was out-of-date with respect - // to its edges, update the critPathCost. - const uint32_t startCp = m_accessp->critPathCost(updateMep, m_way); - const uint32_t newCp = startCp + cpGrowBy; - if (m_slowAsserts) m_accessp->checkNewCpVersusEdges(updateMep, m_way, newCp); - - m_accessp->setCritPathCost(updateMep, m_way, newCp); - cpHasIncreased(updateMep, newCp + m_accessp->cost(updateMep)); - } - } - -private: - VL_DEBUG_FUNC; - VL_UNCOPYABLE(PartPropagateCp); -}; - -class PartPropagateCpSelfTest final { -private: - // MEMBERS - V3Graph m_graph; // A graph - V3GraphVertex* m_vx[50]; // All vertices within the graph - using CpMap = std::unordered_map; - CpMap m_cp; // Vertex-to-CP map - CpMap m_seen; // Set of vertices we've seen - - // CONSTRUCTORS - PartPropagateCpSelfTest() = default; - ~PartPropagateCpSelfTest() = default; - - // METHODS -protected: - friend class PartPropagateCp; - void notifyEdgeCp(V3GraphVertex* /*vxp*/, GraphWay way, V3GraphVertex* throughp, - uint32_t cp) const { - const uint32_t throughCost = critPathCost(throughp, way); - UASSERT_SELFTEST(uint32_t, cp, (1 + throughCost)); - } - -private: - void checkNewCpVersusEdges(V3GraphVertex* vxp, GraphWay way, uint32_t cp) const { - // Don't need to check this in the self test; it supports an assert - // that runs in production code. - } - void setCritPathCost(V3GraphVertex* vxp, GraphWay /*way*/, uint32_t cost) { - m_cp[vxp] = cost; - // Confirm that we only set each node's CP once. That's an - // important property of PartPropagateCp which allows it to be far - // faster than a recursive algorithm on some graphs. - const auto it = m_seen.find(vxp); - UASSERT_OBJ(it == m_seen.end(), vxp, "Set CP on node twice"); - m_seen[vxp] = cost; - } - uint32_t critPathCost(V3GraphVertex* vxp, GraphWay /*way*/) const { - const auto it = m_cp.find(vxp); - if (it != m_cp.end()) return it->second; - return 0; - } - static uint32_t cost(const V3GraphVertex*) { return 1; } - void partInitCriticalPaths(bool checkOnly) { - // Set up the FORWARD cp's only. This test only looks in one - // direction, it assumes REVERSE is symmetrical and would be - // redundant to test. - GraphStreamUnordered order(&m_graph); - while (const V3GraphVertex* const cvxp = order.nextp()) { - V3GraphVertex* const vxp = const_cast(cvxp); - uint32_t cpCost = 0; - for (V3GraphEdge* edgep = vxp->inBeginp(); edgep; edgep = edgep->inNextp()) { - V3GraphVertex* const parentp = edgep->fromp(); - cpCost = std::max(cpCost, critPathCost(parentp, GraphWay::FORWARD) + 1); - } - if (checkOnly) { - UASSERT_SELFTEST(uint32_t, cpCost, critPathCost(vxp, GraphWay::FORWARD)); - } else { - setCritPathCost(vxp, GraphWay::FORWARD, cpCost); - } - } - } - void go() { - // Generate a pseudo-random graph - std::array rngState - = {{0x12345678ULL, 0x9abcdef0ULL}}; // GCC 3.8.0 wants {{}} - // Create 50 vertices - for (auto& i : m_vx) i = new V3GraphVertex(&m_graph); - // Create 250 edges at random. Edges must go from - // lower-to-higher index vertices, so we get a DAG. - for (unsigned i = 0; i < 250; ++i) { - const unsigned idx1 = V3Os::rand64(rngState) % 50; - const unsigned idx2 = V3Os::rand64(rngState) % 50; - if (idx1 > idx2) { - new V3GraphEdge(&m_graph, m_vx[idx2], m_vx[idx1], 1); - } else if (idx2 > idx1) { - new V3GraphEdge(&m_graph, m_vx[idx1], m_vx[idx2], 1); - } - } - - partInitCriticalPaths(false); - - // This SelfTest class is also the T_CostAccessor - PartPropagateCp prop(&m_graph, GraphWay::FORWARD, this, true); - - // Seed the propagator with every input node; - // This should result in the complete graph getting all CP's assigned. - for (const auto& i : m_vx) { - if (!i->inBeginp()) prop.cpHasIncreased(i, 1 /* inclusive CP starts at 1 */); - } - - // Run the propagator. - // * The setCritPathCost() routine checks that each node's CP changes - // at most once. - // * The notifyEdgeCp routine is also self checking. - m_seen.clear(); - prop.go(); - - // Finally, confirm that the entire graph appears to have correct CPs. - partInitCriticalPaths(true); - } - -public: - static void selfTest() { PartPropagateCpSelfTest().go(); } -}; - //###################################################################### // LogicMTask @@ -361,10 +155,6 @@ public: bool operator()(const LogicMTask* ap, const LogicMTask* bp) const { return ap->id() < bp->id(); } - bool operator()(const V3GraphVertex* ap, const V3GraphVertex* bp) const { - return operator()(static_cast(ap), - static_cast(bp)); - } }; // This adaptor class allows the PartPropagateCp class to be somewhat @@ -857,8 +647,8 @@ bool MergeCandidate::mergeWouldCreateCycle() const { : static_cast(this)->mergeWouldCreateCycle(); } -//###################################################################### -// Vertex utility classes +// ###################################################################### +// Vertex utility classes class OrderByPtrId final { PartPtrIdMap m_ids; @@ -1015,6 +805,175 @@ static void partCheckCriticalPaths(V3Graph* mtasksp) { } } +// ###################################################################### +// PartPropagateCp + +// Propagate increasing critical path (CP) costs through a graph. +// +// Usage: +// * Client increases the cost and/or CP at a node or small set of nodes +// (often a pair in practice, eg. edge contraction.) +// * Client instances a PartPropagateCp object +// * Client calls PartPropagateCp::cpHasIncreased() one or more times. +// Each call indicates that the inclusive CP of some "seed" vertex +// has increased to a given value. +// * NOTE: PartPropagateCp will neither read nor modify the cost +// or CPs at the seed vertices, it only accesses and modifies +// vertices wayward from the seeds. +// * Client calls PartPropagateCp::go(). Internally, this iteratively +// propagates the new CPs wayward through the graph. +// + +class PartPropagateCp final : GraphAlg<> { +private: + // MEMBERS + const GraphWay m_way; // CPs oriented in this direction: either FORWARD + // // from graph-start to current node, or REVERSE + // // from graph-end to current node. + LogicMTask::CpCostAccessor m_access; // Access cost and CPs on V3GraphVertex's. + // // confirm we only process each vertex once. + const bool m_slowAsserts; // Enable nontrivial asserts + // Pending rescores + SortByValueMap m_pending; + + std::set m_seen; // Used only with slow asserts to check mtasks visited only once + +public: + // CONSTRUCTORS + PartPropagateCp(V3Graph* graphp, GraphWay way, bool slowAsserts, + V3EdgeFuncP edgeFuncp = &V3GraphEdge::followAlwaysTrue) + : GraphAlg<>{graphp, edgeFuncp} + , m_way{way} + , m_slowAsserts{slowAsserts} {} + + // METHODS + void cpHasIncreased(V3GraphVertex* vxp, uint32_t newInclusiveCp) { + // For *vxp, whose CP-inclusive has just increased to + // newInclusiveCp, iterate to all wayward nodes, update the edges + // of each, and add each to m_pending if its overall CP has grown. + for (V3GraphEdge* edgep = vxp->beginp(m_way); edgep; edgep = edgep->nextp(m_way)) { + if (!m_edgeFuncp(edgep)) continue; + LogicMTask* const relativep = static_cast(edgep->furtherp(m_way)); + m_access.notifyEdgeCp(relativep, m_way, vxp, newInclusiveCp); + + if (m_access.critPathCost(relativep, m_way) < newInclusiveCp) { + // relativep's critPathCost() is out of step with its + // longest !wayward edge. Schedule that to be resolved. + const uint32_t newPendingVal + = newInclusiveCp - m_access.critPathCost(relativep, m_way); + const auto pair = m_pending.emplace(relativep, newPendingVal); + if (!pair.second && (newPendingVal > pair.first->second)) { + m_pending.update(pair.first, newPendingVal); + } + } + } + } + + void go() { + // m_pending maps each pending vertex to the amount that it wayward + // CP will grow. + // + // We can iterate over the pending set in reverse order, always + // choosing the nodes with the largest pending CP-growth. + // + // The intuition is: if the original seed node had its CP grow by + // 50, the most any wayward node can possibly grow is also 50. So + // for anything pending to grow by 50, we know we can process it + // once and we won't have to grow its CP again on the current pass. + // After we're done with all the grow-by-50s, nothing else will + // grow by 50 again on the current pass, and we can process the + // grow-by-49s and we know we'll only have to process each one + // once. And so on. + // + // This generalizes to multiple seed nodes also. + while (!m_pending.empty()) { + const auto it = m_pending.rbegin(); + LogicMTask* const updateMep = it->first; + const uint32_t cpGrowBy = it->second; + m_pending.erase(it); + + // For *updateMep, whose critPathCost was out-of-date with respect + // to its edges, update the critPathCost. + const uint32_t startCp = m_access.critPathCost(updateMep, m_way); + const uint32_t newCp = startCp + cpGrowBy; + if (VL_UNLIKELY(m_slowAsserts)) { + m_access.checkNewCpVersusEdges(updateMep, m_way, newCp); + // Confirm that we only set each node's CP once. That's an + // important property of PartPropagateCp which allows it to be far + // faster than a recursive algorithm on some graphs. + const bool first = m_seen.insert(updateMep).second; + UASSERT_OBJ(first, updateMep, "Set CP on node twice"); + } + m_access.setCritPathCost(updateMep, m_way, newCp); + cpHasIncreased(updateMep, newCp + m_access.cost(updateMep)); + } + } + +private: + VL_DEBUG_FUNC; + VL_UNCOPYABLE(PartPropagateCp); +}; + +class PartPropagateCpSelfTest final { +private: + // MEMBERS + V3Graph m_graph; // A graph + LogicMTask* m_vx[50]; // All vertices within the graph + + // CONSTRUCTORS + PartPropagateCpSelfTest() = default; + ~PartPropagateCpSelfTest() = default; + + void go() { + // Generate a pseudo-random graph + std::array rngState + = {{0x12345678ULL, 0x9abcdef0ULL}}; // GCC 3.8.0 wants {{}} + // Create 50 vertices + for (auto& i : m_vx) { + i = new LogicMTask{&m_graph, nullptr}; + i->setCost(1); + } + // Create 250 edges at random. Edges must go from + // lower-to-higher index vertices, so we get a DAG. + for (unsigned i = 0; i < 250; ++i) { + const unsigned idx1 = V3Os::rand64(rngState) % 50; + const unsigned idx2 = V3Os::rand64(rngState) % 50; + if (idx1 > idx2) { + if (!m_vx[idx2]->hasRelative(GraphWay::FORWARD, m_vx[idx1])) { + new MTaskEdge{&m_graph, m_vx[idx2], m_vx[idx1], 1}; + } + } else if (idx2 > idx1) { + if (!m_vx[idx1]->hasRelative(GraphWay::FORWARD, m_vx[idx2])) { + new MTaskEdge{&m_graph, m_vx[idx1], m_vx[idx2], 1}; + } + } + } + + partInitCriticalPaths(&m_graph); + + // This SelfTest class is also the T_CostAccessor + PartPropagateCp prop(&m_graph, GraphWay::FORWARD, true); + + // Seed the propagator with every input node; + // This should result in the complete graph getting all CP's assigned. + for (const auto& i : m_vx) { + if (!i->inBeginp()) prop.cpHasIncreased(i, 1 /* inclusive CP starts at 1 */); + } + + // Run the propagator. + // * The setCritPathCost() routine checks that each node's CP changes + // at most once. + // * The notifyEdgeCp routine is also self checking. + prop.go(); + + // Finally, confirm that the entire graph appears to have correct CPs. + partCheckCriticalPaths(&m_graph); + } + +public: + static void selfTest() { PartPropagateCpSelfTest().go(); } +}; + // Merge edges from a LogicMtask. // // This code removes 'hasRelative' edges. When this occurs, mark it in need @@ -1394,11 +1353,8 @@ private: << (donorNewCpFwd.propagate ? " true " : " false ") << donorNewCpFwd.propagateCp << endl); - LogicMTask::CpCostAccessor cpAccess; - PartPropagateCp forwardPropagator( - m_mtasksp, GraphWay::FORWARD, &cpAccess, m_slowAsserts); - PartPropagateCp reversePropagator( - m_mtasksp, GraphWay::REVERSE, &cpAccess, m_slowAsserts); + PartPropagateCp forwardPropagator(m_mtasksp, GraphWay::FORWARD, m_slowAsserts); + PartPropagateCp reversePropagator(m_mtasksp, GraphWay::REVERSE, m_slowAsserts); recipientp->setCritPathCost(GraphWay::FORWARD, recipientNewCpFwd.cp); if (recipientNewCpFwd.propagate) { From 83475008d94a493112e5ae6bf3b9b2af5a571a21 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sun, 7 Aug 2022 14:11:58 +0100 Subject: [PATCH 101/119] Improve performance of MTask coarsening Various optimizations to speed up MTasks coarsening (which is the long pole in the multi-threaded scheduling of very large designs). The biggest impact ones: - Use efficient hand written Pairing Heaps for implementing priority queues and the scoreboard, instead of the old SortByValueMap. This helps us avoid having to sort a lot of merge candidates that we will never actually consider and helps a lot in performance. - Remove unnecessary associative containers and store data structures (the heap nodes in particular) directly in the object they relate to. This eliminates a huge amount of lookups and helps a lot in performance. - Distribute storage for SiblingMC instances into the LogicMTask instances, and combine with the sibling maps. This again eliminates hash table lookups and makes storage structures smaller. - Remove some now bidirectional edge maps, keep only the forward map. There are also some other smaller optimizations: - Replaced more unnecessary dynamic_casts with static_casts - Templated some functions/classes to reduce the number of static branches in loops. - Improves sorting of edges for sibling candidate creation - Various micro-optimizations here and there This speeds up MTask coarsening by 3.8x on a large design, which translates to a 2.5x speedup of the ordering pass in multi-threaded mode. (Combined with the earlier optimizations, ordering is now 3x faster.) Due to the elimination of a lot of the auxiliary data structures, and ensuring a minimal size for the necessary ones, memory consumption of the MTask coarsening is also reduced (measured up to 4.4x reduction though the accuracy of this is low). The algorithm is identical except for minor alterations of the order some candidates are added or removed, this can cause perturbation in the output due to tied scores being broken based on IDs. --- Changes | 1 + src/V3Graph.h | 2 +- src/V3PairingHeap.h | 293 ++++++++++++ src/V3Partition.cpp | 1026 ++++++++++++++++++++++++------------------ src/V3Scoreboard.cpp | 56 ++- src/V3Scoreboard.h | 337 +++++--------- 6 files changed, 1015 insertions(+), 700 deletions(-) create mode 100644 src/V3PairingHeap.h diff --git a/Changes b/Changes index 65ee0fd39..1fa981e3c 100644 --- a/Changes +++ b/Changes @@ -20,6 +20,7 @@ Verilator 4.225 devel * Fix incorrect tristate logic (#3399) [shareefj, Vighnesh Iyer] * Fix segfault exporting non-existant package (#3535). * Fix case statement comparing string literal (#3544). [Gustav Svensk] +* Improve Verilation speed with --threads on large designs. [Geza Lore] Verilator 4.224 2022-06-19 diff --git a/src/V3Graph.h b/src/V3Graph.h index da096ab2f..a18fb5dfc 100644 --- a/src/V3Graph.h +++ b/src/V3Graph.h @@ -67,7 +67,7 @@ public: return names[m_e]; } // METHODS unique to this class - constexpr GraphWay invert() const { return m_e == FORWARD ? REVERSE : FORWARD; } + constexpr GraphWay invert() const { return GraphWay{m_e ^ 1}; } constexpr bool forward() const { return m_e == FORWARD; } constexpr bool reverse() const { return m_e != FORWARD; } }; diff --git a/src/V3PairingHeap.h b/src/V3PairingHeap.h new file mode 100644 index 000000000..c1f5f5342 --- /dev/null +++ b/src/V3PairingHeap.h @@ -0,0 +1,293 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// DESCRIPTION: Verilator: Pairing Heap data structure +// +// Code available from: https://verilator.org +// +//************************************************************************* +// +// Copyright 2003-2022 by Wilson Snyder. This program is free software; you +// can redistribute it and/or modify it under the terms of either the GNU +// Lesser General Public License Version 3 or the Perl Artistic License +// Version 2.0. +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* + +#ifndef VERILATOR_V3PAIRINGHEAP_H_ +#define VERILATOR_V3PAIRINGHEAP_H_ + +#include "config_build.h" +#include "verilatedos.h" + +#include "V3Error.h" + +//============================================================================= +// Pairing heap (max-heap) with increase key and delete. +// +// While this is written as a generic data structure, it's interface and +// implementation is finely tuned for it's use by V3Parm_tition, and is critical +// to verilaton performance, so be very careful changing anything or adding any +// new operations that would impact either memory usage, or performance of the +// existing operations. This data structure is fully deterministic, meaning +// the order in which elements with equal keys are retrieved only depends on +// the order of operations performed on the heap. +//============================================================================= + +template +class PairingHeap final { +public: + struct Node; + + // Just a pointer to a heap Node, but with special accessors to help keep back pointers + // consistent. + struct Link { + Node* m_ptr = nullptr; // The managed pointer + + Link() = default; + VL_UNCOPYABLE(Link); + + // Make the pointer point to the target, and the target's owner pointer to this pointer + VL_ATTR_ALWINLINE void link(Node* targetp) { + m_ptr = targetp; + if (!targetp) return; +#if VL_DEBUG + UASSERT(!targetp->m_ownerpp, "Already linked"); +#endif + targetp->m_ownerpp = &m_ptr; + } + + // Make the pointer point to the target, and the target's owner pointer to this pointer + VL_ATTR_ALWINLINE void linkNonNull(Node* targetp) { + m_ptr = targetp; +#if VL_DEBUG + UASSERT(!targetp->m_ownerpp, "Already linked"); +#endif + targetp->m_ownerpp = &m_ptr; + } + + // Clear the pointer and return it's previous value + VL_ATTR_ALWINLINE Node* unlink() { + Node* const result = m_ptr; +#if VL_DEBUG + if (result) { + UASSERT(m_ptr->m_ownerpp == &m_ptr, "Bad back link"); + // Not strictly necessary to clear this, but helps debugging + m_ptr->m_ownerpp = nullptr; + } +#endif + m_ptr = nullptr; + return result; + } + + // Minimal convenience acessors and operators + VL_ATTR_ALWINLINE Node* ptr() const { return m_ptr; } + VL_ATTR_ALWINLINE operator bool() const { return m_ptr; } + VL_ATTR_ALWINLINE bool operator!() const { return !m_ptr; } + VL_ATTR_ALWINLINE Node* operator->() const { return m_ptr; } + VL_ATTR_ALWINLINE Node& operator*() const { return *m_ptr; } + }; + + // A single node in the pairing heap tree + struct Node { + Link m_next; // Next in list of sibling heaps + Link m_kids; // Head of list of child heaps + Node** m_ownerpp = nullptr; // Pointer to the Link pointer pointing to this heap + T_Key m_key; // The key in the heap + + // CONSTRUCTOR + explicit Node() = default; + VL_UNCOPYABLE(Node); + + // METHODS + VL_ATTR_ALWINLINE const T_Key& key() const { return m_key; } + VL_ATTR_ALWINLINE bool operator<(const Node& that) const { return m_key < that.m_key; } + VL_ATTR_ALWINLINE bool operator>(const Node& that) const { return that.m_key < m_key; } + + // Make newp take the place of this in the tree + VL_ATTR_ALWINLINE void replaceWith(Node* newp) { + *m_ownerpp = newp; // The owner pointer needs to point to the new node + if (newp) newp->m_ownerpp = m_ownerpp; // The new node needs to point to its owner + m_ownerpp = nullptr; // This node has no owner anymore + } + + // Make newp take the place of this in the tree + VL_ATTR_ALWINLINE void replaceWithNonNull(Node* newp) { + *m_ownerpp = newp; // The owner pointer needs to point to the new node + newp->m_ownerpp = m_ownerpp; // The new node needs to point to its owner + m_ownerpp = nullptr; // This node has no owner anymore + } + }; + +private: + // MEMBERS + + // The root of the heap. Note: We do not reduce lists during insertion/removal etc, unless we + // absolutely have to. This means the root can become a list. This is ok, we will reduce + // lazily when requesting the minimum element. + mutable Link m_root; + + // CONSTRUCTORS + VL_UNCOPYABLE(PairingHeap); + +public: + explicit PairingHeap() = default; + + // METHODS + bool empty() const { return !m_root; } + + // Insert given node into this heap with given key. + void insert(Node* nodep, T_Key key) { + // Update key of node + nodep->m_key = key; + insert(nodep); + } + + // Insert given node into this heap with key already set in the node + void insert(Node* nodep) { +#if VL_DEBUG + UASSERT(!nodep->m_ownerpp && !nodep->m_next && !nodep->m_kids, "Already linked"); +#endif + // Just stick it at the front of the root list + nodep->m_next.link(m_root.unlink()); + m_root.linkNonNull(nodep); + } + + // Remove given node only from the heap it is contained in + void remove(Node* nodep) { + if (!nodep->m_next) { + // If the node does not have siblings, replace it with its children (might be empty). + nodep->replaceWith(nodep->m_kids.unlink()); + } else if (!nodep->m_kids) { + // If it has siblings but no children, replace it with the siblings. + nodep->replaceWithNonNull(nodep->m_next.unlink()); + } else { + // If it has both siblings and children, reduce the children and splice that + // reduced heap in place of this node + Node* const reducedKidsp = reduce(nodep->m_kids.unlink()); + reducedKidsp->m_next.linkNonNull(nodep->m_next.unlink()); + nodep->replaceWithNonNull(reducedKidsp); + } + } + + // Returns the largest element in the heap + Node* max() const { + // Heap might be empty + if (!m_root) return nullptr; + // If the root have siblings reduce them + if (m_root->m_next) m_root.linkNonNull(reduce(m_root.unlink())); + // The root element is the largest + return m_root.ptr(); + } + + // Returns the second-largest element in the heap. + // This is only valid to call if 'max' returned a valid element. + Node* secondMax() const { +#if VL_DEBUG + UASSERT(m_root, "'max' would have returned nullptr"); + UASSERT(!m_root->m_next, "'max' would have reduced"); +#endif + // If there are no children, there is no second element + if (!m_root->m_kids) return nullptr; + // If there are multiple children, reduce them + if (m_root->m_kids->m_next) m_root->m_kids.linkNonNull(reduce(m_root->m_kids.unlink())); + // Return the now singular child, which is the second-largest element + return m_root->m_kids.ptr(); + } + + // Increase the key of the given node to the given new value + template + void increaseKey(Node* nodep, T_Update value) { + // Update the key + nodep->m_key.increase(value); + // Increasing the key of the root is easy + if (nodep == m_root.ptr()) return; + // Otherwise we do have a little work to do + if (!nodep->m_kids) { + // If the node has no children, replace it with its siblings (migtht be null) + nodep->replaceWith(nodep->m_next.unlink()); + } else if (!nodep->m_next) { + // If the node has no siblings, replace it with its children + nodep->replaceWithNonNull(nodep->m_kids.unlink()); + } else { + // The node has both children and siblings. Splice the first child in the place of the + // node, and extract the rest of the children with the node + Node* const kidsp = nodep->m_kids.unlink(); + nodep->m_kids.link(kidsp->m_next.unlink()); + kidsp->m_next.linkNonNull(nodep->m_next.unlink()); + nodep->replaceWithNonNull(kidsp); + } + // Just stick the increased node a the front of the root list + nodep->m_next.linkNonNull(m_root.unlink()); + m_root.linkNonNull(nodep); + } + +private: + // Meld (merge) two heaps rooted at the given nodes, return the root of the new heap + VL_ATTR_ALWINLINE static Node* merge(Node* ap, Node* bp) { +#if VL_DEBUG + UASSERT(!ap->m_ownerpp && !ap->m_next, "Not root a"); + UASSERT(!bp->m_ownerpp && !bp->m_next, "Not root b"); +#endif + if (*ap > *bp) { // bp goes under ap + bp->m_next.link(ap->m_kids.unlink()); + ap->m_kids.linkNonNull(bp); + return ap; + } else { // ap goes under bp + ap->m_next.link(bp->m_kids.unlink()); + bp->m_kids.linkNonNull(ap); + return bp; + } + } + + // Reduces the list of nodes starting at the given node into a single node that is returned + VL_ATTR_NOINLINE static Node* reduce(Node* nodep) { +#if VL_DEBUG + UASSERT(!nodep->m_ownerpp, "Node is linked"); +#endif + // If there is only one node in the list, then there is nothing to do + if (!nodep->m_next) return nodep; + // The result node + Node* resultp = nullptr; + // Pairwise merge the child nodes + while (nodep) { + // Pop off the first nodes + Node* const ap = nodep; + // If we have an odd number of nodes, prepend the unpaired one onto the result list + if (!nodep->m_next) { + ap->m_next.link(resultp); + resultp = ap; + break; + } + // Pop off the second nodes + Node* const bp = nodep->m_next.unlink(); + // Keep hold of the rest of the list + nodep = bp->m_next.unlink(); + // Merge the current pair + Node* const mergedp = merge(ap, bp); + // Prepend the merged pair to the result list + mergedp->m_next.link(resultp); + resultp = mergedp; + } + // Now merge-reduce the merged pairs + while (resultp->m_next) { + // Pop first two results + Node* const ap = resultp; + Node* const bp = resultp->m_next.unlink(); + // Keep hold of the rest of the list + resultp = bp->m_next.unlink(); + // Merge the current pair + Node* const mergedp = merge(ap, bp); + // Prepend the merged pair to the result list + mergedp->m_next.link(resultp); + resultp = mergedp; + } + // Done + return resultp; + } +}; + +// The PairingHeap itself should be a simple pointer and nothing more +static_assert(sizeof(PairingHeap) == sizeof(PairingHeap::Node*), "Should be a pointer"); + +#endif // Guard diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 5b1474e91..bf537a65c 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -22,23 +22,29 @@ #include "V3Config.h" #include "V3EmitCBase.h" #include "V3File.h" -#include "V3GraphAlg.h" #include "V3GraphStream.h" #include "V3InstrCount.h" #include "V3Os.h" +#include "V3PairingHeap.h" #include "V3PartitionGraph.h" #include "V3Scoreboard.h" #include "V3Stats.h" #include "V3UniqueNames.h" #include +#include #include #include +#include #include +#include +class LogicMTask; +class MTaskEdge; class MergeCandidate; +class SiblingMC; -//###################################################################### +// ###################################################################### // Partitioner tunable settings: // // Before describing these settings, a bit of background: @@ -70,14 +76,14 @@ class MergeCandidate; // skipping the enumeration of some siblings on a few vertices does not // have a large impact on the result of the partitioner. // -// If your vertices are small, the limit (at 25) approaches a no-op. Hence +// If your vertices are small, the limit (at 26) approaches a no-op. Hence // there's basically no cost to applying this limit even when we don't // expect huge vertices. // // If you don't care about partitioner runtime and you want the most // aggressive partition, set the limit very high. If you have huge // vertices, leave this as is. -constexpr unsigned PART_SIBLING_EDGE_LIMIT = 25; +constexpr unsigned PART_SIBLING_EDGE_LIMIT = 26; // PART_STEPPED_COST (defined/undef) // @@ -143,10 +149,34 @@ static void partCheckCachedScoreVsActual(uint32_t cached, uint32_t actual) { #endif } -//###################################################################### +//============================================================================= +// We keep MTaskEdge graph edges in a PairingHeap, sorted by score and id + +struct EdgeKey { + // Node: Structure layout chosen to minimize padding in PairingHeao<*>::Node + uint64_t m_id; // Unique ID part of edge score + uint32_t m_score; // Score part of ID + void increase(uint32_t score) { +#if VL_DEBUG + UASSERT(score >= m_score, "Must increase"); +#endif + m_score = score; + } + bool operator<(const EdgeKey& other) const { + // First by Score then by ID + return m_score < other.m_score || (m_score == other.m_score && m_id < other.m_id); + } +}; + +using EdgeHeap = PairingHeap; + +//============================================================================= // LogicMTask class LogicMTask final : public AbstractLogicMTask { + template + friend class PartPropagateCp; + public: // TYPES using VxList = std::list; @@ -157,55 +187,6 @@ public: } }; - // This adaptor class allows the PartPropagateCp class to be somewhat - // independent of the LogicMTask class - // - PartPropagateCp can thus be declared before LogicMTask - // - PartPropagateCp could be reused with graphs of other node types - // in the future, using another Accessor adaptor. - class CpCostAccessor final { - public: - CpCostAccessor() = default; - ~CpCostAccessor() = default; - // Return cost of this node - uint32_t cost(const V3GraphVertex* vxp) const { - const LogicMTask* const mtaskp = static_cast(vxp); - return mtaskp->stepCost(); - } - // Return stored CP to this node - uint32_t critPathCost(const V3GraphVertex* vxp, GraphWay way) const { - const LogicMTask* const mtaskp = static_cast(vxp); - return mtaskp->critPathCost(way); - } - // Store a new CP to this node - void setCritPathCost(V3GraphVertex* vxp, GraphWay way, uint32_t cost) const { - LogicMTask* const mtaskp = static_cast(vxp); - mtaskp->setCritPathCost(way, cost); - } - // Notify vxp that the wayward CP at the throughp-->vxp edge - // has increased to 'cp'. (vxp is wayward from throughp.) - // This is our cue to update vxp's m_edges[!way][throughp]. - void notifyEdgeCp(V3GraphVertex* vxp, GraphWay way, V3GraphVertex* throuvhVxp, - uint32_t cp) const { - LogicMTask* const updateVxp = static_cast(vxp); - LogicMTask* const lthrouvhVxp = static_cast(throuvhVxp); - EdgeSet& edges = updateVxp->m_edges[way.invert()]; - const auto it = edges.find(lthrouvhVxp); - if (cp > it->second) edges.update(it, cp); - } - // Check that CP matches that of the longest edge wayward of vxp. - void checkNewCpVersusEdges(V3GraphVertex* vxp, GraphWay way, uint32_t cp) const { - LogicMTask* const mtaskp = static_cast(vxp); - const EdgeSet& edges = mtaskp->m_edges[way.invert()]; - // This is mtaskp's relative with longest !wayward inclusive CP: - const auto edgeIt = edges.rbegin(); - const uint32_t edgeCp = edgeIt->second; - UASSERT_OBJ(edgeCp == cp, vxp, "CP doesn't match longest wayward edge"); - } - - private: - VL_UNCOPYABLE(CpCostAccessor); - }; - private: // MEMBERS @@ -231,21 +212,21 @@ private: // while searching for a path. uint64_t m_generation = 0; - // Redundant with the V3GraphEdge's, store a map of relatives so we can - // quickly check if we have a given parent or child. - // - // 'm_edges[way]' maps a wayward relative to the !way critical path at - // our edge with them. The SortByValueMap supports iterating over - // relatives in longest-to-shortest CP order. We rely on this ordering - // in more than one place. - using EdgeSet = SortByValueMap; - std::array m_edges; + // Store a set of forward relatives so we can quickly check if we have a given child + std::unordered_set m_edgeSet; + // Store the outgoing and incoming edges in a heap sorted by the critical path length + std::array m_edgeHeap; + + // SiblingMC for which storage is owned by this MTask + std::set m_ownSibs; + // SiblingMC for which storage is owned by the opposite MTask + std::set m_farSibps; public: // CONSTRUCTORS LogicMTask(V3Graph* graphp, MTaskMoveVertex* mtmvVxp) : AbstractLogicMTask{graphp} { - for (unsigned int& i : m_critPathCost) i = 0; + for (uint32_t& item : m_critPathCost) item = 0; if (mtmvVxp) { // Else null for test m_vertices.push_back(mtmvVxp); if (const OrderLogicVertex* const olvp = mtmvVxp->logicp()) { @@ -259,6 +240,9 @@ public: } // METHODS + std::set& ownSibs() { return m_ownSibs; }; + std::set& farSibs() { return m_farSibps; }; + void moveAllVerticesFrom(LogicMTask* otherp) { // splice() is constant time m_vertices.splice(m_vertices.end(), otherp->m_vertices); @@ -296,32 +280,37 @@ public: logcost = logcost / 20.0; const uint32_t stepCost = static_cast(exp(logcost)); +#if VL_DEBUG UASSERT_STATIC(stepCost >= cost, "stepped cost error exceeded"); UASSERT_STATIC(stepCost <= ((cost * 11 / 10)), "stepped cost error exceeded"); +#endif return stepCost; #else return cost; #endif } - void addRelative(GraphWay way, LogicMTask* relativep) { - // value is !way cp to this edge - const uint32_t cp = relativep->stepCost() + relativep->critPathCost(way.invert()); - VL_ATTR_UNUSED const bool exits = !m_edges[way].emplace(relativep, cp).second; + template + void addRelativeEdge(MTaskEdge* edgep); + template + void removeRelativeEdge(MTaskEdge* edgep); + + void addRelativeMTask(LogicMTask* relativep) { + // Add the relative to connecting edge map + VL_ATTR_UNUSED const bool exits = !m_edgeSet.emplace(relativep).second; #if VL_DEBUG - UASSERT(!exits, "Adding existing edge"); + UASSERT(!exits, "Adding existing relative"); #endif } - void removeRelative(GraphWay way, LogicMTask* relativep) { m_edges[way].erase(relativep); } - bool hasRelative(GraphWay way, LogicMTask* relativep) { return m_edges[way].has(relativep); } - void checkRelativesCp(GraphWay way) const { - for (const auto& edge : vlstd::reverse_view(m_edges[way])) { - const LogicMTask* const relativep = edge.first; - const uint32_t cachedCp = edge.second; - const uint32_t cp = relativep->critPathCost(way.invert()) + relativep->stepCost(); - partCheckCachedScoreVsActual(cachedCp, cp); - } + void removeRelativeMTask(LogicMTask* relativep) { + VL_ATTR_UNUSED const size_t removed = m_edgeSet.erase(relativep); +#if VL_DEBUG + UASSERT(removed, "Relative should have been in set"); +#endif } + bool hasRelativeMTask(LogicMTask* relativep) const { return m_edgeSet.count(relativep); } + + void checkRelativesCp(GraphWay way) const; virtual string name() const override { // Display forward and reverse critical path costs. This gives a quick @@ -334,27 +323,7 @@ public: void setCritPathCost(GraphWay way, uint32_t cost) { m_critPathCost[way] = cost; } uint32_t critPathCost(GraphWay way) const { return m_critPathCost[way]; } - uint32_t critPathCostWithout(GraphWay way, const V3GraphEdge* withoutp) const { - // Compute the critical path cost wayward to this node, without - // considering edge 'withoutp' - UASSERT(this == withoutp->furtherp(way), "In critPathCostWithout(), edge 'withoutp' must " - "further to 'this'"); - - // Iterate through edges until we get a relative other than - // wayEdgeEndp(way, withoutp). This should take 2 iterations max. - const EdgeSet& edges = m_edges[way.invert()]; - uint32_t result = 0; - for (const auto& edge : vlstd::reverse_view(edges)) { - if (edge.first != withoutp->furtherp(way.invert())) { - // Use the cached cost. It could be a small overestimate - // due to stepping. This is consistent with critPathCost() - // which also returns the cached cost. - result = edge.second; - break; - } - } - return result; - } + uint32_t critPathCostWithout(GraphWay way, const V3GraphEdge* withoutp) const; private: static bool pathExistsFromInternal(LogicMTask* fromp, LogicMTask* top, @@ -411,65 +380,7 @@ public: return pathExistsFromInternal(fromp, top, excludedEdgep, incGeneration()); } - static void dumpCpFilePrefixed(const V3Graph* graphp, const string& nameComment) { - const string filename = v3Global.debugFilename(nameComment) + ".txt"; - UINFO(1, "Writing " << filename << endl); - const std::unique_ptr ofp{V3File::new_ofstream(filename)}; - std::ostream* const osp = &(*ofp); // &* needed to deref unique_ptr - if (osp->fail()) v3fatalStatic("Can't write " << filename); - - // Find start vertex with longest CP - const LogicMTask* startp = nullptr; - for (const V3GraphVertex* vxp = graphp->verticesBeginp(); vxp; - vxp = vxp->verticesNextp()) { - const LogicMTask* const mtaskp = static_cast(vxp); - if (!startp) { - startp = mtaskp; - continue; - } - if (mtaskp->cost() + mtaskp->critPathCost(GraphWay::REVERSE) - > startp->cost() + startp->critPathCost(GraphWay::REVERSE)) { - startp = mtaskp; - } - } - - // Follow the entire critical path - std::vector path; - uint32_t totalCost = 0; - for (const LogicMTask* nextp = startp; nextp;) { - path.push_back(nextp); - totalCost += nextp->cost(); - - const EdgeSet& children = nextp->m_edges[GraphWay::FORWARD]; - const EdgeSet::const_reverse_iterator it = children.rbegin(); - if (it == children.rend()) { - nextp = nullptr; - } else { - nextp = it->first; - } - } - - *osp << "totalCost = " << totalCost - << " (should match the computed critical path cost (CP) for the graph)\n"; - - // Dump - for (const LogicMTask* mtaskp : path) { - *osp << "begin mtask with cost " << mtaskp->cost() << '\n'; - for (VxList::const_iterator lit = mtaskp->vertexListp()->begin(); - lit != mtaskp->vertexListp()->end(); ++lit) { - const OrderLogicVertex* const logicp = (*lit)->logicp(); - if (!logicp) continue; - if (false) { - // Show nodes only - *osp << "> "; - logicp->nodep()->dumpTree(*osp); - } else { - // Show nodes with hierarchical costs - V3InstrCount::count(logicp->nodep(), false, osp); - } - } - } - } + static void dumpCpFilePrefixed(const V3Graph* graphp, const string& nameComment); private: VL_DEBUG_FUNC; // Declare debug() @@ -490,11 +401,20 @@ public: } }; -class SiblingMC; -class MTaskEdge; +struct MergeCandidateKey { + // Note: Structure layout chosen to minimize padding in PairingHeao<*>::Node + uint64_t m_id; // Unique ID part of edge score + uint32_t m_score; // Score part of ID + bool operator<(const MergeCandidateKey& other) const { + // First by Score then by ID, but notice that we want minimums using a max-heap, so reverse + return m_score > other.m_score || (m_score == other.m_score && m_id > other.m_id); + } +}; -// Information associated with scoreboarding an MTask -class MergeCandidate VL_NOT_FINAL { +using MergeCandidateScoreboard = V3Scoreboard; + +// Information associated with scoreboarding a merge candidate +class MergeCandidate VL_NOT_FINAL : public MergeCandidateScoreboard::Node { private: // Only the known subclasses can create or delete one of these friend class SiblingMC; @@ -507,18 +427,17 @@ private: // using another bit of the id to denote the actual subtype. // By using the bottom bits for flags, we can still use < to compare IDs without masking. - uint64_t m_id; // <63:2> Serial number for ordering, <1> subtype (SiblingMC), <0> removed - static constexpr uint64_t REMOVED_MASK = 1ULL << 0; - static constexpr uint64_t IS_SIBLING_MASK = 1ULL << 1; - static constexpr uint64_t ID_INCREMENT = 1ULL << 2; + // <63:1> Serial number for ordering, <0> subtype (SiblingMC) + static constexpr uint64_t IS_SIBLING_MASK = 1ULL << 0; + static constexpr uint64_t ID_INCREMENT = 1ULL << 1; - bool isSiblingMC() const { return m_id & IS_SIBLING_MASK; } + bool isSiblingMC() const { return m_key.m_id & IS_SIBLING_MASK; } // CONSTRUCTORS explicit MergeCandidate(bool isSiblingMC) { static uint64_t serial = 0; serial += ID_INCREMENT; // +ID_INCREMENT so doesn't set the special bottom bits - m_id = serial | (isSiblingMC * IS_SIBLING_MASK); + m_key.m_id = serial | (isSiblingMC * IS_SIBLING_MASK); } ~MergeCandidate() = default; @@ -530,35 +449,33 @@ public: const MTaskEdge* toMTaskEdge() const; // Instead of dynamic_cast bool mergeWouldCreateCycle() const; // Instead of virtual method - bool removedFromSb() const { return (m_id & REMOVED_MASK) != 0; } - void removedFromSb(bool /*removed*/) { m_id |= REMOVED_MASK; } - void clearRemovedFromSb() { m_id &= ~REMOVED_MASK; } - bool operator<(const MergeCandidate& other) const { return m_id < other.m_id; } + inline void rescore(); + uint32_t score() const { return m_key.m_score; } + + static MergeCandidate* heapNodeToElem(MergeCandidateScoreboard::Node* nodep) { + return static_cast(nodep); + } }; -static_assert(sizeof(MergeCandidate) == sizeof(uint64_t), "Should not have a vtable"); +static_assert(sizeof(MergeCandidate) == sizeof(MergeCandidateScoreboard::Node), + "Should not have a vtable"); // A pair of associated LogicMTask's that are merge candidates for sibling // contraction class SiblingMC final : public MergeCandidate { private: - LogicMTask* m_ap; - LogicMTask* m_bp; + LogicMTask* const m_ap; + LogicMTask* const m_bp; public: // CONSTRUCTORS SiblingMC() = delete; SiblingMC(LogicMTask* ap, LogicMTask* bp) - : MergeCandidate{/* isSiblingMC: */ true} { - // Assign 'ap' and 'bp' in a canonical order, so we can more easily - // compare pairs of SiblingMCs - if (ap->id() > bp->id()) { - m_ap = ap; - m_bp = bp; - } else { - m_ap = bp; - m_bp = ap; - } + : MergeCandidate{/* isSiblingMC: */ true} + , m_ap{ap} + , m_bp{bp} { + // operator< and storage management depends on this + UASSERT(ap->id() > bp->id(), "Should be ordered"); } ~SiblingMC() = default; // METHODS @@ -580,17 +497,23 @@ static_assert(sizeof(SiblingMC) == sizeof(MergeCandidate) + 2 * sizeof(LogicMTas // GraphEdge for the MTask graph class MTaskEdge final : public V3GraphEdge, public MergeCandidate { + friend class LogicMTask; + template + friend class PartPropagateCp; + + // MEMBERS + // This edge can be in 2 EdgeHeaps, one forward and one reverse. We allocate the heap nodes + // directly within the edge as they are always required and this makes association cheap. + EdgeHeap::Node m_edgeHeapNode[GraphWay::NUM_WAYS]; + public: // CONSTRUCTORS MTaskEdge(V3Graph* graphp, LogicMTask* fromp, LogicMTask* top, int weight) : V3GraphEdge{graphp, fromp, top, weight} , MergeCandidate{/* isSiblingMC: */ false} { - fromp->addRelative(GraphWay::FORWARD, top); - top->addRelative(GraphWay::REVERSE, fromp); - } - virtual ~MTaskEdge() override { - fromMTaskp()->removeRelative(GraphWay::FORWARD, toMTaskp()); - toMTaskp()->removeRelative(GraphWay::REVERSE, fromMTaskp()); + fromp->addRelativeMTask(top); + fromp->addRelativeEdge(this); + top->addRelativeEdge(this); } // METHODS LogicMTask* furtherMTaskp(GraphWay way) const { @@ -601,28 +524,135 @@ public: bool mergeWouldCreateCycle() const { return LogicMTask::pathExistsFrom(fromMTaskp(), toMTaskp(), this); } - static MTaskEdge* cast(V3GraphEdge* edgep) { - if (!edgep) return nullptr; - MTaskEdge* const resultp = dynamic_cast(edgep); - UASSERT(resultp, "Failed to cast in MTaskEdge::cast"); - return resultp; - } // Following initial assignment of critical paths, clear this MTaskEdge // out of the edge-map for each node and reinsert at a new location // with updated critical path. void resetCriticalPaths() { LogicMTask* const fromp = fromMTaskp(); LogicMTask* const top = toMTaskp(); - fromp->removeRelative(GraphWay::FORWARD, top); - top->removeRelative(GraphWay::REVERSE, fromp); - fromp->addRelative(GraphWay::FORWARD, top); - top->addRelative(GraphWay::REVERSE, fromp); + fromp->removeRelativeEdge(this); + top->removeRelativeEdge(this); + fromp->addRelativeEdge(this); + top->addRelativeEdge(this); + } + + uint32_t cachedCp(GraphWay way) const { return m_edgeHeapNode[way].key().m_score; } + + // Convert from the address of the m_edgeHeapNode[way] in an MTaskEdge back to the MTaskEdge + static const MTaskEdge* toEdge(GraphWay way, const EdgeHeap::Node* nodep) { + // Offset of the node within the MTaskEdge + const size_t offset + = reinterpret_cast(&(reinterpret_cast(0)->m_edgeHeapNode[way])); + return reinterpret_cast(reinterpret_cast(nodep) - offset); } private: VL_UNCOPYABLE(MTaskEdge); }; +template +void LogicMTask::addRelativeEdge(MTaskEdge* edgep) { + constexpr GraphWay way{T_Way}; + constexpr GraphWay inv = way.invert(); + // Add to the edge heap + LogicMTask* const relativep = edgep->furtherMTaskp(way); + // Value is !way cp to this edge + const uint32_t cp = relativep->stepCost() + relativep->critPathCost(inv); + // + m_edgeHeap[way].insert(&edgep->m_edgeHeapNode[way], {relativep->id(), cp}); +} + +template +void LogicMTask::removeRelativeEdge(MTaskEdge* edgep) { + constexpr GraphWay way{T_Way}; + // Remove from the edge heap + m_edgeHeap[way].remove(&edgep->m_edgeHeapNode[way]); +} + +void LogicMTask::checkRelativesCp(GraphWay way) const { + for (V3GraphEdge* edgep = beginp(way); edgep; edgep = edgep->nextp(way)) { + const LogicMTask* const relativep = static_cast(edgep->furtherp(way)); + const uint32_t cachedCp = static_cast(edgep)->cachedCp(way); + const uint32_t cp = relativep->critPathCost(way.invert()) + relativep->stepCost(); + partCheckCachedScoreVsActual(cachedCp, cp); + } +} + +uint32_t LogicMTask::critPathCostWithout(GraphWay way, const V3GraphEdge* withoutp) const { + // Compute the critical path cost wayward to this node, without considering edge 'withoutp'. + // We need to look at two edges at most, the critical path if that is not via 'withoutp', + // or the second-worst path, if the critical path is via 'withoutp'. +#if VL_DEBUG + UASSERT(withoutp->furtherp(way) == this, + "In critPathCostWithout(), edge 'withoutp' must further to 'this'"); +#endif + const GraphWay inv = way.invert(); + const EdgeHeap& edgeHeap = m_edgeHeap[inv]; + const EdgeHeap::Node* const maxp = edgeHeap.max(); + if (!maxp) return 0; + if (MTaskEdge::toEdge(inv, maxp) != withoutp) return maxp->key().m_score; + const EdgeHeap::Node* const secp = edgeHeap.secondMax(); + if (!secp) return 0; + return secp->key().m_score; +} + +void LogicMTask::dumpCpFilePrefixed(const V3Graph* graphp, const string& nameComment) { + const string filename = v3Global.debugFilename(nameComment) + ".txt"; + UINFO(1, "Writing " << filename << endl); + const std::unique_ptr ofp{V3File::new_ofstream(filename)}; + std::ostream* const osp = &(*ofp); // &* needed to deref unique_ptr + if (osp->fail()) v3fatalStatic("Can't write " << filename); + + // Find start vertex with longest CP + LogicMTask* startp = nullptr; + for (V3GraphVertex* vxp = graphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { + LogicMTask* const mtaskp = static_cast(vxp); + if (!startp) { + startp = mtaskp; + continue; + } + if (mtaskp->cost() + mtaskp->critPathCost(GraphWay::REVERSE) + > startp->cost() + startp->critPathCost(GraphWay::REVERSE)) { + startp = mtaskp; + } + } + + // Follow the entire critical path + std::vector path; + uint32_t totalCost = 0; + for (LogicMTask* nextp = startp; nextp;) { + path.push_back(nextp); + totalCost += nextp->cost(); + + if (EdgeHeap::Node* const maxp = nextp->m_edgeHeap[GraphWay::FORWARD].max()) { + nextp = MTaskEdge::toEdge(GraphWay::FORWARD, maxp)->toMTaskp(); + } else { + nextp = nullptr; + } + } + + *osp << "totalCost = " << totalCost + << " (should match the computed critical path cost (CP) for the graph)\n"; + + // Dump + for (const LogicMTask* mtaskp : path) { + *osp << "begin mtask with cost " << mtaskp->cost() << '\n'; + for (VxList::const_iterator lit = mtaskp->vertexListp()->begin(); + lit != mtaskp->vertexListp()->end(); ++lit) { + const OrderLogicVertex* const logicp = (*lit)->logicp(); + if (!logicp) continue; + if (false) { + // Show nodes only + *osp << "> "; + logicp->nodep()->dumpTree(*osp); + } else { + // Show nodes with hierarchical costs + V3InstrCount::count(logicp->nodep(), false, osp); + } + } + } +} + // Instead of dynamic cast SiblingMC* MergeCandidate::toSiblingMC() { return isSiblingMC() ? static_cast(this) : nullptr; @@ -647,6 +677,40 @@ bool MergeCandidate::mergeWouldCreateCycle() const { : static_cast(this)->mergeWouldCreateCycle(); } +static uint32_t siblingScore(const SiblingMC* sibsp) { + const LogicMTask* const ap = sibsp->ap(); + const LogicMTask* const bp = sibsp->bp(); + const uint32_t mergedCpCostFwd + = std::max(ap->critPathCost(GraphWay::FORWARD), bp->critPathCost(GraphWay::FORWARD)); + const uint32_t mergedCpCostRev + = std::max(ap->critPathCost(GraphWay::REVERSE), bp->critPathCost(GraphWay::REVERSE)); + return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(ap->cost() + bp->cost()); +} + +static uint32_t edgeScore(const MTaskEdge* edgep) { + // Score this edge. Lower is better. The score is the new local CP + // length if we merge these mtasks. ("Local" means the longest + // critical path running through the merged node.) + const LogicMTask* const top = static_cast(edgep->top()); + const LogicMTask* const fromp = static_cast(edgep->fromp()); + const uint32_t mergedCpCostFwd = std::max(fromp->critPathCost(GraphWay::FORWARD), + top->critPathCostWithout(GraphWay::FORWARD, edgep)); + const uint32_t mergedCpCostRev = std::max(fromp->critPathCostWithout(GraphWay::REVERSE, edgep), + top->critPathCost(GraphWay::REVERSE)); + return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(fromp->cost() + top->cost()); +} + +void MergeCandidate::rescore() { + if (const SiblingMC* const sibp = toSiblingMC()) { + m_key.m_score = siblingScore(sibp); + } else { + // The '1 +' favors merging a SiblingMC over an otherwise- + // equal-scoring MTaskEdge. The comment on selfTest() talks + // about why. + m_key.m_score = 1 + edgeScore(static_cast(this)); + } +} + // ###################################################################### // Vertex utility classes @@ -813,7 +877,6 @@ static void partCheckCriticalPaths(V3Graph* mtasksp) { // Usage: // * Client increases the cost and/or CP at a node or small set of nodes // (often a pair in practice, eg. edge contraction.) -// * Client instances a PartPropagateCp object // * Client calls PartPropagateCp::cpHasIncreased() one or more times. // Each call indicates that the inclusive CP of some "seed" vertex // has increased to a given value. @@ -823,53 +886,120 @@ static void partCheckCriticalPaths(V3Graph* mtasksp) { // * Client calls PartPropagateCp::go(). Internally, this iteratively // propagates the new CPs wayward through the graph. // +template +class PartPropagateCp final { + // TYPES + + // We keep pending vertices in a heap during critical path propagation + struct PendingKey { + LogicMTask* m_mtaskp; // The vertex in the heap + uint32_t m_score; // The score of this entry + void increase(uint32_t score) { +#if VL_DEBUG + UASSERT(score >= m_score, "Must increase"); +#endif + m_score = score; + } + bool operator<(const PendingKey& other) const { + if (m_score != other.m_score) return m_score < other.m_score; + return LogicMTask::CmpLogicMTask{}(m_mtaskp, other.m_mtaskp); + } + }; + + using PendingHeap = PairingHeap; + using PendingHeapNode = typename PendingHeap::Node; -class PartPropagateCp final : GraphAlg<> { -private: // MEMBERS - const GraphWay m_way; // CPs oriented in this direction: either FORWARD - // // from graph-start to current node, or REVERSE - // // from graph-end to current node. - LogicMTask::CpCostAccessor m_access; // Access cost and CPs on V3GraphVertex's. - // // confirm we only process each vertex once. - const bool m_slowAsserts; // Enable nontrivial asserts - // Pending rescores - SortByValueMap m_pending; + PendingHeap m_pendingHeap; // Heap of pending rescores + // We allocate this many heap nodes at once + static constexpr size_t ALLOC_CHUNK_SIZE = 128; + PendingHeapNode* m_freep = nullptr; // List of free heap nodes + std::vector> m_allocated; // Allocated heap nodes + + const bool m_slowAsserts; // Enable nontrivial asserts std::set m_seen; // Used only with slow asserts to check mtasks visited only once public: // CONSTRUCTORS - PartPropagateCp(V3Graph* graphp, GraphWay way, bool slowAsserts, - V3EdgeFuncP edgeFuncp = &V3GraphEdge::followAlwaysTrue) - : GraphAlg<>{graphp, edgeFuncp} - , m_way{way} - , m_slowAsserts{slowAsserts} {} + PartPropagateCp(bool slowAsserts) + : m_slowAsserts{slowAsserts} {} // METHODS +private: + // Allocate a HeapNode for the given element + PendingHeapNode* allocNode() { + // If no free nodes available, then make some + if (!m_freep) { + // Allocate in chunks for efficiency + m_allocated.emplace_back(new PendingHeapNode[ALLOC_CHUNK_SIZE]); + // Set up free list pointer + m_freep = m_allocated.back().get(); + // Set up free list chain + for (size_t i = 1; i < ALLOC_CHUNK_SIZE; ++i) { + m_freep[i - 1].m_next.m_ptr = &m_freep[i]; + } + // Clear the next pointer of the last entry + m_freep[ALLOC_CHUNK_SIZE - 1].m_next.m_ptr = nullptr; + } + // Free nodes are available, pick up the first one + PendingHeapNode* const resultp = m_freep; + m_freep = resultp->m_next.m_ptr; + resultp->m_next.m_ptr = nullptr; + return resultp; + } + + // Release a heap node (make it available for future allocation) + void freeNode(PendingHeapNode* nodep) { + // Re-use the existing link pointers and simply prepend it to the free list + nodep->m_next.m_ptr = m_freep; + m_freep = nodep; + } + +public: void cpHasIncreased(V3GraphVertex* vxp, uint32_t newInclusiveCp) { + constexpr GraphWay way{T_Way}; + constexpr GraphWay inv{way.invert()}; + // For *vxp, whose CP-inclusive has just increased to // newInclusiveCp, iterate to all wayward nodes, update the edges // of each, and add each to m_pending if its overall CP has grown. - for (V3GraphEdge* edgep = vxp->beginp(m_way); edgep; edgep = edgep->nextp(m_way)) { - if (!m_edgeFuncp(edgep)) continue; - LogicMTask* const relativep = static_cast(edgep->furtherp(m_way)); - m_access.notifyEdgeCp(relativep, m_way, vxp, newInclusiveCp); + for (MTaskEdge *edgep = static_cast(vxp->beginp(way)), *nextp; edgep; + edgep = nextp) { + // Fetch early as likely cache miss + nextp = static_cast(edgep->nextp(way)); - if (m_access.critPathCost(relativep, m_way) < newInclusiveCp) { - // relativep's critPathCost() is out of step with its - // longest !wayward edge. Schedule that to be resolved. - const uint32_t newPendingVal - = newInclusiveCp - m_access.critPathCost(relativep, m_way); - const auto pair = m_pending.emplace(relativep, newPendingVal); - if (!pair.second && (newPendingVal > pair.first->second)) { - m_pending.update(pair.first, newPendingVal); - } + LogicMTask* const relativep = edgep->furtherMTaskp(way); + EdgeHeap::Node& edgeHeapNode = edgep->m_edgeHeapNode[inv]; + if (newInclusiveCp > edgeHeapNode.key().m_score) { + relativep->m_edgeHeap[inv].increaseKey(&edgeHeapNode, newInclusiveCp); } + + const uint32_t critPathCost = relativep->critPathCost(way); + + if (critPathCost >= newInclusiveCp) continue; + + // relativep's critPathCost() is out of step with its longest !wayward edge. + // Schedule that to be resolved. + const uint32_t newVal = newInclusiveCp - critPathCost; + + if (PendingHeapNode* const nodep = static_cast(relativep->userp())) { + // Already in heap. Increase score if needed. + if (newVal > nodep->key().m_score) m_pendingHeap.increaseKey(nodep, newVal); + continue; + } + + // Add to heap + PendingHeapNode* const nodep = allocNode(); + relativep->userp(nodep); + m_pendingHeap.insert(nodep, {relativep, newVal}); } } void go() { + constexpr GraphWay way{T_Way}; + constexpr GraphWay inv{way.invert()}; + // m_pending maps each pending vertex to the amount that it wayward // CP will grow. // @@ -886,27 +1016,34 @@ public: // once. And so on. // // This generalizes to multiple seed nodes also. - while (!m_pending.empty()) { - const auto it = m_pending.rbegin(); - LogicMTask* const updateMep = it->first; - const uint32_t cpGrowBy = it->second; - m_pending.erase(it); - - // For *updateMep, whose critPathCost was out-of-date with respect - // to its edges, update the critPathCost. - const uint32_t startCp = m_access.critPathCost(updateMep, m_way); + while (!m_pendingHeap.empty()) { + // Pop max element from heap + PendingHeapNode* const maxp = m_pendingHeap.max(); + m_pendingHeap.remove(maxp); + // Pick up values + LogicMTask* const mtaskp = maxp->key().m_mtaskp; + const uint32_t cpGrowBy = maxp->key().m_score; + // Free the heap node, we are done with it + freeNode(maxp); + mtaskp->userp(nullptr); + // Update the critPathCost of mtaskp, that was out-of-date with respect to its edges + const uint32_t startCp = mtaskp->critPathCost(way); const uint32_t newCp = startCp + cpGrowBy; if (VL_UNLIKELY(m_slowAsserts)) { - m_access.checkNewCpVersusEdges(updateMep, m_way, newCp); + // Check that CP matches that of the longest edge wayward of vxp. + const uint32_t edgeCp = mtaskp->m_edgeHeap[inv].max()->key().m_score; + UASSERT_OBJ(edgeCp == newCp, mtaskp, "CP doesn't match longest wayward edge"); // Confirm that we only set each node's CP once. That's an // important property of PartPropagateCp which allows it to be far // faster than a recursive algorithm on some graphs. - const bool first = m_seen.insert(updateMep).second; - UASSERT_OBJ(first, updateMep, "Set CP on node twice"); + const bool first = m_seen.insert(mtaskp).second; + UASSERT_OBJ(first, mtaskp, "Set CP on node twice"); } - m_access.setCritPathCost(updateMep, m_way, newCp); - cpHasIncreased(updateMep, newCp + m_access.cost(updateMep)); + mtaskp->setCritPathCost(way, newCp); + cpHasIncreased(mtaskp, newCp + mtaskp->stepCost()); } + + if (VL_UNLIKELY(m_slowAsserts)) m_seen.clear(); } private: @@ -939,11 +1076,11 @@ private: const unsigned idx1 = V3Os::rand64(rngState) % 50; const unsigned idx2 = V3Os::rand64(rngState) % 50; if (idx1 > idx2) { - if (!m_vx[idx2]->hasRelative(GraphWay::FORWARD, m_vx[idx1])) { + if (!m_vx[idx2]->hasRelativeMTask(m_vx[idx1])) { new MTaskEdge{&m_graph, m_vx[idx2], m_vx[idx1], 1}; } } else if (idx2 > idx1) { - if (!m_vx[idx1]->hasRelative(GraphWay::FORWARD, m_vx[idx2])) { + if (!m_vx[idx1]->hasRelativeMTask(m_vx[idx2])) { new MTaskEdge{&m_graph, m_vx[idx1], m_vx[idx2], 1}; } } @@ -952,7 +1089,7 @@ private: partInitCriticalPaths(&m_graph); // This SelfTest class is also the T_CostAccessor - PartPropagateCp prop(&m_graph, GraphWay::FORWARD, true); + PartPropagateCp prop(true); // Seed the propagator with every input node; // This should result in the complete graph getting all CP's assigned. @@ -961,9 +1098,6 @@ private: } // Run the propagator. - // * The setCritPathCost() routine checks that each node's CP changes - // at most once. - // * The notifyEdgeCp routine is also self checking. prop.go(); // Finally, confirm that the entire graph appears to have correct CPs. @@ -976,7 +1110,7 @@ public: // Merge edges from a LogicMtask. // -// This code removes 'hasRelative' edges. When this occurs, mark it in need +// This code removes adjacent edges. When this occurs, mark it in need // of a rescore, in case its score has fallen and we need to move it up // toward the front of the scoreboard. // @@ -1007,51 +1141,93 @@ public: // // Another way of stating this: this code ensures that scores of // non-transitive edges only ever increase. -static void partRedirectEdgesFrom(LogicMTask* recipientp, LogicMTask* donorp, - V3Scoreboard* sbp) { - for (const auto& way : {GraphWay::FORWARD, GraphWay::REVERSE}) { - for (V3GraphEdge *edgep = donorp->beginp(way), *nextp; edgep; edgep = nextp) { - nextp = edgep->nextp(way); - MTaskEdge* const tedgep = MTaskEdge::cast(edgep); - LogicMTask* const relativep = tedgep->furtherMTaskp(way); - if (recipientp->hasRelative(way, relativep)) { - // An edge already exists between recipient and relative of donor. - // Mark it in need of a rescore - if (sbp) { - if (!tedgep->removedFromSb()) sbp->removeElem(tedgep); - const MTaskEdge* const existMTaskEdgep - = MTaskEdge::cast(recipientp->findConnectingEdgep(way, relativep)); - UASSERT(existMTaskEdgep, "findConnectingEdge didn't find edge"); - if (!existMTaskEdgep->removedFromSb()) { - sbp->hintScoreChanged(existMTaskEdgep); - } - } - VL_DO_DANGLING(edgep->unlinkDelete(), edgep); - } else { - // No existing edge between recipient and relative of donor. - // Redirect the edge from donor<->relative to recipient<->relative. - if (way == GraphWay::REVERSE) { - tedgep->relinkTop(recipientp); - relativep->removeRelative(GraphWay::FORWARD, donorp); - relativep->addRelative(GraphWay::FORWARD, recipientp); - recipientp->addRelative(GraphWay::REVERSE, relativep); +static void partRedirectEdgesFrom(V3Graph* graphp, LogicMTask* recipientp, LogicMTask* donorp, + MergeCandidateScoreboard* sbp) { + + // Process outgoing edges + MTaskEdge* outNextp = static_cast(donorp->outBeginp()); + while (outNextp) { + MTaskEdge* const edgep = outNextp; + LogicMTask* const relativep = outNextp->toMTaskp(); + outNextp = static_cast(outNextp->outNextp()); + + relativep->removeRelativeEdge(edgep); + + if (recipientp->hasRelativeMTask(relativep)) { + // An edge already exists between recipient and relative of donor. + // Mark it in need of a rescore + if (sbp) { + if (sbp->contains(edgep)) sbp->remove(edgep); + MTaskEdge* const existMTaskEdgep = static_cast( + recipientp->findConnectingEdgep(GraphWay::FORWARD, relativep)); +#if VL_DEBUG + UASSERT(existMTaskEdgep, "findConnectingEdge didn't find edge"); +#endif + if (sbp->contains(existMTaskEdgep)) sbp->hintScoreChanged(existMTaskEdgep); + } + // Can nuke the edge now + VL_DO_DANGLING(edgep->unlinkDelete(), edgep); + } else { + // No existing edge between recipient and relative of donor. + // Redirect the edge from donor<->relative to recipient<->relative. + donorp->removeRelativeEdge(edgep); + edgep->relinkFromp(recipientp); + recipientp->addRelativeMTask(relativep); + recipientp->addRelativeEdge(edgep); + relativep->addRelativeEdge(edgep); + if (sbp) { + if (!sbp->contains(edgep)) { + sbp->add(edgep); } else { - tedgep->relinkFromp(recipientp); - relativep->removeRelative(GraphWay::REVERSE, donorp); - relativep->addRelative(GraphWay::REVERSE, recipientp); - recipientp->addRelative(GraphWay::FORWARD, relativep); - } - if (sbp) { - if (tedgep->removedFromSb()) { - tedgep->clearRemovedFromSb(); - sbp->addElem(tedgep); - } else { - sbp->hintScoreChanged(tedgep); - } + sbp->hintScoreChanged(edgep); } } } } + + // Process incoming edges + MTaskEdge* inNextp = static_cast(donorp->inBeginp()); + while (inNextp) { + MTaskEdge* const edgep = inNextp; + LogicMTask* const relativep = inNextp->fromMTaskp(); + inNextp = static_cast(inNextp->inNextp()); + + relativep->removeRelativeMTask(donorp); + relativep->removeRelativeEdge(edgep); + + if (relativep->hasRelativeMTask(recipientp)) { + // An edge already exists between recipient and relative of donor. + // Mark it in need of a rescore + if (sbp) { + if (sbp->contains(edgep)) sbp->remove(edgep); + MTaskEdge* const existMTaskEdgep = static_cast( + recipientp->findConnectingEdgep(GraphWay::REVERSE, relativep)); +#if VL_DEBUG + UASSERT(existMTaskEdgep, "findConnectingEdge didn't find edge"); +#endif + if (sbp->contains(existMTaskEdgep)) sbp->hintScoreChanged(existMTaskEdgep); + } + VL_DO_DANGLING(edgep->unlinkDelete(), edgep); + } else { + // No existing edge between recipient and relative of donor. + // Redirect the edge from donor<->relative to recipient<->relative. + donorp->removeRelativeEdge(edgep); + edgep->relinkTop(recipientp); + relativep->addRelativeMTask(recipientp); + relativep->addRelativeEdge(edgep); + recipientp->addRelativeEdge(edgep); + if (sbp) { + if (!sbp->contains(edgep)) { + sbp->add(edgep); + } else { + sbp->hintScoreChanged(edgep); + } + } + } + } + + // Remove donorp from the graph + VL_DO_DANGLING(donorp->unlinkDelete(graphp), donorp); } //###################################################################### @@ -1061,14 +1237,6 @@ static void partRedirectEdgesFrom(LogicMTask* recipientp, LogicMTask* donorp, class PartContraction final { private: // TYPES - - // TODO: might get a little more speed by making this a - // std::unordered_set and defining hash and equal_to functors for the - // SiblingMC: - using SibSet = std::set; - using SibpSet = std::unordered_set; - using MTask2Sibs = std::unordered_map; - // New CP information for mtaskp reflecting an upcoming merge struct NewCp { uint32_t cp; @@ -1082,17 +1250,17 @@ private: uint32_t m_scoreLimitBeforeRescore = 0xffffffff; // Next score rescore at unsigned m_mergesSinceRescore = 0; // Merges since last rescore const bool m_slowAsserts; // Take extra time to validate algorithm - V3Scoreboard m_sb; // Scoreboard - SibSet m_pairs; // Storage for each SiblingMC - MTask2Sibs m_mtask2sibs; // SiblingMC set for each mtask + MergeCandidateScoreboard m_sb; // Scoreboard + + PartPropagateCp m_forwardPropagator{m_slowAsserts}; // Forward propagator + PartPropagateCp m_reversePropagator{m_slowAsserts}; // Reverse propagator public: // CONSTRUCTORS PartContraction(V3Graph* mtasksp, uint32_t scoreLimit, bool slowAsserts) : m_mtasksp{mtasksp} , m_scoreLimit{scoreLimit} - , m_slowAsserts{slowAsserts} - , m_sb{&mergeCandidateScore, slowAsserts} {} + , m_slowAsserts{slowAsserts} {} // METHODS void go() { @@ -1116,17 +1284,18 @@ public: // - Incrementally recompute critical paths near the merged mtask. for (V3GraphVertex* itp = m_mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) { + itp->userp(nullptr); // Reset user value. Used by PartPropagateCp. std::unordered_set neighbors; for (V3GraphEdge* edgep = itp->outBeginp(); edgep; edgep = edgep->outNextp()) { - m_sb.addElem(MTaskEdge::cast(edgep)); + m_sb.add(static_cast(edgep)); if (m_slowAsserts) { UASSERT_OBJ(neighbors.find(edgep->top()) == neighbors.end(), itp, "Redundant edge found in input to PartContraction()"); } neighbors.insert(edgep->top()); } - siblingPairFromRelatives(GraphWay::REVERSE, itp, true); - siblingPairFromRelatives(GraphWay::FORWARD, itp, true); + siblingPairFromRelatives(itp); + siblingPairFromRelatives(itp); } doRescore(); // Set initial scores in scoreboard @@ -1134,7 +1303,7 @@ public: while (true) { // This is the best edge to merge, with the lowest // score (shortest local critical path) - MergeCandidate* const mergeCanp = const_cast(m_sb.bestp()); + MergeCandidate* const mergeCanp = m_sb.best(); if (!mergeCanp) { // Scoreboard found no eligible merges. Maybe a rescore // will produce some merge-able pairs? @@ -1149,8 +1318,9 @@ public: UASSERT(!m_sb.needsRescore(mergeCanp), "Need-rescore items should not be returned by bestp"); } - const uint32_t cachedScore = m_sb.cachedScore(mergeCanp); - const uint32_t actualScore = mergeCandidateScore(mergeCanp); + const uint32_t cachedScore = mergeCanp->score(); + mergeCanp->rescore(); + const uint32_t actualScore = mergeCanp->score(); if (actualScore > cachedScore) { // Cached score is out-of-date. @@ -1211,8 +1381,11 @@ public: if (mergeCanp->mergeWouldCreateCycle()) { // Remove this edge from scoreboard so we don't keep // reconsidering it on every loop. - m_sb.removeElem(mergeCanp); - mergeCanp->removedFromSb(true); + m_sb.remove(mergeCanp); + if (SiblingMC* const smcp = mergeCanp->toSiblingMC()) { + smcp->bp()->farSibs().erase(smcp); + smcp->ap()->ownSibs().erase(*smcp); // Kills *smcp, so do last + } continue; } @@ -1274,31 +1447,29 @@ private: } void removeSiblingMCsWith(LogicMTask* mtaskp) { - for (SibpSet::iterator it = m_mtask2sibs[mtaskp].begin(); it != m_mtask2sibs[mtaskp].end(); - ++it) { - const SiblingMC* const pairp = *it; - if (!pairp->removedFromSb()) m_sb.removeElem(pairp); - const LogicMTask* const otherp = (pairp->bp() == mtaskp) ? pairp->ap() : pairp->bp(); - size_t erased = m_mtask2sibs[otherp].erase(pairp); - UASSERT_OBJ(erased > 0, otherp, "Expected existing mtask"); - erased = m_pairs.erase(*pairp); - UASSERT_OBJ(erased > 0, mtaskp, "Expected existing mtask"); + for (const SiblingMC& pair : mtaskp->ownSibs()) { + m_sb.remove(const_cast(&pair)); + // Owner is always ap(), remove from the opposite side + pair.bp()->farSibs().erase(&pair); } - const size_t erased = m_mtask2sibs.erase(mtaskp); - UASSERT_OBJ(erased > 0, mtaskp, "Expected existing mtask"); + for (const SiblingMC* const pairp : mtaskp->farSibs()) { + m_sb.remove(const_cast(pairp)); + // Owner is always ap(), remove from the opposite side + pairp->ap()->ownSibs().erase(*pairp); + } + mtaskp->ownSibs().clear(); + mtaskp->farSibs().clear(); } void contract(MergeCandidate* mergeCanp) { LogicMTask* top = nullptr; LogicMTask* fromp = nullptr; MTaskEdge* mergeEdgep = mergeCanp->toMTaskEdge(); - const SiblingMC* mergeSibsp = nullptr; if (mergeEdgep) { top = static_cast(mergeEdgep->top()); fromp = static_cast(mergeEdgep->fromp()); } else { - mergeSibsp = mergeCanp->toSiblingMC(); - UASSERT(mergeSibsp, "Failed to cast mergeCanp to either MTaskEdge or SiblingMC"); + const SiblingMC* mergeSibsp = static_cast(mergeCanp); top = mergeSibsp->ap(); fromp = mergeSibsp->bp(); } @@ -1337,7 +1508,10 @@ private: if (mergeEdgep) { // Remove and free the connecting edge. Must do this before // propagating CP's below. - m_sb.removeElem(mergeCanp); + m_sb.remove(mergeCanp); + mergeEdgep->fromMTaskp()->removeRelativeMTask(mergeEdgep->toMTaskp()); + mergeEdgep->fromMTaskp()->removeRelativeEdge(mergeEdgep); + mergeEdgep->toMTaskp()->removeRelativeEdge(mergeEdgep); VL_DO_CLEAR(mergeEdgep->unlinkDelete(), mergeEdgep = nullptr); } @@ -1353,25 +1527,22 @@ private: << (donorNewCpFwd.propagate ? " true " : " false ") << donorNewCpFwd.propagateCp << endl); - PartPropagateCp forwardPropagator(m_mtasksp, GraphWay::FORWARD, m_slowAsserts); - PartPropagateCp reversePropagator(m_mtasksp, GraphWay::REVERSE, m_slowAsserts); - recipientp->setCritPathCost(GraphWay::FORWARD, recipientNewCpFwd.cp); if (recipientNewCpFwd.propagate) { - forwardPropagator.cpHasIncreased(recipientp, recipientNewCpFwd.propagateCp); + m_forwardPropagator.cpHasIncreased(recipientp, recipientNewCpFwd.propagateCp); } recipientp->setCritPathCost(GraphWay::REVERSE, recipientNewCpRev.cp); if (recipientNewCpRev.propagate) { - reversePropagator.cpHasIncreased(recipientp, recipientNewCpRev.propagateCp); + m_reversePropagator.cpHasIncreased(recipientp, recipientNewCpRev.propagateCp); } if (donorNewCpFwd.propagate) { - forwardPropagator.cpHasIncreased(donorp, donorNewCpFwd.propagateCp); + m_forwardPropagator.cpHasIncreased(donorp, donorNewCpFwd.propagateCp); } if (donorNewCpRev.propagate) { - reversePropagator.cpHasIncreased(donorp, donorNewCpRev.propagateCp); + m_reversePropagator.cpHasIncreased(donorp, donorNewCpRev.propagateCp); } - forwardPropagator.go(); - reversePropagator.go(); + m_forwardPropagator.go(); + m_reversePropagator.go(); // Remove all SiblingMCs that include donorp. This Includes the one // we're merging, if we're merging a SiblingMC. @@ -1381,11 +1552,8 @@ private: // to a bounded number. removeSiblingMCsWith(recipientp); - // Redirect all edges - partRedirectEdgesFrom(recipientp, donorp, &m_sb); - - // Delete the donorp mtask from the graph - VL_DO_CLEAR(donorp->unlinkDelete(m_mtasksp), donorp = nullptr); + // Redirect all edges, delete donorp + partRedirectEdgesFrom(m_mtasksp, recipientp, donorp, &m_sb); ++m_mergesSinceRescore; @@ -1398,21 +1566,21 @@ private: // - prereqs of recipientp's postreqs // - postreqs of recipientp's prereqs // Note that this depends on the updated critical paths (above). - siblingPairFromRelatives(GraphWay::REVERSE, recipientp, true); - siblingPairFromRelatives(GraphWay::FORWARD, recipientp, true); + siblingPairFromRelatives(recipientp); + siblingPairFromRelatives(recipientp); unsigned edges = 0; for (V3GraphEdge* edgep = recipientp->outBeginp(); edgep; edgep = edgep->outNextp()) { LogicMTask* const postreqp = static_cast(edgep->top()); - siblingPairFromRelatives(GraphWay::REVERSE, postreqp, false); + siblingPairFromRelatives(postreqp); ++edges; - if (edges > PART_SIBLING_EDGE_LIMIT) break; + if (edges >= PART_SIBLING_EDGE_LIMIT) break; } edges = 0; for (V3GraphEdge* edgep = recipientp->inBeginp(); edgep; edgep = edgep->inNextp()) { LogicMTask* const prereqp = static_cast(edgep->fromp()); - siblingPairFromRelatives(GraphWay::FORWARD, prereqp, false); + siblingPairFromRelatives(prereqp); ++edges; - if (edges > PART_SIBLING_EDGE_LIMIT) break; + if (edges >= PART_SIBLING_EDGE_LIMIT) break; } } @@ -1429,111 +1597,86 @@ private: m_scoreLimitBeforeRescore = 0xffffffff; } - static uint32_t mergeCandidateScore(const MergeCandidate* pairp) { - if (const MTaskEdge* const edgep = pairp->toMTaskEdge()) { - // The '1 +' favors merging a SiblingMC over an otherwise- - // equal-scoring MTaskEdge. The comment on selfTest() talks - // about why. - return 1 + edgeScore(edgep); - } else { - return siblingScore(pairp->toSiblingMC()); - } - v3fatalSrc("Failed to cast pairp to either MTaskEdge or SiblingMC in mergeCandidateScore"); - return 0; - } - - VL_ATTR_NOINLINE - static uint32_t siblingScore(const SiblingMC* sibsp) { - const LogicMTask* const ap = sibsp->ap(); - const LogicMTask* const bp = sibsp->bp(); - const uint32_t mergedCpCostFwd - = std::max(ap->critPathCost(GraphWay::FORWARD), bp->critPathCost(GraphWay::FORWARD)); - const uint32_t mergedCpCostRev - = std::max(ap->critPathCost(GraphWay::REVERSE), bp->critPathCost(GraphWay::REVERSE)); - return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(ap->cost() + bp->cost()); - } - - VL_ATTR_NOINLINE - static uint32_t edgeScore(const V3GraphEdge* edgep) { - // Score this edge. Lower is better. The score is the new local CP - // length if we merge these mtasks. ("Local" means the longest - // critical path running through the merged node.) - const LogicMTask* const top = static_cast(edgep->top()); - const LogicMTask* const fromp = static_cast(edgep->fromp()); - const uint32_t mergedCpCostFwd - = std::max(fromp->critPathCost(GraphWay::FORWARD), - top->critPathCostWithout(GraphWay::FORWARD, edgep)); - const uint32_t mergedCpCostRev - = std::max(fromp->critPathCostWithout(GraphWay::REVERSE, edgep), - top->critPathCost(GraphWay::REVERSE)); - return mergedCpCostRev + mergedCpCostFwd - + LogicMTask::stepCost(fromp->cost() + top->cost()); - } - void makeSiblingMC(LogicMTask* ap, LogicMTask* bp) { - const SiblingMC newSibs(ap, bp); - const std::pair insertResult = m_pairs.insert(newSibs); - if (insertResult.second) { - const SiblingMC* const newSibsp = &(*insertResult.first); - m_mtask2sibs[ap].insert(newSibsp); - m_mtask2sibs[bp].insert(newSibsp); - m_sb.addElem(newSibsp); + if (ap->id() < bp->id()) std::swap(ap, bp); + // The higher id vertex owns the storage + const auto emplaceResult = ap->ownSibs().emplace(ap, bp); + if (emplaceResult.second) { + SiblingMC* const newSibsp = const_cast(&(*emplaceResult.first)); + bp->farSibs().insert(newSibsp); + m_sb.add(newSibsp); } else if (m_slowAsserts) { // It's fine if we already have this SiblingMC, we may have // created it earlier. Just confirm that we have associated data. - UASSERT_OBJ(m_mtask2sibs.find(ap) != m_mtask2sibs.end(), ap, "Sibling not found"); - UASSERT_OBJ(m_mtask2sibs.find(bp) != m_mtask2sibs.end(), bp, "Sibling not found"); bool found = false; - for (SibpSet::iterator it = m_mtask2sibs[ap].begin(); it != m_mtask2sibs[ap].end(); - ++it) { - const SiblingMC* const sibsp = *it; - UASSERT_OBJ(!(!sibsp->removedFromSb() && !m_sb.contains(sibsp)), ap, - "One sibling must be the one we collided with"); - if ((sibsp->ap() == ap && sibsp->bp() == bp) - || (sibsp->bp() == ap && sibsp->ap() == bp)) - found = true; + for (const SiblingMC& sibs : ap->ownSibs()) { + UASSERT_OBJ(sibs.ap() == ap, ap, "Inconsistent SiblingMC"); + UASSERT_OBJ(m_sb.contains(&sibs), ap, "Must be on the scoreboard"); + if (sibs.bp() == bp) found = true; } UASSERT_OBJ(found, ap, "Sibling not found"); } } - void siblingPairFromRelatives(GraphWay way, V3GraphVertex* mtaskp, bool exhaustive) { - std::vector shortestPrereqs; + template + VL_ATTR_NOINLINE void siblingPairFromRelatives(V3GraphVertex* mtaskp) { + constexpr GraphWay way{Way}; + // Need at least 2 edges + if (!mtaskp->beginp(way) || !mtaskp->beginp(way)->nextp(way)) return; - for (V3GraphEdge* edgep = mtaskp->beginp(way); edgep; edgep = edgep->nextp(way)) { - LogicMTask* const prereqp = static_cast(edgep->furtherp(way)); - shortestPrereqs.push_back(prereqp); - // Prevent nodes with huge numbers of edges from massively - // slowing down the partitioner: - if (shortestPrereqs.size() > PART_SIBLING_EDGE_LIMIT) break; + std::array neighbours; + + // This is a hot method, so we want so sort as efficiently as possible. We pre-load + // all data (critical path cost and id) required for determining ordering into an aligned + // structure. There is not enough space next to these to keep a whole pointer within 16 + // bytes, so we store an index into the neighbours buffer instead. We can then compare + // and swap these sorting records very efficiently. With this the standard library sorting + // functions are efficient enough and using more optimized methods (e.g.: sorting networks) + // has no measurable benefit. + struct alignas(16) SortingRecord { + uint64_t m_id; + uint32_t m_cp; + uint8_t m_idx; + static_assert(PART_SIBLING_EDGE_LIMIT <= std::numeric_limits::max(), + "m_idx must fit all indices into 'neighbours'"); + bool operator<(const SortingRecord& that) const { + return m_cp < that.m_cp || (m_cp == that.m_cp && m_id < that.m_id); + } + }; + static_assert(sizeof(SortingRecord) <= 16, "How could this be padded to more than 16?"); + + std::array sortRecs; + size_t n = 0; + + // Populate the buffers + for (V3GraphEdge *edgep = mtaskp->beginp(way), *nextp; edgep; edgep = nextp) { + nextp = edgep->nextp(way); // Fetch next first as likely cache miss + LogicMTask* const otherp = static_cast(edgep->furtherp(way)); + neighbours[n] = otherp; + sortRecs[n].m_id = otherp->id(); + sortRecs[n].m_cp = otherp->critPathCost(way) + otherp->cost(); + sortRecs[n].m_idx = n; + ++n; + // Prevent nodes with huge numbers of edges from massively slowing down us down + if (n >= PART_SIBLING_EDGE_LIMIT) break; } - if (shortestPrereqs.size() <= 1) return; - - const auto cmp = [way](const LogicMTask* ap, const LogicMTask* bp) { - const uint32_t aCp = ap->critPathCost(way) + ap->cost(); - const uint32_t bCp = bp->critPathCost(way) + bp->cost(); - if (aCp != bCp) return aCp < bCp; - return ap->id() < bp->id(); - }; - - // Don't make all possible pairs of prereqs when not requested (non-exhaustive). + // Don't make all possible pairs of siblings when not requested (non-exhaustive). // Just make a few pairs. constexpr size_t MAX_NONEXHAUSTIVE_PAIRS = 3; - size_t end; // End index of pairs to add to candidates (exclusive) - - if (exhaustive || (shortestPrereqs.size() <= 2 * MAX_NONEXHAUSTIVE_PAIRS)) { - end = shortestPrereqs.size() & ~static_cast(1); // Round down to even - std::sort(shortestPrereqs.begin(), shortestPrereqs.end(), cmp); + if (Exhaustive || n <= 2 * MAX_NONEXHAUSTIVE_PAIRS) { + const size_t end = n & ~static_cast(1); // Round down to even, (we want pairs) + std::sort(sortRecs.begin(), sortRecs.begin() + n); + for (size_t i = 0; i < end; i += 2) { + makeSiblingMC(neighbours[sortRecs[i].m_idx], neighbours[sortRecs[i + 1].m_idx]); + } } else { - end = 2 * MAX_NONEXHAUSTIVE_PAIRS; - std::partial_sort(shortestPrereqs.begin(), shortestPrereqs.begin() + end, - shortestPrereqs.end(), cmp); - } - - for (size_t i = 0; i < end; i += 2) { - makeSiblingMC(shortestPrereqs[i], shortestPrereqs[i + 1]); + constexpr size_t end = 2 * MAX_NONEXHAUSTIVE_PAIRS; + std::partial_sort(sortRecs.begin(), sortRecs.begin() + end, sortRecs.begin() + n); + for (size_t i = 0; i < end; i += 2) { + makeSiblingMC(neighbours[sortRecs[i].m_idx], neighbours[sortRecs[i + 1].m_idx]); + } } } @@ -1850,17 +1993,15 @@ private: } // Move all vertices from donorp to mergedp mergedp->moveAllVerticesFrom(donorp); - // Redirect edges from donorp to recipientp - partRedirectEdgesFrom(mergedp, donorp, nullptr); - // Remove donorp from the graph - VL_DO_DANGLING(donorp->unlinkDelete(m_mtasksp), donorp); + // Redirect edges from donorp to recipientp, delete donorp + partRedirectEdgesFrom(m_mtasksp, mergedp, donorp, nullptr); ++m_mergesDone; } if (lastMergedp) { UASSERT_OBJ(lastMergedp->rank() < mergedp->rank(), mergedp, "Merging must be on lower rank"); - if (!lastMergedp->hasRelative(GraphWay::FORWARD, mergedp)) { + if (!lastMergedp->hasRelativeMTask(mergedp)) { new MTaskEdge(m_mtasksp, lastMergedp, mergedp, 1); } } @@ -2506,9 +2647,8 @@ void V3Partition::setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp) UASSERT_OBJ(otherMTaskp != mtaskp, mtaskp, "Would create a cycle edge"); // Don't create redundant edges. - if (mtaskp->hasRelative(GraphWay::FORWARD, otherMTaskp)) { // - continue; - } + if (mtaskp->hasRelativeMTask(otherMTaskp)) continue; + new MTaskEdge(mtasksp, mtaskp, otherMTaskp, 1); } } diff --git a/src/V3Scoreboard.cpp b/src/V3Scoreboard.cpp index 78d466596..d21422a81 100644 --- a/src/V3Scoreboard.cpp +++ b/src/V3Scoreboard.cpp @@ -19,26 +19,42 @@ #include "V3Scoreboard.h" -class ScoreboardTestElem final { +class ScoreboardTestElem; + +struct Key { + // Node: Structure layout chosen to minimize padding in PairingHeao<*>::Node + uint64_t m_id; // Unique ID part of edge score + uint32_t m_score; // Score part of ID + bool operator<(const Key& other) const { + // First by Score then by ID, but notice that we want minimums using a max-heap, so reverse + return m_score > other.m_score || (m_score == other.m_score && m_id > other.m_id); + } +}; + +using Scoreboard = V3Scoreboard; + +class ScoreboardTestElem final : public Scoreboard::Node { public: - // MEMBERS - uint32_t m_score; - uint32_t m_id; + uint32_t m_newScore; // CONSTRUCTORS explicit ScoreboardTestElem(uint32_t score) - : m_score{score} { + : m_newScore{score} { + m_key.m_score = m_newScore; static uint32_t s_serial = 0; - m_id = ++s_serial; + m_key.m_id = ++s_serial; } ScoreboardTestElem() = default; - // METHODS - static uint32_t scoreFn(const ScoreboardTestElem* elp) { return elp->m_score; } - bool operator<(const ScoreboardTestElem& other) const { return m_id < other.m_id; } + uint64_t id() const { return m_key.m_id; } + void rescore() { m_key.m_score = m_newScore; } + uint32_t score() const { return m_key.m_score; } + static ScoreboardTestElem* heapNodeToElem(Scoreboard::Node* nodep) { + return static_cast(nodep); + } }; void V3ScoreboardBase::selfTest() { - V3Scoreboard sb(ScoreboardTestElem::scoreFn, true); + Scoreboard sb; UASSERT(!sb.needsRescore(), "SelfTest: Empty sb should not need rescore."); @@ -46,13 +62,13 @@ void V3ScoreboardBase::selfTest() { ScoreboardTestElem e2(20); ScoreboardTestElem e3(30); - sb.addElem(&e1); - sb.addElem(&e2); - sb.addElem(&e3); + sb.add(&e1); + sb.add(&e2); + sb.add(&e3); UASSERT(sb.needsRescore(), "SelfTest: Newly filled sb should need a rescore."); UASSERT(sb.needsRescore(&e1), "SelfTest: Individual newly-added element should need rescore"); - UASSERT(nullptr == sb.bestp(), + UASSERT(nullptr == sb.best(), "SelfTest: Newly filled sb should have nothing eligible for Bestp()"); sb.rescore(); @@ -60,24 +76,22 @@ void V3ScoreboardBase::selfTest() { UASSERT(!sb.needsRescore(), "SelfTest: Newly rescored sb should not need rescore"); UASSERT(!sb.needsRescore(&e1), "SelfTest: Newly rescored sb should not need an element rescored"); - UASSERT(e2.m_score == sb.cachedScore(&e2), - "SelfTest: Cached score should match current score"); - UASSERT(&e1 == sb.bestp(), "SelfTest: Should return element with lowest (best) score"); + UASSERT(&e1 == sb.best(), "SelfTest: Should return element with lowest (best) score"); // Change one element's score sb.hintScoreChanged(&e2); - e2.m_score = 21; + e2.m_newScore = 21; UASSERT(sb.needsRescore(&e2), "SelfTest: Should need rescore on elem after hintScoreChanged"); // Remove an element UASSERT(sb.contains(&e1), "SelfTest: e1 should be there"); - sb.removeElem(&e1); + sb.remove(&e1); UASSERT(!sb.contains(&e1), "SelfTest: e1 should be gone"); UASSERT(sb.contains(&e2), "SelfTest: e2 should be there, despite needing rescore"); // Now e3 should be our best-scoring element, even though // e2 has a better score, since e2 is pending rescore. - UASSERT(&e3 == sb.bestp(), "SelfTest: Expect e3 as best element with known score."); + UASSERT(&e3 == sb.best(), "SelfTest: Expect e3 as best element with known score."); sb.rescore(); - UASSERT(&e2 == sb.bestp(), "SelfTest: Expect e2 as best element again after Rescore"); + UASSERT(&e2 == sb.best(), "SelfTest: Expect e2 as best element again after Rescore"); } diff --git a/src/V3Scoreboard.h b/src/V3Scoreboard.h index dc5fce0b0..4bf915431 100644 --- a/src/V3Scoreboard.h +++ b/src/V3Scoreboard.h @@ -1,13 +1,6 @@ // -*- mode: C++; c-file-style: "cc-mode" -*- //************************************************************************* -// DESCRIPTION: Verilator: Scoreboards for thread partitioner -// -// Provides scoreboard classes: -// -// * SortByValueMap -// * V3Scoreboard -// -// See details below +// DESCRIPTION: Verilator: Scoreboard for mtask coarsening // // Code available from: https://verilator.org // @@ -28,248 +21,122 @@ #include "verilatedos.h" #include "V3Error.h" +#include "V3PairingHeap.h" -#include -#include -#include -#include +//=============================================================================================== +// V3Scoreboard is essentially a heap that can be hinted that some elements have changed keys, at +// which points those elements will be deferred as 'unknown' until the next 'rescore' call. We +// largely reuse the implementation of the slightly more generic PairingHeap, but we do rely on the +// internal structure of the PairingHeap so changing that class requires changing this. +// +// For efficiency, the elements themselves must be the heap nodes, by deriving them from +// V3Scoreboard::Node. This also means a single element can only be associated with +// a single scoreboard. -// ###################################################################### -// SortByValueMap - -// A generic key-value map, except iteration is in *value* sorted order. Values need not be unique. -// Uses T_KeyCompare to break ties in the sort when values collide. Note: Only const iteration is -// possible, as updating mapped values via iterators is not safe. - -template > -class SortByValueMap final { - // Current implementation is a std::set of key/value pairs, plus a std_unordered_map from keys - // to iterators into the set. This keeps most operations fairly cheap and also has the benefit - // of being able to re-use the std::set iterators. - - // TYPES - - using Pair = std::pair; - - struct PairCmp final { - bool operator()(const Pair& a, const Pair& b) const { - // First compare values - if (a.second != b.second) return a.second < b.second; - // Then compare keys - return T_KeyCompare{}(a.first, b.first); - } - }; - - using PairSet = std::set; - -public: - using const_iterator = typename PairSet::const_iterator; - using const_reverse_iterator = typename PairSet::const_reverse_iterator; - -private: - // MEMBERS - PairSet m_pairs; // The contents of the map, stored directly as key-value pairs - std::unordered_map m_kiMap; // Key to iterator map - - VL_UNCOPYABLE(SortByValueMap); - -public: - // CONSTRUCTORS - SortByValueMap() = default; - - // Only const iteration is possible - const_iterator begin() const { return m_pairs.begin(); } - const_iterator end() const { return m_pairs.end(); } - const_iterator cbegin() const { m_pairs.cbegin(); } - const_iterator cend() const { return m_pairs.cend(); } - const_reverse_iterator rbegin() const { return m_pairs.rbegin(); } - const_reverse_iterator rend() const { return m_pairs.rend(); } - const_reverse_iterator crbegin() const { return m_pairs.crbegin(); } - const_reverse_iterator crend() const { return m_pairs.crend(); } - - const_iterator find(const T_Key& key) const { - const auto kiIt = m_kiMap.find(key); - if (kiIt == m_kiMap.end()) return cend(); - return kiIt->second; - } - size_t erase(const T_Key& key) { - const auto kiIt = m_kiMap.find(key); - if (kiIt == m_kiMap.end()) return 0; - m_pairs.erase(kiIt->second); - m_kiMap.erase(kiIt); - return 1; - } - void erase(const_iterator it) { - m_kiMap.erase(it->first); - m_pairs.erase(it); - } - void erase(const_reverse_iterator rit) { - m_kiMap.erase(rit->first); - m_pairs.erase(std::next(rit).base()); - } - bool has(const T_Key& key) const { return m_kiMap.count(key); } - bool empty() const { return m_pairs.empty(); } - // Returns const reference. - const T_Value& at(const T_Key& key) const { return m_kiMap.at(key)->second; } - // Note this returns const_iterator - template - std::pair emplace(const T_Key& key, Args&&... args) { - const auto kiEmp = m_kiMap.emplace(key, end()); - if (kiEmp.second) { - const auto result = m_pairs.emplace(key, std::forward(args)...); -#if VL_DEBUG - UASSERT(result.second, "Should not be in set yet"); -#endif - kiEmp.first->second = result.first; - return result; - } - return {kiEmp.first->second, false}; - } - // Invalidates iterators - void update(const_iterator it, T_Value value) { - const auto kiIt = m_kiMap.find(it->first); - m_pairs.erase(it); - kiIt->second = m_pairs.emplace(kiIt->first, value).first; - } -}; - -//###################################################################### - -/// V3Scoreboard takes a set of Elem*'s, each having some score. -/// Scores are assigned by a user-supplied scoring function. -/// -/// At any time, the V3Scoreboard can return th515e elem with the "best" score -/// among those elements whose scores are known. -/// -/// The best score is the _lowest_ score. This makes sense in contexts -/// where scores represent costs. -/// -/// The Scoreboard supports mutating element scores efficiently. The client -/// must hint to the V3Scoreboard when an element's score may have -/// changed. When it receives this hint, the V3Scoreboard will move the -/// element into the set of elements whose scores are unknown. Later the -/// client can tell V3Scoreboard to re-sort the list, which it does -/// incrementally, by re-scoring all elements whose scores are unknown, and -/// then moving these back into the score-sorted map. This is efficient -/// when the subset of elements whose scores change is much smaller than -/// the full set size. - -template > +template class V3Scoreboard final { -private: // TYPES - class CmpElems final { - public: - bool operator()(const T_Elem* const& ap, const T_Elem* const& bp) const { - const T_ElemCompare cmp; - return cmp.operator()(*ap, *bp); - } - }; - using SortedMap = SortByValueMap; - using UserScoreFnp = T_Score (*)(const T_Elem*); + using Heap = PairingHeap; + +public: + using Node = typename Heap::Node; + +private: + using Link = typename Heap::Link; + + // Note: T_Elem is incomplete here, so we cannot assert 'std::is_base_of::value' // MEMBERS - // Below uses set<> not an unordered_set<>. unordered_set::clear() and - // construction results in a 491KB clear operation to zero all the - // buckets. Since the set size is generally small, and we iterate the - // set members, set is better performant. - std::set m_unknown; // Elements with unknown scores - SortedMap m_sorted; // Set of elements with known scores - const UserScoreFnp m_scoreFnp; // Scoring function - const bool m_slowAsserts; // Do some asserts that require extra lookups + Heap m_known; // The heap of entries with known scores + Link m_unknown; // List of entries with unknown scores public: // CONSTRUCTORS - explicit V3Scoreboard(UserScoreFnp scoreFnp, bool slowAsserts) - : m_scoreFnp{scoreFnp} - , m_slowAsserts{slowAsserts} {} + explicit V3Scoreboard() = default; ~V3Scoreboard() = default; - // METHODS - - // Add an element to the scoreboard. - // Element begins in needs-rescore state; it won't be returned by - // bestp() until after the next rescore(). - void addElem(const T_Elem* elp) { - if (m_slowAsserts) { - UASSERT(!contains(elp), "Adding element to scoreboard that was already in scoreboard"); - } - m_unknown.insert(elp); - } - - // Remove elp from scoreboard. - void removeElem(const T_Elem* elp) { - if (0 == m_sorted.erase(elp)) { - UASSERT(m_unknown.erase(elp), - "Could not find requested elem to remove from scoreboard"); - } - } - - // Returns true if elp is present in the scoreboard, false otherwise. - // - // Note: every other V3Scoreboard routine that takes an T_Elem* has - // undefined behavior if the element is not in the scoreboard. - bool contains(const T_Elem* elp) const { - if (m_unknown.find(elp) != m_unknown.end()) return true; - return (m_sorted.find(elp) != m_sorted.end()); - } - - // Get the best element, with the lowest score (lower is better), among - // elements whose scores are known. Returns nullptr if no elements with - // known scores exist. - // - // Note: This does not automatically rescore. Client must call - // rescore() periodically to ensure all elems in the scoreboard are - // reflected in the result of bestp(). Otherwise, bestp() only - // considers elements that aren't pending rescore. - const T_Elem* bestp() { - const auto it = m_sorted.begin(); - if (VL_UNLIKELY(it == m_sorted.end())) return nullptr; - return it->first; - } - - // Tell the scoreboard that this element's score may have changed. - // - // At the time of this call, the element's score becomes "unknown" - // to the V3Scoreboard. Unknown elements won't be returned by bestp(). - // The element's score will remain unknown until the next rescore(). - // - // The client MUST call this for each element whose score has changed. - // - // The client MAY call this for elements whose score has not changed. - // Doing so incurs some compute cost (to re-sort the element back to - // its original location) and still makes it ineligible to be returned - // by bestp() until the next rescore(). - void hintScoreChanged(const T_Elem* elp) { - m_unknown.insert(elp); - m_sorted.erase(elp); - } - - // True if any element's score is unknown to V3Scoreboard. - bool needsRescore() { return !m_unknown.empty(); } - // False if elp's score is known to V3Scoreboard, - // else true if elp's score is unknown until the next rescore(). - bool needsRescore(const T_Elem* elp) { return m_unknown.count(elp); } - // Retrieve the last known score for an element. - T_Score cachedScore(const T_Elem* elp) { return m_sorted.at(elp); } - // For each element whose score is unknown to V3Scoreboard, - // call the client's scoring function to get a new score, - // and sort all elements by their current score. - void rescore() { - for (const T_Elem* elp : m_unknown) { - VL_ATTR_UNUSED const bool exists = !m_sorted.emplace(elp, m_scoreFnp(elp)).second; -#if VL_DEBUG - UASSERT(!exists, "Should not be in both m_unknown and m_sorted"); -#endif - } - m_unknown.clear(); - } - private: VL_UNCOPYABLE(V3Scoreboard); + + // METHODSs + void addUnknown(T_Elem* nodep) { + // Just prepend it to the list of unknown entries + nodep->m_next.link(m_unknown.unlink()); + m_unknown.linkNonNull(nodep); + // We mark nodes on the unknown list by making their child pointer point to themselves + nodep->m_kids.m_ptr = nodep; + } + +public: + // Returns true if the element is present in the scoreboard, false otherwise. Every other + // method that takes a T_Elem* (except for 'add') has undefined behavior if the element is not + // in this scoreboard. Furthermore, this method is only valid if the element can only possibly + // be in this scoreboard. That is: if the element might be in another scoreboard, the behaviour + // of this method is undefined. + static bool contains(const T_Elem* nodep) { return nodep->m_ownerpp; } + + // Add an element to the scoreboard. This will not be returned before the next 'rescore' call. + void add(T_Elem* nodep) { +#if VL_DEBUG + UASSERT(!contains(nodep), "Adding element to scoreboard that was already in a scoreboard"); +#endif + addUnknown(nodep); + } + + // Remove element from scoreboard. + void remove(T_Elem* nodep) { + if (nodep->m_kids.m_ptr == nodep) { + // Node is on the unknown list, replace with next + nodep->replaceWith(nodep->m_next.unlink()); + return; + } + // Node is in the known heap, remove it + m_known.remove(nodep); + } + + // Get the known element with the highest score (as we are using a max-heap), or nullptr if + // there are no elements with known entries. This does not automatically 'rescore'. The client + // must call 'rescore' appropriately to ensure all elements in the scoreboard are reflected in + // the result of this method. + T_Elem* best() const { return T_Elem::heapNodeToElem(m_known.max()); } + + // Tell the scoreboard that this element's score may have changed. At the time of this call, + // the element's score becomes 'unknown' to the scoreboard. Unknown elements will not be + // returned by 'best until the next call to 'rescore'. + void hintScoreChanged(T_Elem* nodep) { + // If it's already in the unknown list, then nothing to do + if (nodep->m_kids.m_ptr == nodep) return; + // Otherwise it was in the heap, remove it + m_known.remove(nodep); + // Prepend it to the unknown list + addUnknown(nodep); + } + + // True if we have elements with unknown score + bool needsRescore() const { return m_unknown; } + + // True if the element's score is unknown, false otherwise. + static bool needsRescore(const T_Elem* nodep) { return nodep->m_kids.m_ptr == nodep; } + + // For each element whose score is unknown, recompute the score and add to the known heap + void rescore() { + // Rescore and insert all unknown elements + for (Node *nodep = m_unknown.unlink(), *nextp; nodep; nodep = nextp) { + // Pick up next + nextp = nodep->m_next.ptr(); + // Reset pointers + nodep->m_next.m_ptr = nullptr; + nodep->m_kids.m_ptr = nullptr; + nodep->m_ownerpp = nullptr; + // Re-compute the score of the element + T_Elem::heapNodeToElem(nodep)->rescore(); + // re-insert into the heap + m_known.insert(nodep); + } + } }; -//###################################################################### +// ###################################################################### namespace V3ScoreboardBase { void selfTest(); From 4d81eb021d57843effe63c6aef6e4b99ddf902f7 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Fri, 19 Aug 2022 18:03:45 +0100 Subject: [PATCH 102/119] Revert "Improve performance of MTask coarsening" This reverts commit 83475008d94a493112e5ae6bf3b9b2af5a571a21. --- Changes | 1 - src/V3Graph.h | 2 +- src/V3PairingHeap.h | 293 ------------ src/V3Partition.cpp | 1026 ++++++++++++++++++------------------------ src/V3Scoreboard.cpp | 56 +-- src/V3Scoreboard.h | 325 +++++++++---- 6 files changed, 694 insertions(+), 1009 deletions(-) delete mode 100644 src/V3PairingHeap.h diff --git a/Changes b/Changes index 1fa981e3c..65ee0fd39 100644 --- a/Changes +++ b/Changes @@ -20,7 +20,6 @@ Verilator 4.225 devel * Fix incorrect tristate logic (#3399) [shareefj, Vighnesh Iyer] * Fix segfault exporting non-existant package (#3535). * Fix case statement comparing string literal (#3544). [Gustav Svensk] -* Improve Verilation speed with --threads on large designs. [Geza Lore] Verilator 4.224 2022-06-19 diff --git a/src/V3Graph.h b/src/V3Graph.h index a18fb5dfc..da096ab2f 100644 --- a/src/V3Graph.h +++ b/src/V3Graph.h @@ -67,7 +67,7 @@ public: return names[m_e]; } // METHODS unique to this class - constexpr GraphWay invert() const { return GraphWay{m_e ^ 1}; } + constexpr GraphWay invert() const { return m_e == FORWARD ? REVERSE : FORWARD; } constexpr bool forward() const { return m_e == FORWARD; } constexpr bool reverse() const { return m_e != FORWARD; } }; diff --git a/src/V3PairingHeap.h b/src/V3PairingHeap.h deleted file mode 100644 index c1f5f5342..000000000 --- a/src/V3PairingHeap.h +++ /dev/null @@ -1,293 +0,0 @@ -// -*- mode: C++; c-file-style: "cc-mode" -*- -//************************************************************************* -// DESCRIPTION: Verilator: Pairing Heap data structure -// -// Code available from: https://verilator.org -// -//************************************************************************* -// -// Copyright 2003-2022 by Wilson Snyder. This program is free software; you -// can redistribute it and/or modify it under the terms of either the GNU -// Lesser General Public License Version 3 or the Perl Artistic License -// Version 2.0. -// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 -// -//************************************************************************* - -#ifndef VERILATOR_V3PAIRINGHEAP_H_ -#define VERILATOR_V3PAIRINGHEAP_H_ - -#include "config_build.h" -#include "verilatedos.h" - -#include "V3Error.h" - -//============================================================================= -// Pairing heap (max-heap) with increase key and delete. -// -// While this is written as a generic data structure, it's interface and -// implementation is finely tuned for it's use by V3Parm_tition, and is critical -// to verilaton performance, so be very careful changing anything or adding any -// new operations that would impact either memory usage, or performance of the -// existing operations. This data structure is fully deterministic, meaning -// the order in which elements with equal keys are retrieved only depends on -// the order of operations performed on the heap. -//============================================================================= - -template -class PairingHeap final { -public: - struct Node; - - // Just a pointer to a heap Node, but with special accessors to help keep back pointers - // consistent. - struct Link { - Node* m_ptr = nullptr; // The managed pointer - - Link() = default; - VL_UNCOPYABLE(Link); - - // Make the pointer point to the target, and the target's owner pointer to this pointer - VL_ATTR_ALWINLINE void link(Node* targetp) { - m_ptr = targetp; - if (!targetp) return; -#if VL_DEBUG - UASSERT(!targetp->m_ownerpp, "Already linked"); -#endif - targetp->m_ownerpp = &m_ptr; - } - - // Make the pointer point to the target, and the target's owner pointer to this pointer - VL_ATTR_ALWINLINE void linkNonNull(Node* targetp) { - m_ptr = targetp; -#if VL_DEBUG - UASSERT(!targetp->m_ownerpp, "Already linked"); -#endif - targetp->m_ownerpp = &m_ptr; - } - - // Clear the pointer and return it's previous value - VL_ATTR_ALWINLINE Node* unlink() { - Node* const result = m_ptr; -#if VL_DEBUG - if (result) { - UASSERT(m_ptr->m_ownerpp == &m_ptr, "Bad back link"); - // Not strictly necessary to clear this, but helps debugging - m_ptr->m_ownerpp = nullptr; - } -#endif - m_ptr = nullptr; - return result; - } - - // Minimal convenience acessors and operators - VL_ATTR_ALWINLINE Node* ptr() const { return m_ptr; } - VL_ATTR_ALWINLINE operator bool() const { return m_ptr; } - VL_ATTR_ALWINLINE bool operator!() const { return !m_ptr; } - VL_ATTR_ALWINLINE Node* operator->() const { return m_ptr; } - VL_ATTR_ALWINLINE Node& operator*() const { return *m_ptr; } - }; - - // A single node in the pairing heap tree - struct Node { - Link m_next; // Next in list of sibling heaps - Link m_kids; // Head of list of child heaps - Node** m_ownerpp = nullptr; // Pointer to the Link pointer pointing to this heap - T_Key m_key; // The key in the heap - - // CONSTRUCTOR - explicit Node() = default; - VL_UNCOPYABLE(Node); - - // METHODS - VL_ATTR_ALWINLINE const T_Key& key() const { return m_key; } - VL_ATTR_ALWINLINE bool operator<(const Node& that) const { return m_key < that.m_key; } - VL_ATTR_ALWINLINE bool operator>(const Node& that) const { return that.m_key < m_key; } - - // Make newp take the place of this in the tree - VL_ATTR_ALWINLINE void replaceWith(Node* newp) { - *m_ownerpp = newp; // The owner pointer needs to point to the new node - if (newp) newp->m_ownerpp = m_ownerpp; // The new node needs to point to its owner - m_ownerpp = nullptr; // This node has no owner anymore - } - - // Make newp take the place of this in the tree - VL_ATTR_ALWINLINE void replaceWithNonNull(Node* newp) { - *m_ownerpp = newp; // The owner pointer needs to point to the new node - newp->m_ownerpp = m_ownerpp; // The new node needs to point to its owner - m_ownerpp = nullptr; // This node has no owner anymore - } - }; - -private: - // MEMBERS - - // The root of the heap. Note: We do not reduce lists during insertion/removal etc, unless we - // absolutely have to. This means the root can become a list. This is ok, we will reduce - // lazily when requesting the minimum element. - mutable Link m_root; - - // CONSTRUCTORS - VL_UNCOPYABLE(PairingHeap); - -public: - explicit PairingHeap() = default; - - // METHODS - bool empty() const { return !m_root; } - - // Insert given node into this heap with given key. - void insert(Node* nodep, T_Key key) { - // Update key of node - nodep->m_key = key; - insert(nodep); - } - - // Insert given node into this heap with key already set in the node - void insert(Node* nodep) { -#if VL_DEBUG - UASSERT(!nodep->m_ownerpp && !nodep->m_next && !nodep->m_kids, "Already linked"); -#endif - // Just stick it at the front of the root list - nodep->m_next.link(m_root.unlink()); - m_root.linkNonNull(nodep); - } - - // Remove given node only from the heap it is contained in - void remove(Node* nodep) { - if (!nodep->m_next) { - // If the node does not have siblings, replace it with its children (might be empty). - nodep->replaceWith(nodep->m_kids.unlink()); - } else if (!nodep->m_kids) { - // If it has siblings but no children, replace it with the siblings. - nodep->replaceWithNonNull(nodep->m_next.unlink()); - } else { - // If it has both siblings and children, reduce the children and splice that - // reduced heap in place of this node - Node* const reducedKidsp = reduce(nodep->m_kids.unlink()); - reducedKidsp->m_next.linkNonNull(nodep->m_next.unlink()); - nodep->replaceWithNonNull(reducedKidsp); - } - } - - // Returns the largest element in the heap - Node* max() const { - // Heap might be empty - if (!m_root) return nullptr; - // If the root have siblings reduce them - if (m_root->m_next) m_root.linkNonNull(reduce(m_root.unlink())); - // The root element is the largest - return m_root.ptr(); - } - - // Returns the second-largest element in the heap. - // This is only valid to call if 'max' returned a valid element. - Node* secondMax() const { -#if VL_DEBUG - UASSERT(m_root, "'max' would have returned nullptr"); - UASSERT(!m_root->m_next, "'max' would have reduced"); -#endif - // If there are no children, there is no second element - if (!m_root->m_kids) return nullptr; - // If there are multiple children, reduce them - if (m_root->m_kids->m_next) m_root->m_kids.linkNonNull(reduce(m_root->m_kids.unlink())); - // Return the now singular child, which is the second-largest element - return m_root->m_kids.ptr(); - } - - // Increase the key of the given node to the given new value - template - void increaseKey(Node* nodep, T_Update value) { - // Update the key - nodep->m_key.increase(value); - // Increasing the key of the root is easy - if (nodep == m_root.ptr()) return; - // Otherwise we do have a little work to do - if (!nodep->m_kids) { - // If the node has no children, replace it with its siblings (migtht be null) - nodep->replaceWith(nodep->m_next.unlink()); - } else if (!nodep->m_next) { - // If the node has no siblings, replace it with its children - nodep->replaceWithNonNull(nodep->m_kids.unlink()); - } else { - // The node has both children and siblings. Splice the first child in the place of the - // node, and extract the rest of the children with the node - Node* const kidsp = nodep->m_kids.unlink(); - nodep->m_kids.link(kidsp->m_next.unlink()); - kidsp->m_next.linkNonNull(nodep->m_next.unlink()); - nodep->replaceWithNonNull(kidsp); - } - // Just stick the increased node a the front of the root list - nodep->m_next.linkNonNull(m_root.unlink()); - m_root.linkNonNull(nodep); - } - -private: - // Meld (merge) two heaps rooted at the given nodes, return the root of the new heap - VL_ATTR_ALWINLINE static Node* merge(Node* ap, Node* bp) { -#if VL_DEBUG - UASSERT(!ap->m_ownerpp && !ap->m_next, "Not root a"); - UASSERT(!bp->m_ownerpp && !bp->m_next, "Not root b"); -#endif - if (*ap > *bp) { // bp goes under ap - bp->m_next.link(ap->m_kids.unlink()); - ap->m_kids.linkNonNull(bp); - return ap; - } else { // ap goes under bp - ap->m_next.link(bp->m_kids.unlink()); - bp->m_kids.linkNonNull(ap); - return bp; - } - } - - // Reduces the list of nodes starting at the given node into a single node that is returned - VL_ATTR_NOINLINE static Node* reduce(Node* nodep) { -#if VL_DEBUG - UASSERT(!nodep->m_ownerpp, "Node is linked"); -#endif - // If there is only one node in the list, then there is nothing to do - if (!nodep->m_next) return nodep; - // The result node - Node* resultp = nullptr; - // Pairwise merge the child nodes - while (nodep) { - // Pop off the first nodes - Node* const ap = nodep; - // If we have an odd number of nodes, prepend the unpaired one onto the result list - if (!nodep->m_next) { - ap->m_next.link(resultp); - resultp = ap; - break; - } - // Pop off the second nodes - Node* const bp = nodep->m_next.unlink(); - // Keep hold of the rest of the list - nodep = bp->m_next.unlink(); - // Merge the current pair - Node* const mergedp = merge(ap, bp); - // Prepend the merged pair to the result list - mergedp->m_next.link(resultp); - resultp = mergedp; - } - // Now merge-reduce the merged pairs - while (resultp->m_next) { - // Pop first two results - Node* const ap = resultp; - Node* const bp = resultp->m_next.unlink(); - // Keep hold of the rest of the list - resultp = bp->m_next.unlink(); - // Merge the current pair - Node* const mergedp = merge(ap, bp); - // Prepend the merged pair to the result list - mergedp->m_next.link(resultp); - resultp = mergedp; - } - // Done - return resultp; - } -}; - -// The PairingHeap itself should be a simple pointer and nothing more -static_assert(sizeof(PairingHeap) == sizeof(PairingHeap::Node*), "Should be a pointer"); - -#endif // Guard diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index bf537a65c..5b1474e91 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -22,29 +22,23 @@ #include "V3Config.h" #include "V3EmitCBase.h" #include "V3File.h" +#include "V3GraphAlg.h" #include "V3GraphStream.h" #include "V3InstrCount.h" #include "V3Os.h" -#include "V3PairingHeap.h" #include "V3PartitionGraph.h" #include "V3Scoreboard.h" #include "V3Stats.h" #include "V3UniqueNames.h" #include -#include #include #include -#include #include -#include -class LogicMTask; -class MTaskEdge; class MergeCandidate; -class SiblingMC; -// ###################################################################### +//###################################################################### // Partitioner tunable settings: // // Before describing these settings, a bit of background: @@ -76,14 +70,14 @@ class SiblingMC; // skipping the enumeration of some siblings on a few vertices does not // have a large impact on the result of the partitioner. // -// If your vertices are small, the limit (at 26) approaches a no-op. Hence +// If your vertices are small, the limit (at 25) approaches a no-op. Hence // there's basically no cost to applying this limit even when we don't // expect huge vertices. // // If you don't care about partitioner runtime and you want the most // aggressive partition, set the limit very high. If you have huge // vertices, leave this as is. -constexpr unsigned PART_SIBLING_EDGE_LIMIT = 26; +constexpr unsigned PART_SIBLING_EDGE_LIMIT = 25; // PART_STEPPED_COST (defined/undef) // @@ -149,34 +143,10 @@ static void partCheckCachedScoreVsActual(uint32_t cached, uint32_t actual) { #endif } -//============================================================================= -// We keep MTaskEdge graph edges in a PairingHeap, sorted by score and id - -struct EdgeKey { - // Node: Structure layout chosen to minimize padding in PairingHeao<*>::Node - uint64_t m_id; // Unique ID part of edge score - uint32_t m_score; // Score part of ID - void increase(uint32_t score) { -#if VL_DEBUG - UASSERT(score >= m_score, "Must increase"); -#endif - m_score = score; - } - bool operator<(const EdgeKey& other) const { - // First by Score then by ID - return m_score < other.m_score || (m_score == other.m_score && m_id < other.m_id); - } -}; - -using EdgeHeap = PairingHeap; - -//============================================================================= +//###################################################################### // LogicMTask class LogicMTask final : public AbstractLogicMTask { - template - friend class PartPropagateCp; - public: // TYPES using VxList = std::list; @@ -187,6 +157,55 @@ public: } }; + // This adaptor class allows the PartPropagateCp class to be somewhat + // independent of the LogicMTask class + // - PartPropagateCp can thus be declared before LogicMTask + // - PartPropagateCp could be reused with graphs of other node types + // in the future, using another Accessor adaptor. + class CpCostAccessor final { + public: + CpCostAccessor() = default; + ~CpCostAccessor() = default; + // Return cost of this node + uint32_t cost(const V3GraphVertex* vxp) const { + const LogicMTask* const mtaskp = static_cast(vxp); + return mtaskp->stepCost(); + } + // Return stored CP to this node + uint32_t critPathCost(const V3GraphVertex* vxp, GraphWay way) const { + const LogicMTask* const mtaskp = static_cast(vxp); + return mtaskp->critPathCost(way); + } + // Store a new CP to this node + void setCritPathCost(V3GraphVertex* vxp, GraphWay way, uint32_t cost) const { + LogicMTask* const mtaskp = static_cast(vxp); + mtaskp->setCritPathCost(way, cost); + } + // Notify vxp that the wayward CP at the throughp-->vxp edge + // has increased to 'cp'. (vxp is wayward from throughp.) + // This is our cue to update vxp's m_edges[!way][throughp]. + void notifyEdgeCp(V3GraphVertex* vxp, GraphWay way, V3GraphVertex* throuvhVxp, + uint32_t cp) const { + LogicMTask* const updateVxp = static_cast(vxp); + LogicMTask* const lthrouvhVxp = static_cast(throuvhVxp); + EdgeSet& edges = updateVxp->m_edges[way.invert()]; + const auto it = edges.find(lthrouvhVxp); + if (cp > it->second) edges.update(it, cp); + } + // Check that CP matches that of the longest edge wayward of vxp. + void checkNewCpVersusEdges(V3GraphVertex* vxp, GraphWay way, uint32_t cp) const { + LogicMTask* const mtaskp = static_cast(vxp); + const EdgeSet& edges = mtaskp->m_edges[way.invert()]; + // This is mtaskp's relative with longest !wayward inclusive CP: + const auto edgeIt = edges.rbegin(); + const uint32_t edgeCp = edgeIt->second; + UASSERT_OBJ(edgeCp == cp, vxp, "CP doesn't match longest wayward edge"); + } + + private: + VL_UNCOPYABLE(CpCostAccessor); + }; + private: // MEMBERS @@ -212,21 +231,21 @@ private: // while searching for a path. uint64_t m_generation = 0; - // Store a set of forward relatives so we can quickly check if we have a given child - std::unordered_set m_edgeSet; - // Store the outgoing and incoming edges in a heap sorted by the critical path length - std::array m_edgeHeap; - - // SiblingMC for which storage is owned by this MTask - std::set m_ownSibs; - // SiblingMC for which storage is owned by the opposite MTask - std::set m_farSibps; + // Redundant with the V3GraphEdge's, store a map of relatives so we can + // quickly check if we have a given parent or child. + // + // 'm_edges[way]' maps a wayward relative to the !way critical path at + // our edge with them. The SortByValueMap supports iterating over + // relatives in longest-to-shortest CP order. We rely on this ordering + // in more than one place. + using EdgeSet = SortByValueMap; + std::array m_edges; public: // CONSTRUCTORS LogicMTask(V3Graph* graphp, MTaskMoveVertex* mtmvVxp) : AbstractLogicMTask{graphp} { - for (uint32_t& item : m_critPathCost) item = 0; + for (unsigned int& i : m_critPathCost) i = 0; if (mtmvVxp) { // Else null for test m_vertices.push_back(mtmvVxp); if (const OrderLogicVertex* const olvp = mtmvVxp->logicp()) { @@ -240,9 +259,6 @@ public: } // METHODS - std::set& ownSibs() { return m_ownSibs; }; - std::set& farSibs() { return m_farSibps; }; - void moveAllVerticesFrom(LogicMTask* otherp) { // splice() is constant time m_vertices.splice(m_vertices.end(), otherp->m_vertices); @@ -280,37 +296,32 @@ public: logcost = logcost / 20.0; const uint32_t stepCost = static_cast(exp(logcost)); -#if VL_DEBUG UASSERT_STATIC(stepCost >= cost, "stepped cost error exceeded"); UASSERT_STATIC(stepCost <= ((cost * 11 / 10)), "stepped cost error exceeded"); -#endif return stepCost; #else return cost; #endif } - template - void addRelativeEdge(MTaskEdge* edgep); - template - void removeRelativeEdge(MTaskEdge* edgep); - - void addRelativeMTask(LogicMTask* relativep) { - // Add the relative to connecting edge map - VL_ATTR_UNUSED const bool exits = !m_edgeSet.emplace(relativep).second; + void addRelative(GraphWay way, LogicMTask* relativep) { + // value is !way cp to this edge + const uint32_t cp = relativep->stepCost() + relativep->critPathCost(way.invert()); + VL_ATTR_UNUSED const bool exits = !m_edges[way].emplace(relativep, cp).second; #if VL_DEBUG - UASSERT(!exits, "Adding existing relative"); + UASSERT(!exits, "Adding existing edge"); #endif } - void removeRelativeMTask(LogicMTask* relativep) { - VL_ATTR_UNUSED const size_t removed = m_edgeSet.erase(relativep); -#if VL_DEBUG - UASSERT(removed, "Relative should have been in set"); -#endif + void removeRelative(GraphWay way, LogicMTask* relativep) { m_edges[way].erase(relativep); } + bool hasRelative(GraphWay way, LogicMTask* relativep) { return m_edges[way].has(relativep); } + void checkRelativesCp(GraphWay way) const { + for (const auto& edge : vlstd::reverse_view(m_edges[way])) { + const LogicMTask* const relativep = edge.first; + const uint32_t cachedCp = edge.second; + const uint32_t cp = relativep->critPathCost(way.invert()) + relativep->stepCost(); + partCheckCachedScoreVsActual(cachedCp, cp); + } } - bool hasRelativeMTask(LogicMTask* relativep) const { return m_edgeSet.count(relativep); } - - void checkRelativesCp(GraphWay way) const; virtual string name() const override { // Display forward and reverse critical path costs. This gives a quick @@ -323,7 +334,27 @@ public: void setCritPathCost(GraphWay way, uint32_t cost) { m_critPathCost[way] = cost; } uint32_t critPathCost(GraphWay way) const { return m_critPathCost[way]; } - uint32_t critPathCostWithout(GraphWay way, const V3GraphEdge* withoutp) const; + uint32_t critPathCostWithout(GraphWay way, const V3GraphEdge* withoutp) const { + // Compute the critical path cost wayward to this node, without + // considering edge 'withoutp' + UASSERT(this == withoutp->furtherp(way), "In critPathCostWithout(), edge 'withoutp' must " + "further to 'this'"); + + // Iterate through edges until we get a relative other than + // wayEdgeEndp(way, withoutp). This should take 2 iterations max. + const EdgeSet& edges = m_edges[way.invert()]; + uint32_t result = 0; + for (const auto& edge : vlstd::reverse_view(edges)) { + if (edge.first != withoutp->furtherp(way.invert())) { + // Use the cached cost. It could be a small overestimate + // due to stepping. This is consistent with critPathCost() + // which also returns the cached cost. + result = edge.second; + break; + } + } + return result; + } private: static bool pathExistsFromInternal(LogicMTask* fromp, LogicMTask* top, @@ -380,7 +411,65 @@ public: return pathExistsFromInternal(fromp, top, excludedEdgep, incGeneration()); } - static void dumpCpFilePrefixed(const V3Graph* graphp, const string& nameComment); + static void dumpCpFilePrefixed(const V3Graph* graphp, const string& nameComment) { + const string filename = v3Global.debugFilename(nameComment) + ".txt"; + UINFO(1, "Writing " << filename << endl); + const std::unique_ptr ofp{V3File::new_ofstream(filename)}; + std::ostream* const osp = &(*ofp); // &* needed to deref unique_ptr + if (osp->fail()) v3fatalStatic("Can't write " << filename); + + // Find start vertex with longest CP + const LogicMTask* startp = nullptr; + for (const V3GraphVertex* vxp = graphp->verticesBeginp(); vxp; + vxp = vxp->verticesNextp()) { + const LogicMTask* const mtaskp = static_cast(vxp); + if (!startp) { + startp = mtaskp; + continue; + } + if (mtaskp->cost() + mtaskp->critPathCost(GraphWay::REVERSE) + > startp->cost() + startp->critPathCost(GraphWay::REVERSE)) { + startp = mtaskp; + } + } + + // Follow the entire critical path + std::vector path; + uint32_t totalCost = 0; + for (const LogicMTask* nextp = startp; nextp;) { + path.push_back(nextp); + totalCost += nextp->cost(); + + const EdgeSet& children = nextp->m_edges[GraphWay::FORWARD]; + const EdgeSet::const_reverse_iterator it = children.rbegin(); + if (it == children.rend()) { + nextp = nullptr; + } else { + nextp = it->first; + } + } + + *osp << "totalCost = " << totalCost + << " (should match the computed critical path cost (CP) for the graph)\n"; + + // Dump + for (const LogicMTask* mtaskp : path) { + *osp << "begin mtask with cost " << mtaskp->cost() << '\n'; + for (VxList::const_iterator lit = mtaskp->vertexListp()->begin(); + lit != mtaskp->vertexListp()->end(); ++lit) { + const OrderLogicVertex* const logicp = (*lit)->logicp(); + if (!logicp) continue; + if (false) { + // Show nodes only + *osp << "> "; + logicp->nodep()->dumpTree(*osp); + } else { + // Show nodes with hierarchical costs + V3InstrCount::count(logicp->nodep(), false, osp); + } + } + } + } private: VL_DEBUG_FUNC; // Declare debug() @@ -401,20 +490,11 @@ public: } }; -struct MergeCandidateKey { - // Note: Structure layout chosen to minimize padding in PairingHeao<*>::Node - uint64_t m_id; // Unique ID part of edge score - uint32_t m_score; // Score part of ID - bool operator<(const MergeCandidateKey& other) const { - // First by Score then by ID, but notice that we want minimums using a max-heap, so reverse - return m_score > other.m_score || (m_score == other.m_score && m_id > other.m_id); - } -}; +class SiblingMC; +class MTaskEdge; -using MergeCandidateScoreboard = V3Scoreboard; - -// Information associated with scoreboarding a merge candidate -class MergeCandidate VL_NOT_FINAL : public MergeCandidateScoreboard::Node { +// Information associated with scoreboarding an MTask +class MergeCandidate VL_NOT_FINAL { private: // Only the known subclasses can create or delete one of these friend class SiblingMC; @@ -427,17 +507,18 @@ private: // using another bit of the id to denote the actual subtype. // By using the bottom bits for flags, we can still use < to compare IDs without masking. - // <63:1> Serial number for ordering, <0> subtype (SiblingMC) - static constexpr uint64_t IS_SIBLING_MASK = 1ULL << 0; - static constexpr uint64_t ID_INCREMENT = 1ULL << 1; + uint64_t m_id; // <63:2> Serial number for ordering, <1> subtype (SiblingMC), <0> removed + static constexpr uint64_t REMOVED_MASK = 1ULL << 0; + static constexpr uint64_t IS_SIBLING_MASK = 1ULL << 1; + static constexpr uint64_t ID_INCREMENT = 1ULL << 2; - bool isSiblingMC() const { return m_key.m_id & IS_SIBLING_MASK; } + bool isSiblingMC() const { return m_id & IS_SIBLING_MASK; } // CONSTRUCTORS explicit MergeCandidate(bool isSiblingMC) { static uint64_t serial = 0; serial += ID_INCREMENT; // +ID_INCREMENT so doesn't set the special bottom bits - m_key.m_id = serial | (isSiblingMC * IS_SIBLING_MASK); + m_id = serial | (isSiblingMC * IS_SIBLING_MASK); } ~MergeCandidate() = default; @@ -449,33 +530,35 @@ public: const MTaskEdge* toMTaskEdge() const; // Instead of dynamic_cast bool mergeWouldCreateCycle() const; // Instead of virtual method - inline void rescore(); - uint32_t score() const { return m_key.m_score; } - - static MergeCandidate* heapNodeToElem(MergeCandidateScoreboard::Node* nodep) { - return static_cast(nodep); - } + bool removedFromSb() const { return (m_id & REMOVED_MASK) != 0; } + void removedFromSb(bool /*removed*/) { m_id |= REMOVED_MASK; } + void clearRemovedFromSb() { m_id &= ~REMOVED_MASK; } + bool operator<(const MergeCandidate& other) const { return m_id < other.m_id; } }; -static_assert(sizeof(MergeCandidate) == sizeof(MergeCandidateScoreboard::Node), - "Should not have a vtable"); +static_assert(sizeof(MergeCandidate) == sizeof(uint64_t), "Should not have a vtable"); // A pair of associated LogicMTask's that are merge candidates for sibling // contraction class SiblingMC final : public MergeCandidate { private: - LogicMTask* const m_ap; - LogicMTask* const m_bp; + LogicMTask* m_ap; + LogicMTask* m_bp; public: // CONSTRUCTORS SiblingMC() = delete; SiblingMC(LogicMTask* ap, LogicMTask* bp) - : MergeCandidate{/* isSiblingMC: */ true} - , m_ap{ap} - , m_bp{bp} { - // operator< and storage management depends on this - UASSERT(ap->id() > bp->id(), "Should be ordered"); + : MergeCandidate{/* isSiblingMC: */ true} { + // Assign 'ap' and 'bp' in a canonical order, so we can more easily + // compare pairs of SiblingMCs + if (ap->id() > bp->id()) { + m_ap = ap; + m_bp = bp; + } else { + m_ap = bp; + m_bp = ap; + } } ~SiblingMC() = default; // METHODS @@ -497,23 +580,17 @@ static_assert(sizeof(SiblingMC) == sizeof(MergeCandidate) + 2 * sizeof(LogicMTas // GraphEdge for the MTask graph class MTaskEdge final : public V3GraphEdge, public MergeCandidate { - friend class LogicMTask; - template - friend class PartPropagateCp; - - // MEMBERS - // This edge can be in 2 EdgeHeaps, one forward and one reverse. We allocate the heap nodes - // directly within the edge as they are always required and this makes association cheap. - EdgeHeap::Node m_edgeHeapNode[GraphWay::NUM_WAYS]; - public: // CONSTRUCTORS MTaskEdge(V3Graph* graphp, LogicMTask* fromp, LogicMTask* top, int weight) : V3GraphEdge{graphp, fromp, top, weight} , MergeCandidate{/* isSiblingMC: */ false} { - fromp->addRelativeMTask(top); - fromp->addRelativeEdge(this); - top->addRelativeEdge(this); + fromp->addRelative(GraphWay::FORWARD, top); + top->addRelative(GraphWay::REVERSE, fromp); + } + virtual ~MTaskEdge() override { + fromMTaskp()->removeRelative(GraphWay::FORWARD, toMTaskp()); + toMTaskp()->removeRelative(GraphWay::REVERSE, fromMTaskp()); } // METHODS LogicMTask* furtherMTaskp(GraphWay way) const { @@ -524,135 +601,28 @@ public: bool mergeWouldCreateCycle() const { return LogicMTask::pathExistsFrom(fromMTaskp(), toMTaskp(), this); } + static MTaskEdge* cast(V3GraphEdge* edgep) { + if (!edgep) return nullptr; + MTaskEdge* const resultp = dynamic_cast(edgep); + UASSERT(resultp, "Failed to cast in MTaskEdge::cast"); + return resultp; + } // Following initial assignment of critical paths, clear this MTaskEdge // out of the edge-map for each node and reinsert at a new location // with updated critical path. void resetCriticalPaths() { LogicMTask* const fromp = fromMTaskp(); LogicMTask* const top = toMTaskp(); - fromp->removeRelativeEdge(this); - top->removeRelativeEdge(this); - fromp->addRelativeEdge(this); - top->addRelativeEdge(this); - } - - uint32_t cachedCp(GraphWay way) const { return m_edgeHeapNode[way].key().m_score; } - - // Convert from the address of the m_edgeHeapNode[way] in an MTaskEdge back to the MTaskEdge - static const MTaskEdge* toEdge(GraphWay way, const EdgeHeap::Node* nodep) { - // Offset of the node within the MTaskEdge - const size_t offset - = reinterpret_cast(&(reinterpret_cast(0)->m_edgeHeapNode[way])); - return reinterpret_cast(reinterpret_cast(nodep) - offset); + fromp->removeRelative(GraphWay::FORWARD, top); + top->removeRelative(GraphWay::REVERSE, fromp); + fromp->addRelative(GraphWay::FORWARD, top); + top->addRelative(GraphWay::REVERSE, fromp); } private: VL_UNCOPYABLE(MTaskEdge); }; -template -void LogicMTask::addRelativeEdge(MTaskEdge* edgep) { - constexpr GraphWay way{T_Way}; - constexpr GraphWay inv = way.invert(); - // Add to the edge heap - LogicMTask* const relativep = edgep->furtherMTaskp(way); - // Value is !way cp to this edge - const uint32_t cp = relativep->stepCost() + relativep->critPathCost(inv); - // - m_edgeHeap[way].insert(&edgep->m_edgeHeapNode[way], {relativep->id(), cp}); -} - -template -void LogicMTask::removeRelativeEdge(MTaskEdge* edgep) { - constexpr GraphWay way{T_Way}; - // Remove from the edge heap - m_edgeHeap[way].remove(&edgep->m_edgeHeapNode[way]); -} - -void LogicMTask::checkRelativesCp(GraphWay way) const { - for (V3GraphEdge* edgep = beginp(way); edgep; edgep = edgep->nextp(way)) { - const LogicMTask* const relativep = static_cast(edgep->furtherp(way)); - const uint32_t cachedCp = static_cast(edgep)->cachedCp(way); - const uint32_t cp = relativep->critPathCost(way.invert()) + relativep->stepCost(); - partCheckCachedScoreVsActual(cachedCp, cp); - } -} - -uint32_t LogicMTask::critPathCostWithout(GraphWay way, const V3GraphEdge* withoutp) const { - // Compute the critical path cost wayward to this node, without considering edge 'withoutp'. - // We need to look at two edges at most, the critical path if that is not via 'withoutp', - // or the second-worst path, if the critical path is via 'withoutp'. -#if VL_DEBUG - UASSERT(withoutp->furtherp(way) == this, - "In critPathCostWithout(), edge 'withoutp' must further to 'this'"); -#endif - const GraphWay inv = way.invert(); - const EdgeHeap& edgeHeap = m_edgeHeap[inv]; - const EdgeHeap::Node* const maxp = edgeHeap.max(); - if (!maxp) return 0; - if (MTaskEdge::toEdge(inv, maxp) != withoutp) return maxp->key().m_score; - const EdgeHeap::Node* const secp = edgeHeap.secondMax(); - if (!secp) return 0; - return secp->key().m_score; -} - -void LogicMTask::dumpCpFilePrefixed(const V3Graph* graphp, const string& nameComment) { - const string filename = v3Global.debugFilename(nameComment) + ".txt"; - UINFO(1, "Writing " << filename << endl); - const std::unique_ptr ofp{V3File::new_ofstream(filename)}; - std::ostream* const osp = &(*ofp); // &* needed to deref unique_ptr - if (osp->fail()) v3fatalStatic("Can't write " << filename); - - // Find start vertex with longest CP - LogicMTask* startp = nullptr; - for (V3GraphVertex* vxp = graphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - LogicMTask* const mtaskp = static_cast(vxp); - if (!startp) { - startp = mtaskp; - continue; - } - if (mtaskp->cost() + mtaskp->critPathCost(GraphWay::REVERSE) - > startp->cost() + startp->critPathCost(GraphWay::REVERSE)) { - startp = mtaskp; - } - } - - // Follow the entire critical path - std::vector path; - uint32_t totalCost = 0; - for (LogicMTask* nextp = startp; nextp;) { - path.push_back(nextp); - totalCost += nextp->cost(); - - if (EdgeHeap::Node* const maxp = nextp->m_edgeHeap[GraphWay::FORWARD].max()) { - nextp = MTaskEdge::toEdge(GraphWay::FORWARD, maxp)->toMTaskp(); - } else { - nextp = nullptr; - } - } - - *osp << "totalCost = " << totalCost - << " (should match the computed critical path cost (CP) for the graph)\n"; - - // Dump - for (const LogicMTask* mtaskp : path) { - *osp << "begin mtask with cost " << mtaskp->cost() << '\n'; - for (VxList::const_iterator lit = mtaskp->vertexListp()->begin(); - lit != mtaskp->vertexListp()->end(); ++lit) { - const OrderLogicVertex* const logicp = (*lit)->logicp(); - if (!logicp) continue; - if (false) { - // Show nodes only - *osp << "> "; - logicp->nodep()->dumpTree(*osp); - } else { - // Show nodes with hierarchical costs - V3InstrCount::count(logicp->nodep(), false, osp); - } - } - } -} - // Instead of dynamic cast SiblingMC* MergeCandidate::toSiblingMC() { return isSiblingMC() ? static_cast(this) : nullptr; @@ -677,40 +647,6 @@ bool MergeCandidate::mergeWouldCreateCycle() const { : static_cast(this)->mergeWouldCreateCycle(); } -static uint32_t siblingScore(const SiblingMC* sibsp) { - const LogicMTask* const ap = sibsp->ap(); - const LogicMTask* const bp = sibsp->bp(); - const uint32_t mergedCpCostFwd - = std::max(ap->critPathCost(GraphWay::FORWARD), bp->critPathCost(GraphWay::FORWARD)); - const uint32_t mergedCpCostRev - = std::max(ap->critPathCost(GraphWay::REVERSE), bp->critPathCost(GraphWay::REVERSE)); - return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(ap->cost() + bp->cost()); -} - -static uint32_t edgeScore(const MTaskEdge* edgep) { - // Score this edge. Lower is better. The score is the new local CP - // length if we merge these mtasks. ("Local" means the longest - // critical path running through the merged node.) - const LogicMTask* const top = static_cast(edgep->top()); - const LogicMTask* const fromp = static_cast(edgep->fromp()); - const uint32_t mergedCpCostFwd = std::max(fromp->critPathCost(GraphWay::FORWARD), - top->critPathCostWithout(GraphWay::FORWARD, edgep)); - const uint32_t mergedCpCostRev = std::max(fromp->critPathCostWithout(GraphWay::REVERSE, edgep), - top->critPathCost(GraphWay::REVERSE)); - return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(fromp->cost() + top->cost()); -} - -void MergeCandidate::rescore() { - if (const SiblingMC* const sibp = toSiblingMC()) { - m_key.m_score = siblingScore(sibp); - } else { - // The '1 +' favors merging a SiblingMC over an otherwise- - // equal-scoring MTaskEdge. The comment on selfTest() talks - // about why. - m_key.m_score = 1 + edgeScore(static_cast(this)); - } -} - // ###################################################################### // Vertex utility classes @@ -877,6 +813,7 @@ static void partCheckCriticalPaths(V3Graph* mtasksp) { // Usage: // * Client increases the cost and/or CP at a node or small set of nodes // (often a pair in practice, eg. edge contraction.) +// * Client instances a PartPropagateCp object // * Client calls PartPropagateCp::cpHasIncreased() one or more times. // Each call indicates that the inclusive CP of some "seed" vertex // has increased to a given value. @@ -886,120 +823,53 @@ static void partCheckCriticalPaths(V3Graph* mtasksp) { // * Client calls PartPropagateCp::go(). Internally, this iteratively // propagates the new CPs wayward through the graph. // -template -class PartPropagateCp final { - // TYPES - - // We keep pending vertices in a heap during critical path propagation - struct PendingKey { - LogicMTask* m_mtaskp; // The vertex in the heap - uint32_t m_score; // The score of this entry - void increase(uint32_t score) { -#if VL_DEBUG - UASSERT(score >= m_score, "Must increase"); -#endif - m_score = score; - } - bool operator<(const PendingKey& other) const { - if (m_score != other.m_score) return m_score < other.m_score; - return LogicMTask::CmpLogicMTask{}(m_mtaskp, other.m_mtaskp); - } - }; - - using PendingHeap = PairingHeap; - using PendingHeapNode = typename PendingHeap::Node; +class PartPropagateCp final : GraphAlg<> { +private: // MEMBERS - PendingHeap m_pendingHeap; // Heap of pending rescores - - // We allocate this many heap nodes at once - static constexpr size_t ALLOC_CHUNK_SIZE = 128; - PendingHeapNode* m_freep = nullptr; // List of free heap nodes - std::vector> m_allocated; // Allocated heap nodes - + const GraphWay m_way; // CPs oriented in this direction: either FORWARD + // // from graph-start to current node, or REVERSE + // // from graph-end to current node. + LogicMTask::CpCostAccessor m_access; // Access cost and CPs on V3GraphVertex's. + // // confirm we only process each vertex once. const bool m_slowAsserts; // Enable nontrivial asserts + // Pending rescores + SortByValueMap m_pending; + std::set m_seen; // Used only with slow asserts to check mtasks visited only once public: // CONSTRUCTORS - PartPropagateCp(bool slowAsserts) - : m_slowAsserts{slowAsserts} {} + PartPropagateCp(V3Graph* graphp, GraphWay way, bool slowAsserts, + V3EdgeFuncP edgeFuncp = &V3GraphEdge::followAlwaysTrue) + : GraphAlg<>{graphp, edgeFuncp} + , m_way{way} + , m_slowAsserts{slowAsserts} {} // METHODS -private: - // Allocate a HeapNode for the given element - PendingHeapNode* allocNode() { - // If no free nodes available, then make some - if (!m_freep) { - // Allocate in chunks for efficiency - m_allocated.emplace_back(new PendingHeapNode[ALLOC_CHUNK_SIZE]); - // Set up free list pointer - m_freep = m_allocated.back().get(); - // Set up free list chain - for (size_t i = 1; i < ALLOC_CHUNK_SIZE; ++i) { - m_freep[i - 1].m_next.m_ptr = &m_freep[i]; - } - // Clear the next pointer of the last entry - m_freep[ALLOC_CHUNK_SIZE - 1].m_next.m_ptr = nullptr; - } - // Free nodes are available, pick up the first one - PendingHeapNode* const resultp = m_freep; - m_freep = resultp->m_next.m_ptr; - resultp->m_next.m_ptr = nullptr; - return resultp; - } - - // Release a heap node (make it available for future allocation) - void freeNode(PendingHeapNode* nodep) { - // Re-use the existing link pointers and simply prepend it to the free list - nodep->m_next.m_ptr = m_freep; - m_freep = nodep; - } - -public: void cpHasIncreased(V3GraphVertex* vxp, uint32_t newInclusiveCp) { - constexpr GraphWay way{T_Way}; - constexpr GraphWay inv{way.invert()}; - // For *vxp, whose CP-inclusive has just increased to // newInclusiveCp, iterate to all wayward nodes, update the edges // of each, and add each to m_pending if its overall CP has grown. - for (MTaskEdge *edgep = static_cast(vxp->beginp(way)), *nextp; edgep; - edgep = nextp) { - // Fetch early as likely cache miss - nextp = static_cast(edgep->nextp(way)); + for (V3GraphEdge* edgep = vxp->beginp(m_way); edgep; edgep = edgep->nextp(m_way)) { + if (!m_edgeFuncp(edgep)) continue; + LogicMTask* const relativep = static_cast(edgep->furtherp(m_way)); + m_access.notifyEdgeCp(relativep, m_way, vxp, newInclusiveCp); - LogicMTask* const relativep = edgep->furtherMTaskp(way); - EdgeHeap::Node& edgeHeapNode = edgep->m_edgeHeapNode[inv]; - if (newInclusiveCp > edgeHeapNode.key().m_score) { - relativep->m_edgeHeap[inv].increaseKey(&edgeHeapNode, newInclusiveCp); + if (m_access.critPathCost(relativep, m_way) < newInclusiveCp) { + // relativep's critPathCost() is out of step with its + // longest !wayward edge. Schedule that to be resolved. + const uint32_t newPendingVal + = newInclusiveCp - m_access.critPathCost(relativep, m_way); + const auto pair = m_pending.emplace(relativep, newPendingVal); + if (!pair.second && (newPendingVal > pair.first->second)) { + m_pending.update(pair.first, newPendingVal); + } } - - const uint32_t critPathCost = relativep->critPathCost(way); - - if (critPathCost >= newInclusiveCp) continue; - - // relativep's critPathCost() is out of step with its longest !wayward edge. - // Schedule that to be resolved. - const uint32_t newVal = newInclusiveCp - critPathCost; - - if (PendingHeapNode* const nodep = static_cast(relativep->userp())) { - // Already in heap. Increase score if needed. - if (newVal > nodep->key().m_score) m_pendingHeap.increaseKey(nodep, newVal); - continue; - } - - // Add to heap - PendingHeapNode* const nodep = allocNode(); - relativep->userp(nodep); - m_pendingHeap.insert(nodep, {relativep, newVal}); } } void go() { - constexpr GraphWay way{T_Way}; - constexpr GraphWay inv{way.invert()}; - // m_pending maps each pending vertex to the amount that it wayward // CP will grow. // @@ -1016,34 +886,27 @@ public: // once. And so on. // // This generalizes to multiple seed nodes also. - while (!m_pendingHeap.empty()) { - // Pop max element from heap - PendingHeapNode* const maxp = m_pendingHeap.max(); - m_pendingHeap.remove(maxp); - // Pick up values - LogicMTask* const mtaskp = maxp->key().m_mtaskp; - const uint32_t cpGrowBy = maxp->key().m_score; - // Free the heap node, we are done with it - freeNode(maxp); - mtaskp->userp(nullptr); - // Update the critPathCost of mtaskp, that was out-of-date with respect to its edges - const uint32_t startCp = mtaskp->critPathCost(way); + while (!m_pending.empty()) { + const auto it = m_pending.rbegin(); + LogicMTask* const updateMep = it->first; + const uint32_t cpGrowBy = it->second; + m_pending.erase(it); + + // For *updateMep, whose critPathCost was out-of-date with respect + // to its edges, update the critPathCost. + const uint32_t startCp = m_access.critPathCost(updateMep, m_way); const uint32_t newCp = startCp + cpGrowBy; if (VL_UNLIKELY(m_slowAsserts)) { - // Check that CP matches that of the longest edge wayward of vxp. - const uint32_t edgeCp = mtaskp->m_edgeHeap[inv].max()->key().m_score; - UASSERT_OBJ(edgeCp == newCp, mtaskp, "CP doesn't match longest wayward edge"); + m_access.checkNewCpVersusEdges(updateMep, m_way, newCp); // Confirm that we only set each node's CP once. That's an // important property of PartPropagateCp which allows it to be far // faster than a recursive algorithm on some graphs. - const bool first = m_seen.insert(mtaskp).second; - UASSERT_OBJ(first, mtaskp, "Set CP on node twice"); + const bool first = m_seen.insert(updateMep).second; + UASSERT_OBJ(first, updateMep, "Set CP on node twice"); } - mtaskp->setCritPathCost(way, newCp); - cpHasIncreased(mtaskp, newCp + mtaskp->stepCost()); + m_access.setCritPathCost(updateMep, m_way, newCp); + cpHasIncreased(updateMep, newCp + m_access.cost(updateMep)); } - - if (VL_UNLIKELY(m_slowAsserts)) m_seen.clear(); } private: @@ -1076,11 +939,11 @@ private: const unsigned idx1 = V3Os::rand64(rngState) % 50; const unsigned idx2 = V3Os::rand64(rngState) % 50; if (idx1 > idx2) { - if (!m_vx[idx2]->hasRelativeMTask(m_vx[idx1])) { + if (!m_vx[idx2]->hasRelative(GraphWay::FORWARD, m_vx[idx1])) { new MTaskEdge{&m_graph, m_vx[idx2], m_vx[idx1], 1}; } } else if (idx2 > idx1) { - if (!m_vx[idx1]->hasRelativeMTask(m_vx[idx2])) { + if (!m_vx[idx1]->hasRelative(GraphWay::FORWARD, m_vx[idx2])) { new MTaskEdge{&m_graph, m_vx[idx1], m_vx[idx2], 1}; } } @@ -1089,7 +952,7 @@ private: partInitCriticalPaths(&m_graph); // This SelfTest class is also the T_CostAccessor - PartPropagateCp prop(true); + PartPropagateCp prop(&m_graph, GraphWay::FORWARD, true); // Seed the propagator with every input node; // This should result in the complete graph getting all CP's assigned. @@ -1098,6 +961,9 @@ private: } // Run the propagator. + // * The setCritPathCost() routine checks that each node's CP changes + // at most once. + // * The notifyEdgeCp routine is also self checking. prop.go(); // Finally, confirm that the entire graph appears to have correct CPs. @@ -1110,7 +976,7 @@ public: // Merge edges from a LogicMtask. // -// This code removes adjacent edges. When this occurs, mark it in need +// This code removes 'hasRelative' edges. When this occurs, mark it in need // of a rescore, in case its score has fallen and we need to move it up // toward the front of the scoreboard. // @@ -1141,93 +1007,51 @@ public: // // Another way of stating this: this code ensures that scores of // non-transitive edges only ever increase. -static void partRedirectEdgesFrom(V3Graph* graphp, LogicMTask* recipientp, LogicMTask* donorp, - MergeCandidateScoreboard* sbp) { - - // Process outgoing edges - MTaskEdge* outNextp = static_cast(donorp->outBeginp()); - while (outNextp) { - MTaskEdge* const edgep = outNextp; - LogicMTask* const relativep = outNextp->toMTaskp(); - outNextp = static_cast(outNextp->outNextp()); - - relativep->removeRelativeEdge(edgep); - - if (recipientp->hasRelativeMTask(relativep)) { - // An edge already exists between recipient and relative of donor. - // Mark it in need of a rescore - if (sbp) { - if (sbp->contains(edgep)) sbp->remove(edgep); - MTaskEdge* const existMTaskEdgep = static_cast( - recipientp->findConnectingEdgep(GraphWay::FORWARD, relativep)); -#if VL_DEBUG - UASSERT(existMTaskEdgep, "findConnectingEdge didn't find edge"); -#endif - if (sbp->contains(existMTaskEdgep)) sbp->hintScoreChanged(existMTaskEdgep); - } - // Can nuke the edge now - VL_DO_DANGLING(edgep->unlinkDelete(), edgep); - } else { - // No existing edge between recipient and relative of donor. - // Redirect the edge from donor<->relative to recipient<->relative. - donorp->removeRelativeEdge(edgep); - edgep->relinkFromp(recipientp); - recipientp->addRelativeMTask(relativep); - recipientp->addRelativeEdge(edgep); - relativep->addRelativeEdge(edgep); - if (sbp) { - if (!sbp->contains(edgep)) { - sbp->add(edgep); +static void partRedirectEdgesFrom(LogicMTask* recipientp, LogicMTask* donorp, + V3Scoreboard* sbp) { + for (const auto& way : {GraphWay::FORWARD, GraphWay::REVERSE}) { + for (V3GraphEdge *edgep = donorp->beginp(way), *nextp; edgep; edgep = nextp) { + nextp = edgep->nextp(way); + MTaskEdge* const tedgep = MTaskEdge::cast(edgep); + LogicMTask* const relativep = tedgep->furtherMTaskp(way); + if (recipientp->hasRelative(way, relativep)) { + // An edge already exists between recipient and relative of donor. + // Mark it in need of a rescore + if (sbp) { + if (!tedgep->removedFromSb()) sbp->removeElem(tedgep); + const MTaskEdge* const existMTaskEdgep + = MTaskEdge::cast(recipientp->findConnectingEdgep(way, relativep)); + UASSERT(existMTaskEdgep, "findConnectingEdge didn't find edge"); + if (!existMTaskEdgep->removedFromSb()) { + sbp->hintScoreChanged(existMTaskEdgep); + } + } + VL_DO_DANGLING(edgep->unlinkDelete(), edgep); + } else { + // No existing edge between recipient and relative of donor. + // Redirect the edge from donor<->relative to recipient<->relative. + if (way == GraphWay::REVERSE) { + tedgep->relinkTop(recipientp); + relativep->removeRelative(GraphWay::FORWARD, donorp); + relativep->addRelative(GraphWay::FORWARD, recipientp); + recipientp->addRelative(GraphWay::REVERSE, relativep); } else { - sbp->hintScoreChanged(edgep); + tedgep->relinkFromp(recipientp); + relativep->removeRelative(GraphWay::REVERSE, donorp); + relativep->addRelative(GraphWay::REVERSE, recipientp); + recipientp->addRelative(GraphWay::FORWARD, relativep); + } + if (sbp) { + if (tedgep->removedFromSb()) { + tedgep->clearRemovedFromSb(); + sbp->addElem(tedgep); + } else { + sbp->hintScoreChanged(tedgep); + } } } } } - - // Process incoming edges - MTaskEdge* inNextp = static_cast(donorp->inBeginp()); - while (inNextp) { - MTaskEdge* const edgep = inNextp; - LogicMTask* const relativep = inNextp->fromMTaskp(); - inNextp = static_cast(inNextp->inNextp()); - - relativep->removeRelativeMTask(donorp); - relativep->removeRelativeEdge(edgep); - - if (relativep->hasRelativeMTask(recipientp)) { - // An edge already exists between recipient and relative of donor. - // Mark it in need of a rescore - if (sbp) { - if (sbp->contains(edgep)) sbp->remove(edgep); - MTaskEdge* const existMTaskEdgep = static_cast( - recipientp->findConnectingEdgep(GraphWay::REVERSE, relativep)); -#if VL_DEBUG - UASSERT(existMTaskEdgep, "findConnectingEdge didn't find edge"); -#endif - if (sbp->contains(existMTaskEdgep)) sbp->hintScoreChanged(existMTaskEdgep); - } - VL_DO_DANGLING(edgep->unlinkDelete(), edgep); - } else { - // No existing edge between recipient and relative of donor. - // Redirect the edge from donor<->relative to recipient<->relative. - donorp->removeRelativeEdge(edgep); - edgep->relinkTop(recipientp); - relativep->addRelativeMTask(recipientp); - relativep->addRelativeEdge(edgep); - recipientp->addRelativeEdge(edgep); - if (sbp) { - if (!sbp->contains(edgep)) { - sbp->add(edgep); - } else { - sbp->hintScoreChanged(edgep); - } - } - } - } - - // Remove donorp from the graph - VL_DO_DANGLING(donorp->unlinkDelete(graphp), donorp); } //###################################################################### @@ -1237,6 +1061,14 @@ static void partRedirectEdgesFrom(V3Graph* graphp, LogicMTask* recipientp, Logic class PartContraction final { private: // TYPES + + // TODO: might get a little more speed by making this a + // std::unordered_set and defining hash and equal_to functors for the + // SiblingMC: + using SibSet = std::set; + using SibpSet = std::unordered_set; + using MTask2Sibs = std::unordered_map; + // New CP information for mtaskp reflecting an upcoming merge struct NewCp { uint32_t cp; @@ -1250,17 +1082,17 @@ private: uint32_t m_scoreLimitBeforeRescore = 0xffffffff; // Next score rescore at unsigned m_mergesSinceRescore = 0; // Merges since last rescore const bool m_slowAsserts; // Take extra time to validate algorithm - MergeCandidateScoreboard m_sb; // Scoreboard - - PartPropagateCp m_forwardPropagator{m_slowAsserts}; // Forward propagator - PartPropagateCp m_reversePropagator{m_slowAsserts}; // Reverse propagator + V3Scoreboard m_sb; // Scoreboard + SibSet m_pairs; // Storage for each SiblingMC + MTask2Sibs m_mtask2sibs; // SiblingMC set for each mtask public: // CONSTRUCTORS PartContraction(V3Graph* mtasksp, uint32_t scoreLimit, bool slowAsserts) : m_mtasksp{mtasksp} , m_scoreLimit{scoreLimit} - , m_slowAsserts{slowAsserts} {} + , m_slowAsserts{slowAsserts} + , m_sb{&mergeCandidateScore, slowAsserts} {} // METHODS void go() { @@ -1284,18 +1116,17 @@ public: // - Incrementally recompute critical paths near the merged mtask. for (V3GraphVertex* itp = m_mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) { - itp->userp(nullptr); // Reset user value. Used by PartPropagateCp. std::unordered_set neighbors; for (V3GraphEdge* edgep = itp->outBeginp(); edgep; edgep = edgep->outNextp()) { - m_sb.add(static_cast(edgep)); + m_sb.addElem(MTaskEdge::cast(edgep)); if (m_slowAsserts) { UASSERT_OBJ(neighbors.find(edgep->top()) == neighbors.end(), itp, "Redundant edge found in input to PartContraction()"); } neighbors.insert(edgep->top()); } - siblingPairFromRelatives(itp); - siblingPairFromRelatives(itp); + siblingPairFromRelatives(GraphWay::REVERSE, itp, true); + siblingPairFromRelatives(GraphWay::FORWARD, itp, true); } doRescore(); // Set initial scores in scoreboard @@ -1303,7 +1134,7 @@ public: while (true) { // This is the best edge to merge, with the lowest // score (shortest local critical path) - MergeCandidate* const mergeCanp = m_sb.best(); + MergeCandidate* const mergeCanp = const_cast(m_sb.bestp()); if (!mergeCanp) { // Scoreboard found no eligible merges. Maybe a rescore // will produce some merge-able pairs? @@ -1318,9 +1149,8 @@ public: UASSERT(!m_sb.needsRescore(mergeCanp), "Need-rescore items should not be returned by bestp"); } - const uint32_t cachedScore = mergeCanp->score(); - mergeCanp->rescore(); - const uint32_t actualScore = mergeCanp->score(); + const uint32_t cachedScore = m_sb.cachedScore(mergeCanp); + const uint32_t actualScore = mergeCandidateScore(mergeCanp); if (actualScore > cachedScore) { // Cached score is out-of-date. @@ -1381,11 +1211,8 @@ public: if (mergeCanp->mergeWouldCreateCycle()) { // Remove this edge from scoreboard so we don't keep // reconsidering it on every loop. - m_sb.remove(mergeCanp); - if (SiblingMC* const smcp = mergeCanp->toSiblingMC()) { - smcp->bp()->farSibs().erase(smcp); - smcp->ap()->ownSibs().erase(*smcp); // Kills *smcp, so do last - } + m_sb.removeElem(mergeCanp); + mergeCanp->removedFromSb(true); continue; } @@ -1447,29 +1274,31 @@ private: } void removeSiblingMCsWith(LogicMTask* mtaskp) { - for (const SiblingMC& pair : mtaskp->ownSibs()) { - m_sb.remove(const_cast(&pair)); - // Owner is always ap(), remove from the opposite side - pair.bp()->farSibs().erase(&pair); + for (SibpSet::iterator it = m_mtask2sibs[mtaskp].begin(); it != m_mtask2sibs[mtaskp].end(); + ++it) { + const SiblingMC* const pairp = *it; + if (!pairp->removedFromSb()) m_sb.removeElem(pairp); + const LogicMTask* const otherp = (pairp->bp() == mtaskp) ? pairp->ap() : pairp->bp(); + size_t erased = m_mtask2sibs[otherp].erase(pairp); + UASSERT_OBJ(erased > 0, otherp, "Expected existing mtask"); + erased = m_pairs.erase(*pairp); + UASSERT_OBJ(erased > 0, mtaskp, "Expected existing mtask"); } - for (const SiblingMC* const pairp : mtaskp->farSibs()) { - m_sb.remove(const_cast(pairp)); - // Owner is always ap(), remove from the opposite side - pairp->ap()->ownSibs().erase(*pairp); - } - mtaskp->ownSibs().clear(); - mtaskp->farSibs().clear(); + const size_t erased = m_mtask2sibs.erase(mtaskp); + UASSERT_OBJ(erased > 0, mtaskp, "Expected existing mtask"); } void contract(MergeCandidate* mergeCanp) { LogicMTask* top = nullptr; LogicMTask* fromp = nullptr; MTaskEdge* mergeEdgep = mergeCanp->toMTaskEdge(); + const SiblingMC* mergeSibsp = nullptr; if (mergeEdgep) { top = static_cast(mergeEdgep->top()); fromp = static_cast(mergeEdgep->fromp()); } else { - const SiblingMC* mergeSibsp = static_cast(mergeCanp); + mergeSibsp = mergeCanp->toSiblingMC(); + UASSERT(mergeSibsp, "Failed to cast mergeCanp to either MTaskEdge or SiblingMC"); top = mergeSibsp->ap(); fromp = mergeSibsp->bp(); } @@ -1508,10 +1337,7 @@ private: if (mergeEdgep) { // Remove and free the connecting edge. Must do this before // propagating CP's below. - m_sb.remove(mergeCanp); - mergeEdgep->fromMTaskp()->removeRelativeMTask(mergeEdgep->toMTaskp()); - mergeEdgep->fromMTaskp()->removeRelativeEdge(mergeEdgep); - mergeEdgep->toMTaskp()->removeRelativeEdge(mergeEdgep); + m_sb.removeElem(mergeCanp); VL_DO_CLEAR(mergeEdgep->unlinkDelete(), mergeEdgep = nullptr); } @@ -1527,22 +1353,25 @@ private: << (donorNewCpFwd.propagate ? " true " : " false ") << donorNewCpFwd.propagateCp << endl); + PartPropagateCp forwardPropagator(m_mtasksp, GraphWay::FORWARD, m_slowAsserts); + PartPropagateCp reversePropagator(m_mtasksp, GraphWay::REVERSE, m_slowAsserts); + recipientp->setCritPathCost(GraphWay::FORWARD, recipientNewCpFwd.cp); if (recipientNewCpFwd.propagate) { - m_forwardPropagator.cpHasIncreased(recipientp, recipientNewCpFwd.propagateCp); + forwardPropagator.cpHasIncreased(recipientp, recipientNewCpFwd.propagateCp); } recipientp->setCritPathCost(GraphWay::REVERSE, recipientNewCpRev.cp); if (recipientNewCpRev.propagate) { - m_reversePropagator.cpHasIncreased(recipientp, recipientNewCpRev.propagateCp); + reversePropagator.cpHasIncreased(recipientp, recipientNewCpRev.propagateCp); } if (donorNewCpFwd.propagate) { - m_forwardPropagator.cpHasIncreased(donorp, donorNewCpFwd.propagateCp); + forwardPropagator.cpHasIncreased(donorp, donorNewCpFwd.propagateCp); } if (donorNewCpRev.propagate) { - m_reversePropagator.cpHasIncreased(donorp, donorNewCpRev.propagateCp); + reversePropagator.cpHasIncreased(donorp, donorNewCpRev.propagateCp); } - m_forwardPropagator.go(); - m_reversePropagator.go(); + forwardPropagator.go(); + reversePropagator.go(); // Remove all SiblingMCs that include donorp. This Includes the one // we're merging, if we're merging a SiblingMC. @@ -1552,8 +1381,11 @@ private: // to a bounded number. removeSiblingMCsWith(recipientp); - // Redirect all edges, delete donorp - partRedirectEdgesFrom(m_mtasksp, recipientp, donorp, &m_sb); + // Redirect all edges + partRedirectEdgesFrom(recipientp, donorp, &m_sb); + + // Delete the donorp mtask from the graph + VL_DO_CLEAR(donorp->unlinkDelete(m_mtasksp), donorp = nullptr); ++m_mergesSinceRescore; @@ -1566,21 +1398,21 @@ private: // - prereqs of recipientp's postreqs // - postreqs of recipientp's prereqs // Note that this depends on the updated critical paths (above). - siblingPairFromRelatives(recipientp); - siblingPairFromRelatives(recipientp); + siblingPairFromRelatives(GraphWay::REVERSE, recipientp, true); + siblingPairFromRelatives(GraphWay::FORWARD, recipientp, true); unsigned edges = 0; for (V3GraphEdge* edgep = recipientp->outBeginp(); edgep; edgep = edgep->outNextp()) { LogicMTask* const postreqp = static_cast(edgep->top()); - siblingPairFromRelatives(postreqp); + siblingPairFromRelatives(GraphWay::REVERSE, postreqp, false); ++edges; - if (edges >= PART_SIBLING_EDGE_LIMIT) break; + if (edges > PART_SIBLING_EDGE_LIMIT) break; } edges = 0; for (V3GraphEdge* edgep = recipientp->inBeginp(); edgep; edgep = edgep->inNextp()) { LogicMTask* const prereqp = static_cast(edgep->fromp()); - siblingPairFromRelatives(prereqp); + siblingPairFromRelatives(GraphWay::FORWARD, prereqp, false); ++edges; - if (edges >= PART_SIBLING_EDGE_LIMIT) break; + if (edges > PART_SIBLING_EDGE_LIMIT) break; } } @@ -1597,86 +1429,111 @@ private: m_scoreLimitBeforeRescore = 0xffffffff; } + static uint32_t mergeCandidateScore(const MergeCandidate* pairp) { + if (const MTaskEdge* const edgep = pairp->toMTaskEdge()) { + // The '1 +' favors merging a SiblingMC over an otherwise- + // equal-scoring MTaskEdge. The comment on selfTest() talks + // about why. + return 1 + edgeScore(edgep); + } else { + return siblingScore(pairp->toSiblingMC()); + } + v3fatalSrc("Failed to cast pairp to either MTaskEdge or SiblingMC in mergeCandidateScore"); + return 0; + } + + VL_ATTR_NOINLINE + static uint32_t siblingScore(const SiblingMC* sibsp) { + const LogicMTask* const ap = sibsp->ap(); + const LogicMTask* const bp = sibsp->bp(); + const uint32_t mergedCpCostFwd + = std::max(ap->critPathCost(GraphWay::FORWARD), bp->critPathCost(GraphWay::FORWARD)); + const uint32_t mergedCpCostRev + = std::max(ap->critPathCost(GraphWay::REVERSE), bp->critPathCost(GraphWay::REVERSE)); + return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(ap->cost() + bp->cost()); + } + + VL_ATTR_NOINLINE + static uint32_t edgeScore(const V3GraphEdge* edgep) { + // Score this edge. Lower is better. The score is the new local CP + // length if we merge these mtasks. ("Local" means the longest + // critical path running through the merged node.) + const LogicMTask* const top = static_cast(edgep->top()); + const LogicMTask* const fromp = static_cast(edgep->fromp()); + const uint32_t mergedCpCostFwd + = std::max(fromp->critPathCost(GraphWay::FORWARD), + top->critPathCostWithout(GraphWay::FORWARD, edgep)); + const uint32_t mergedCpCostRev + = std::max(fromp->critPathCostWithout(GraphWay::REVERSE, edgep), + top->critPathCost(GraphWay::REVERSE)); + return mergedCpCostRev + mergedCpCostFwd + + LogicMTask::stepCost(fromp->cost() + top->cost()); + } + void makeSiblingMC(LogicMTask* ap, LogicMTask* bp) { - if (ap->id() < bp->id()) std::swap(ap, bp); - // The higher id vertex owns the storage - const auto emplaceResult = ap->ownSibs().emplace(ap, bp); - if (emplaceResult.second) { - SiblingMC* const newSibsp = const_cast(&(*emplaceResult.first)); - bp->farSibs().insert(newSibsp); - m_sb.add(newSibsp); + const SiblingMC newSibs(ap, bp); + const std::pair insertResult = m_pairs.insert(newSibs); + if (insertResult.second) { + const SiblingMC* const newSibsp = &(*insertResult.first); + m_mtask2sibs[ap].insert(newSibsp); + m_mtask2sibs[bp].insert(newSibsp); + m_sb.addElem(newSibsp); } else if (m_slowAsserts) { // It's fine if we already have this SiblingMC, we may have // created it earlier. Just confirm that we have associated data. + UASSERT_OBJ(m_mtask2sibs.find(ap) != m_mtask2sibs.end(), ap, "Sibling not found"); + UASSERT_OBJ(m_mtask2sibs.find(bp) != m_mtask2sibs.end(), bp, "Sibling not found"); bool found = false; - for (const SiblingMC& sibs : ap->ownSibs()) { - UASSERT_OBJ(sibs.ap() == ap, ap, "Inconsistent SiblingMC"); - UASSERT_OBJ(m_sb.contains(&sibs), ap, "Must be on the scoreboard"); - if (sibs.bp() == bp) found = true; + for (SibpSet::iterator it = m_mtask2sibs[ap].begin(); it != m_mtask2sibs[ap].end(); + ++it) { + const SiblingMC* const sibsp = *it; + UASSERT_OBJ(!(!sibsp->removedFromSb() && !m_sb.contains(sibsp)), ap, + "One sibling must be the one we collided with"); + if ((sibsp->ap() == ap && sibsp->bp() == bp) + || (sibsp->bp() == ap && sibsp->ap() == bp)) + found = true; } UASSERT_OBJ(found, ap, "Sibling not found"); } } - template - VL_ATTR_NOINLINE void siblingPairFromRelatives(V3GraphVertex* mtaskp) { - constexpr GraphWay way{Way}; - // Need at least 2 edges - if (!mtaskp->beginp(way) || !mtaskp->beginp(way)->nextp(way)) return; + void siblingPairFromRelatives(GraphWay way, V3GraphVertex* mtaskp, bool exhaustive) { + std::vector shortestPrereqs; - std::array neighbours; - - // This is a hot method, so we want so sort as efficiently as possible. We pre-load - // all data (critical path cost and id) required for determining ordering into an aligned - // structure. There is not enough space next to these to keep a whole pointer within 16 - // bytes, so we store an index into the neighbours buffer instead. We can then compare - // and swap these sorting records very efficiently. With this the standard library sorting - // functions are efficient enough and using more optimized methods (e.g.: sorting networks) - // has no measurable benefit. - struct alignas(16) SortingRecord { - uint64_t m_id; - uint32_t m_cp; - uint8_t m_idx; - static_assert(PART_SIBLING_EDGE_LIMIT <= std::numeric_limits::max(), - "m_idx must fit all indices into 'neighbours'"); - bool operator<(const SortingRecord& that) const { - return m_cp < that.m_cp || (m_cp == that.m_cp && m_id < that.m_id); - } - }; - static_assert(sizeof(SortingRecord) <= 16, "How could this be padded to more than 16?"); - - std::array sortRecs; - size_t n = 0; - - // Populate the buffers - for (V3GraphEdge *edgep = mtaskp->beginp(way), *nextp; edgep; edgep = nextp) { - nextp = edgep->nextp(way); // Fetch next first as likely cache miss - LogicMTask* const otherp = static_cast(edgep->furtherp(way)); - neighbours[n] = otherp; - sortRecs[n].m_id = otherp->id(); - sortRecs[n].m_cp = otherp->critPathCost(way) + otherp->cost(); - sortRecs[n].m_idx = n; - ++n; - // Prevent nodes with huge numbers of edges from massively slowing down us down - if (n >= PART_SIBLING_EDGE_LIMIT) break; + for (V3GraphEdge* edgep = mtaskp->beginp(way); edgep; edgep = edgep->nextp(way)) { + LogicMTask* const prereqp = static_cast(edgep->furtherp(way)); + shortestPrereqs.push_back(prereqp); + // Prevent nodes with huge numbers of edges from massively + // slowing down the partitioner: + if (shortestPrereqs.size() > PART_SIBLING_EDGE_LIMIT) break; } - // Don't make all possible pairs of siblings when not requested (non-exhaustive). + if (shortestPrereqs.size() <= 1) return; + + const auto cmp = [way](const LogicMTask* ap, const LogicMTask* bp) { + const uint32_t aCp = ap->critPathCost(way) + ap->cost(); + const uint32_t bCp = bp->critPathCost(way) + bp->cost(); + if (aCp != bCp) return aCp < bCp; + return ap->id() < bp->id(); + }; + + // Don't make all possible pairs of prereqs when not requested (non-exhaustive). // Just make a few pairs. constexpr size_t MAX_NONEXHAUSTIVE_PAIRS = 3; - if (Exhaustive || n <= 2 * MAX_NONEXHAUSTIVE_PAIRS) { - const size_t end = n & ~static_cast(1); // Round down to even, (we want pairs) - std::sort(sortRecs.begin(), sortRecs.begin() + n); - for (size_t i = 0; i < end; i += 2) { - makeSiblingMC(neighbours[sortRecs[i].m_idx], neighbours[sortRecs[i + 1].m_idx]); - } + size_t end; // End index of pairs to add to candidates (exclusive) + + if (exhaustive || (shortestPrereqs.size() <= 2 * MAX_NONEXHAUSTIVE_PAIRS)) { + end = shortestPrereqs.size() & ~static_cast(1); // Round down to even + std::sort(shortestPrereqs.begin(), shortestPrereqs.end(), cmp); } else { - constexpr size_t end = 2 * MAX_NONEXHAUSTIVE_PAIRS; - std::partial_sort(sortRecs.begin(), sortRecs.begin() + end, sortRecs.begin() + n); - for (size_t i = 0; i < end; i += 2) { - makeSiblingMC(neighbours[sortRecs[i].m_idx], neighbours[sortRecs[i + 1].m_idx]); - } + end = 2 * MAX_NONEXHAUSTIVE_PAIRS; + std::partial_sort(shortestPrereqs.begin(), shortestPrereqs.begin() + end, + shortestPrereqs.end(), cmp); + } + + for (size_t i = 0; i < end; i += 2) { + makeSiblingMC(shortestPrereqs[i], shortestPrereqs[i + 1]); } } @@ -1993,15 +1850,17 @@ private: } // Move all vertices from donorp to mergedp mergedp->moveAllVerticesFrom(donorp); - // Redirect edges from donorp to recipientp, delete donorp - partRedirectEdgesFrom(m_mtasksp, mergedp, donorp, nullptr); + // Redirect edges from donorp to recipientp + partRedirectEdgesFrom(mergedp, donorp, nullptr); + // Remove donorp from the graph + VL_DO_DANGLING(donorp->unlinkDelete(m_mtasksp), donorp); ++m_mergesDone; } if (lastMergedp) { UASSERT_OBJ(lastMergedp->rank() < mergedp->rank(), mergedp, "Merging must be on lower rank"); - if (!lastMergedp->hasRelativeMTask(mergedp)) { + if (!lastMergedp->hasRelative(GraphWay::FORWARD, mergedp)) { new MTaskEdge(m_mtasksp, lastMergedp, mergedp, 1); } } @@ -2647,8 +2506,9 @@ void V3Partition::setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp) UASSERT_OBJ(otherMTaskp != mtaskp, mtaskp, "Would create a cycle edge"); // Don't create redundant edges. - if (mtaskp->hasRelativeMTask(otherMTaskp)) continue; - + if (mtaskp->hasRelative(GraphWay::FORWARD, otherMTaskp)) { // + continue; + } new MTaskEdge(mtasksp, mtaskp, otherMTaskp, 1); } } diff --git a/src/V3Scoreboard.cpp b/src/V3Scoreboard.cpp index d21422a81..78d466596 100644 --- a/src/V3Scoreboard.cpp +++ b/src/V3Scoreboard.cpp @@ -19,42 +19,26 @@ #include "V3Scoreboard.h" -class ScoreboardTestElem; - -struct Key { - // Node: Structure layout chosen to minimize padding in PairingHeao<*>::Node - uint64_t m_id; // Unique ID part of edge score - uint32_t m_score; // Score part of ID - bool operator<(const Key& other) const { - // First by Score then by ID, but notice that we want minimums using a max-heap, so reverse - return m_score > other.m_score || (m_score == other.m_score && m_id > other.m_id); - } -}; - -using Scoreboard = V3Scoreboard; - -class ScoreboardTestElem final : public Scoreboard::Node { +class ScoreboardTestElem final { public: - uint32_t m_newScore; + // MEMBERS + uint32_t m_score; + uint32_t m_id; // CONSTRUCTORS explicit ScoreboardTestElem(uint32_t score) - : m_newScore{score} { - m_key.m_score = m_newScore; + : m_score{score} { static uint32_t s_serial = 0; - m_key.m_id = ++s_serial; + m_id = ++s_serial; } ScoreboardTestElem() = default; + // METHODS + static uint32_t scoreFn(const ScoreboardTestElem* elp) { return elp->m_score; } - uint64_t id() const { return m_key.m_id; } - void rescore() { m_key.m_score = m_newScore; } - uint32_t score() const { return m_key.m_score; } - static ScoreboardTestElem* heapNodeToElem(Scoreboard::Node* nodep) { - return static_cast(nodep); - } + bool operator<(const ScoreboardTestElem& other) const { return m_id < other.m_id; } }; void V3ScoreboardBase::selfTest() { - Scoreboard sb; + V3Scoreboard sb(ScoreboardTestElem::scoreFn, true); UASSERT(!sb.needsRescore(), "SelfTest: Empty sb should not need rescore."); @@ -62,13 +46,13 @@ void V3ScoreboardBase::selfTest() { ScoreboardTestElem e2(20); ScoreboardTestElem e3(30); - sb.add(&e1); - sb.add(&e2); - sb.add(&e3); + sb.addElem(&e1); + sb.addElem(&e2); + sb.addElem(&e3); UASSERT(sb.needsRescore(), "SelfTest: Newly filled sb should need a rescore."); UASSERT(sb.needsRescore(&e1), "SelfTest: Individual newly-added element should need rescore"); - UASSERT(nullptr == sb.best(), + UASSERT(nullptr == sb.bestp(), "SelfTest: Newly filled sb should have nothing eligible for Bestp()"); sb.rescore(); @@ -76,22 +60,24 @@ void V3ScoreboardBase::selfTest() { UASSERT(!sb.needsRescore(), "SelfTest: Newly rescored sb should not need rescore"); UASSERT(!sb.needsRescore(&e1), "SelfTest: Newly rescored sb should not need an element rescored"); - UASSERT(&e1 == sb.best(), "SelfTest: Should return element with lowest (best) score"); + UASSERT(e2.m_score == sb.cachedScore(&e2), + "SelfTest: Cached score should match current score"); + UASSERT(&e1 == sb.bestp(), "SelfTest: Should return element with lowest (best) score"); // Change one element's score sb.hintScoreChanged(&e2); - e2.m_newScore = 21; + e2.m_score = 21; UASSERT(sb.needsRescore(&e2), "SelfTest: Should need rescore on elem after hintScoreChanged"); // Remove an element UASSERT(sb.contains(&e1), "SelfTest: e1 should be there"); - sb.remove(&e1); + sb.removeElem(&e1); UASSERT(!sb.contains(&e1), "SelfTest: e1 should be gone"); UASSERT(sb.contains(&e2), "SelfTest: e2 should be there, despite needing rescore"); // Now e3 should be our best-scoring element, even though // e2 has a better score, since e2 is pending rescore. - UASSERT(&e3 == sb.best(), "SelfTest: Expect e3 as best element with known score."); + UASSERT(&e3 == sb.bestp(), "SelfTest: Expect e3 as best element with known score."); sb.rescore(); - UASSERT(&e2 == sb.best(), "SelfTest: Expect e2 as best element again after Rescore"); + UASSERT(&e2 == sb.bestp(), "SelfTest: Expect e2 as best element again after Rescore"); } diff --git a/src/V3Scoreboard.h b/src/V3Scoreboard.h index 4bf915431..dc5fce0b0 100644 --- a/src/V3Scoreboard.h +++ b/src/V3Scoreboard.h @@ -1,6 +1,13 @@ // -*- mode: C++; c-file-style: "cc-mode" -*- //************************************************************************* -// DESCRIPTION: Verilator: Scoreboard for mtask coarsening +// DESCRIPTION: Verilator: Scoreboards for thread partitioner +// +// Provides scoreboard classes: +// +// * SortByValueMap +// * V3Scoreboard +// +// See details below // // Code available from: https://verilator.org // @@ -21,122 +28,248 @@ #include "verilatedos.h" #include "V3Error.h" -#include "V3PairingHeap.h" -//=============================================================================================== -// V3Scoreboard is essentially a heap that can be hinted that some elements have changed keys, at -// which points those elements will be deferred as 'unknown' until the next 'rescore' call. We -// largely reuse the implementation of the slightly more generic PairingHeap, but we do rely on the -// internal structure of the PairingHeap so changing that class requires changing this. -// -// For efficiency, the elements themselves must be the heap nodes, by deriving them from -// V3Scoreboard::Node. This also means a single element can only be associated with -// a single scoreboard. +#include +#include +#include +#include + +// ###################################################################### +// SortByValueMap + +// A generic key-value map, except iteration is in *value* sorted order. Values need not be unique. +// Uses T_KeyCompare to break ties in the sort when values collide. Note: Only const iteration is +// possible, as updating mapped values via iterators is not safe. + +template > +class SortByValueMap final { + // Current implementation is a std::set of key/value pairs, plus a std_unordered_map from keys + // to iterators into the set. This keeps most operations fairly cheap and also has the benefit + // of being able to re-use the std::set iterators. -template -class V3Scoreboard final { // TYPES - using Heap = PairingHeap; + + using Pair = std::pair; + + struct PairCmp final { + bool operator()(const Pair& a, const Pair& b) const { + // First compare values + if (a.second != b.second) return a.second < b.second; + // Then compare keys + return T_KeyCompare{}(a.first, b.first); + } + }; + + using PairSet = std::set; public: - using Node = typename Heap::Node; + using const_iterator = typename PairSet::const_iterator; + using const_reverse_iterator = typename PairSet::const_reverse_iterator; private: - using Link = typename Heap::Link; - - // Note: T_Elem is incomplete here, so we cannot assert 'std::is_base_of::value' - // MEMBERS - Heap m_known; // The heap of entries with known scores - Link m_unknown; // List of entries with unknown scores + PairSet m_pairs; // The contents of the map, stored directly as key-value pairs + std::unordered_map m_kiMap; // Key to iterator map + + VL_UNCOPYABLE(SortByValueMap); public: // CONSTRUCTORS - explicit V3Scoreboard() = default; - ~V3Scoreboard() = default; + SortByValueMap() = default; -private: - VL_UNCOPYABLE(V3Scoreboard); + // Only const iteration is possible + const_iterator begin() const { return m_pairs.begin(); } + const_iterator end() const { return m_pairs.end(); } + const_iterator cbegin() const { m_pairs.cbegin(); } + const_iterator cend() const { return m_pairs.cend(); } + const_reverse_iterator rbegin() const { return m_pairs.rbegin(); } + const_reverse_iterator rend() const { return m_pairs.rend(); } + const_reverse_iterator crbegin() const { return m_pairs.crbegin(); } + const_reverse_iterator crend() const { return m_pairs.crend(); } - // METHODSs - void addUnknown(T_Elem* nodep) { - // Just prepend it to the list of unknown entries - nodep->m_next.link(m_unknown.unlink()); - m_unknown.linkNonNull(nodep); - // We mark nodes on the unknown list by making their child pointer point to themselves - nodep->m_kids.m_ptr = nodep; + const_iterator find(const T_Key& key) const { + const auto kiIt = m_kiMap.find(key); + if (kiIt == m_kiMap.end()) return cend(); + return kiIt->second; } - -public: - // Returns true if the element is present in the scoreboard, false otherwise. Every other - // method that takes a T_Elem* (except for 'add') has undefined behavior if the element is not - // in this scoreboard. Furthermore, this method is only valid if the element can only possibly - // be in this scoreboard. That is: if the element might be in another scoreboard, the behaviour - // of this method is undefined. - static bool contains(const T_Elem* nodep) { return nodep->m_ownerpp; } - - // Add an element to the scoreboard. This will not be returned before the next 'rescore' call. - void add(T_Elem* nodep) { + size_t erase(const T_Key& key) { + const auto kiIt = m_kiMap.find(key); + if (kiIt == m_kiMap.end()) return 0; + m_pairs.erase(kiIt->second); + m_kiMap.erase(kiIt); + return 1; + } + void erase(const_iterator it) { + m_kiMap.erase(it->first); + m_pairs.erase(it); + } + void erase(const_reverse_iterator rit) { + m_kiMap.erase(rit->first); + m_pairs.erase(std::next(rit).base()); + } + bool has(const T_Key& key) const { return m_kiMap.count(key); } + bool empty() const { return m_pairs.empty(); } + // Returns const reference. + const T_Value& at(const T_Key& key) const { return m_kiMap.at(key)->second; } + // Note this returns const_iterator + template + std::pair emplace(const T_Key& key, Args&&... args) { + const auto kiEmp = m_kiMap.emplace(key, end()); + if (kiEmp.second) { + const auto result = m_pairs.emplace(key, std::forward(args)...); #if VL_DEBUG - UASSERT(!contains(nodep), "Adding element to scoreboard that was already in a scoreboard"); + UASSERT(result.second, "Should not be in set yet"); #endif - addUnknown(nodep); - } - - // Remove element from scoreboard. - void remove(T_Elem* nodep) { - if (nodep->m_kids.m_ptr == nodep) { - // Node is on the unknown list, replace with next - nodep->replaceWith(nodep->m_next.unlink()); - return; + kiEmp.first->second = result.first; + return result; } - // Node is in the known heap, remove it - m_known.remove(nodep); + return {kiEmp.first->second, false}; } - - // Get the known element with the highest score (as we are using a max-heap), or nullptr if - // there are no elements with known entries. This does not automatically 'rescore'. The client - // must call 'rescore' appropriately to ensure all elements in the scoreboard are reflected in - // the result of this method. - T_Elem* best() const { return T_Elem::heapNodeToElem(m_known.max()); } - - // Tell the scoreboard that this element's score may have changed. At the time of this call, - // the element's score becomes 'unknown' to the scoreboard. Unknown elements will not be - // returned by 'best until the next call to 'rescore'. - void hintScoreChanged(T_Elem* nodep) { - // If it's already in the unknown list, then nothing to do - if (nodep->m_kids.m_ptr == nodep) return; - // Otherwise it was in the heap, remove it - m_known.remove(nodep); - // Prepend it to the unknown list - addUnknown(nodep); - } - - // True if we have elements with unknown score - bool needsRescore() const { return m_unknown; } - - // True if the element's score is unknown, false otherwise. - static bool needsRescore(const T_Elem* nodep) { return nodep->m_kids.m_ptr == nodep; } - - // For each element whose score is unknown, recompute the score and add to the known heap - void rescore() { - // Rescore and insert all unknown elements - for (Node *nodep = m_unknown.unlink(), *nextp; nodep; nodep = nextp) { - // Pick up next - nextp = nodep->m_next.ptr(); - // Reset pointers - nodep->m_next.m_ptr = nullptr; - nodep->m_kids.m_ptr = nullptr; - nodep->m_ownerpp = nullptr; - // Re-compute the score of the element - T_Elem::heapNodeToElem(nodep)->rescore(); - // re-insert into the heap - m_known.insert(nodep); - } + // Invalidates iterators + void update(const_iterator it, T_Value value) { + const auto kiIt = m_kiMap.find(it->first); + m_pairs.erase(it); + kiIt->second = m_pairs.emplace(kiIt->first, value).first; } }; -// ###################################################################### +//###################################################################### + +/// V3Scoreboard takes a set of Elem*'s, each having some score. +/// Scores are assigned by a user-supplied scoring function. +/// +/// At any time, the V3Scoreboard can return th515e elem with the "best" score +/// among those elements whose scores are known. +/// +/// The best score is the _lowest_ score. This makes sense in contexts +/// where scores represent costs. +/// +/// The Scoreboard supports mutating element scores efficiently. The client +/// must hint to the V3Scoreboard when an element's score may have +/// changed. When it receives this hint, the V3Scoreboard will move the +/// element into the set of elements whose scores are unknown. Later the +/// client can tell V3Scoreboard to re-sort the list, which it does +/// incrementally, by re-scoring all elements whose scores are unknown, and +/// then moving these back into the score-sorted map. This is efficient +/// when the subset of elements whose scores change is much smaller than +/// the full set size. + +template > +class V3Scoreboard final { +private: + // TYPES + class CmpElems final { + public: + bool operator()(const T_Elem* const& ap, const T_Elem* const& bp) const { + const T_ElemCompare cmp; + return cmp.operator()(*ap, *bp); + } + }; + using SortedMap = SortByValueMap; + using UserScoreFnp = T_Score (*)(const T_Elem*); + + // MEMBERS + // Below uses set<> not an unordered_set<>. unordered_set::clear() and + // construction results in a 491KB clear operation to zero all the + // buckets. Since the set size is generally small, and we iterate the + // set members, set is better performant. + std::set m_unknown; // Elements with unknown scores + SortedMap m_sorted; // Set of elements with known scores + const UserScoreFnp m_scoreFnp; // Scoring function + const bool m_slowAsserts; // Do some asserts that require extra lookups + +public: + // CONSTRUCTORS + explicit V3Scoreboard(UserScoreFnp scoreFnp, bool slowAsserts) + : m_scoreFnp{scoreFnp} + , m_slowAsserts{slowAsserts} {} + ~V3Scoreboard() = default; + + // METHODS + + // Add an element to the scoreboard. + // Element begins in needs-rescore state; it won't be returned by + // bestp() until after the next rescore(). + void addElem(const T_Elem* elp) { + if (m_slowAsserts) { + UASSERT(!contains(elp), "Adding element to scoreboard that was already in scoreboard"); + } + m_unknown.insert(elp); + } + + // Remove elp from scoreboard. + void removeElem(const T_Elem* elp) { + if (0 == m_sorted.erase(elp)) { + UASSERT(m_unknown.erase(elp), + "Could not find requested elem to remove from scoreboard"); + } + } + + // Returns true if elp is present in the scoreboard, false otherwise. + // + // Note: every other V3Scoreboard routine that takes an T_Elem* has + // undefined behavior if the element is not in the scoreboard. + bool contains(const T_Elem* elp) const { + if (m_unknown.find(elp) != m_unknown.end()) return true; + return (m_sorted.find(elp) != m_sorted.end()); + } + + // Get the best element, with the lowest score (lower is better), among + // elements whose scores are known. Returns nullptr if no elements with + // known scores exist. + // + // Note: This does not automatically rescore. Client must call + // rescore() periodically to ensure all elems in the scoreboard are + // reflected in the result of bestp(). Otherwise, bestp() only + // considers elements that aren't pending rescore. + const T_Elem* bestp() { + const auto it = m_sorted.begin(); + if (VL_UNLIKELY(it == m_sorted.end())) return nullptr; + return it->first; + } + + // Tell the scoreboard that this element's score may have changed. + // + // At the time of this call, the element's score becomes "unknown" + // to the V3Scoreboard. Unknown elements won't be returned by bestp(). + // The element's score will remain unknown until the next rescore(). + // + // The client MUST call this for each element whose score has changed. + // + // The client MAY call this for elements whose score has not changed. + // Doing so incurs some compute cost (to re-sort the element back to + // its original location) and still makes it ineligible to be returned + // by bestp() until the next rescore(). + void hintScoreChanged(const T_Elem* elp) { + m_unknown.insert(elp); + m_sorted.erase(elp); + } + + // True if any element's score is unknown to V3Scoreboard. + bool needsRescore() { return !m_unknown.empty(); } + // False if elp's score is known to V3Scoreboard, + // else true if elp's score is unknown until the next rescore(). + bool needsRescore(const T_Elem* elp) { return m_unknown.count(elp); } + // Retrieve the last known score for an element. + T_Score cachedScore(const T_Elem* elp) { return m_sorted.at(elp); } + // For each element whose score is unknown to V3Scoreboard, + // call the client's scoring function to get a new score, + // and sort all elements by their current score. + void rescore() { + for (const T_Elem* elp : m_unknown) { + VL_ATTR_UNUSED const bool exists = !m_sorted.emplace(elp, m_scoreFnp(elp)).second; +#if VL_DEBUG + UASSERT(!exists, "Should not be in both m_unknown and m_sorted"); +#endif + } + m_unknown.clear(); + } + +private: + VL_UNCOPYABLE(V3Scoreboard); +}; + +//###################################################################### namespace V3ScoreboardBase { void selfTest(); From 90dc04cf93f09fab1f5284e27e9d457b6566ed64 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sat, 20 Aug 2022 14:01:13 -0400 Subject: [PATCH 103/119] Add --future0 and --future1 options. --- Changes | 1 + docs/guide/exe_verilator.rst | 24 ++++++++++++++++++++++++ src/V3Options.cpp | 15 +++++++++++++++ src/V3Options.h | 6 ++++++ test_regress/t/t_flag_future.pl | 2 +- 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/Changes b/Changes index 65ee0fd39..752f4c15c 100644 --- a/Changes +++ b/Changes @@ -13,6 +13,7 @@ Verilator 4.225 devel **Minor:** +* Add --future0 and --future1 options. * Fix incorrect bit op tree optimization (#3470). [algrobman] * Fix empty string arguments to display (#3484). [Grulfen] * Fix table misoptimizing away display (#3488). [Stefan Post] diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index 5e1c6282f..0348c4db2 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -491,6 +491,30 @@ Summary: are typically used only when recommended by a maintainer to help debug or work around an issue. +.. option:: -future0