From c52f3349d1e473426115c35ba2edccaf02c4b55a Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 21 Apr 2020 23:49:07 +0100 Subject: [PATCH] Initial implementation of generic multithreaded tracing (#2269) The --trace-threads option can now be used to perform tracing on a thread separate from the main thread when using VCD tracing (with --trace-threads 1). For FST tracing --trace-threads can be 1 or 2, and --trace-fst --trace-threads 1 is the same a what --trace-fst-threads used to be (which is now deprecated). Performance numbers on SweRV EH1 CoreMark, clang 6.0.0, Intel i7-3770 @ 3.40GHz, IO to ramdisk, with numactl set to schedule threads on different physical cores. Relative speedup: --trace -> --trace --trace-threads 1 +22% --trace-fst -> --trace-fst --trace-threads 1 +38% (as --trace-fst-thread) --trace-fst -> --trace-fst --trace-threads 2 +93% Speed relative to --trace with no threaded tracing: --trace 1.00 x --trace --trace-threads 1 0.82 x --trace-fst 1.79 x --trace-fst --trace-threads 1 1.23 x --trace-fst --trace-threads 2 0.87 x This means FST tracing with 2 extra threads is now faster than single threaded VCD tracing, and is on par with threaded VCD tracing. You do pay for it in total compute though as --trace-fst --trace-threads 2 uses about 240% CPU vs 150% for --trace-fst --trace-threads 1, and 155% for --trace --trace threads 1. Still for interactive use it should be helpful with large designs. --- Changes | 2 + bin/verilator | 50 +++- include/verilated.mk.in | 16 +- include/verilated_fst_c.cpp | 17 +- include/verilated_fst_c.h | 8 +- include/verilated_trace.h | 197 +++++++++++- include/verilated_trace_imp.cpp | 281 +++++++++++++++++- include/verilated_vcd_c.cpp | 6 + include/verilated_vcd_c.h | 2 +- src/V3EmitC.cpp | 4 +- src/V3EmitMk.cpp | 21 +- src/V3Options.cpp | 14 +- src/V3Options.h | 11 +- test_regress/driver.pl | 6 +- test_regress/t/t_trace_array_fst_threads_1.pl | 27 ++ test_regress/t/t_trace_array_fst_threads_2.pl | 27 ++ test_regress/t/t_trace_array_threads_1.pl | 26 ++ ...ad.pl => t_trace_complex_fst_threads_1.pl} | 2 +- .../t/t_trace_complex_fst_threads_2.pl | 27 ++ test_regress/t/t_trace_complex_old_api.pl | 2 +- test_regress/t/t_trace_complex_threads_1.pl | 37 +++ test_regress/t/t_trace_two_dumpfst_cc.pl | 4 +- test_regress/t/t_trace_two_hdrfst_cc.pl | 4 +- test_regress/t/t_trace_two_portfst_cc.pl | 4 +- test_regress/t/t_verilated_all.pl | 2 + 25 files changed, 735 insertions(+), 62 deletions(-) create mode 100755 test_regress/t/t_trace_array_fst_threads_1.pl create mode 100755 test_regress/t/t_trace_array_fst_threads_2.pl create mode 100755 test_regress/t/t_trace_array_threads_1.pl rename test_regress/t/{t_trace_complex_fst_thread.pl => t_trace_complex_fst_threads_1.pl} (91%) create mode 100755 test_regress/t/t_trace_complex_fst_threads_2.pl create mode 100755 test_regress/t/t_trace_complex_threads_1.pl diff --git a/Changes b/Changes index cefd3e574..483d49ecd 100644 --- a/Changes +++ b/Changes @@ -20,6 +20,8 @@ The contributors that suggested a given feature are shown in []. Thanks! ** Fix DPI import/export to be standard compliant, #2236. [Geza Lore] +** Add --trace-threads for general multithreaded tracing, #2269. [Geza Lore] + *** Add --flatten for use with --xml-only, #2270. [James Hanlon] **** Support $ferror, and $fflush without arguments, #1638. diff --git a/bin/verilator b/bin/verilator index 3fa55b771..7d0b2c5c0 100755 --- a/bin/verilator +++ b/bin/verilator @@ -377,14 +377,14 @@ detailed descriptions in L for more information. --timescale-override Overrides all timescales --top-module Name of top level input module --trace Enable waveform creation - --trace-depth Depth of tracing --trace-coverage Enable tracing of coverage + --trace-depth Depth of tracing --trace-fst Enable FST waveform creation - --trace-fst-thread Enable FST threaded waveform creation --trace-max-array Maximum bit width for tracing --trace-max-width Maximum array depth for tracing --trace-params Enable tracing of parameters --trace-structs Enable tracing structure names + --trace-threads Enable waveform creation on separate threads --trace-underscore Enable tracing of _signals -U Undefine preprocessor define --unroll-count Tune maximum loop iterations @@ -1477,6 +1477,8 @@ need to add these to your Makefile manually. Having tracing compiled in may result in some small performance losses, even when waveforms are not turned on during model execution. +See also C<--trace-threads>. + =item --trace-coverage With --trace and --coverage-*, enable tracing to include a traced signal @@ -1498,14 +1500,8 @@ improve simulation runtime and trace file size. =item --trace-fst -Enable FST waveform tracing in the model. This overrides C<--trace> and -C<--trace-fst-thread>. See also C<--trace-fst-thread>. - -=item --trace-fst-thread - -Enable FST waveform tracing in the model, using a separate thread. This is -typically faster in simulation runtime but slower in total computes than -C<--trace-fst>. This overrides C<--trace> and C<--trace-fst>. +Enable FST waveform tracing in the model. This overrides C<--trace>. +See also C<--trace-threads>. =item --trace-max-array I @@ -1530,6 +1526,15 @@ array fields, rather than a single combined packed bus. Due to VCD file format constraints this may result in significantly slower trace times and larger trace files. +=item --trace-threads I + +Enable waveform tracing using separate threads. This is typically faster in +simulation runtime but uses more total compute. This option is independend of, +and works with, both C<--trace> and C<--trace-fst>. Different trace formats can +take advantage of more trace threads to varying degrees. Currently VCD tracing +can utilize at most --trace-threads 1, and FST tracing can utilize at most +--trace-threads 2. This overrides C<--no-threads>. + =item --trace-underscore Enable tracing of signals that start with an underscore. Normally, these @@ -2773,7 +2778,7 @@ performance. With --threads 1, the generated model is single threaded, however the support libraries are multithread safe. This allows different instantiations of model(s) to potentially each be run under a different -thread. All threading is the responsibility of the user's C++ testbench. +thread. All threading is the responsibility of the user's C++ testbench. With --threads N, where N is at least 2, the generated model will be designed to run in parallel on N threads. The thread calling eval() @@ -2784,9 +2789,6 @@ Verilated model should not livelock nor deadlock, however, you can expect performance to be far worse than it would be with proper ratio of threads and CPU cores. -With --trace-fst-thread, tracing occurs in a separate thread from the main -simulation thread(s). This option is orthogonal to --threads. - The remainder of this section describe behavior with --threads 1 or --threads N (not --no-threads). @@ -2800,12 +2802,28 @@ be done by a "main thread". In most cases the eval thread and main thread are the same thread (i.e. the user's top C++ testbench runs on a single thread), but this is not required. +The --trace-threads options can be used to produce trace dumps using multiple +threads. If --trace-threads is set without --threads, then --trace-threads will +imply --threads 1, i.e.: the support libraries will be thread safe. + +With --trace-threads 0, trace dumps are produced on the main thread. This again +gives the highest single thread performance. + +With --trace-threads N, where N is at least 1, N additional threads will be +created and managed by the trace files (e.g.: VerilatedVcdC or VerilatedFstC), +to generate the trace dump. The main thread will be released to proceed with +execution as soon as possible, though some blocking of the main thread is still +necessary while capturing the trace. Different trace formats can utilize a +various number of threads. See the --trace-threads option. + When running a multithreaded model, the default Linux task scheduler often works against the model, by assuming threads are short lived, and thus often schedules threads using multiple hyperthreads within the same physical core. For best performance use the C program to (when the -threading count fits) select unique physical cores on the same socket. For -example, if a model was Verilated with "--threads 4", we consult +threading count fits) select unique physical cores on the same socket. The +same applies for --trace-threads as well. + +As an example, if a model was Verilated with "--threads 4", we consult egrep 'processor|physical id|core id' /proc/cpuinfo diff --git a/include/verilated.mk.in b/include/verilated.mk.in index 833d185c1..1966bf956 100644 --- a/include/verilated.mk.in +++ b/include/verilated.mk.in @@ -129,14 +129,26 @@ ifneq ($(VM_THREADS),0) endif endif -ifneq ($(VM_TRACE_THREADED),0) - ifneq ($(VM_TRACE_THREADED),) +ifneq ($(VM_TRACE_THREADS),0) + ifneq ($(VM_TRACE_THREADS),) + ifeq ($(findstring -DVL_THREADED,$(CPPFLAGS)),) + $(error VM_TRACE_THREADS requires VM_THREADS) + endif CPPFLAGS += -DVL_TRACE_THREADED VK_C11=1 VK_LIBS_THREADED=1 endif endif + +ifneq ($(VM_TRACE_FST_WRITER_THREAD),0) + ifneq ($(VM_TRACE_FST_WRITER_THREAD),) + CPPFLAGS += -DVL_TRACE_FST_WRITER_THREAD + VK_C11=1 + VK_LIBS_THREADED=1 + endif +endif + ifneq ($(VK_C11),0) ifneq ($(VK_C11),) # Need C++11 at least, so always default to newest diff --git a/include/verilated_fst_c.cpp b/include/verilated_fst_c.cpp index a128639ca..1e1d44434 100644 --- a/include/verilated_fst_c.cpp +++ b/include/verilated_fst_c.cpp @@ -24,7 +24,7 @@ #include "verilated_fst_c.h" // GTKWave configuration -#ifdef VL_TRACE_THREADED +#ifdef VL_TRACE_FST_WRITER_THREAD # define HAVE_LIBPTHREAD # define FST_WRITER_PARALLEL #endif @@ -76,9 +76,10 @@ void VerilatedFst::open(const char* filename) VL_MT_UNSAFE { m_fst = fstWriterCreate(filename, 1); fstWriterSetPackType(m_fst, FST_WR_PT_LZ4); fstWriterSetTimescaleFromString(m_fst, timeResStr().c_str()); // lintok-begin-on-ref -#ifdef VL_TRACE_THREADED +#ifdef VL_TRACE_FST_WRITER_THREAD fstWriterSetParallelMode(m_fst, 1); #endif + m_curScope.clear(); VerilatedTrace::traceInit(); @@ -101,6 +102,18 @@ void VerilatedFst::open(const char* filename) VL_MT_UNSAFE { m_code2symbol.clear(); } +void VerilatedFst::close() { + m_assertOne.check(); + VerilatedTrace::close(); + fstWriterClose(m_fst); + m_fst = NULL; +} + +void VerilatedFst::flush() { + VerilatedTrace::flush(); + fstWriterFlushContext(m_fst); +} + void VerilatedFst::emitTimeChange(vluint64_t timeui) { fstWriterEmitTimeChange(m_fst, timeui); } //============================================================================= diff --git a/include/verilated_fst_c.h b/include/verilated_fst_c.h index 9a8c7eb0e..8572a0f5f 100644 --- a/include/verilated_fst_c.h +++ b/include/verilated_fst_c.h @@ -85,13 +85,9 @@ public: /// Open the file; call isOpen() to see if errors void open(const char* filename) VL_MT_UNSAFE; /// Close the file - void close() VL_MT_UNSAFE { - m_assertOne.check(); - fstWriterClose(m_fst); - m_fst = NULL; - } + void close() VL_MT_UNSAFE; /// Flush any remaining data to this file - void flush() VL_MT_UNSAFE { fstWriterFlushContext(m_fst); } + void flush() VL_MT_UNSAFE; /// Is file open? bool isOpen() const { return m_fst != NULL; } diff --git a/include/verilated_trace.h b/include/verilated_trace.h index 8be887734..a611a436c 100644 --- a/include/verilated_trace.h +++ b/include/verilated_trace.h @@ -20,11 +20,99 @@ #ifndef _VERILATED_TRACE_H_ #define _VERILATED_TRACE_H_ 1 +// clang-format off + #include "verilated.h" #include #include +#ifdef VL_TRACE_THREADED +# include +# include +# include +#endif + +// clang-format on + +#ifdef VL_TRACE_THREADED +//============================================================================= +// Threaded tracing + +// A simple synchronized first in first out queue +template class VerilatedThreadQueue { +private: + VerilatedMutex m_mutex; // Protects m_queue + std::condition_variable_any m_cv; + std::deque m_queue VL_GUARDED_BY(m_mutex); + +public: + // Put an element at the back of the queue + void put(T value) { + VerilatedLockGuard lock(m_mutex); + m_queue.push_back(value); + m_cv.notify_one(); + } + + // Put an element at the front of the queue + void put_front(T value) { + VerilatedLockGuard lock(m_mutex); + m_queue.push_front(value); + m_cv.notify_one(); + } + + // Get an element from the front of the queue. Blocks if none available + T get() { + VerilatedLockGuard lock(m_mutex); + m_cv.wait(lock, [this]() VL_REQUIRES(m_mutex) { return !m_queue.empty(); }); + assert(!m_queue.empty()); + T value = m_queue.front(); + m_queue.pop_front(); + return value; + } + + // Non blocking get + bool tryGet(T& result) { + VerilatedLockGuard lockGuard(m_mutex); + if (m_queue.empty()) { return false; } + result = m_queue.front(); + m_queue.pop_front(); + return true; + } +}; + +// Commands used by thread tracing. Note that the bottom 8 bits of all these +// values are empty and are used to store small amounts of additional command +// parameters. Anonymous enum in class, as we want it scoped, but we also +// want the automatic conversion to integer types. +class VerilatedTraceCommand { +public: + enum { + CHG_BIT = 0x0000, + CHG_BUS = 0x0100, + CHG_QUAD = 0x0200, + CHG_ARRAY = 0x0300, + CHG_FLOAT = 0x0400, + CHG_DOUBLE = 0x0500, + TIME_CHANGE = 0x8000, + END = 0xf000, // End of buffer + SHUTDOWN = 0xf200 // Shutdown worker thread, also marks end of buffer + }; +}; + +typedef union { + vluint32_t cmd; // Command code + params in bottom 8 bits + vluint32_t* oldp; // Pointer to previous value buffer to consult/update + // Note: These are 64-bit wide, as this union already has a pointer type in it. + vluint64_t params; // Command parameter + // New signal value in various forms + vluint64_t newBits; + float newFloat; + double newDouble; + vluint64_t timeui; +} VerilatedTraceEntry; +#endif + class VerilatedTraceCallInfo; //============================================================================= @@ -43,6 +131,7 @@ private: std::vector m_callbacks; ///< Routines to perform dumping bool m_fullDump; ///< Whether a full dump is required on the next call to 'dump' vluint32_t m_nextCode; ///< Next code number to assign + vluint32_t m_numSignals; ///< Number of distinct signals std::string m_moduleName; ///< Name of module being trace initialized now char m_scopeEscape; double m_timeRes; ///< Time resolution (ns/ms etc) @@ -52,6 +141,40 @@ private: // to access duck-typed functions to avoid a virtual function call. T_Derived* self() { return static_cast(this); } +#ifdef VL_TRACE_THREADED + // Number of total trace buffers that have been allocated + vluint32_t m_numTraceBuffers; + + // Size of trace buffers + size_t m_traceBufferSize; + + // Buffers handed to worker for processing + VerilatedThreadQueue m_buffersToWorker; + // Buffers returned from worker after processing + VerilatedThreadQueue m_buffersFromWorker; + + // Get a new trace buffer that can be populated. May block if none available + VerilatedTraceEntry* getTraceBuffer(); + + // Write pointer into current buffer + VerilatedTraceEntry* m_traceBufferWritep; + + // End of trace buffer + VerilatedTraceEntry* m_traceBufferEndp; + + // The worker thread itself + std::unique_ptr m_workerThread; + + // The function executed by the worker thread + void workerThreadMain(); + + // Wait until given buffer is placed in m_buffersFromWorker + void waitForBuffer(const VerilatedTraceEntry* bufferp); + + // Shut down and join worker, if it's running, otherwise do nothing + void shutdownWorker(); +#endif + // CONSTRUCTORS VL_UNCOPYABLE(VerilatedTrace); @@ -62,6 +185,7 @@ protected: VerilatedAssertOneThread m_assertOne; ///< Assert only called from single thread vluint32_t nextCode() const { return m_nextCode; } + vluint32_t numSignals() const { return m_numSignals; } const std::string& moduleName() const { return m_moduleName; } void fullDump(bool value) { m_fullDump = value; } vluint64_t timeLastDump() { return m_timeLastDump; } @@ -80,6 +204,9 @@ protected: /// Character that splits scopes. Note whitespace are ALWAYS escapes. char scopeEscape() { return m_scopeEscape; } + void close(); + void flush(); + //========================================================================= // Virtual functions to be provided by the format specific implementation @@ -151,20 +278,76 @@ public: void fullFloat(vluint32_t* oldp, float newval); void fullDouble(vluint32_t* oldp, double newval); - // Check previous dumped value of signal. If changed, then emit trace entry +#ifdef VL_TRACE_THREADED + // Threaded tracing. Just dump everything in the trace buffer inline void chgBit(vluint32_t* oldp, vluint32_t newval) { + m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_BIT | newval; + m_traceBufferWritep[1].oldp = oldp; + m_traceBufferWritep += 2; + VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); + } + template inline void chgBus(vluint32_t* oldp, vluint32_t newval) { + m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_BUS | T_Bits; + m_traceBufferWritep[1].oldp = oldp; + m_traceBufferWritep[2].newBits = newval; + m_traceBufferWritep += 3; + VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); + } + inline void chgQuad(vluint32_t* oldp, vluint64_t newval, int bits) { + m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_QUAD | bits; + m_traceBufferWritep[1].oldp = oldp; + m_traceBufferWritep[2].newBits = newval; + m_traceBufferWritep += 3; + VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); + } + inline void chgArray(vluint32_t* oldp, const vluint32_t* newvalp, int bits) { + m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_ARRAY; + m_traceBufferWritep[1].oldp = oldp; + m_traceBufferWritep[2].params = bits; + m_traceBufferWritep += 3; + vluint32_t* const wp = reinterpret_cast(m_traceBufferWritep); + for (int i = 0; i < (bits + 31) / 32; ++i) { wp[i] = newvalp[i]; } + m_traceBufferWritep += (bits + 63) / 64; + VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); + } + inline void chgFloat(vluint32_t* oldp, float newval) { + m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_FLOAT; + m_traceBufferWritep[1].oldp = oldp; + m_traceBufferWritep[2].newFloat = newval; + m_traceBufferWritep += 3; + VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); + } + inline void chgDouble(vluint32_t* oldp, double newval) { + m_traceBufferWritep[0].cmd = VerilatedTraceCommand::CHG_DOUBLE; + m_traceBufferWritep[1].oldp = oldp; + m_traceBufferWritep[2].newDouble = newval; + m_traceBufferWritep += 3; + VL_DEBUG_IF(assert(m_traceBufferWritep <= m_traceBufferEndp);); + } + +#define CHG(name) chg##name##Impl +#else +#define CHG(name) chg##name +#endif + + // In non-threaded mode, these are called directly by the trace callbacks, + // and are called chg*. In threaded mode, they are called by the worker + // thread and are called chg*Impl + + // Check previous dumped value of signal. If changed, then emit trace entry + inline void CHG(Bit)(vluint32_t* oldp, vluint32_t newval) { const vluint32_t diff = *oldp ^ newval; if (VL_UNLIKELY(diff)) fullBit(oldp, newval); } - template inline void chgBus(vluint32_t* oldp, vluint32_t newval) { + template inline void CHG(Bus)(vluint32_t* oldp, vluint32_t newval) { const vluint32_t diff = *oldp ^ newval; if (VL_UNLIKELY(diff)) fullBus(oldp, newval); } - inline void chgQuad(vluint32_t* oldp, vluint64_t newval, int bits) { + inline void CHG(Quad)(vluint32_t* oldp, vluint64_t newval, int bits) { const vluint64_t diff = *reinterpret_cast(oldp) ^ newval; if (VL_UNLIKELY(diff)) fullQuad(oldp, newval, bits); } - inline void chgArray(vluint32_t* oldp, const vluint32_t* newvalp, int bits) { + inline void CHG(Array)(vluint32_t* oldp, const vluint32_t* newvalp, int bits) { for (int i = 0; i < (bits + 31) / 32; ++i) { if (VL_UNLIKELY(oldp[i] ^ newvalp[i])) { fullArray(oldp, newvalp, bits); @@ -172,13 +355,15 @@ public: } } } - inline void chgFloat(vluint32_t* oldp, float newval) { + inline void CHG(Float)(vluint32_t* oldp, float newval) { // cppcheck-suppress invalidPointerCast if (VL_UNLIKELY(*reinterpret_cast(oldp) != newval)) fullFloat(oldp, newval); } - inline void chgDouble(vluint32_t* oldp, double newval) { + inline void CHG(Double)(vluint32_t* oldp, double newval) { // cppcheck-suppress invalidPointerCast if (VL_UNLIKELY(*reinterpret_cast(oldp) != newval)) fullDouble(oldp, newval); } + +#undef CHG }; #endif // guard diff --git a/include/verilated_trace_imp.cpp b/include/verilated_trace_imp.cpp index 537b687dd..b5b6fc924 100644 --- a/include/verilated_trace_imp.cpp +++ b/include/verilated_trace_imp.cpp @@ -25,6 +25,13 @@ #include "verilated_trace.h" +#if 0 +# include +# define VL_TRACE_THREAD_DEBUG(msg) std::cout << "TRACE THREAD: " << msg << std::endl +#else +# define VL_TRACE_THREAD_DEBUG(msg) +#endif + // clang-format on //============================================================================= @@ -90,6 +97,213 @@ public: // This is in .cpp file so is not widely visible ~VerilatedTraceCallInfo() {} }; +#ifdef VL_TRACE_THREADED +//========================================================================= +// Buffer management + +template <> VerilatedTraceEntry* VerilatedTrace::getTraceBuffer() { + VerilatedTraceEntry* bufferp; + // Some jitter is expected, so some number of alternative trace buffers are + // required, but don't allocate more than 8 buffers. + if (m_numTraceBuffers < 8) { + // Allocate a new buffer if none is available + if (!m_buffersFromWorker.tryGet(bufferp)) { + ++m_numTraceBuffers; + // Note: over allocate a bit so pointer comparison is well defined + // if we overflow only by a small amount + bufferp = new VerilatedTraceEntry[m_traceBufferSize + 16]; + } + } else { + // Block until a buffer becomes available + bufferp = m_buffersFromWorker.get(); + } + return bufferp; +} + +template <> void VerilatedTrace::waitForBuffer(const VerilatedTraceEntry* buffp) { + // Slow path code only called on flush/shutdown, so use a simple algorithm. + // Collect buffers from worker and stash them until we get the one we want. + std::deque stash; + do { stash.push_back(m_buffersFromWorker.get()); } while (stash.back() != buffp); + // Now put them back in the queue, in the original order. + while (!stash.empty()) { + m_buffersFromWorker.put_front(stash.back()); + stash.pop_back(); + } +} + +//========================================================================= +// Worker thread + +template <> void VerilatedTrace::workerThreadMain() { + while (true) { + VerilatedTraceEntry* const bufferp = m_buffersToWorker.get(); + + VL_TRACE_THREAD_DEBUG(""); + VL_TRACE_THREAD_DEBUG("Got buffer"); + + const VerilatedTraceEntry* readp = bufferp; + + vluint32_t cmd; + unsigned bits; + vluint32_t* oldp; + vluint64_t newBits; + + while (true) { + cmd = (readp++)->cmd; + + switch (cmd & ~0xFFU) { + //=== + // CHG_* commands + case VerilatedTraceCommand::CHG_BIT: + VL_TRACE_THREAD_DEBUG("Command CHG_BIT"); + chgBitImpl((readp++)->oldp, cmd & 1); + continue; + case VerilatedTraceCommand::CHG_BUS: + VL_TRACE_THREAD_DEBUG("Command CHG_BUS"); + + oldp = (readp++)->oldp; + newBits = (readp++)->newBits; + + // Bits stored in bottom byte of command + switch (cmd & 0xFFU) { + case 2: chgBusImpl<2>(oldp, newBits); continue; + case 3: chgBusImpl<3>(oldp, newBits); continue; + case 4: chgBusImpl<4>(oldp, newBits); continue; + case 5: chgBusImpl<5>(oldp, newBits); continue; + case 6: chgBusImpl<6>(oldp, newBits); continue; + case 7: chgBusImpl<7>(oldp, newBits); continue; + case 8: chgBusImpl<8>(oldp, newBits); continue; + case 9: chgBusImpl<9>(oldp, newBits); continue; + case 10: chgBusImpl<10>(oldp, newBits); continue; + case 11: chgBusImpl<11>(oldp, newBits); continue; + case 12: chgBusImpl<12>(oldp, newBits); continue; + case 13: chgBusImpl<13>(oldp, newBits); continue; + case 14: chgBusImpl<14>(oldp, newBits); continue; + case 15: chgBusImpl<15>(oldp, newBits); continue; + case 16: chgBusImpl<16>(oldp, newBits); continue; + case 17: chgBusImpl<17>(oldp, newBits); continue; + case 18: chgBusImpl<18>(oldp, newBits); continue; + case 19: chgBusImpl<19>(oldp, newBits); continue; + case 20: chgBusImpl<20>(oldp, newBits); continue; + case 21: chgBusImpl<21>(oldp, newBits); continue; + case 22: chgBusImpl<22>(oldp, newBits); continue; + case 23: chgBusImpl<23>(oldp, newBits); continue; + case 24: chgBusImpl<24>(oldp, newBits); continue; + case 25: chgBusImpl<25>(oldp, newBits); continue; + case 26: chgBusImpl<26>(oldp, newBits); continue; + case 27: chgBusImpl<27>(oldp, newBits); continue; + case 28: chgBusImpl<28>(oldp, newBits); continue; + case 29: chgBusImpl<29>(oldp, newBits); continue; + case 30: chgBusImpl<30>(oldp, newBits); continue; + case 31: chgBusImpl<31>(oldp, newBits); continue; + case 32: chgBusImpl<32>(oldp, newBits); continue; + } + VL_FATAL_MT(__FILE__, __LINE__, "", "Bad number of bits in CHG_BUS command"); + break; + case VerilatedTraceCommand::CHG_QUAD: + VL_TRACE_THREAD_DEBUG("Command CHG_QUAD"); + // Bits stored in bottom byte of command + chgQuadImpl(readp[0].oldp, readp[1].newBits, cmd & 0xFF); + readp += 2; + continue; + case VerilatedTraceCommand::CHG_ARRAY: + VL_TRACE_THREAD_DEBUG("Command CHG_CHG_ARRAY"); + oldp = (readp++)->oldp; + bits = (readp++)->params; + chgArrayImpl(oldp, reinterpret_cast(readp), bits); + readp += (bits + 63) / 64; + continue; + case VerilatedTraceCommand::CHG_FLOAT: + VL_TRACE_THREAD_DEBUG("Command CHG_FLOAT"); + chgFloatImpl(readp[0].oldp, readp[1].newFloat); + readp += 2; + continue; + case VerilatedTraceCommand::CHG_DOUBLE: + VL_TRACE_THREAD_DEBUG("Command CHG_DOUBLE"); + chgDoubleImpl(readp[0].oldp, readp[1].newDouble); + readp += 2; + continue; + + //=== + // Rare commands + case VerilatedTraceCommand::TIME_CHANGE: + VL_TRACE_THREAD_DEBUG("Command TIME_CHANGE"); + emitTimeChange((readp++)->timeui); + continue; + + //=== + // Commands ending this buffer + case VerilatedTraceCommand::END: VL_TRACE_THREAD_DEBUG("Command END"); break; + case VerilatedTraceCommand::SHUTDOWN: + VL_TRACE_THREAD_DEBUG("Command SHUTDOWN"); + break; + + //=== + // Unknown command + default: + VL_PRINTF_MT("Trace command: 0x%08x\n", cmd); + VL_FATAL_MT(__FILE__, __LINE__, "", "Unknown trace command"); + break; + } + + // The above switch will execute 'continue' when necessary, + // so if we ever reach here, we are done with the buffer. + break; + } + + VL_TRACE_THREAD_DEBUG("Returning buffer"); + + // Return buffer + m_buffersFromWorker.put(bufferp); + + // Shut down if required + if (VL_UNLIKELY(cmd == VerilatedTraceCommand::SHUTDOWN)) return; + } +} + +template <> void VerilatedTrace::shutdownWorker() { + // If the worker thread is not running, done.. + if (!m_workerThread) return; + + // Hand an buffer with a shutdown command to the worker thread + VerilatedTraceEntry* const bufferp = getTraceBuffer(); + bufferp[0].cmd = VerilatedTraceCommand::SHUTDOWN; + m_buffersToWorker.put(bufferp); + // Wait for it to return + waitForBuffer(bufferp); + // Join the thread and delete it + m_workerThread->join(); + m_workerThread.reset(nullptr); +} + +#endif + +//============================================================================= +// Life cycle + +template <> void VerilatedTrace::close() { +#ifdef VL_TRACE_THREADED + shutdownWorker(); + while (m_numTraceBuffers) { + delete[] m_buffersFromWorker.get(); + m_numTraceBuffers--; + } +#endif +} + +template <> void VerilatedTrace::flush() { +#ifdef VL_TRACE_THREADED + // Hand an empty buffer to the worker thread + VerilatedTraceEntry* const bufferp = getTraceBuffer(); + bufferp[0].cmd = VerilatedTraceCommand::END; + m_buffersToWorker.put(bufferp); + // Wait for it to be returned. As the processing is in-order, + // this ensures all previous buffers have been processed. + waitForBuffer(bufferp); +#endif +} + //============================================================================= // VerilatedTrace @@ -99,9 +313,14 @@ VerilatedTrace::VerilatedTrace() , m_timeLastDump(0) , m_fullDump(true) , m_nextCode(0) + , m_numSignals(0) , m_scopeEscape('.') , m_timeRes(1e-9) - , m_timeUnit(1e-9) { + , m_timeUnit(1e-9) +#ifdef VL_TRACE_THREADED + , m_numTraceBuffers(0) +#endif +{ set_time_unit(Verilated::timeunitString()); set_time_resolution(Verilated::timeprecisionString()); } @@ -112,6 +331,9 @@ template <> VerilatedTrace::~VerilatedTrace() { delete m_callbacks.back(); m_callbacks.pop_back(); } +#ifdef VL_TRACE_THREADED + close(); +#endif } //========================================================================= @@ -125,6 +347,7 @@ template <> void VerilatedTrace::traceInit() VL_MT_UNSAFE { // of codes on re-open const vluint32_t expectedCodes = nextCode(); m_nextCode = 1; + m_numSignals = 0; // Call all initialize callbacks, which will call decl* for each signal. for (vluint32_t ent = 0; ent < m_callbacks.size(); ++ent) { @@ -141,6 +364,18 @@ template <> void VerilatedTrace::traceInit() VL_MT_UNSAFE { // Now that we know the number of codes, allocate space for the buffer // holding previous signal values. if (!m_sigs_oldvalp) m_sigs_oldvalp = new vluint32_t[nextCode()]; + +#ifdef VL_TRACE_THREADED + // Compute trace buffer size. we need to be able to store a new value for + // each signal, which is 'nextCode()' entries after the init callbacks + // above have been run, plus up to 3 more words of metadata per signal, + // plus fixed overhead of 1 for a termination flag and 2 for a time stamp + // update. + m_traceBufferSize = nextCode() + numSignals() * 3 + 3; + + // Start the worker thread + m_workerThread.reset(new std::thread(&VerilatedTrace::workerThreadMain, this)); +#endif } template <> @@ -153,6 +388,7 @@ void VerilatedTrace::declCode(vluint32_t code, vluint32_t bits, bo int codesNeeded = (bits + 31) / 32; if (tri) codesNeeded *= 2; m_nextCode = std::max(m_nextCode, code + codesNeeded); + ++m_numSignals; } //========================================================================= @@ -194,23 +430,60 @@ template <> void VerilatedTrace::dump(vluint64_t timeui) { return; } m_timeLastDump = timeui; + Verilated::quiesce(); + + // Call hook for format specific behaviour if (VL_UNLIKELY(m_fullDump)) { if (!preFullDump()) return; + } else { + if (!preChangeDump()) return; + } + +#ifdef VL_TRACE_THREADED + // Get the trace buffer we are about to fill + VerilatedTraceEntry* const bufferp = getTraceBuffer(); + m_traceBufferWritep = bufferp; + m_traceBufferEndp = bufferp + m_traceBufferSize; + + // Currently only incremental dumps run on the worker thread + if (VL_LIKELY(!m_fullDump)) { + // Tell worker to update time point + (m_traceBufferWritep++)->cmd = VerilatedTraceCommand::TIME_CHANGE; + (m_traceBufferWritep++)->timeui = timeui; + } else { + // Update time point emitTimeChange(timeui); + } +#else + // Update time point + emitTimeChange(timeui); +#endif + + // Run the callbacks + if (VL_UNLIKELY(m_fullDump)) { m_fullDump = false; // No more need for next dump to be full - for (vluint32_t ent = 0; ent < m_callbacks.size(); ent++) { + for (vluint32_t ent = 0; ent < m_callbacks.size(); ++ent) { VerilatedTraceCallInfo* cip = m_callbacks[ent]; (cip->m_fullcb)(self(), cip->m_userthis, cip->m_code); } } else { - if (!preChangeDump()) return; - emitTimeChange(timeui); for (vluint32_t ent = 0; ent < m_callbacks.size(); ++ent) { VerilatedTraceCallInfo* cip = m_callbacks[ent]; (cip->m_changecb)(self(), cip->m_userthis, cip->m_code); } } + +#ifdef VL_TRACE_THREADED + // Mark end of the trace buffer we just filled + (m_traceBufferWritep++)->cmd = VerilatedTraceCommand::END; + + // Assert no buffer overflow + assert(m_traceBufferWritep - bufferp <= m_traceBufferSize); + + // Pass it to the worker thread + m_buffersToWorker.put(bufferp); +#endif } //============================================================================= diff --git a/include/verilated_vcd_c.cpp b/include/verilated_vcd_c.cpp index af8bb2a15..c5ad6f7bb 100644 --- a/include/verilated_vcd_c.cpp +++ b/include/verilated_vcd_c.cpp @@ -294,6 +294,7 @@ void VerilatedVcd::close() { // This function is on the flush() call path m_assertOne.check(); if (!isOpen()) return; + VerilatedTrace::close(); if (m_evcd) { printStr("$vcdclose "); printQuad(timeLastDump()); @@ -302,6 +303,11 @@ void VerilatedVcd::close() { closePrev(); } +void VerilatedVcd::flush() { + VerilatedTrace::flush(); + bufferFlush(); +} + void VerilatedVcd::printStr(const char* str) { // Not fast... while (*str) { diff --git a/include/verilated_vcd_c.h b/include/verilated_vcd_c.h index 0ee8084c8..e9838c15b 100644 --- a/include/verilated_vcd_c.h +++ b/include/verilated_vcd_c.h @@ -149,7 +149,7 @@ public: /// Close the file void close() VL_MT_UNSAFE_ONE; /// Flush any remaining data to this file - void flush() VL_MT_UNSAFE_ONE { bufferFlush(); } + void flush() VL_MT_UNSAFE_ONE; /// Is file open? bool isOpen() const { return m_isOpen; } diff --git a/src/V3EmitC.cpp b/src/V3EmitC.cpp index c39c8ecd6..89053334c 100644 --- a/src/V3EmitC.cpp +++ b/src/V3EmitC.cpp @@ -3427,7 +3427,7 @@ class EmitCTrace : EmitCStmts { putsQuoted(VIdProtect::protectWordsIf(nodep->showname(), nodep->protect())); if (nodep->isScoped()) puts(",\" \")"); // Direction - if (v3Global.opt.traceFormat().fstFlavor()) { + if (v3Global.opt.traceFormat().fst()) { puts("," + cvtToStr(enumNum)); // fstVarDir if (nodep->declDirection().isInoutish()) { @@ -3503,7 +3503,7 @@ class EmitCTrace : EmitCStmts { int emitTraceDeclDType(AstNodeDType* nodep) { // Return enum number or -1 for none - if (v3Global.opt.traceFormat().fstFlavor()) { + if (v3Global.opt.traceFormat().fst()) { // Skip over refs-to-refs, but stop before final ref so can get data type name // Alternatively back in V3Width we could push enum names from upper typedefs if (AstEnumDType* enump = VN_CAST(nodep->skipRefToEnump(), EnumDType)) { diff --git a/src/V3EmitMk.cpp b/src/V3EmitMk.cpp index cb4443037..a2710f70a 100644 --- a/src/V3EmitMk.cpp +++ b/src/V3EmitMk.cpp @@ -46,8 +46,11 @@ public: of.puts("# See " + v3Global.opt.prefix() + ".mk" + " for the caller.\n"); of.puts("\n### Switches...\n"); - of.puts("# C11 constructs required? 0/1 (from --threads or use of classes)\n"); - of.puts("VM_C11 = " + cvtToStr(v3Global.needC11() || v3Global.opt.threads()) + "\n"); + of.puts("# C11 constructs required? 0/1 (from --threads, " + "--trace-threads or use of classes)\n"); + of.puts("VM_C11 = "); + of.puts(v3Global.needC11() || v3Global.opt.threads() ? "1" : "0"); + of.puts("\n"); of.puts("# Coverage output mode? 0/1 (from --coverage)\n"); of.puts("VM_COVERAGE = "); of.puts(v3Global.opt.coverage() ? "1" : "0"); @@ -60,13 +63,19 @@ public: of.puts("VM_THREADS = "); of.puts(cvtToStr(v3Global.opt.threads())); of.puts("\n"); - of.puts("# Tracing output mode? 0/1 (from --trace)\n"); + of.puts("# Tracing output mode? 0/1 (from --trace/--trace-fst)\n"); of.puts("VM_TRACE = "); of.puts(v3Global.opt.trace() ? "1" : "0"); of.puts("\n"); - of.puts("# Tracing threaded output mode? 0/1 (from --trace-fst-thread)\n"); - of.puts("VM_TRACE_THREADED = "); - of.puts(v3Global.opt.traceFormat().threaded() ? "1" : "0"); + of.puts("# Tracing threaded output mode? 0/1/N threads (from --trace-thread)\n"); + of.puts("VM_TRACE_THREADS = "); + of.puts(!v3Global.opt.traceThreads() + ? "0" + : cvtToStr(v3Global.opt.traceThreads() - v3Global.opt.traceFormat().fst())); + of.puts("\n"); + of.puts("# Separate FST writer thread? 0/1 (from --trace-fst with --trace-thread > 0)\n"); + of.puts("VM_TRACE_FST_WRITER_THREAD = "); + of.puts(v3Global.opt.traceThreads() && v3Global.opt.traceFormat().fst() ? "1" : "0"); of.puts("\n"); of.puts("\n### Object file lists...\n"); diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 1f0b30f15..c836bffc6 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -631,6 +631,9 @@ void V3Options::notify() { && !v3Global.opt.preprocOnly() // && !v3Global.opt.xmlOnly()); } + + // --trace-threads implies --threads 1 unless explicitly specified + if (traceThreads() && !threads()) { m_threads = 1; } } //###################################################################### @@ -1062,8 +1065,16 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char addLdLibs("-lz"); } else if (!strcmp(sw, "-trace-fst-thread")) { m_trace = true; - m_traceFormat = TraceFormat::FST_THREAD; + m_traceFormat = TraceFormat::FST; addLdLibs("-lz"); + fl->v3warn(DEPRECATED, "Option --trace-fst-thread is deprecated. " + "Use --trace-fst with --trace-threads > 0."); + if (m_traceThreads == 0) m_traceThreads = 1; + } else if (!strcmp(sw, "-trace-threads")) { + shift; + m_trace = true; + m_traceThreads = atoi(argv[i]); + if (m_traceThreads < 0) fl->v3fatal("--trace-threads must be >= 0: " << argv[i]); } else if (!strcmp(sw, "-trace-depth") && (i + 1) < argc) { shift; m_traceDepth = atoi(argv[i]); @@ -1568,6 +1579,7 @@ V3Options::V3Options() { m_traceParams = true; m_traceStructs = false; m_traceUnderscore = false; + m_traceThreads = 0; m_underlineZero = false; m_verilate = true; m_vpi = false; diff --git a/src/V3Options.h b/src/V3Options.h index d8d233112..2e48eda40 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -150,21 +150,20 @@ inline std::ostream& operator<<(std::ostream& os, const VTimescale& rhs) { class TraceFormat { public: - enum en { VCD = 0, FST, FST_THREAD } m_e; + enum en { VCD = 0, FST } m_e; // cppcheck-suppress noExplicitConstructor inline TraceFormat(en _e = VCD) : m_e(_e) {} explicit inline TraceFormat(int _e) : m_e(static_cast(_e)) {} operator en() const { return m_e; } - bool fstFlavor() const { return m_e == FST || m_e == FST_THREAD; } - bool threaded() const { return m_e == FST_THREAD; } + bool fst() const { return m_e == FST; } string classBase() const { - static const char* const names[] = {"VerilatedVcd", "VerilatedFst", "VerilatedFst"}; + static const char* const names[] = {"VerilatedVcd", "VerilatedFst"}; return names[m_e]; } string sourceName() const { - static const char* const names[] = {"verilated_vcd", "verilated_fst", "verilated_fst"}; + static const char* const names[] = {"verilated_vcd", "verilated_fst"}; return names[m_e]; } }; @@ -295,6 +294,7 @@ private: TraceFormat m_traceFormat; // main switch: --trace or --trace-fst int m_traceMaxArray;// main switch: --trace-max-array int m_traceMaxWidth;// main switch: --trace-max-width + int m_traceThreads; // main switch: --trace-threads int m_unrollCount; // main switch: --unroll-count int m_unrollStmts; // main switch: --unroll-stmts @@ -487,6 +487,7 @@ public: TraceFormat traceFormat() const { return m_traceFormat; } int traceMaxArray() const { return m_traceMaxArray; } int traceMaxWidth() const { return m_traceMaxWidth; } + int traceThreads() const { return m_traceThreads; } int unrollCount() const { return m_unrollCount; } int unrollStmts() const { return m_unrollStmts; } diff --git a/test_regress/driver.pl b/test_regress/driver.pl index 65bb340f5..d523aac45 100755 --- a/test_regress/driver.pl +++ b/test_regress/driver.pl @@ -833,8 +833,7 @@ sub compile_vlt_flags { @{$param{verilator_flags3}}); $self->{sc} = 1 if ($checkflags =~ /-sc\b/); $self->{trace} = ($opt_trace || $checkflags =~ /-trace\b/ - || $checkflags =~ /-trace-fst\b/ - || $checkflags =~ /-trace-fst-thread\b/); + || $checkflags =~ /-trace-fst\b/); $self->{trace_format} = (($checkflags =~ /-trace-fst/ && 'fst-c') || ($self->{sc} && 'vcd-sc') || (!$self->{sc} && 'vcd-c')); @@ -849,7 +848,8 @@ sub compile_vlt_flags { unshift @verilator_flags, "--trace" if $opt_trace; my $threads = ::calc_threads($Vltmt_threads); unshift @verilator_flags, "--threads $threads" if $param{vltmt} && $checkflags !~ /-threads /; - unshift @verilator_flags, "--trace-fst-thread" if $param{vltmt} && $checkflags =~ /-trace-fst/; + unshift @verilator_flags, "--trace-threads 1" if $param{vltmt} && $checkflags =~ /-trace /; + unshift @verilator_flags, "--trace-threads 2" if $param{vltmt} && $checkflags =~ /-trace-fst /; unshift @verilator_flags, "--debug-partition" if $param{vltmt}; unshift @verilator_flags, "--make gmake" if $param{verilator_make_gmake}; unshift @verilator_flags, "--make cmake" if $param{verilator_make_cmake}; diff --git a/test_regress/t/t_trace_array_fst_threads_1.pl b/test_regress/t/t_trace_array_fst_threads_1.pl new file mode 100755 index 000000000..487c4b474 --- /dev/null +++ b/test_regress/t/t_trace_array_fst_threads_1.pl @@ -0,0 +1,27 @@ +#!/usr/bin/perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003-2009 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(vlt => 1); + +top_filename("t/t_trace_array.v"); +$Self->{golden_filename} = "t/t_trace_array_fst.out"; + +compile( + verilator_flags2 => ['--cc --trace-fst --trace-threads 1 --trace-structs'], + ); + +execute( + check_finished => 1, + ); + +fst_identical($Self->trace_filename, $Self->{golden_filename}); + +ok(1); +1; diff --git a/test_regress/t/t_trace_array_fst_threads_2.pl b/test_regress/t/t_trace_array_fst_threads_2.pl new file mode 100755 index 000000000..170baffe6 --- /dev/null +++ b/test_regress/t/t_trace_array_fst_threads_2.pl @@ -0,0 +1,27 @@ +#!/usr/bin/perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003-2009 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(vlt => 1); + +top_filename("t/t_trace_array.v"); +$Self->{golden_filename} = "t/t_trace_array_fst.out"; + +compile( + verilator_flags2 => ['--cc --trace-fst --trace-threads 2 --trace-structs'], + ); + +execute( + check_finished => 1, + ); + +fst_identical($Self->trace_filename, $Self->{golden_filename}); + +ok(1); +1; diff --git a/test_regress/t/t_trace_array_threads_1.pl b/test_regress/t/t_trace_array_threads_1.pl new file mode 100755 index 000000000..00d9d4789 --- /dev/null +++ b/test_regress/t/t_trace_array_threads_1.pl @@ -0,0 +1,26 @@ +#!/usr/bin/perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003-2009 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(vlt => 1); + +top_filename("t/t_trace_array.v"); + +compile( + verilator_flags2 => ['--cc --trace --trace-threads 1 --trace-structs'], + ); + +execute( + check_finished => 1, + ); + +file_grep("$Self->{obj_dir}/simx.vcd", qr/\$enddefinitions/x); + +ok(1); +1; diff --git a/test_regress/t/t_trace_complex_fst_thread.pl b/test_regress/t/t_trace_complex_fst_threads_1.pl similarity index 91% rename from test_regress/t/t_trace_complex_fst_thread.pl rename to test_regress/t/t_trace_complex_fst_threads_1.pl index 59ffac304..5e9707bf6 100755 --- a/test_regress/t/t_trace_complex_fst_thread.pl +++ b/test_regress/t/t_trace_complex_fst_threads_1.pl @@ -14,7 +14,7 @@ top_filename("t/t_trace_complex.v"); $Self->{golden_filename} = "t/t_trace_complex_fst.out"; compile( - verilator_flags2 => ['--cc --trace-fst-thread'], + verilator_flags2 => ['--cc --trace-fst --trace-threads 1'], ); execute( diff --git a/test_regress/t/t_trace_complex_fst_threads_2.pl b/test_regress/t/t_trace_complex_fst_threads_2.pl new file mode 100755 index 000000000..7cbdebcbb --- /dev/null +++ b/test_regress/t/t_trace_complex_fst_threads_2.pl @@ -0,0 +1,27 @@ +#!/usr/bin/perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003-2009 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +top_filename("t/t_trace_complex.v"); +$Self->{golden_filename} = "t/t_trace_complex_fst.out"; + +compile( + verilator_flags2 => ['--cc --trace-fst --trace-threads 2'], + ); + +execute( + check_finished => 1, + ); + +fst_identical($Self->trace_filename, $Self->{golden_filename}); + +ok(1); +1; diff --git a/test_regress/t/t_trace_complex_old_api.pl b/test_regress/t/t_trace_complex_old_api.pl index 6041f09c2..730aa1e76 100755 --- a/test_regress/t/t_trace_complex_old_api.pl +++ b/test_regress/t/t_trace_complex_old_api.pl @@ -10,7 +10,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di # Same test as t_trace_complex, but exercising the old VCD tracing API -scenarios(simulator => 1); +scenarios(vlt => 1); top_filename("t/t_trace_complex.v"); diff --git a/test_regress/t/t_trace_complex_threads_1.pl b/test_regress/t/t_trace_complex_threads_1.pl new file mode 100755 index 000000000..0999ed679 --- /dev/null +++ b/test_regress/t/t_trace_complex_threads_1.pl @@ -0,0 +1,37 @@ +#!/usr/bin/perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2003-2009 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(simulator => 1); + +top_filename("t/t_trace_complex.v"); +$Self->{golden_filename} = "t/t_trace_complex.out"; + +compile( + verilator_flags2 => ['--cc --trace --trace-threads 1'] + ); + +execute( + check_finished => 1, + ); + +file_grep ("$Self->{obj_dir}/simx.vcd", qr/ v_strp /); +file_grep ("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /); +file_grep ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /); +file_grep ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /); +file_grep ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /); +file_grep ("$Self->{obj_dir}/simx.vcd", qr/ v_arru\(/); +file_grep ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\(/); +file_grep ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\(/); +file_grep ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\(/); + +vcd_identical ("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename}); + +ok(1); +1; diff --git a/test_regress/t/t_trace_two_dumpfst_cc.pl b/test_regress/t/t_trace_two_dumpfst_cc.pl index d2288c696..6a1fae915 100755 --- a/test_regress/t/t_trace_two_dumpfst_cc.pl +++ b/test_regress/t/t_trace_two_dumpfst_cc.pl @@ -18,13 +18,13 @@ compile( verilator_make_gmake => 0, top_filename => 't_trace_two_b.v', VM_PREFIX => 'Vt_trace_two_b', - verilator_flags2 => ['--trace-fst-thread -DTEST_FST'], + verilator_flags2 => ['--trace-fst --trace-threads 1 -DTEST_FST'], ); compile( make_main => 0, top_filename => 't_trace_two_a.v', - verilator_flags2 => ['-exe', '--trace-fst-thread', + verilator_flags2 => ['-exe', '--trace-fst --trace-threads 1', '-DTEST_FST', "$Self->{t_dir}/t_trace_two_cc.cpp"], v_flags2 => ['+define+TEST_DUMP'], diff --git a/test_regress/t/t_trace_two_hdrfst_cc.pl b/test_regress/t/t_trace_two_hdrfst_cc.pl index dd4712626..653776ccc 100755 --- a/test_regress/t/t_trace_two_hdrfst_cc.pl +++ b/test_regress/t/t_trace_two_hdrfst_cc.pl @@ -18,14 +18,14 @@ compile( verilator_make_gmake => 0, top_filename => 't_trace_two_b.v', VM_PREFIX => 'Vt_trace_two_b', - verilator_flags2 => ['--trace-fst-thread'], + verilator_flags2 => ['--trace-fst --trace-threads 1'], ); compile( make_main => 0, top_filename => 't_trace_two_a.v', make_flags => 'CPPFLAGS_ADD="-DTEST_HDR_TRACE=1 -DTEST_FST=1"', - verilator_flags2 => ['-exe', '--trace-fst-thread', + verilator_flags2 => ['-exe', '--trace-fst --trace-threads 1', '-DTEST_FST', "$Self->{t_dir}/t_trace_two_cc.cpp"], ); diff --git a/test_regress/t/t_trace_two_portfst_cc.pl b/test_regress/t/t_trace_two_portfst_cc.pl index acb6176b9..299b3cefb 100755 --- a/test_regress/t/t_trace_two_portfst_cc.pl +++ b/test_regress/t/t_trace_two_portfst_cc.pl @@ -18,13 +18,13 @@ compile( verilator_make_gmake => 0, top_filename => 't_trace_two_b.v', VM_PREFIX => 'Vt_trace_two_b', - verilator_flags2 => ['--trace-fst-thread'], + verilator_flags2 => ['--trace-fst --trace-threads 1'], ); compile( make_main => 0, top_filename => 't_trace_two_a.v', - verilator_flags2 => ['-exe', '--trace-fst-thread', + verilator_flags2 => ['-exe', '--trace-fst --trace-threads 1', '-DTEST_FST', "$Self->{t_dir}/t_trace_two_cc.cpp"], v_flags2 => ['+define+TEST_DUMPPORTS'], diff --git a/test_regress/t/t_verilated_all.pl b/test_regress/t/t_verilated_all.pl index d3b70bac5..0bd51a322 100755 --- a/test_regress/t/t_verilated_all.pl +++ b/test_regress/t/t_verilated_all.pl @@ -19,6 +19,8 @@ compile( "--trace --vpi ", ($Self->cfg_with_threaded ? "--threads 2 $root/include/verilated_threads.cpp" : ""), + ($Self->cfg_with_threaded + ? "--trace-threads 1" : ""), "$root/include/verilated_save.cpp"], );