diff --git a/include/verilated_profiler.cpp b/include/verilated_profiler.cpp index ed25093d1..9f37addf9 100644 --- a/include/verilated_profiler.cpp +++ b/include/verilated_profiler.cpp @@ -99,6 +99,10 @@ void VlExecutionProfiler::configure(const VerilatedContext& context) { } } +void VlExecutionProfiler::startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId) { + profilep->setupThread(threadId); +} + void VlExecutionProfiler::setupThread(uint32_t threadId) { // Reserve some space in the thread-local profiling buffer, in order to try to avoid malloc // while profiling. diff --git a/include/verilated_profiler.h b/include/verilated_profiler.h index d47be4da4..f85c95528 100644 --- a/include/verilated_profiler.h +++ b/include/verilated_profiler.h @@ -23,11 +23,6 @@ #define VERILATOR_VERILATED_PROFILER_H_ #include "verilatedos.h" - -#ifndef VL_PROFILER -#error "verilated_profiler.h/cpp expects VL_PROFILER (from --prof-{exec, pgo}" -#endif - #include "verilated.h" #include @@ -186,6 +181,9 @@ public: void clear() VL_MT_SAFE_EXCLUDES(m_mutex); // Write profiling data into file void dump(const char* filenamep, uint64_t tickEnd) VL_MT_SAFE_EXCLUDES(m_mutex); + + // Called via VlStartWorkerCb in VlWorkerThread::startWorker + static void startWorkerSetup(VlExecutionProfiler* profilep, uint32_t threadId); }; //============================================================================= diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp index 12a579026..dbdd3330a 100644 --- a/include/verilated_threads.cpp +++ b/include/verilated_threads.cpp @@ -24,10 +24,6 @@ #include "verilatedos.h" #include "verilated_threads.h" -#ifdef VL_PROFILER -#include "verilated_profiler.h" -#endif - #include #include #include @@ -52,10 +48,10 @@ VlMTaskVertex::VlMTaskVertex(uint32_t upstreamDepCount) // VlWorkerThread VlWorkerThread::VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, - VlExecutionProfiler* profilerp) + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) : m_ready_size{0} , m_exiting{false} - , m_cthread{startWorker, this, threadId, profilerp} + , m_cthread{startWorker, this, threadId, profilerp, startCb} , m_contextp{contextp} {} VlWorkerThread::~VlWorkerThread() { @@ -83,13 +79,9 @@ void VlWorkerThread::workerLoop() { } void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId, - VlExecutionProfiler* profilerp) { + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) { Verilated::threadContextp(workerp->m_contextp); -#ifdef VL_PROFILER - // Note: setupThread is not defined without VL_PROFILER, hence the #ifdef. Still, we might - // not be profiling execution (e.g.: PGO only), so profilerp might still be nullptr. - if (profilerp) profilerp->setupThread(threadId); -#endif + if (VL_UNLIKELY(startCb)) startCb(profilerp, threadId); workerp->workerLoop(); } @@ -97,7 +89,7 @@ void VlWorkerThread::startWorker(VlWorkerThread* workerp, uint32_t threadId, // VlThreadPool VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads, - VlExecutionProfiler* profiler) { + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb) { // --threads N passes nThreads=N-1, as the "main" threads counts as 1 ++nThreads; const unsigned cpus = std::thread::hardware_concurrency(); @@ -111,7 +103,7 @@ VlThreadPool::VlThreadPool(VerilatedContext* contextp, int nThreads, } // Create worker threads for (uint32_t threadId = 1; threadId < nThreads; ++threadId) { - m_workers.push_back(new VlWorkerThread{threadId, contextp, profiler}); + m_workers.push_back(new VlWorkerThread{threadId, contextp, profilerp, startCb}); } } diff --git a/include/verilated_threads.h b/include/verilated_threads.h index 257c87162..805a73d2d 100644 --- a/include/verilated_threads.h +++ b/include/verilated_threads.h @@ -50,6 +50,9 @@ #endif // clang-format on +class VlExecutionProfiler; +class VlThreadPool; + // VlMTaskVertex and VlThreadpool will work with multiple model class types. // Since the type is opaque to VlMTaskVertex and VlThreadPool, represent it // as a void* here. @@ -57,6 +60,9 @@ using VlSelfP = void*; using VlExecFnp = void (*)(VlSelfP, bool); +// VlWorkerThread::startWorker callback, used to hook in VlExecutionProfiler +using VlStartWorkerCb = void (*)(VlExecutionProfiler*, uint32_t threadId); + // Track dependencies for a single MTask. class VlMTaskVertex final { // MEMBERS @@ -129,9 +135,6 @@ public: } }; -class VlExecutionProfiler; -class VlThreadPool; - class VlWorkerThread final { private: // TYPES @@ -171,7 +174,7 @@ private: public: // CONSTRUCTORS explicit VlWorkerThread(uint32_t threadId, VerilatedContext* contextp, - VlExecutionProfiler* profilerp); + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); ~VlWorkerThread(); // METHODS @@ -209,7 +212,7 @@ public: } void workerLoop(); static void startWorker(VlWorkerThread* workerp, uint32_t threadId, - VlExecutionProfiler* profilerp); + VlExecutionProfiler* profilerp, VlStartWorkerCb startCb); }; class VlThreadPool final { @@ -221,7 +224,8 @@ public: // Construct a thread pool with 'nThreads' dedicated threads. The thread // pool will create these threads and make them available to execute tasks // via this->workerp(index)->addTask(...) - VlThreadPool(VerilatedContext* contextp, int nThreads, VlExecutionProfiler* profilerp); + VlThreadPool(VerilatedContext* contextp, int nThreads, VlExecutionProfiler* profilerp, + VlStartWorkerCb startCb); ~VlThreadPool(); // METHODS diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index f9baa3fd5..28d455fd9 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -677,8 +677,8 @@ void EmitCSyms::emitSymImp() { puts("}\n\n"); // Constructor - puts(symClassName() + "::" + symClassName() + "(VerilatedContext* contextp, const char* namep," - + topClassName() + "* modelp)\n"); + puts(symClassName() + "::" + symClassName() + + "(VerilatedContext* contextp, const char* namep, " + topClassName() + "* modelp)\n"); puts(" : VerilatedSyms{contextp}\n"); puts(" // Setup internal state of the Syms class\n"); puts(" , __Vm_modelp{modelp}\n"); @@ -707,7 +707,10 @@ void EmitCSyms::emitSymImp() { // duration of the eval call. puts(" , __Vm_threadPoolp{new VlThreadPool{_vm_contextp__, " + cvtToStr(v3Global.opt.threads() - 1) + ", " - + (v3Global.opt.profExec() ? "&__Vm_executionProfiler" : "nullptr") + "}}\n"); + + (v3Global.opt.profExec() + ? "&__Vm_executionProfiler, &VlExecutionProfiler::startWorkerSetup" + : "nullptr, nullptr") + + "}}\n"); } puts(" // Setup module instances\n"); diff --git a/src/V3EmitMk.cpp b/src/V3EmitMk.cpp index b748d9553..3fb3907be 100644 --- a/src/V3EmitMk.cpp +++ b/src/V3EmitMk.cpp @@ -197,7 +197,6 @@ public: of.puts("# User CFLAGS (from -CFLAGS on Verilator command line)\n"); of.puts("VM_USER_CFLAGS = \\\n"); if (!v3Global.opt.libCreate().empty()) of.puts("\t-fPIC \\\n"); - if (v3Global.opt.usesProfiler()) of.puts("\t-DVL_PROFILER \\\n"); const V3StringList& cFlags = v3Global.opt.cFlags(); for (const string& i : cFlags) of.puts("\t" + i + " \\\n"); of.puts("\n");