From 75a44e5aa9df8a42aa3ae3dd2c453df33d611298 Mon Sep 17 00:00:00 2001 From: Mariusz Glebocki Date: Tue, 12 Dec 2023 14:10:01 +0100 Subject: [PATCH] Fix deadlocks in error handler (#4672) --- src/V3ThreadPool.h | 49 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/src/V3ThreadPool.h b/src/V3ThreadPool.h index c0d5fff88..8f29ed800 100644 --- a/src/V3ThreadPool.h +++ b/src/V3ThreadPool.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -109,22 +110,59 @@ class V3ThreadPool final { // Indicates whether multithreading has been suspended. // Used for error detection in resumeMultithreading only. You probably should use // m_exclusiveAccess for information whether something should be run in current thread. - bool m_multithreadingSuspended VL_GUARDED_BY(m_mutex) = false; + std::atomic_bool m_multithreadingSuspended{false}; // CONSTRUCTORS V3ThreadPool() = default; ~V3ThreadPool() { - if (!m_mutex.try_lock()) { - if (m_jobsInProgress != 0) { + if (m_multithreadingSuspended) { + // Ideally we shouldn't deal with this and just call the std::abort. However, + // std::exit(0) (which invokes this destructor) is called in multiple places with + // multithreading being suspended. Aborting would change the exit code, what would not + // be acceptable. + resumeMultithreading(); + } + // Unexpected states. Aborting to subtly signal to the outside world that something is + // wrong. This won't make things worse to an user - the program is already terminating at + // this point anyway, most likely as a result of an error. Using if/abort instead of assert + // because assert can be disabled. + if (VL_UNCOVERABLE(m_exclusiveAccess)) { + std::cerr << "%Error: Internal Error: attempted to destroy Thread Pool with active " + "exclusive access mode" + << std::endl; + std::abort(); + } + if (VL_UNCOVERABLE(m_stopRequested)) { + std::cerr << "%Error: Internal Error: attempted to destroy Thread Pool with active " + "stop request" + << std::endl; + std::abort(); + } + + if (VL_UNCOVERABLE(!m_mutex.try_lock())) { + if (VL_UNCOVERABLE(m_jobsInProgress != 0)) { // ThreadPool shouldn't be destroyed when jobs are running and mutex is locked, // something is wrong. Most likely Verilator is exiting as a result of failed - // assert in critical section. Do nothing, let it exit. - return; + // assert in critical section. Just returning is dangerous, as threads and this + // class' members might keep the program hanging endlessly. + std::cerr << "%Error: Internal Error: attempted to destroy Thread Pool with " + "running jobs" + << std::endl; + std::abort(); } + // Probably an error happened in a call to some V3ThreadPool's method. We can't just + // unlock m_mutex and be sure that resize(0) handle this well. + std::cerr << "%Error: Internal Error: attempted to destroy locked Thread Pool" + << std::endl; + std::abort(); } else { V3LockGuard lock{m_mutex, std::adopt_lock_t{}}; m_queue = {}; // make sure queue is empty } + + // This locks mutexes and wakes up threads, so it would be nice to move this to a separate + // method that would be called just before controlled exit. Such method would need to be + // called in quite a few places that call std::exit(0), so leaving it here for now. resize(0); } @@ -232,6 +270,7 @@ private: // True when any thread requested exclusive access bool stopRequested() const VL_MT_SAFE { + if (m_exclusiveAccess) return false; // don't wait if shutdown already requested if (m_shutdown) return false; return m_stopRequested;