Fix deadlocks in error handler (#4672)

2023-12-12 14:10:01 +01:00 · 2023-12-12 14:10:01 +01:00 · 75a44e5aa9
parent a811f2e17d
commit 75a44e5aa9
1 changed files with 44 additions and 5 deletions
--- a/src/V3ThreadPool.h
+++ b/src/V3ThreadPool.h
@ -27,6 +27,7 @@
 #include <condition_variable>
 #include <functional>
 #include <future>
+#include <iostream>
 #include <list>
 #include <mutex>
 #include <queue>
@ -109,22 +110,59 @@ class V3ThreadPool final {
    // Indicates whether multithreading has been suspended.
    // Used for error detection in resumeMultithreading only. You probably should use
    // m_exclusiveAccess for information whether something should be run in current thread.
-    bool m_multithreadingSuspended VL_GUARDED_BY(m_mutex) = false;
+    std::atomic_bool m_multithreadingSuspended{false};

    // CONSTRUCTORS
    V3ThreadPool() = default;
    ~V3ThreadPool() {
-        if (!m_mutex.try_lock()) {
-            if (m_jobsInProgress != 0) {
+        if (m_multithreadingSuspended) {
+            // Ideally we shouldn't deal with this and just call the std::abort. However,
+            // std::exit(0) (which invokes this destructor) is called in multiple places with
+            // multithreading being suspended. Aborting would change the exit code, what would not
+            // be acceptable.
+            resumeMultithreading();
+        }
+        // Unexpected states. Aborting to subtly signal to the outside world that something is
+        // wrong. This won't make things worse to an user - the program is already terminating at
+        // this point anyway, most likely as a result of an error. Using if/abort instead of assert
+        // because assert can be disabled.
+        if (VL_UNCOVERABLE(m_exclusiveAccess)) {
+            std::cerr << "%Error: Internal Error: attempted to destroy Thread Pool with active "
+                         "exclusive access mode"
+                      << std::endl;
+            std::abort();
+        }
+        if (VL_UNCOVERABLE(m_stopRequested)) {
+            std::cerr << "%Error: Internal Error: attempted to destroy Thread Pool with active "
+                         "stop request"
+                      << std::endl;
+            std::abort();
+        }
+
+        if (VL_UNCOVERABLE(!m_mutex.try_lock())) {
+            if (VL_UNCOVERABLE(m_jobsInProgress != 0)) {
                // ThreadPool shouldn't be destroyed when jobs are running and mutex is locked,
                // something is wrong. Most likely Verilator is exiting as a result of failed
-                // assert in critical section. Do nothing, let it exit.
-                return;
+                // assert in critical section. Just returning is dangerous, as threads and this
+                // class' members might keep the program hanging endlessly.
+                std::cerr << "%Error: Internal Error: attempted to destroy Thread Pool with "
+                             "running jobs"
+                          << std::endl;
+                std::abort();
            }
+            // Probably an error happened in a call to some V3ThreadPool's method. We can't just
+            // unlock m_mutex and be sure that resize(0) handle this well.
+            std::cerr << "%Error: Internal Error: attempted to destroy locked Thread Pool"
+                      << std::endl;
+            std::abort();
        } else {
            V3LockGuard lock{m_mutex, std::adopt_lock_t{}};
            m_queue = {};  // make sure queue is empty
        }
+
+        // This locks mutexes and wakes up threads, so it would be nice to move this to a separate
+        // method that would be called just before controlled exit. Such method would need to be
+        // called in quite a few places that call std::exit(0), so leaving it here for now.
        resize(0);
    }

@ -232,6 +270,7 @@ private:

    // True when any thread requested exclusive access
    bool stopRequested() const VL_MT_SAFE {
+        if (m_exclusiveAccess) return false;
        // don't wait if shutdown already requested
        if (m_shutdown) return false;
        return m_stopRequested;