Performance improvement for hierarchical processor

Problem was pushing of the results into the cell hierarchy somehow lead to thread stalling. Likely due to bbox invalidation and related effects. Solution is to store the results of the operations inside the context structure (along with the propagation information) and push it to the layout later in a single step.
2026-02-18 18:37:54 +01:00 · 2026-02-18 18:37:54 +01:00 · a8119c5a53
parent b685f89639
commit a8119c5a53
3 changed files with 64 additions and 17 deletions
--- a/src/db/db/dbHierProcessor.cc
+++ b/src/db/db/dbHierProcessor.cc
@ -714,7 +714,6 @@ local_processor_result_computation_task<TS, TI, TR>::perform ()
 {
  mp_cell_contexts->compute_results (*mp_contexts, mp_cell, mp_op, m_output_layers, mp_proc);

-  //  erase the contexts we don't need any longer
  {
    tl::MutexLocker locker (& mp_contexts->lock ());

@ -734,7 +733,10 @@ local_processor_result_computation_task<TS, TI, TR>::perform ()
    }
 #endif

-    mp_contexts->context_map ().erase (mp_cell);
+    //  release some memory
+    auto ctx = mp_contexts->context_map ().find (mp_cell);
+    tl_assert (ctx != mp_contexts->context_map ().end ());
+    ctx->second.cleanup ();
  }
 }

@ -881,15 +883,6 @@ void local_processor<TS, TI, TR>::run (local_operation<TS, TI, TR> *op, unsigned
  compute_results (contexts, op, output_layers);
 }

-template <class TS, class TI, class TR>
-void local_processor<TS, TI, TR>::push_results (db::Cell *cell, unsigned int output_layer, const std::unordered_set<TR> &result) const
-{
-  if (! result.empty ()) {
-    tl::MutexLocker locker (&cell->layout ()->lock ());
-    cell->shapes (output_layer).insert (result.begin (), result.end ());
-  }
-}
-
 template <class TS, class TI, class TR>
 void local_processor<TS, TI, TR>::compute_contexts (local_processor_contexts<TS, TI, TR> &contexts, const local_operation<TS, TI, TR> *op, unsigned int subject_layer, const std::vector<unsigned int> &intruder_layers) const
 {
@ -1248,7 +1241,7 @@ local_processor<TS, TI, TR>::compute_results (local_processor_contexts<TS, TI, T
        typename local_processor_contexts<TS, TI, TR>::iterator cpc = contexts.context_map ().find (&mp_subject_layout->cell (*bu));
        if (cpc != contexts.context_map ().end ()) {
          cpc->second.compute_results (contexts, cpc->first, op, output_layers, this);
-          contexts.context_map ().erase (cpc);
+          cpc->second.cleanup ();   //  release some memory
        }

      }
@ -1261,6 +1254,24 @@ local_processor<TS, TI, TR>::compute_results (local_processor_contexts<TS, TI, T
    }

  }
+
+  //  deliver the results
+  {
+    tl::MutexLocker locker (& mp_subject_layout->lock ());
+    for (auto c = contexts.begin (); c != contexts.end (); ++c) {
+
+      db::Cell *cell = c->first;
+      auto r = c->second.result ().begin ();
+      auto rend = c->second.result ().end ();
+
+      for (auto o = output_layers.begin (); r != rend && o != output_layers.end (); ++o, ++r) {
+        if (! r->empty ()) {
+          cell->shapes (*o).insert (r->begin (), r->end ());
+        }
+      }
+
+    }
+  }
 }

 namespace {
--- a/src/db/db/dbHierProcessor.h
+++ b/src/db/db/dbHierProcessor.h
@ -244,9 +244,20 @@ public:
    return m_contexts.end ();
  }

+  const std::vector<std::unordered_set<TR> > &result () const
+  {
+    return m_result;
+  }
+
+  void cleanup ()
+  {
+    m_contexts.clear ();
+  }
+
 private:
  const db::Cell *mp_intruder_cell;
  std::unordered_map<context_key_type, db::local_processor_cell_context<TS, TI, TR> > m_contexts;
+  std::vector<std::unordered_set<TR> > m_result;
 };

 template <class TS, class TI, class TR>
@ -558,7 +569,6 @@ private:
  size_t get_progress () const;
  void compute_contexts (db::local_processor_contexts<TS, TI, TR> &contexts, db::local_processor_cell_context<TS, TI, TR> *parent_context, db::Cell *subject_parent, db::Cell *subject_cell, const db::ICplxTrans &subject_cell_inst, const db::Cell *intruder_cell, const typename local_processor_cell_contexts<TS, TI, TR>::context_key_type &intruders, db::Coord dist) const;
  void issue_compute_contexts (db::local_processor_contexts<TS, TI, TR> &contexts, db::local_processor_cell_context<TS, TI, TR> *parent_context, db::Cell *subject_parent, db::Cell *subject_cell, const db::ICplxTrans &subject_cell_inst, const db::Cell *intruder_cell, typename local_processor_cell_contexts<TS, TI, TR>::context_key_type &intruders, db::Coord dist) const;
-  void push_results (db::Cell *cell, unsigned int output_layer, const std::unordered_set<TR> &result) const;
  void compute_local_cell (const db::local_processor_contexts<TS, TI, TR> &contexts, db::Cell *subject_cell, const db::Cell *intruder_cell, const local_operation<TS, TI, TR> *op, const typename local_processor_cell_contexts<TS, TI, TR>::context_key_type &intruders, std::vector<std::unordered_set<TR> > &result) const;

  bool subject_cell_is_breakout (db::cell_index_type ci) const
--- a/src/db/db/dbHierProcessor2.cc
+++ b/src/db/db/dbHierProcessor2.cc
@ -446,7 +446,12 @@ local_processor_cell_contexts<TS, TI, TR>::compute_results (const local_processo
        proc->compute_local_cell (contexts, cell, mp_intruder_cell, op, *c->first, res);
      }

-      if (common.empty ()) {
+      bool common_empty = true;
+      for (auto c = common.begin (); common_empty && c != common.end (); ++c) {
+        common_empty = c->empty ();
+      }
+
+      if (common_empty) {

        CRONOLOGY_COMPUTE_BRACKET(event_propagate)
        for (std::vector<unsigned int>::const_iterator o = output_layers.begin (); o != output_layers.end (); ++o) {
@ -519,9 +524,30 @@ local_processor_cell_contexts<TS, TI, TR>::compute_results (const local_processo

  }

-  for (std::vector<unsigned int>::const_iterator o = output_layers.begin (); o != output_layers.end (); ++o) {
-    size_t oi = o - output_layers.begin ();
-    proc->push_results (cell, *o, common[oi]);
+  //  store the results
+
+  bool common_empty = true;
+  for (auto c = common.begin (); common_empty && c != common.end (); ++c) {
+    common_empty = c->empty ();
+  }
+
+  if (! common_empty) {
+    if (m_result.empty ()) {
+      m_result.swap (common);
+    } else {
+      if (m_result.size () < common.size ()) {
+        m_result.resize (common.size ());
+      }
+      auto t = m_result.begin ();
+      for (auto s = common.begin (); s != common.end (); ++s, ++t) {
+        if (t->empty ()) {
+          t->swap (*s);
+        } else {
+          t->insert (s->begin (), s->end ());
+          s->clear ();
+        }
+      }
+    }
  }
 }