Performance improvement for hierarchical processor

Problem was pushing of the results into the cell hierarchy
somehow lead to thread stalling. Likely due to bbox invalidation
and related effects. Solution is to store the results of
the operations inside the context structure (along with the
propagation information) and push it to the layout later in
a single step.
This commit is contained in:
Matthias Koefferlein 2026-02-18 18:37:54 +01:00
parent b685f89639
commit a8119c5a53
3 changed files with 64 additions and 17 deletions

View File

@ -714,7 +714,6 @@ local_processor_result_computation_task<TS, TI, TR>::perform ()
{
mp_cell_contexts->compute_results (*mp_contexts, mp_cell, mp_op, m_output_layers, mp_proc);
// erase the contexts we don't need any longer
{
tl::MutexLocker locker (& mp_contexts->lock ());
@ -734,7 +733,10 @@ local_processor_result_computation_task<TS, TI, TR>::perform ()
}
#endif
mp_contexts->context_map ().erase (mp_cell);
// release some memory
auto ctx = mp_contexts->context_map ().find (mp_cell);
tl_assert (ctx != mp_contexts->context_map ().end ());
ctx->second.cleanup ();
}
}
@ -881,15 +883,6 @@ void local_processor<TS, TI, TR>::run (local_operation<TS, TI, TR> *op, unsigned
compute_results (contexts, op, output_layers);
}
template <class TS, class TI, class TR>
void local_processor<TS, TI, TR>::push_results (db::Cell *cell, unsigned int output_layer, const std::unordered_set<TR> &result) const
{
if (! result.empty ()) {
tl::MutexLocker locker (&cell->layout ()->lock ());
cell->shapes (output_layer).insert (result.begin (), result.end ());
}
}
template <class TS, class TI, class TR>
void local_processor<TS, TI, TR>::compute_contexts (local_processor_contexts<TS, TI, TR> &contexts, const local_operation<TS, TI, TR> *op, unsigned int subject_layer, const std::vector<unsigned int> &intruder_layers) const
{
@ -1248,7 +1241,7 @@ local_processor<TS, TI, TR>::compute_results (local_processor_contexts<TS, TI, T
typename local_processor_contexts<TS, TI, TR>::iterator cpc = contexts.context_map ().find (&mp_subject_layout->cell (*bu));
if (cpc != contexts.context_map ().end ()) {
cpc->second.compute_results (contexts, cpc->first, op, output_layers, this);
contexts.context_map ().erase (cpc);
cpc->second.cleanup (); // release some memory
}
}
@ -1261,6 +1254,24 @@ local_processor<TS, TI, TR>::compute_results (local_processor_contexts<TS, TI, T
}
}
// deliver the results
{
tl::MutexLocker locker (& mp_subject_layout->lock ());
for (auto c = contexts.begin (); c != contexts.end (); ++c) {
db::Cell *cell = c->first;
auto r = c->second.result ().begin ();
auto rend = c->second.result ().end ();
for (auto o = output_layers.begin (); r != rend && o != output_layers.end (); ++o, ++r) {
if (! r->empty ()) {
cell->shapes (*o).insert (r->begin (), r->end ());
}
}
}
}
}
namespace {

View File

@ -244,9 +244,20 @@ public:
return m_contexts.end ();
}
const std::vector<std::unordered_set<TR> > &result () const
{
return m_result;
}
void cleanup ()
{
m_contexts.clear ();
}
private:
const db::Cell *mp_intruder_cell;
std::unordered_map<context_key_type, db::local_processor_cell_context<TS, TI, TR> > m_contexts;
std::vector<std::unordered_set<TR> > m_result;
};
template <class TS, class TI, class TR>
@ -558,7 +569,6 @@ private:
size_t get_progress () const;
void compute_contexts (db::local_processor_contexts<TS, TI, TR> &contexts, db::local_processor_cell_context<TS, TI, TR> *parent_context, db::Cell *subject_parent, db::Cell *subject_cell, const db::ICplxTrans &subject_cell_inst, const db::Cell *intruder_cell, const typename local_processor_cell_contexts<TS, TI, TR>::context_key_type &intruders, db::Coord dist) const;
void issue_compute_contexts (db::local_processor_contexts<TS, TI, TR> &contexts, db::local_processor_cell_context<TS, TI, TR> *parent_context, db::Cell *subject_parent, db::Cell *subject_cell, const db::ICplxTrans &subject_cell_inst, const db::Cell *intruder_cell, typename local_processor_cell_contexts<TS, TI, TR>::context_key_type &intruders, db::Coord dist) const;
void push_results (db::Cell *cell, unsigned int output_layer, const std::unordered_set<TR> &result) const;
void compute_local_cell (const db::local_processor_contexts<TS, TI, TR> &contexts, db::Cell *subject_cell, const db::Cell *intruder_cell, const local_operation<TS, TI, TR> *op, const typename local_processor_cell_contexts<TS, TI, TR>::context_key_type &intruders, std::vector<std::unordered_set<TR> > &result) const;
bool subject_cell_is_breakout (db::cell_index_type ci) const

View File

@ -446,7 +446,12 @@ local_processor_cell_contexts<TS, TI, TR>::compute_results (const local_processo
proc->compute_local_cell (contexts, cell, mp_intruder_cell, op, *c->first, res);
}
if (common.empty ()) {
bool common_empty = true;
for (auto c = common.begin (); common_empty && c != common.end (); ++c) {
common_empty = c->empty ();
}
if (common_empty) {
CRONOLOGY_COMPUTE_BRACKET(event_propagate)
for (std::vector<unsigned int>::const_iterator o = output_layers.begin (); o != output_layers.end (); ++o) {
@ -519,9 +524,30 @@ local_processor_cell_contexts<TS, TI, TR>::compute_results (const local_processo
}
for (std::vector<unsigned int>::const_iterator o = output_layers.begin (); o != output_layers.end (); ++o) {
size_t oi = o - output_layers.begin ();
proc->push_results (cell, *o, common[oi]);
// store the results
bool common_empty = true;
for (auto c = common.begin (); common_empty && c != common.end (); ++c) {
common_empty = c->empty ();
}
if (! common_empty) {
if (m_result.empty ()) {
m_result.swap (common);
} else {
if (m_result.size () < common.size ()) {
m_result.resize (common.size ());
}
auto t = m_result.begin ();
for (auto s = common.begin (); s != common.end (); ++s, ++t) {
if (t->empty ()) {
t->swap (*s);
} else {
t->insert (s->begin (), s->end ());
s->clear ();
}
}
}
}
}