// OpenSTA, Static Timing Analyzer // Copyright (c) 2026, Parallax Software, Inc. // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // // The origin of this software must not be misrepresented; you must not // claim that you wrote the original software. // // Altered source versions must be plainly marked as such, and must not be // misrepresented as being the original software. // // This notice may not be removed or altered from any source distribution. #include "Bfs.hh" #include #include "Debug.hh" #include "DispatchQueue.hh" #include "Graph.hh" #include "Levelize.hh" #include "Mutex.hh" #include "Network.hh" #include "Report.hh" #include "Sdc.hh" #include "SearchPred.hh" #include "Variables.hh" namespace sta { // Persistent storage for Kahn's algorithm arrays. // Allocated once and reused across visitParallel calls to // avoid repeated allocation of large per-graph arrays. struct BfsIterator::KahnState { // -1 = not in active set, >= 0 = in-degree. std::vector in_degree_init; // Atomic in-degrees for the parallel phase. std::unique_ptr[]> in_degree; size_t in_degree_size = 0; // Vertex IDs touched in the previous call -- reset to -1 before reuse. std::vector prev_ids; void ensureInitSize(size_t needed) { if (in_degree_init.size() < needed) in_degree_init.resize(needed, -1); } void ensureAtomicSize(size_t needed) { if (in_degree_size < needed) { in_degree = std::make_unique[]>(needed); in_degree_size = needed; } } void resetPrevious() { for (VertexId vid : prev_ids) in_degree_init[vid] = -1; prev_ids.clear(); } }; BfsIterator::BfsIterator(BfsIndex bfs_index, Level level_min, Level level_max, SearchPred *search_pred, StaState *sta) : StaState(sta), bfs_index_(bfs_index), level_min_(level_min), level_max_(level_max), search_pred_(search_pred) { init(); } void BfsIterator::init() { first_level_ = level_max_; last_level_ = level_min_; ensureSize(); } void BfsIterator::ensureSize() { if (levelize_->levelized()) { unsigned max_level_1 = levelize_->maxLevel() + 1; if (queue_.size() < max_level_1) queue_.resize(max_level_1); } } void BfsIterator::clear() { Level level = first_level_; while (levelLessOrEqual(level, last_level_)) { VertexSeq &level_vertices = queue_[level]; for (Vertex *vertex : level_vertices) { if (vertex) vertex->setBfsInQueue(bfs_index_, false); } level_vertices.clear(); incrLevel(level); } init(); } void BfsIterator::reportEntries() const { for (Level level = first_level_; levelLessOrEqual(level, last_level_); incrLevel(level)) { const VertexSeq &level_vertices = queue_[level]; if (!level_vertices.empty()) { report_->report("Level {}", level); for (Vertex *vertex : level_vertices) report_->report(" {}", vertex ? vertex->to_string(this) : "NULL"); } } } void BfsIterator::deleteEntries(Level level) { VertexSeq &level_vertices = queue_[level]; for (Vertex *vertex : level_vertices) { if (vertex) vertex->setBfsInQueue(bfs_index_, false); } level_vertices.clear(); } bool BfsIterator::empty() const { return levelLess(last_level_, first_level_); } void BfsIterator::enqueueAdjacentVertices(Vertex *vertex) { enqueueAdjacentVertices(vertex, search_pred_); } void BfsIterator::enqueueAdjacentVertices(Vertex *vertex, const Mode *mode) { enqueueAdjacentVertices(vertex, search_pred_, mode); } int BfsIterator::visit(Level to_level, VertexVisitor *visitor) { int visit_count = 0; while (levelLessOrEqual(first_level_, last_level_) && levelLessOrEqual(first_level_, to_level)) { Level level = first_level_; VertexSeq &level_vertices = queue_[level]; incrLevel(first_level_); // Note that ArrivalVisitor::enqueueRefPinInputDelays may enqueue // vertices at this level so range iteration fails if the vector grows. while (!level_vertices.empty()) { Vertex *vertex = level_vertices.back(); level_vertices.pop_back(); if (vertex) { checkLevel(vertex, level); vertex->setBfsInQueue(bfs_index_, false); visitor->visit(vertex); visit_count++; } } level_vertices.clear(); } return visit_count; } // Recalculate first_level_/last_level_ from remaining queue entries. void BfsIterator::resetLevelBounds() { first_level_ = level_max_; last_level_ = level_min_; for (Level l = 0; l < static_cast(queue_.size()); l++) { if (!queue_[l].empty()) { if (levelLess(l, first_level_)) first_level_ = l; if (levelLess(last_level_, l)) last_level_ = l; } } } int BfsIterator::visitParallel(Level to_level, VertexVisitor *visitor) { size_t thread_count = thread_count_; int visit_count = 0; if (!empty()) { if (thread_count == 1) visit_count = visit(to_level, visitor); else if (!variables_->useKahnsBfs() || !kahn_pred_ || variables_->dynamicLoopBreaking()) { // Original level-based parallel BFS with per-level barriers. // dynamic_loop_breaking enables disabled-loop edges based on // arrival tags that only emerge during propagation. Kahn's // discovery runs before any propagation and cannot see those // tags, so we fall back to the original BFS whenever dynamic // loop breaking is active. std::vector visitors; visitors.reserve(thread_count_); for (int k = 0; k < thread_count_; k++) visitors.push_back(visitor->copy()); while (levelLessOrEqual(first_level_, last_level_) && levelLessOrEqual(first_level_, to_level)) { VertexSeq &level_vertices = queue_[first_level_]; Level level = first_level_; incrLevel(first_level_); if (!level_vertices.empty()) { size_t vertex_count = level_vertices.size(); if (vertex_count < thread_count) { for (Vertex *vertex : level_vertices) { if (vertex) { checkLevel(vertex, level); vertex->setBfsInQueue(bfs_index_, false); visitor->visit(vertex); } } } else { size_t from = 0; size_t chunk_size = vertex_count / thread_count; BfsIndex bfs_index = bfs_index_; for (size_t k = 0; k < thread_count; k++) { size_t to = (k == thread_count - 1) ? vertex_count : from + chunk_size; dispatch_queue_->dispatch([=, this](size_t) { for (size_t i = from; i < to; i++) { Vertex *vertex = level_vertices[i]; if (vertex) { checkLevel(vertex, level); vertex->setBfsInQueue(bfs_index, false); visitors[k]->visit(vertex); } } }); from = to; } dispatch_queue_->finishTasks(); } level_vertices.clear(); visit_count += vertex_count; } } for (VertexVisitor *v : visitors) delete v; } else { // ------------------------------------------------------- // Kahn's algorithm: process vertices as soon as all their // predecessors are done, eliminating per-level barriers. // ------------------------------------------------------- // Lazy-init persistent Kahn state. if (!kahn_state_) kahn_state_ = std::make_unique(); // Vertex IDs can exceed vertexCount() after deletions // (ObjectTable uses block-based IDs). Start with a // reasonable estimate and grow dynamically during discovery. VertexId vertex_count = graph_->vertexCount(); kahn_state_->ensureInitSize(vertex_count + 1); kahn_state_->resetPrevious(); std::vector &in_deg = kahn_state_->in_degree_init; std::vector active_vertices; VertexId max_id = 0; // Collect seed vertices from the level queue. Level saved_first = first_level_; Level saved_last = last_level_; Level level = first_level_; while (levelLessOrEqual(level, last_level_) && levelLessOrEqual(level, to_level)) { for (Vertex *vertex : queue_[level]) { if (vertex) { VertexId vid = graph_->id(vertex); if (vid >= in_deg.size()) in_deg.resize(vid + 128, -1); if (in_deg[vid] == -1) { in_deg[vid] = 0; active_vertices.push_back(vertex); if (vid > max_id) max_id = vid; } } } incrLevel(level); } // BFS discovery -- mirrors enqueueAdjacentVertices logic. size_t disc_idx = 0; while (disc_idx < active_vertices.size()) { Vertex *vertex = active_vertices[disc_idx++]; kahnForEachSuccessor(vertex, kahn_pred_, [&](Vertex *succ) { if (!levelLessOrEqual(succ->level(), to_level)) return; VertexId sid = graph_->id(succ); if (sid >= in_deg.size()) in_deg.resize(sid + 128, -1); if (in_deg[sid] == -1) { in_deg[sid] = 1; active_vertices.push_back(succ); succ->setBfsInQueue(bfs_index_, true); if (sid > max_id) max_id = sid; } else in_deg[sid]++; }); } size_t active_count = active_vertices.size(); debugPrint(debug_, "bfs", 1, "kahns {} active vertices", active_count); if (active_count == 0) { kahn_state_->prev_ids.clear(); level = saved_first; while (levelLessOrEqual(level, saved_last) && levelLessOrEqual(level, to_level)) { queue_[level].clear(); incrLevel(level); } resetLevelBounds(); return 0; } // Size atomic array to cover max discovered ID. kahn_state_->ensureAtomicSize(max_id + 1); std::atomic *in_degree = kahn_state_->in_degree.get(); // Copy active in-degrees to atomic array and record IDs // for cleanup on the next call. kahn_state_->prev_ids.clear(); kahn_state_->prev_ids.reserve(active_count); int initial_ready_count = 0; for (Vertex *v : active_vertices) { VertexId vid = graph_->id(v); in_degree[vid].store(in_deg[vid], std::memory_order_relaxed); kahn_state_->prev_ids.push_back(vid); if (in_deg[vid] == 0) initial_ready_count++; } debugPrint(debug_, "bfs", 1, "kahns {} initial ready", initial_ready_count); // Phase 3: Recursive-dispatch Kahn's traversal. // Each task visits its vertex, decrements successor in-degrees, // and directly dispatches any successor whose in-degree hit zero // back into the DispatchQueue. finishTasks() waits for all work, // including recursively-dispatched tasks. No batch barriers. std::vector visitors; for (size_t k = 0; k < thread_count; k++) visitors.push_back(visitor->copy()); std::atomic total_visited{0}; BfsIndex bfs_index = bfs_index_; SearchPred *pred = kahn_pred_; size_t in_deg_size = in_deg.size(); // Recursive task lambda: self-reference via std::function. // Captures persist on visitParallel's stack until finishTasks // returns. std::function process; process = [&, bfs_index, pred, in_deg_size](Vertex *vertex, size_t tid) { vertex->setBfsInQueue(bfs_index, false); visitors[tid]->visit(vertex); total_visited.fetch_add(1, std::memory_order_relaxed); kahnForEachSuccessor(vertex, pred, [&](Vertex *succ) { VertexId sid = graph_->id(succ); if (sid < in_deg_size && in_deg[sid] >= 0) { int prev = in_degree[sid] .fetch_sub(1, std::memory_order_acq_rel); if (prev == 1) { // Successor is now ready -- dispatch immediately. dispatch_queue_->dispatch([&process, succ](size_t t) { process(succ, t); }); } } }); }; // Seed initial ready vertices into the dispatch queue. for (Vertex *v : active_vertices) { if (in_deg[graph_->id(v)] == 0) { dispatch_queue_->dispatch([&process, v](size_t t) { process(v, t); }); } } dispatch_queue_->finishTasks(); visit_count = total_visited.load(std::memory_order_relaxed); for (VertexVisitor *v : visitors) delete v; // Clear processed levels and update bounds for remaining entries. level = saved_first; while (levelLessOrEqual(level, saved_last) && levelLessOrEqual(level, to_level)) { queue_[level].clear(); incrLevel(level); } resetLevelBounds(); } } return visit_count; } bool BfsIterator::hasNext() { return hasNext(last_level_); } bool BfsIterator::hasNext(Level to_level) { findNext(to_level); return levelLessOrEqual(first_level_, last_level_) && !queue_[first_level_].empty(); } Vertex * BfsIterator::next() { VertexSeq &level_vertices = queue_[first_level_]; Vertex *vertex = level_vertices.back(); level_vertices.pop_back(); vertex->setBfsInQueue(bfs_index_, false); return vertex; } void BfsIterator::findNext(Level to_level) { while (levelLessOrEqual(first_level_, last_level_) && levelLessOrEqual(first_level_, to_level)) { VertexSeq &level_vertices = queue_[first_level_]; // Skip null entries from deleted vertices. while (!level_vertices.empty()) { Vertex *vertex = level_vertices.back(); if (vertex == nullptr) level_vertices.pop_back(); else { checkLevel(vertex, first_level_); return; } } incrLevel(first_level_); } } void BfsIterator::enqueue(Vertex *vertex) { debugPrint(debug_, "bfs", 2, "enqueue {}", vertex->to_string(this)); if (!vertex->bfsInQueue(bfs_index_)) { Level level = vertex->level(); LockGuard lock(queue_lock_); if (!vertex->bfsInQueue(bfs_index_)) { vertex->setBfsInQueue(bfs_index_, true); queue_[level].push_back(vertex); if (levelLess(last_level_, level)) last_level_ = level; if (levelLess(level, first_level_)) first_level_ = level; } } } bool BfsIterator::inQueue(Vertex *vertex) { // checkInQueue(vertex); return vertex->bfsInQueue(bfs_index_); } void BfsIterator::checkInQueue(Vertex *vertex) { Level level = vertex->level(); if (std::cmp_greater(queue_.size(), level)) { for (Vertex *v : queue_[level]) { if (v == vertex) { if (vertex->bfsInQueue(bfs_index_)) return; else debugPrint(debug_, "bfs", 1, "extra {}", vertex->to_string(this)); } } } if (vertex->bfsInQueue(bfs_index_)) debugPrint(debug_, "brs", 1, "missing {}", vertex->to_string(this)); } void BfsIterator::checkLevel(Vertex *vertex, Level level) { if (vertex->level() != level) report_->error(2300, "vertex {} level {} != bfs level {}", vertex->to_string(this), vertex->level(), level); } void BfsIterator::deleteVertexBefore(Vertex *vertex) { remove(vertex); } // Remove by inserting null vertex pointer. void BfsIterator::remove(Vertex *vertex) { // If the iterator has not been inited the queue will be empty. Level level = vertex->level(); if (vertex->bfsInQueue(bfs_index_) && std::cmp_greater(queue_.size(), level)) { debugPrint(debug_, "bfs", 2, "remove {}", vertex->to_string(this)); for (Vertex *&v : queue_[level]) { if (v == vertex) { v = nullptr; vertex->setBfsInQueue(bfs_index_, false); break; } } } } //////////////////////////////////////////////////////////////// BfsFwdIterator::BfsFwdIterator(BfsIndex bfs_index, SearchPred *search_pred, StaState *sta) : BfsIterator(bfs_index, 0, level_max, search_pred, sta) { } // clear() without saving lists to list_free_. BfsFwdIterator::~BfsFwdIterator() { for (Level level = first_level_; level <= last_level_; level++) deleteEntries(level); } void BfsFwdIterator::incrLevel(Level &level) const { level++; } bool BfsFwdIterator::levelLessOrEqual(Level level1, Level level2) const { return level1 <= level2; } bool BfsFwdIterator::levelLess(Level level1, Level level2) const { return level1 < level2; } void BfsFwdIterator::kahnForEachSuccessor(Vertex *vertex, SearchPred *pred, const VertexFn &fn) { if (pred->searchFrom(vertex)) { VertexOutEdgeIterator edge_iter(vertex, graph_); while (edge_iter.hasNext()) { Edge *edge = edge_iter.next(); Vertex *to_vertex = edge->to(graph_); if (pred->searchThru(edge) && pred->searchTo(to_vertex)) fn(to_vertex); } } } void BfsFwdIterator::enqueueAdjacentVertices(Vertex *vertex, SearchPred *search_pred) { if (search_pred->searchFrom(vertex)) { VertexOutEdgeIterator edge_iter(vertex, graph_); while (edge_iter.hasNext()) { Edge *edge = edge_iter.next(); Vertex *to_vertex = edge->to(graph_); if (search_pred->searchThru(edge) && search_pred->searchTo(to_vertex)) enqueue(to_vertex); } } } void BfsFwdIterator::enqueueAdjacentVertices(Vertex *vertex, SearchPred *search_pred, const Mode *mode) { if (search_pred->searchFrom(vertex, mode)) { VertexOutEdgeIterator edge_iter(vertex, graph_); while (edge_iter.hasNext()) { Edge *edge = edge_iter.next(); Vertex *to_vertex = edge->to(graph_); if (search_pred->searchThru(edge, mode) && search_pred->searchTo(to_vertex, mode)) enqueue(to_vertex); } } } //////////////////////////////////////////////////////////////// BfsBkwdIterator::BfsBkwdIterator(BfsIndex bfs_index, SearchPred *search_pred, StaState *sta) : BfsIterator(bfs_index, level_max, 0, search_pred, sta) { } // clear() without saving lists to list_free_. BfsBkwdIterator::~BfsBkwdIterator() { for (Level level = first_level_; level >= last_level_; level--) deleteEntries(level); } void BfsBkwdIterator::incrLevel(Level &level) const { level--; } bool BfsBkwdIterator::levelLessOrEqual(Level level1, Level level2) const { return level1 >= level2; } bool BfsBkwdIterator::levelLess(Level level1, Level level2) const { return level1 > level2; } void BfsBkwdIterator::kahnForEachSuccessor(Vertex *vertex, SearchPred *pred, const VertexFn &fn) { if (pred->searchTo(vertex)) { VertexInEdgeIterator edge_iter(vertex, graph_); while (edge_iter.hasNext()) { Edge *edge = edge_iter.next(); Vertex *from_vertex = edge->from(graph_); if (pred->searchFrom(from_vertex) && pred->searchThru(edge)) fn(from_vertex); } } } void BfsBkwdIterator::enqueueAdjacentVertices(Vertex *vertex, SearchPred *search_pred) { if (search_pred->searchTo(vertex)) { VertexInEdgeIterator edge_iter(vertex, graph_); while (edge_iter.hasNext()) { Edge *edge = edge_iter.next(); Vertex *from_vertex = edge->from(graph_); if (search_pred->searchFrom(from_vertex) && search_pred->searchThru(edge)) enqueue(from_vertex); } } } void BfsBkwdIterator::enqueueAdjacentVertices(Vertex *vertex, SearchPred *search_pred, const Mode *mode) { if (search_pred->searchTo(vertex, mode)) { VertexInEdgeIterator edge_iter(vertex, graph_); while (edge_iter.hasNext()) { Edge *edge = edge_iter.next(); Vertex *from_vertex = edge->from(graph_); if (search_pred->searchFrom(from_vertex, mode) && search_pred->searchThru(edge, mode)) enqueue(from_vertex); } } } } // namespace sta