OpenSTA/search/Bfs.cc

741 lines
21 KiB
C++

// OpenSTA, Static Timing Analyzer
// Copyright (c) 2026, Parallax Software, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software.
//
// Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
//
// This notice may not be removed or altered from any source distribution.
#include "Bfs.hh"
#include <atomic>
#include "Debug.hh"
#include "DispatchQueue.hh"
#include "Graph.hh"
#include "Levelize.hh"
#include "Mutex.hh"
#include "Network.hh"
#include "Report.hh"
#include "Sdc.hh"
#include "SearchPred.hh"
#include "Variables.hh"
namespace sta {
// Persistent storage for Kahn's algorithm arrays.
// Allocated once and reused across visitParallel calls to
// avoid repeated allocation of large per-graph arrays.
struct BfsIterator::KahnState
{
// -1 = not in active set, >= 0 = in-degree.
std::vector<int> in_degree_init;
// Atomic in-degrees for the parallel phase.
std::unique_ptr<std::atomic<int>[]> in_degree;
size_t in_degree_size = 0;
// Vertex IDs touched in the previous call -- reset to -1 before reuse.
std::vector<VertexId> prev_ids;
void ensureInitSize(size_t needed)
{
if (in_degree_init.size() < needed)
in_degree_init.resize(needed, -1);
}
void ensureAtomicSize(size_t needed)
{
if (in_degree_size < needed) {
in_degree = std::make_unique<std::atomic<int>[]>(needed);
in_degree_size = needed;
}
}
void resetPrevious()
{
for (VertexId vid : prev_ids)
in_degree_init[vid] = -1;
prev_ids.clear();
}
};
BfsIterator::BfsIterator(BfsIndex bfs_index,
Level level_min,
Level level_max,
SearchPred *search_pred,
StaState *sta) :
StaState(sta),
bfs_index_(bfs_index),
level_min_(level_min),
level_max_(level_max),
search_pred_(search_pred)
{
init();
}
void
BfsIterator::init()
{
first_level_ = level_max_;
last_level_ = level_min_;
ensureSize();
}
void
BfsIterator::ensureSize()
{
if (levelize_->levelized()) {
unsigned max_level_1 = levelize_->maxLevel() + 1;
if (queue_.size() < max_level_1)
queue_.resize(max_level_1);
}
}
void
BfsIterator::clear()
{
Level level = first_level_;
while (levelLessOrEqual(level, last_level_)) {
VertexSeq &level_vertices = queue_[level];
for (Vertex *vertex : level_vertices) {
if (vertex)
vertex->setBfsInQueue(bfs_index_, false);
}
level_vertices.clear();
incrLevel(level);
}
init();
}
void
BfsIterator::reportEntries() const
{
for (Level level = first_level_; levelLessOrEqual(level, last_level_);
incrLevel(level)) {
const VertexSeq &level_vertices = queue_[level];
if (!level_vertices.empty()) {
report_->report("Level {}", level);
for (Vertex *vertex : level_vertices)
report_->report(" {}", vertex ? vertex->to_string(this) : "NULL");
}
}
}
void
BfsIterator::deleteEntries(Level level)
{
VertexSeq &level_vertices = queue_[level];
for (Vertex *vertex : level_vertices) {
if (vertex)
vertex->setBfsInQueue(bfs_index_, false);
}
level_vertices.clear();
}
bool
BfsIterator::empty() const
{
return levelLess(last_level_, first_level_);
}
void
BfsIterator::enqueueAdjacentVertices(Vertex *vertex)
{
enqueueAdjacentVertices(vertex, search_pred_);
}
void
BfsIterator::enqueueAdjacentVertices(Vertex *vertex,
const Mode *mode)
{
enqueueAdjacentVertices(vertex, search_pred_, mode);
}
int
BfsIterator::visit(Level to_level,
VertexVisitor *visitor)
{
int visit_count = 0;
while (levelLessOrEqual(first_level_, last_level_)
&& levelLessOrEqual(first_level_, to_level)) {
Level level = first_level_;
VertexSeq &level_vertices = queue_[level];
incrLevel(first_level_);
// Note that ArrivalVisitor::enqueueRefPinInputDelays may enqueue
// vertices at this level so range iteration fails if the vector grows.
while (!level_vertices.empty()) {
Vertex *vertex = level_vertices.back();
level_vertices.pop_back();
if (vertex) {
checkLevel(vertex, level);
vertex->setBfsInQueue(bfs_index_, false);
visitor->visit(vertex);
visit_count++;
}
}
level_vertices.clear();
}
return visit_count;
}
// Recalculate first_level_/last_level_ from remaining queue entries.
void
BfsIterator::resetLevelBounds()
{
first_level_ = level_max_;
last_level_ = level_min_;
for (Level l = 0; l < static_cast<Level>(queue_.size()); l++) {
if (!queue_[l].empty()) {
if (levelLess(l, first_level_))
first_level_ = l;
if (levelLess(last_level_, l))
last_level_ = l;
}
}
}
int
BfsIterator::visitParallel(Level to_level,
VertexVisitor *visitor)
{
size_t thread_count = thread_count_;
int visit_count = 0;
if (!empty()) {
if (thread_count == 1)
visit_count = visit(to_level, visitor);
else if (!variables_->useKahnsBfs()
|| !kahn_pred_
|| variables_->dynamicLoopBreaking()) {
// Original level-based parallel BFS with per-level barriers.
// dynamic_loop_breaking enables disabled-loop edges based on
// arrival tags that only emerge during propagation. Kahn's
// discovery runs before any propagation and cannot see those
// tags, so we fall back to the original BFS whenever dynamic
// loop breaking is active.
std::vector<VertexVisitor *> visitors;
visitors.reserve(thread_count_);
for (int k = 0; k < thread_count_; k++)
visitors.push_back(visitor->copy());
while (levelLessOrEqual(first_level_, last_level_)
&& levelLessOrEqual(first_level_, to_level)) {
VertexSeq &level_vertices = queue_[first_level_];
Level level = first_level_;
incrLevel(first_level_);
if (!level_vertices.empty()) {
size_t vertex_count = level_vertices.size();
if (vertex_count < thread_count) {
for (Vertex *vertex : level_vertices) {
if (vertex) {
checkLevel(vertex, level);
vertex->setBfsInQueue(bfs_index_, false);
visitor->visit(vertex);
}
}
}
else {
size_t from = 0;
size_t chunk_size = vertex_count / thread_count;
BfsIndex bfs_index = bfs_index_;
for (size_t k = 0; k < thread_count; k++) {
size_t to = (k == thread_count - 1)
? vertex_count : from + chunk_size;
dispatch_queue_->dispatch([=, this](size_t) {
for (size_t i = from; i < to; i++) {
Vertex *vertex = level_vertices[i];
if (vertex) {
checkLevel(vertex, level);
vertex->setBfsInQueue(bfs_index, false);
visitors[k]->visit(vertex);
}
}
});
from = to;
}
dispatch_queue_->finishTasks();
}
level_vertices.clear();
visit_count += vertex_count;
}
}
for (VertexVisitor *v : visitors)
delete v;
}
else {
// -------------------------------------------------------
// Kahn's algorithm: process vertices as soon as all their
// predecessors are done, eliminating per-level barriers.
// -------------------------------------------------------
// Lazy-init persistent Kahn state.
if (!kahn_state_)
kahn_state_ = std::make_unique<KahnState>();
// Vertex IDs can exceed vertexCount() after deletions
// (ObjectTable uses block-based IDs). Start with a
// reasonable estimate and grow dynamically during discovery.
VertexId vertex_count = graph_->vertexCount();
kahn_state_->ensureInitSize(vertex_count + 1);
kahn_state_->resetPrevious();
std::vector<int> &in_deg = kahn_state_->in_degree_init;
std::vector<Vertex*> active_vertices;
VertexId max_id = 0;
// Collect seed vertices from the level queue.
Level saved_first = first_level_;
Level saved_last = last_level_;
Level level = first_level_;
while (levelLessOrEqual(level, last_level_)
&& levelLessOrEqual(level, to_level)) {
for (Vertex *vertex : queue_[level]) {
if (vertex) {
VertexId vid = graph_->id(vertex);
if (vid >= in_deg.size())
in_deg.resize(vid + 128, -1);
if (in_deg[vid] == -1) {
in_deg[vid] = 0;
active_vertices.push_back(vertex);
if (vid > max_id) max_id = vid;
}
}
}
incrLevel(level);
}
// BFS discovery -- mirrors enqueueAdjacentVertices logic.
size_t disc_idx = 0;
while (disc_idx < active_vertices.size()) {
Vertex *vertex = active_vertices[disc_idx++];
kahnForEachSuccessor(vertex, kahn_pred_,
[&](Vertex *succ) {
if (!levelLessOrEqual(succ->level(), to_level))
return;
VertexId sid = graph_->id(succ);
if (sid >= in_deg.size())
in_deg.resize(sid + 128, -1);
if (in_deg[sid] == -1) {
in_deg[sid] = 1;
active_vertices.push_back(succ);
succ->setBfsInQueue(bfs_index_, true);
if (sid > max_id) max_id = sid;
}
else
in_deg[sid]++;
});
}
size_t active_count = active_vertices.size();
debugPrint(debug_, "bfs", 1, "kahns {} active vertices", active_count);
if (active_count == 0) {
kahn_state_->prev_ids.clear();
level = saved_first;
while (levelLessOrEqual(level, saved_last)
&& levelLessOrEqual(level, to_level)) {
queue_[level].clear();
incrLevel(level);
}
resetLevelBounds();
return 0;
}
// Size atomic array to cover max discovered ID.
kahn_state_->ensureAtomicSize(max_id + 1);
std::atomic<int> *in_degree = kahn_state_->in_degree.get();
// Copy active in-degrees to atomic array and record IDs
// for cleanup on the next call.
kahn_state_->prev_ids.clear();
kahn_state_->prev_ids.reserve(active_count);
int initial_ready_count = 0;
for (Vertex *v : active_vertices) {
VertexId vid = graph_->id(v);
in_degree[vid].store(in_deg[vid], std::memory_order_relaxed);
kahn_state_->prev_ids.push_back(vid);
if (in_deg[vid] == 0)
initial_ready_count++;
}
debugPrint(debug_, "bfs", 1, "kahns {} initial ready",
initial_ready_count);
// Phase 3: Recursive-dispatch Kahn's traversal.
// Each task visits its vertex, decrements successor in-degrees,
// and directly dispatches any successor whose in-degree hit zero
// back into the DispatchQueue. finishTasks() waits for all work,
// including recursively-dispatched tasks. No batch barriers.
std::vector<VertexVisitor *> visitors;
for (size_t k = 0; k < thread_count; k++)
visitors.push_back(visitor->copy());
std::atomic<int> total_visited{0};
BfsIndex bfs_index = bfs_index_;
SearchPred *pred = kahn_pred_;
size_t in_deg_size = in_deg.size();
// Recursive task lambda: self-reference via std::function.
// Captures persist on visitParallel's stack until finishTasks
// returns.
std::function<void(Vertex*, size_t)> process;
process = [&, bfs_index, pred, in_deg_size](Vertex *vertex,
size_t tid) {
vertex->setBfsInQueue(bfs_index, false);
visitors[tid]->visit(vertex);
total_visited.fetch_add(1, std::memory_order_relaxed);
kahnForEachSuccessor(vertex, pred, [&](Vertex *succ) {
VertexId sid = graph_->id(succ);
if (sid < in_deg_size && in_deg[sid] >= 0) {
int prev = in_degree[sid]
.fetch_sub(1, std::memory_order_acq_rel);
if (prev == 1) {
// Successor is now ready -- dispatch immediately.
dispatch_queue_->dispatch([&process, succ](size_t t) {
process(succ, t);
});
}
}
});
};
// Seed initial ready vertices into the dispatch queue.
for (Vertex *v : active_vertices) {
if (in_deg[graph_->id(v)] == 0) {
dispatch_queue_->dispatch([&process, v](size_t t) {
process(v, t);
});
}
}
dispatch_queue_->finishTasks();
visit_count = total_visited.load(std::memory_order_relaxed);
for (VertexVisitor *v : visitors)
delete v;
// Clear processed levels and update bounds for remaining entries.
level = saved_first;
while (levelLessOrEqual(level, saved_last)
&& levelLessOrEqual(level, to_level)) {
queue_[level].clear();
incrLevel(level);
}
resetLevelBounds();
}
}
return visit_count;
}
bool
BfsIterator::hasNext()
{
return hasNext(last_level_);
}
bool
BfsIterator::hasNext(Level to_level)
{
findNext(to_level);
return levelLessOrEqual(first_level_, last_level_)
&& !queue_[first_level_].empty();
}
Vertex *
BfsIterator::next()
{
VertexSeq &level_vertices = queue_[first_level_];
Vertex *vertex = level_vertices.back();
level_vertices.pop_back();
vertex->setBfsInQueue(bfs_index_, false);
return vertex;
}
void
BfsIterator::findNext(Level to_level)
{
while (levelLessOrEqual(first_level_, last_level_)
&& levelLessOrEqual(first_level_, to_level)) {
VertexSeq &level_vertices = queue_[first_level_];
// Skip null entries from deleted vertices.
while (!level_vertices.empty()) {
Vertex *vertex = level_vertices.back();
if (vertex == nullptr)
level_vertices.pop_back();
else {
checkLevel(vertex, first_level_);
return;
}
}
incrLevel(first_level_);
}
}
void
BfsIterator::enqueue(Vertex *vertex)
{
debugPrint(debug_, "bfs", 2, "enqueue {}", vertex->to_string(this));
if (!vertex->bfsInQueue(bfs_index_)) {
Level level = vertex->level();
LockGuard lock(queue_lock_);
if (!vertex->bfsInQueue(bfs_index_)) {
vertex->setBfsInQueue(bfs_index_, true);
queue_[level].push_back(vertex);
if (levelLess(last_level_, level))
last_level_ = level;
if (levelLess(level, first_level_))
first_level_ = level;
}
}
}
bool
BfsIterator::inQueue(Vertex *vertex)
{
// checkInQueue(vertex);
return vertex->bfsInQueue(bfs_index_);
}
void
BfsIterator::checkInQueue(Vertex *vertex)
{
Level level = vertex->level();
if (std::cmp_greater(queue_.size(), level)) {
for (Vertex *v : queue_[level]) {
if (v == vertex) {
if (vertex->bfsInQueue(bfs_index_))
return;
else
debugPrint(debug_, "bfs", 1, "extra {}", vertex->to_string(this));
}
}
}
if (vertex->bfsInQueue(bfs_index_))
debugPrint(debug_, "brs", 1, "missing {}", vertex->to_string(this));
}
void
BfsIterator::checkLevel(Vertex *vertex,
Level level)
{
if (vertex->level() != level)
report_->error(2300, "vertex {} level {} != bfs level {}",
vertex->to_string(this), vertex->level(), level);
}
void
BfsIterator::deleteVertexBefore(Vertex *vertex)
{
remove(vertex);
}
// Remove by inserting null vertex pointer.
void
BfsIterator::remove(Vertex *vertex)
{
// If the iterator has not been inited the queue will be empty.
Level level = vertex->level();
if (vertex->bfsInQueue(bfs_index_) && std::cmp_greater(queue_.size(), level)) {
debugPrint(debug_, "bfs", 2, "remove {}", vertex->to_string(this));
for (Vertex *&v : queue_[level]) {
if (v == vertex) {
v = nullptr;
vertex->setBfsInQueue(bfs_index_, false);
break;
}
}
}
}
////////////////////////////////////////////////////////////////
BfsFwdIterator::BfsFwdIterator(BfsIndex bfs_index,
SearchPred *search_pred,
StaState *sta) :
BfsIterator(bfs_index,
0,
level_max,
search_pred,
sta)
{
}
// clear() without saving lists to list_free_.
BfsFwdIterator::~BfsFwdIterator()
{
for (Level level = first_level_; level <= last_level_; level++)
deleteEntries(level);
}
void
BfsFwdIterator::incrLevel(Level &level) const
{
level++;
}
bool
BfsFwdIterator::levelLessOrEqual(Level level1,
Level level2) const
{
return level1 <= level2;
}
bool
BfsFwdIterator::levelLess(Level level1,
Level level2) const
{
return level1 < level2;
}
void
BfsFwdIterator::kahnForEachSuccessor(Vertex *vertex,
SearchPred *pred,
const VertexFn &fn)
{
if (pred->searchFrom(vertex)) {
VertexOutEdgeIterator edge_iter(vertex, graph_);
while (edge_iter.hasNext()) {
Edge *edge = edge_iter.next();
Vertex *to_vertex = edge->to(graph_);
if (pred->searchThru(edge) && pred->searchTo(to_vertex))
fn(to_vertex);
}
}
}
void
BfsFwdIterator::enqueueAdjacentVertices(Vertex *vertex,
SearchPred *search_pred)
{
if (search_pred->searchFrom(vertex)) {
VertexOutEdgeIterator edge_iter(vertex, graph_);
while (edge_iter.hasNext()) {
Edge *edge = edge_iter.next();
Vertex *to_vertex = edge->to(graph_);
if (search_pred->searchThru(edge) && search_pred->searchTo(to_vertex))
enqueue(to_vertex);
}
}
}
void
BfsFwdIterator::enqueueAdjacentVertices(Vertex *vertex,
SearchPred *search_pred,
const Mode *mode)
{
if (search_pred->searchFrom(vertex, mode)) {
VertexOutEdgeIterator edge_iter(vertex, graph_);
while (edge_iter.hasNext()) {
Edge *edge = edge_iter.next();
Vertex *to_vertex = edge->to(graph_);
if (search_pred->searchThru(edge, mode)
&& search_pred->searchTo(to_vertex, mode))
enqueue(to_vertex);
}
}
}
////////////////////////////////////////////////////////////////
BfsBkwdIterator::BfsBkwdIterator(BfsIndex bfs_index,
SearchPred *search_pred,
StaState *sta) :
BfsIterator(bfs_index,
level_max,
0,
search_pred,
sta)
{
}
// clear() without saving lists to list_free_.
BfsBkwdIterator::~BfsBkwdIterator()
{
for (Level level = first_level_; level >= last_level_; level--)
deleteEntries(level);
}
void
BfsBkwdIterator::incrLevel(Level &level) const
{
level--;
}
bool
BfsBkwdIterator::levelLessOrEqual(Level level1,
Level level2) const
{
return level1 >= level2;
}
bool
BfsBkwdIterator::levelLess(Level level1,
Level level2) const
{
return level1 > level2;
}
void
BfsBkwdIterator::kahnForEachSuccessor(Vertex *vertex,
SearchPred *pred,
const VertexFn &fn)
{
if (pred->searchTo(vertex)) {
VertexInEdgeIterator edge_iter(vertex, graph_);
while (edge_iter.hasNext()) {
Edge *edge = edge_iter.next();
Vertex *from_vertex = edge->from(graph_);
if (pred->searchFrom(from_vertex) && pred->searchThru(edge))
fn(from_vertex);
}
}
}
void
BfsBkwdIterator::enqueueAdjacentVertices(Vertex *vertex,
SearchPred *search_pred)
{
if (search_pred->searchTo(vertex)) {
VertexInEdgeIterator edge_iter(vertex, graph_);
while (edge_iter.hasNext()) {
Edge *edge = edge_iter.next();
Vertex *from_vertex = edge->from(graph_);
if (search_pred->searchFrom(from_vertex) && search_pred->searchThru(edge))
enqueue(from_vertex);
}
}
}
void
BfsBkwdIterator::enqueueAdjacentVertices(Vertex *vertex,
SearchPred *search_pred,
const Mode *mode)
{
if (search_pred->searchTo(vertex, mode)) {
VertexInEdgeIterator edge_iter(vertex, graph_);
while (edge_iter.hasNext()) {
Edge *edge = edge_iter.next();
Vertex *from_vertex = edge->from(graph_);
if (search_pred->searchFrom(from_vertex, mode)
&& search_pred->searchThru(edge, mode))
enqueue(from_vertex);
}
}
}
} // namespace sta