OpenSTA/dcalc/ArnoldiReduce.cc

686 lines
17 KiB
C++

// OpenSTA, Static Timing Analyzer
// Copyright (c) 2026, Parallax Software, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software.
//
// Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
//
// This notice may not be removed or altered from any source distribution.
// (c) 2018 Nefelus, Inc.
//
// Author: W. Scott
#include "ArnoldiReduce.hh"
#include "Debug.hh"
#include "MinMax.hh"
#include "Sdc.hh"
#include "Network.hh"
#include "Units.hh"
#include "Arnoldi.hh"
#include "Format.hh"
#include "parasitics/ConcreteParasiticsPvt.hh"
namespace sta {
rcmodel::rcmodel() :
pinV(nullptr)
{
}
rcmodel::~rcmodel() { free(pinV); }
float
rcmodel::capacitance() const
{
return ctot;
}
PinSet
rcmodel::unannotatedLoads(const Pin *,
const Parasitics *) const
{
// This should never be called because the rcmodel is not saved in the Parasitics.
return PinSet();
}
struct ts_point
{
ParasiticNode *node_;
int eN;
bool is_term;
int tindex; // index into termV of corresponding term
ts_edge **eV;
bool visited;
ts_edge *in_edge;
int ts;
double c;
double r;
};
struct ts_edge
{
ParasiticResistor *resistor_;
ts_point *from;
ts_point *to;
};
////////////////////////////////////////////////////////////////
const int ArnoldiReduce::ts_point_count_incr_ = 1024;
const int ArnoldiReduce::ts_edge_count_incr_ = 1024;
ArnoldiReduce::ArnoldiReduce(StaState *sta) :
StaState(sta),
ts_pointNmax(1024),
ts_edgeNmax(1024),
termNmax(256),
dNmax(8)
{
ts_pointV = (ts_point *)malloc(ts_pointNmax * sizeof(ts_point));
ts_ordV = (int *)malloc(ts_pointNmax * sizeof(int));
ts_pordV = (ts_point **)malloc(ts_pointNmax * sizeof(ts_point *));
_u0 = (double *)malloc(ts_pointNmax * sizeof(double));
_u1 = (double *)malloc(ts_pointNmax * sizeof(double));
y = (double *)malloc(ts_pointNmax * sizeof(double));
iv = (double *)malloc(ts_pointNmax * sizeof(double));
r = (double *)malloc(ts_pointNmax * sizeof(double));
c = (double *)malloc(ts_pointNmax * sizeof(double));
par = (int *)malloc(ts_pointNmax * sizeof(int));
ts_edgeV = (ts_edge *)malloc(ts_edgeNmax * sizeof(ts_edge));
ts_stackV = (ts_edge **)malloc(ts_edgeNmax * sizeof(ts_edge *));
ts_eV = (ts_edge **)malloc(2 * ts_edgeNmax * sizeof(ts_edge *));
pinV = (const Pin **)malloc(termNmax * sizeof(const Pin *));
termV = (int *)malloc(termNmax * sizeof(int));
outV = (int *)malloc(termNmax * sizeof(int));
d = (double *)malloc(dNmax * sizeof(double));
e = (double *)malloc(dNmax * sizeof(double));
U = (double **)malloc(dNmax * sizeof(double *));
U0 = (double *)malloc(dNmax * termNmax * sizeof(double));
int h;
for (h = 0; h < dNmax; h++)
U[h] = U0 + h * termNmax;
}
ArnoldiReduce::~ArnoldiReduce()
{
free(U0);
free(U);
free(e);
free(d);
free(outV);
free(termV);
free(pinV);
free(ts_eV);
free(ts_edgeV);
free(ts_stackV);
free(par);
free(c);
free(r);
free(iv);
free(y);
free(_u1);
free(_u0);
free(ts_pordV);
free(ts_ordV);
free(ts_pointV);
}
rcmodel *
ArnoldiReduce::reduceToArnoldi(Parasitic *parasitic,
const Pin *drvr_pin,
float coupling_cap_factor,
const RiseFall *rf,
const Scene *scene,
const MinMax *min_max)
{
drvr_pin_ = drvr_pin;
coupling_cap_factor_ = coupling_cap_factor;
rf_ = rf;
scene_ = scene;
min_max_ = min_max;
parasitics_ = scene->parasitics(min_max);
parasitic_network_ = reinterpret_cast<ConcreteParasiticNetwork *>(parasitic);
loadWork();
return makeRcmodelDrv();
}
void
ArnoldiReduce::loadWork()
{
pt_map_.clear();
const ParasiticResistorSeq &resistors = parasitics_->resistors(parasitic_network_);
int resistor_count = resistors.size();
termN = 0;
int subnode_count = 0;
ParasiticNodeSeq nodes = parasitics_->nodes(parasitic_network_);
for (ParasiticNode *node : nodes) {
if (!parasitics_->isExternal(node)) {
const Pin *pin = parasitics_->pin(node);
if (pin)
termN++;
else
subnode_count++;
}
}
ts_pointN = subnode_count + 1 + termN;
ts_edgeN = resistor_count;
allocPoints();
allocTerms(termN);
ts_point *p0 = ts_pointV;
pterm0 = p0 + subnode_count + 1;
ts_point *pend = p0 + ts_pointN;
ts_point *p;
ts_edge *e0 = ts_edgeV;
ts_edge *eend = e0 + ts_edgeN;
ts_edge *e;
int tindex;
for (p = p0; p != pend; p++) {
p->node_ = nullptr;
p->eN = 0;
p->is_term = false;
}
pend = pterm0;
e = e0;
int index = 0;
for (ParasiticNode *node : nodes) {
if (!parasitics_->isExternal(node)) {
const Pin *pin = parasitics_->pin(node);
if (pin) {
p = pend++;
pt_map_[node] = p - p0;
p->node_ = node;
p->eN = 0;
p->is_term = true;
tindex = p - pterm0;
p->tindex = tindex;
pinV[tindex] = pin;
}
else {
pt_map_[node] = index;
p = p0 + index;
p->node_ = node;
p->eN = 0;
p->is_term = false;
index++;
}
}
}
ts_edge **eV = ts_eV;
for (ParasiticResistor *resistor : resistors) {
ts_point *pt1 = findPt(parasitics_->node1(resistor));
ts_point *pt2 = findPt(parasitics_->node2(resistor));
e->from = pt1;
e->to = pt2;
e->resistor_ = resistor;
pt1->eN++;
if (e->from != e->to)
pt2->eN++;
e++;
}
for (p = p0; p != pend; p++) {
if (p->node_) {
p->eV = eV;
eV += p->eN;
p->eN = 0;
}
}
for (e = e0; e != eend; e++) {
e->from->eV[e->from->eN++] = e;
if (e->to != e->from)
e->to->eV[e->to->eN++] = e;
}
}
void
ArnoldiReduce::allocPoints()
{
if (ts_pointN > ts_pointNmax) {
free(par);
free(c);
free(r);
free(iv);
free(y);
free(_u1);
free(_u0);
free(ts_pordV);
free(ts_ordV);
free(ts_pointV);
ts_pointNmax = ts_pointN + ts_point_count_incr_;
ts_pointV = (ts_point *)malloc(ts_pointNmax * sizeof(ts_point));
ts_ordV = (int *)malloc(ts_pointNmax * sizeof(int));
ts_pordV = (ts_point **)malloc(ts_pointNmax * sizeof(ts_point *));
_u0 = (double *)malloc(ts_pointNmax * sizeof(double));
_u1 = (double *)malloc(ts_pointNmax * sizeof(double));
y = (double *)malloc(ts_pointNmax * sizeof(double));
iv = (double *)malloc(ts_pointNmax * sizeof(double));
r = (double *)malloc(ts_pointNmax * sizeof(double));
c = (double *)malloc(ts_pointNmax * sizeof(double));
par = (int *)malloc(ts_pointNmax * sizeof(int));
}
if (ts_edgeN > ts_edgeNmax) {
free(ts_edgeV);
free(ts_eV);
free(ts_stackV);
ts_edgeNmax = ts_edgeN + ts_edge_count_incr_;
ts_edgeV = (ts_edge *)malloc(ts_edgeNmax * sizeof(ts_edge));
ts_stackV = (ts_edge **)malloc(ts_edgeNmax * sizeof(ts_edge *));
ts_eV = (ts_edge **)malloc(2 * ts_edgeNmax * sizeof(ts_edge *));
}
}
void
ArnoldiReduce::allocTerms(int nterms)
{
if (nterms > termNmax) {
free(U0);
free(outV);
free(termV);
free(pinV);
termNmax = nterms + 256;
pinV = (const Pin **)malloc(termNmax * sizeof(const Pin *));
termV = (int *)malloc(termNmax * sizeof(int));
outV = (int *)malloc(termNmax * sizeof(int));
U0 = (double *)malloc(dNmax * termNmax * sizeof(double));
int h;
for (h = 0; h < dNmax; h++)
U[h] = U0 + h * termNmax;
}
}
ts_point *
ArnoldiReduce::findPt(ParasiticNode *node)
{
return &ts_pointV[pt_map_[reinterpret_cast<ConcreteParasiticNode *>(node)]];
}
rcmodel *
ArnoldiReduce::makeRcmodelDrv()
{
ParasiticNode *drv_node =
parasitics_->findParasiticNode(parasitic_network_, drvr_pin_);
ts_point *pdrv = findPt(drv_node);
makeRcmodelDfs(pdrv);
getRC();
if (ctot_ < 1e-22) // 1e-10ps
return nullptr;
setTerms(pdrv);
makeRcmodelFromTs();
return makeRcmodelFromW();
}
#define ts_orient(pp, ee) \
if (ee->from != pp) { \
ee->to = ee->from; \
ee->from = pp; \
}
void
ArnoldiReduce::makeRcmodelDfs(ts_point *pdrv)
{
bool loop = false;
int k;
ts_point *p, *q;
ts_point *p0 = ts_pointV;
ts_point *pend = p0 + ts_pointN;
for (p = p0; p != pend; p++)
p->visited = 0;
ts_edge *e;
ts_edge **stackV = ts_stackV;
int stackN = 1;
stackV[0] = e = pdrv->eV[0];
ts_orient(pdrv, e);
pdrv->visited = 1;
pdrv->in_edge = nullptr;
pdrv->ts = 0;
ts_ordV[0] = pdrv - p0;
ts_pordV[0] = pdrv;
ts_ordN = 1;
while (stackN > 0) {
e = stackV[stackN - 1];
q = e->to;
if (q->visited) {
// if it is a one-rseg self-loop,
// ignore, and do not even set *loop
if (e->to != e->from)
loop = true;
}
else {
// try to descend
q->visited = 1;
q->ts = ts_ordN++;
ts_pordV[q->ts] = q;
ts_ordV[q->ts] = q - p0;
q->in_edge = e;
if (q->eN > 1) {
for (k = 0; k < q->eN; k++)
if (q->eV[k] != e)
break;
e = q->eV[k];
ts_orient(q, e);
stackV[stackN++] = e;
continue; // descent
}
}
// try to ascend
while (--stackN >= 0) {
e = stackV[stackN];
p = e->from;
// find e in p->eV
for (k = 0; k < p->eN; k++)
if (p->eV[k] == e)
break;
// if (k==p->eN) notice(0,"ERROR, e not found!\n");
++k;
if (k >= p->eN)
continue;
e = p->eV[k];
// check that next sibling is not the incoming edge
if (stackN > 0 && e == stackV[stackN - 1]) {
++k;
if (k >= p->eN)
continue;
e = p->eV[k];
}
ts_orient(p, e);
stackV[stackN++] = e;
break;
}
} // while (stackN)
if (loop)
debugPrint(debug_, "arnoldi", 1, "net {} loop", network_->pathName(drvr_pin_));
}
// makeRcmodelGetRC
void
ArnoldiReduce::getRC()
{
ts_point *p, *p0 = ts_pointV;
ts_point *pend = p0 + ts_pointN;
ctot_ = 0.0;
for (p = p0; p != pend; p++) {
p->c = 0.0;
p->r = 0.0;
if (p->node_) {
ParasiticNode *node = p->node_;
double cap = parasitics_->nodeGndCap(node) + pinCapacitance(node);
if (cap > 0.0) {
p->c = cap;
ctot_ += cap;
}
else
p->c = 0.0;
if (p->in_edge && p->in_edge->resistor_)
p->r = parasitics_->value(p->in_edge->resistor_);
if (!(p->r >= 0.0 && p->r < 100e+3)) { // 0 < r < 100kohm
debugPrint(debug_, "arnoldi", 1, "R value {:g} out of range, drvr pin {}",
p->r, network_->pathName(drvr_pin_));
}
}
}
for (ParasiticCapacitor *capacitor : parasitics_->capacitors(parasitic_network_)) {
float cap = parasitics_->value(capacitor) * parasitics_->couplingCapFactor();
ParasiticNode *node1 = parasitics_->node1(capacitor);
if (!parasitics_->isExternal(node1)) {
ts_point *pt = findPt(node1);
pt->c += cap;
}
ParasiticNode *node2 = parasitics_->node2(capacitor);
if (!parasitics_->isExternal(node2)) {
ts_point *pt = findPt(node2);
pt->c += cap;
}
}
}
float
ArnoldiReduce::pinCapacitance(ParasiticNode *node)
{
const Pin *pin = parasitics_->pin(node);
float pin_cap = 0.0;
if (pin) {
Port *port = network_->port(pin);
LibertyPort *lib_port = network_->libertyPort(port);
const Sdc *sdc = scene_->sdc();
if (lib_port)
pin_cap = sdc->pinCapacitance(pin, rf_, scene_, min_max_);
else if (network_->isTopLevelPort(pin))
pin_cap = sdc->portExtCap(port, rf_, min_max_);
}
return pin_cap;
}
void
ArnoldiReduce::setTerms(ts_point *pdrv)
{
// termV: from drv-ordered to fixed order
// outV: from drv-ordered to ts_pordV
ts_point *p;
int k, k0;
termV[0] = k0 = pdrv->tindex;
for (k = 1; k < termN; k++) {
if (k == k0)
termV[k] = 0;
else
termV[k] = k;
}
for (k = 0; k < termN; k++) {
p = pterm0 + termV[k];
outV[k] = p->ts;
}
}
// The guts of the arnoldi reducer.
void
ArnoldiReduce::makeRcmodelFromTs()
{
ts_point *p, *p0 = ts_pointV;
int n = ts_ordN;
int nterms = termN;
int i, j, k, h;
if (debug_->check("arnoldi", 1)) {
for (k = 0; k < ts_ordN; k++) {
p = ts_pordV[k];
debugPrint(debug_, "arnoldi", 1, "T{} P{} c={}", p->ts, p - p0,
units_->capacitanceUnit()->asString(p->c));
if (p->is_term)
debugPrint(debug_, "arnoldi", 1, " term {}", p->tindex);
if (p->in_edge)
debugPrint(debug_, "arnoldi", 1, " from T{} P{} r={}",
p->in_edge->from->ts, p->in_edge->from - p0,
units_->resistanceUnit()->asString(p->r));
}
for (i = 0; i < nterms; i++)
debugPrint(debug_, "arnoldi", 1, "outV[{}] = T{}", i, outV[i]);
}
int max_order = 5;
double *u0, *u1;
u0 = _u0;
u1 = _u1;
double sum, e1;
order = max_order;
if (n < order)
order = n;
par[0] = -1;
r[0] = 0.0;
c[0] = ts_pordV[0]->c;
for (j = 1; j < n; j++) {
p = ts_pordV[j];
c[j] = p->c;
r[j] = p->r;
par[j] = p->in_edge->from->ts;
}
sum = 0.0;
for (j = 0; j < n; j++)
sum += c[j];
debugPrint(debug_, "arnoldi", 1, "ctot = {}",
units_->capacitanceUnit()->asString(sum));
ctot_ = sum;
sqc_ = sqrt(sum);
double sqrt_ctot_inv = 1.0 / sqc_;
for (j = 0; j < n; j++)
u0[j] = sqrt_ctot_inv;
for (h = 0; h < order; h++) {
for (i = 0; i < nterms; i++)
U[h][i] = u0[outV[i]];
// y = R C u0
for (j = 0; j < n; j++) {
iv[j] = 0.0;
}
for (j = n - 1; j > 0; j--) {
iv[j] += c[j] * u0[j];
iv[par[j]] += iv[j];
}
iv[0] += c[0] * u0[0];
y[0] = 0.0;
for (j = 1; j < n; j++) {
y[j] = y[par[j]] + r[j] * iv[j];
}
// d[h] = u0 C y
sum = 0.0;
for (j = 1; j < n; j++) {
sum += u0[j] * c[j] * y[j];
}
d[h] = sum;
if (h == order - 1)
break;
if (d[h] < 1e-13) { // .1ps
order = h + 1;
break;
}
// y = y - d[h]*u0 - e[h-1]*u1
if (h == 0) {
for (j = 0; j < n; j++)
y[j] -= sum * u0[j];
}
else {
e1 = e[h - 1];
for (j = 0; j < n; j++)
y[j] -= sum * u0[j] + e1 * u1[j];
}
// e[h] = sqrt(y C y)
// u1 = y/e[h]
sum = 0.0;
for (j = 0; j < n; j++) {
sum += c[j] * y[j] * y[j];
}
if (sum < 1e-30) { // (1e-6ns)^2
order = h + 1;
break;
}
e[h] = sqrt(sum);
sum = 1.0 / e[h];
for (j = 0; j < n; j++)
u1[j] = sum * y[j];
// swap u0, u1
if (h % 2) {
u0 = _u0;
u1 = _u1;
}
else {
u0 = _u1;
u1 = _u0;
}
}
if (debug_->check("arnoldi", 1)) {
report_->report("tridiagonal reduced matrix, drvr pin {}",
network_->pathName(drvr_pin_));
report_->report("order {} n {}", order, n);
for (h = 0; h < order; h++) {
if (h < order - 1)
report_->report(" d[{}] {} e[{}] {}", h,
units_->timeUnit()->asString(d[h]), h,
units_->timeUnit()->asString(e[h]));
else
report_->report(" d[{}] {}", h, units_->timeUnit()->asString(d[h]));
std::string line = sta::format("U[{}]", h);
for (i = 0; i < nterms; i++)
line += sta::format(" {:6.2e}", U[h][i]);
report_->reportLine(line);
}
}
}
rcmodel *
ArnoldiReduce::makeRcmodelFromW()
{
int j, h;
int n = termN;
rcmodel *mod = new rcmodel();
mod->order = order;
mod->n = n;
if (order > 0) {
int totd = order + order - 1 + order * n;
mod->d = (double *)malloc(totd * sizeof(double));
if (order > 1)
mod->e = mod->d + order;
else
mod->e = nullptr;
mod->U = (double **)malloc(order * sizeof(double *));
mod->U[0] = mod->d + order + order - 1;
for (h = 1; h < order; h++)
mod->U[h] = mod->U[0] + h * n;
for (h = 0; h < order; h++) {
mod->d[h] = d[h];
if (h < order - 1)
mod->e[h] = e[h];
for (j = 0; j < n; j++)
mod->U[h][j] = U[h][j];
}
}
mod->pinV = (const Pin **)malloc(n * sizeof(const Pin *));
for (j = 0; j < n; j++) {
int k = termV[j];
mod->pinV[j] = pinV[k];
}
mod->ctot = ctot_;
mod->sqc = sqc_;
return mod;
}
} // namespace sta