arnoldi delay calculator

This commit is contained in:
James Cherry 2018-10-24 14:22:21 -07:00
parent c6a90ec151
commit b952c58f92
8 changed files with 2425 additions and 0 deletions

83
dcalc/Arnoldi.hh Normal file
View File

@ -0,0 +1,83 @@
// OpenSTA, Static Timing Analyzer
// Copyright (c) 2018, Parallax Software, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// (c) 2018 Nefelus, Inc.
//
// Author: W. Scott
#ifndef ARNOLDI_H
#define ARNOLDI_H
#include "ConcreteParasiticsPvt.hh"
namespace sta {
struct delay_work;
class rcmodel;
class GateTableModel;
class Pin;
//
// single-driver arnoldi model
//
class arnoldi1
{
public:
arnoldi1() { order=0; n=0; d=NULL; e=NULL; U=NULL; ctot=0.0; sqc=0.0; }
~arnoldi1();
double elmore(int term_index);
//
// calculate poles/residues for given rdrive
//
void calculate_poles_res(delay_work *D,double rdrive);
public:
int order;
int n; // number of terms, including driver
double *d; // [order]
double *e; // [order-1]
double **U; // [order][n]
double ctot;
double sqc;
};
// This is the rcmodel, without Rd.
// n is the number of terms
// The vectors U[j] are of size n
class rcmodel : public ConcreteParasitic,
public arnoldi1
{
public:
rcmodel();
virtual ~rcmodel();
virtual float capacitance() const;
const Pin **pinV; // [n]
};
struct timing_table
{
GateTableModel *table;
const LibertyCell *cell;
const Pvt *pvt;
float in_slew;
float relcap;
};
} // namespace
#endif

57
dcalc/Arnoldi.txt Normal file
View File

@ -0,0 +1,57 @@
The method is used for simulation with a time-and-voltage-dependent
current source. But it is simpler to describe with a linear driver.
Suppose we are given:
voltage nodes 1,..n initialized to V[j]=1.0
a resistor network described by a conductance matrix G[j,k]
a drive resistance from node 1 to ground, Rdrv
capacitance of the nodes, c[j], also written as a
diagonal matrix C[j,k]
The node voltages will fall to zero with time.
Matrix equation:
GV+CdV/dt = -(V[0]/Rdrv)e0
where e0 is the unit vector (1,0,0,..0).
Let G' be the matrix formed by taking G and adding 1/Rdrv in the
[0,0] position. Then we are solving G'V + CdV/dt = 0.
Let R be the inverse of G'. (In implementation, R may not actually
be formed as a matrix, instead some method of producing RV given V).
The exact solution would diagonalize sqrt(C) R sqrt(C).
The Arnoldi method takes a matrix M and a vector of interest V, and
considers the subspace of vectors near V in terms of the action of M,
that is, the space spanned by V, MV, MMV, etc, for a small number of
powers. We do this here, but instead of finding the part of MV orthogonal
to V, we find the part of RCV that is C-orthogonal of V. We use C
as the metric, so the basis vectors U0, U1, U2 that we generate satisfy
Ui.CUj = (i==j?1:0)
U0 = (1,1,..1)/sqrt(sum C)
representing the initial value of V, V[j] = sqrt(n)U0[j] at t=0.
sum(C) = C[0]+..C[n-1], so U0.C U0 = 1.
Let:
W = R C U0
d0 = U0.C W
W' = W - d0 U0
e0 = sqrt(W'.C W')
U1 = W'/e0
Then: U0.C U0 = U1.C U1 = 1, U0.C U1 = 0, and
R C U0 = d0 U0 + e0 U1
Next step:
W = R C U1
d1 = U1.C W
W' = W - d1 U1 - e0 U0
e1 = sqrt(W'.C W')
U2 = W'/e1
and we have U2.C U2 = 1, U2.C U1 = U2.C U0 = 0, and
RC U1 = e0 U0 + d1 U1 + e1 U2
In this way, RC, which in nonsymmetric in the original basis,
becomes a symmetric tridiagonal matrix in the U0,U1,.. basis.
We stop at, say, U3. The resulting 4x4 tridiagonal matrix is
positive definite, because it is the projection of sqrt(C)R sqrt(C)
to a subspace. So the eigenvalues of this small tridiagonal matrix
are guaranteed to be positive. This is the advantage over AWE.
In the actual implementation, I remember there was some way of
isolating the node 0 where drive resistance is attached, so that the
tridiagonal matrix (d,e) can be recalculated without knowing Rdrv,
and then just the first d0,e0 updated when the Rdrv is known, or
when Rdrv changes in a simulation.

1490
dcalc/ArnoldiDelayCalc.cc Normal file

File diff suppressed because it is too large Load Diff

26
dcalc/ArnoldiDelayCalc.hh Normal file
View File

@ -0,0 +1,26 @@
// OpenSTA, Static Timing Analyzer
// Copyright (c) 2018, Parallax Software, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
#ifndef ARNOLDIDELAYCALC_H
#define ARNOLDIDELAYCALC_H
namespace sta {
ArcDelayCalc *
makeArnoldiDelayCalc(StaState *sta);
} // namespace
#endif

646
dcalc/ArnoldiReduce.cc Normal file
View File

@ -0,0 +1,646 @@
// OpenSTA, Static Timing Analyzer
// Copyright (c) 2018, Parallax Software, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// (c) 2018 Nefelus, Inc.
//
// Author: W. Scott
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <math.h>
#include "Machine.hh"
#include "Debug.hh"
#include "Units.hh"
#include "MinMax.hh"
#include "Sdc.hh"
#include "Network.hh"
#include "ArnoldiReduce.hh"
#include "Arnoldi.hh"
#include "ConcreteParasiticsPvt.hh"
namespace sta {
rcmodel::rcmodel() :
pinV(NULL)
{
}
rcmodel::~rcmodel()
{
free(pinV);
}
float
rcmodel::capacitance() const
{
return ctot;
}
struct ts_point
{
ParasiticNode *node_;
int eN;
bool is_term;
int tindex; // index into termV of corresponding term
ts_edge **eV;
bool visited;
ts_edge *in_edge;
int ts;
double c;
double r;
};
struct ts_edge
{
ConcreteParasiticResistor *resistor_;
ts_point *from;
ts_point *to;
};
////////////////////////////////////////////////////////////////
const int ArnoldiReduce::ts_point_count_incr_ = 1024;
const int ArnoldiReduce::ts_edge_count_incr_ = 1024;
ArnoldiReduce::ArnoldiReduce(StaState *sta) :
StaState(sta),
ts_pointNmax(1024),
ts_edgeNmax(1024),
termNmax(256),
dNmax(8)
{
ts_pointV = (ts_point*)malloc(ts_pointNmax*sizeof(ts_point));
ts_ordV = (int*)malloc(ts_pointNmax*sizeof(int));
ts_pordV = (ts_point**)malloc(ts_pointNmax*sizeof(ts_point*));
_u0 = (double*)malloc(ts_pointNmax*sizeof(double));
_u1 = (double*)malloc(ts_pointNmax*sizeof(double));
y = (double*)malloc(ts_pointNmax*sizeof(double));
iv = (double*)malloc(ts_pointNmax*sizeof(double));
r = (double*)malloc(ts_pointNmax*sizeof(double));
c = (double*)malloc(ts_pointNmax*sizeof(double));
par = (int*)malloc(ts_pointNmax*sizeof(int));
ts_edgeV = (ts_edge*)malloc(ts_edgeNmax*sizeof(ts_edge));
ts_stackV = (ts_edge**)malloc(ts_edgeNmax*sizeof(ts_edge*));
ts_eV = (ts_edge**)malloc(2*ts_edgeNmax*sizeof(ts_edge*));
pinV = (const Pin**)malloc(termNmax*sizeof(const Pin*));
termV = (int*)malloc(termNmax*sizeof(int));
outV = (int*)malloc(termNmax*sizeof(int));
d = (double*)malloc(dNmax*sizeof(double));
e = (double*)malloc(dNmax*sizeof(double));
U = (double**)malloc(dNmax*sizeof(double*));
U0 = (double*)malloc(dNmax*termNmax*sizeof(double));
int h;
for (h=0;h<dNmax;h++) U[h] = U0 + h*termNmax;
}
ArnoldiReduce::~ArnoldiReduce()
{
free(U0);
free(U);
free(e);
free(d);
free(outV);
free(termV);
free(pinV);
free(ts_eV);
free(ts_edgeV);
free(ts_stackV);
free(par);
free(c);
free(r);
free(iv);
free(y);
free(_u1);
free(_u0);
free(ts_pordV);
free(ts_ordV);
free(ts_pointV);
}
Parasitic *
ArnoldiReduce::reduceToArnoldi(Parasitic *parasitic,
const Pin *drvr_pin,
float coupling_cap_factor,
const TransRiseFall *tr,
const OperatingConditions *op_cond,
const Corner *corner,
const MinMax *cnst_min_max,
const ParasiticAnalysisPt *ap)
{
parasitic_network_ = reinterpret_cast<ConcreteParasiticNetwork*>(parasitic);
drvr_pin_ = drvr_pin;
coupling_cap_factor_ = coupling_cap_factor;
tr_ = tr;
op_cond_ = op_cond;
corner_ = corner;
cnst_min_max_ = cnst_min_max;
ap_ = ap;
loadWork();
return makeRcmodelDrv();
}
void
ArnoldiReduce::loadWork()
{
pt_map_.clear();
int resistor_count = 0;
ConcreteParasiticDeviceSet devices;
parasitic_network_->devices(devices);
ConcreteParasiticDeviceSet::Iterator device_iter(devices);
while (device_iter.hasNext()) {
ParasiticDevice *device = device_iter.next();
if (parasitics_->isResistor(device))
resistor_count++;
}
termN = parasitic_network_->pinNodes()->size();
int subnode_count = parasitic_network_->subNodes()->size();
ts_pointN = subnode_count + 1 + termN;
ts_edgeN = resistor_count;
allocPoints();
allocTerms(termN);
ts_point *p0 = ts_pointV;
pterm0 = p0 + subnode_count + 1;
ts_point *pend = p0 + ts_pointN;
ts_point *p;
ts_edge *e0 = ts_edgeV;
ts_edge *eend = e0 + ts_edgeN;
ts_edge *e;
int tindex;
for (p = p0; p!=pend; p++) {
p->node_ = NULL;
p->eN = 0;
p->is_term = false;
}
pend = pterm0;
e = e0;
int index = 0;
ConcreteParasiticSubNodeMap::Iterator
sub_node_iter(parasitic_network_->subNodes());
while (sub_node_iter.hasNext()) {
ConcreteParasiticSubNode *node = sub_node_iter.next();
pt_map_[node] = index;
p = p0 + index;
p->node_ = node;
p->eN = 0;
p->is_term = false;
index++;
}
ConcreteParasiticPinNodeMap::Iterator
pin_node_iter(parasitic_network_->pinNodes());
while (pin_node_iter.hasNext()) {
ConcreteParasiticPinNode *node = pin_node_iter.next();
p = pend++;
pt_map_[node] = p - p0;
p->node_ = node;
p->eN = 0;
p->is_term = true;
tindex = p - pterm0;
p->tindex = tindex;
const Pin *pin = parasitics_->connectionPin(node);
pinV[tindex] = pin;
}
ts_edge **eV = ts_eV;
ConcreteParasiticDeviceSet::Iterator device_iter2(devices);
while (device_iter2.hasNext()) {
ParasiticDevice *device = device_iter2.next();
if (parasitics_->isResistor(device)) {
ConcreteParasiticResistor *resistor =
reinterpret_cast<ConcreteParasiticResistor*>(device);
ts_point *pt1 = findPt(resistor->node1());
ts_point *pt2 = findPt(resistor->node2());
e->from = pt1;
e->to = pt2;
e->resistor_ = resistor;
pt1->eN++;
if (e->from != e->to)
pt2->eN++;
e++;
}
}
for (p=p0;p!=pend;p++) {
if (p->node_) {
p->eV = eV;
eV += p->eN;
p->eN = 0;
}
}
for (e=e0;e!=eend;e++) {
e->from->eV[e->from->eN++] = e;
if (e->to != e->from)
e->to->eV[e->to->eN++] = e;
}
}
void
ArnoldiReduce::allocPoints()
{
if (ts_pointN > ts_pointNmax) {
free(par);
free(c);
free(r);
free(iv); free(y); free(_u1); free(_u0);
free(ts_pordV);
free(ts_ordV);
free(ts_pointV);
ts_pointNmax = ts_pointN + ts_point_count_incr_;
ts_pointV = (ts_point*)malloc(ts_pointNmax*sizeof(ts_point));
ts_ordV = (int*)malloc(ts_pointNmax*sizeof(int));
ts_pordV = (ts_point**)malloc(ts_pointNmax*sizeof(ts_point*));
_u0 = (double*)malloc(ts_pointNmax*sizeof(double));
_u1 = (double*)malloc(ts_pointNmax*sizeof(double));
y = (double*)malloc(ts_pointNmax*sizeof(double));
iv = (double*)malloc(ts_pointNmax*sizeof(double));
r = (double*)malloc(ts_pointNmax*sizeof(double));
c = (double*)malloc(ts_pointNmax*sizeof(double));
par = (int*)malloc(ts_pointNmax*sizeof(int));
}
if (ts_edgeN > ts_edgeNmax) {
free(ts_edgeV);
free(ts_eV);
free(ts_stackV);
ts_edgeNmax = ts_edgeN + ts_edge_count_incr_;
ts_edgeV = (ts_edge*)malloc(ts_edgeNmax*sizeof(ts_edge));
ts_stackV = (ts_edge**)malloc(ts_edgeNmax*sizeof(ts_edge*));
ts_eV = (ts_edge**)malloc(2*ts_edgeNmax*sizeof(ts_edge*));
}
}
void
ArnoldiReduce::allocTerms(int nterms)
{
if (nterms > termNmax) {
free(U0);
free(outV);
free(termV);
free(pinV);
termNmax = nterms+256;
pinV = (const Pin**)malloc(termNmax*sizeof(const Pin*));
termV = (int*)malloc(termNmax*sizeof(int));
outV = (int*)malloc(termNmax*sizeof(int));
U0 = (double*)malloc(dNmax*termNmax*sizeof(double));
int h;
for (h=0;h<dNmax;h++) U[h] = U0 + h*termNmax;
}
}
ts_point *
ArnoldiReduce::findPt(ParasiticNode *node)
{
return &ts_pointV[pt_map_[reinterpret_cast<ConcreteParasiticNode*>(node)]];
}
rcmodel *
ArnoldiReduce::makeRcmodelDrv()
{
ParasiticNode *drv_node = parasitics_->findNode(parasitic_network_,
drvr_pin_);
ts_point *pdrv = findPt(drv_node);
makeRcmodelDfs(pdrv);
getRC();
if (ctot_ < 1e-22) // 1e-10ps
return NULL;
setTerms(pdrv);
makeRcmodelFromTs();
rcmodel *mod = makeRcmodelFromW();
return mod;
}
#define ts_orient( pp, ee) \
if (ee->from!=pp) { ee->to = ee->from; ee->from = pp; }
void
ArnoldiReduce::makeRcmodelDfs(ts_point *pdrv)
{
bool loop = false;
int k;
ts_point *p,*q;
ts_point *p0 = ts_pointV;
ts_point *pend = p0 + ts_pointN;
for (p=p0;p!=pend;p++)
p->visited = 0;
ts_edge *e;
ts_edge **stackV = ts_stackV;
int stackN = 1;
stackV[0] = e = pdrv->eV[0];
ts_orient(pdrv,e);
pdrv->visited = 1;
pdrv->in_edge = NULL;
pdrv->ts = 0;
ts_ordV[0] = pdrv-p0;
ts_pordV[0] = pdrv;
ts_ordN = 1;
while (stackN>0) {
e = stackV[stackN-1];
q = e->to;
if (q->visited) {
// if it is a one-rseg self-loop,
// ignore, and do not even set *loop
if (e->to != e->from)
loop = true;
} else {
// try to descend
q->visited = 1;
q->ts = ts_ordN++;
ts_pordV[q->ts] = q;
ts_ordV[q->ts] = q-p0;
q->in_edge = e;
if (q->eN>1) {
for (k=0;k<q->eN;k++) if (q->eV[k] != e) break;
e = q->eV[k];
ts_orient(q,e);
stackV[stackN++] = e;
continue; // descent
}
}
// try to ascend
while (--stackN>=0) {
e = stackV[stackN];
p = e->from;
// find e in p->eV
for (k=0;k<p->eN;k++) if (p->eV[k]==e) break;
// if (k==p->eN) notice(0,"ERROR, e not found!\n");
++k;
if (k>=p->eN) continue;
e = p->eV[k];
// check that next sibling is not the incoming edge
if (stackN>0 && e==stackV[stackN-1]) {
++k;
if (k>=p->eN) continue;
e = p->eV[k];
}
ts_orient(p,e);
stackV[stackN++] = e;
break;
}
} // while (stackN)
if (loop)
debugPrint1(debug_, "arnoldi", 1,
"net %s loop\n",
network_->pathName(drvr_pin_));
}
// makeRcmodelGetRC
void
ArnoldiReduce::getRC()
{
ts_point *p, *p0 = ts_pointV;
ts_point *pend = p0 + ts_pointN;
ctot_ = 0.0;
for (p=p0;p!=pend;p++) {
p->c = 0.0;
p->r = 0.0;
if (p->node_) {
ParasiticNode *node = p->node_;
double cap = parasitics_->nodeGndCap(node, ap_)
+ pinCapacitance(node);
if (cap > 0.0) {
p->c = cap;
ctot_ += cap;
}
else
p->c = 0.0;
if (p->in_edge && p->in_edge->resistor_)
p->r = parasitics_->value(p->in_edge->resistor_, ap_);
if (!(p->r>=0.0 && p->r<100e+3)) { // 0 < r < 100kohm
debugPrint2(debug_, "arnoldi", 1,
"R value %g out of range, drvr pin %s\n",
p->r,
network_->pathName(drvr_pin_));
}
}
}
}
float
ArnoldiReduce::pinCapacitance(ParasiticNode *node)
{
const Pin *pin = parasitics_->connectionPin(node);
float pin_cap = 0.0;
if (pin) {
Port *port = network_->port(pin);
LibertyPort *lib_port = network_->libertyPort(port);
if (lib_port)
pin_cap = sdc_->pinCapacitance(pin,tr_, op_cond_, corner_, cnst_min_max_);
else if (network_->isTopLevelPort(pin))
pin_cap = sdc_->portExtCap(port, tr_, cnst_min_max_);
}
return pin_cap;
}
void
ArnoldiReduce::setTerms(ts_point *pdrv)
{
// termV: from drv-ordered to fixed order
// outV: from drv-ordered to ts_pordV
ts_point *p;
int k,k0;
termV[0] = k0 = pdrv->tindex;
for (k=1;k<termN;k++) {
if (k==k0) termV[k] = 0;
else termV[k] = k;
}
for (k=0;k<termN;k++) {
p = pterm0 + termV[k];
outV[k] = p->ts;
}
}
// The guts of the arnoldi reducer.
void
ArnoldiReduce::makeRcmodelFromTs()
{
ts_point *p, *p0 = ts_pointV;
int n = ts_ordN;
int nterms = termN;
int i,j,k,h;
if (debug_->check("arnoldi", 1)) {
for (k=0;k<ts_ordN;k++) {
p = ts_pordV[k];
debugPrint3(debug_, "arnoldi", 1, "T%d,P%ld c=%s",
p->ts,p-p0,
units_->capacitanceUnit()->asString(p->c));
if (p->is_term)
debug_->print(" term%d",p->tindex);
if (p->in_edge)
debug_->print(" from T%d,P%ld r=%s",
p->in_edge->from->ts,
p->in_edge->from-p0,
units_->resistanceUnit()->asString(p->r));
debug_->print("\n");
}
for (i=0;i<nterms;i++)
debugPrint2(debug_, "arnoldi", 1, "outV[%d] = T%d\n",i,outV[i]);
}
int max_order = 5;
double *u0, *u1;
u0 = _u0; u1 = _u1;
double sum,e1;
order = max_order;
if (n < order)
order = n;
par[0] = -1; r[0] = 0.0;
c[0] = ts_pordV[0]->c;
for (j=1;j<n;j++) {
p = ts_pordV[j];
c[j] = p->c;
r[j] = p->r;
par[j] = p->in_edge->from->ts;
}
sum = 0.0;
for (j=0;j<n;j++) sum += c[j];
debugPrint1(debug_, "arnoldi", 1, "ctot = %s\n",
units_->capacitanceUnit()->asString(sum));
ctot_ = sum;
sqc_ = sqrt(sum);
double sqrt_ctot_inv = 1.0/sqc_;
for (j=0;j<n;j++) u0[j] = sqrt_ctot_inv;
for (h=0;h<order;h++) {
for (i=0;i<nterms;i++) U[h][i] = u0[outV[i]];
// y = R C u0
for (j=0;j<n;j++) {
iv[j] = 0.0;
}
for (j=n-1;j>0;j--) {
iv[j] += c[j]*u0[j];
iv[par[j]] += iv[j];
}
iv[0] += c[0]*u0[0];
y[0] = 0.0;
for (j=1;j<n;j++) {
y[j] = y[par[j]] + r[j]*iv[j];
}
// d[h] = u0 C y
sum = 0.0;
for (j=1;j<n;j++) {
sum += u0[j]*c[j]*y[j];
}
d[h] = sum;
if (h==order-1) break;
if (d[h]<1e-13) { // .1ps
order = h+1;
break;
}
// y = y - d[h]*u0 - e[h-1]*u1
if (h==0) {
for (j=0;j<n;j++) y[j] -= sum*u0[j];
} else {
e1 = e[h-1];
for (j=0;j<n;j++) y[j] -= sum*u0[j] + e1*u1[j];
}
// e[h] = sqrt(y C y)
// u1 = y/e[h]
sum = 0.0;
for (j=0;j<n;j++) {
sum += c[j]*y[j]*y[j];
}
if (sum<1e-30) { // (1e-6ns)^2
order = h+1;
break;
}
e[h] = sqrt(sum);
sum = 1.0/e[h];
for (j=0;j<n;j++) u1[j] = sum*y[j];
// swap u0, u1
if (h%2) {
u0 = _u0; u1 = _u1;
} else {
u0 = _u1; u1 = _u0;
}
}
if (debug_->check("arnoldi", 1)) {
debugPrint1(debug_, "arnoldi", 1,
"tridiagonal reduced matrix, drvr pin %s\n",
network_->pathName(drvr_pin_));
debugPrint2(debug_, "arnoldi", 1, "order %d n %d\n",order,n);
for (h=0;h<order;h++) {
debug_->print("d[%d] %s",
h,
units_->timeUnit()->asString(d[h]));
if (h<order-1)
debug_->print(" e[%d] %s",
h,
units_->timeUnit()->asString(e[h]));
debug_->print("\n");
debug_->print("U[%d]",h);
for (i=0;i<nterms;i++)
debug_->print(" %6.2e",U[h][i]);
debug_->print("\n");
}
}
}
rcmodel *
ArnoldiReduce::makeRcmodelFromW()
{
int j,h;
int n = termN;
rcmodel *mod = new rcmodel();
mod->order = order;
mod->n = n;
if (order>0) {
int totd = order + order - 1 + order*n;
mod->d = (double *)malloc(totd*sizeof(double));
if (order>1) mod->e = mod->d + order;
else mod->e = NULL;
mod->U = (double **)malloc(order*sizeof(double*));
mod->U[0] = mod->d + order + order - 1;
for (h=1;h<order;h++) mod->U[h]=mod->U[0] + h*n;
for (h=0;h<order;h++) {
mod->d[h] = d[h];
if (h<order-1) mod->e[h] = e[h];
for (j=0;j<n;j++)
mod->U[h][j] = U[h][j];
}
}
mod->pinV = (const Pin **)malloc(n*sizeof(const Pin*));
for (j=0;j<n;j++) {
int k = termV[j];
mod->pinV[j] = pinV[k];
}
mod->ctot = ctot_;
mod->sqc = sqc_;
return mod;
}
} // namespace

116
dcalc/ArnoldiReduce.hh Normal file
View File

@ -0,0 +1,116 @@
// OpenSTA, Static Timing Analyzer
// Copyright (c) 2018, Parallax Software, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// (c) 2018 Nefelus, Inc.
//
// Author: W. Scott
#ifndef STA_ARNOLDI_REDUCE_H
#define STA_ARNOLDI_REDUCE_H
#include "Map.hh"
#include "NetworkClass.hh"
#include "ParasiticsClass.hh"
namespace sta {
class ConcreteParasiticNetwork;
class ConcreteParasiticNode;
class rcmodel;
struct ts_edge;
struct ts_point;
typedef Map<ConcreteParasiticNode*, int> ArnolidPtMap;
class ArnoldiReduce : public StaState
{
public:
ArnoldiReduce(StaState *sta);
~ArnoldiReduce();
Parasitic *reduceToArnoldi(Parasitic *parasitic,
const Pin *drvr_pin,
float coupling_cap_factor,
const TransRiseFall *tr,
const OperatingConditions *op_cond,
const Corner *corner,
const MinMax *cnst_min_max,
const ParasiticAnalysisPt *ap);
protected:
void loadWork();
rcmodel *makeRcmodelDrv();
void allocPoints();
void allocTerms(int nterms);
ts_point *findPt(ParasiticNode *node);
void makeRcmodelDfs(ts_point *pdrv);
void getRC();
float pinCapacitance(ParasiticNode *node);
void setTerms(ts_point *pdrv);
void makeRcmodelFromTs();
rcmodel *makeRcmodelFromW();
ConcreteParasiticNetwork *parasitic_network_;
const Pin *drvr_pin_;
float coupling_cap_factor_;
const TransRiseFall *tr_;
const OperatingConditions *op_cond_;
const Corner *corner_;
const MinMax *cnst_min_max_;
const ParasiticAnalysisPt *ap_;
// ParasiticNode -> ts_point index.
ArnolidPtMap pt_map_;
// rcWork
ts_point *ts_pointV;
int ts_pointN;
int ts_pointNmax;
static const int ts_point_count_incr_;
ts_edge *ts_edgeV;
int ts_edgeN;
int ts_edgeNmax;
static const int ts_edge_count_incr_;
ts_edge **ts_eV;
ts_edge **ts_stackV;
int *ts_ordV;
ts_point **ts_pordV;
int ts_ordN;
int termNmax;
int termN;
ts_point *pterm0;
const Pin **pinV; // fixed order, offset from pterm0
int *termV; // from drv-ordered to fixed order
int *outV; // from drv-ordered to ts_pordV
int dNmax;
double *d;
double *e;
double *U0;
double **U;
double ctot_;
double sqc_;
double *_u0, *_u1;
double *y, *iv;
double *c, *r;
int *par;
int order;
};
} // namespace
#endif

View File

@ -21,6 +21,7 @@
#include "LumpedCapDelayCalc.hh" #include "LumpedCapDelayCalc.hh"
#include "SimpleRCDelayCalc.hh" #include "SimpleRCDelayCalc.hh"
#include "DmpDelayCalc.hh" #include "DmpDelayCalc.hh"
#include "ArnoldiDelayCalc.hh"
#include "DelayCalc.hh" #include "DelayCalc.hh"
namespace sta { namespace sta {
@ -37,6 +38,7 @@ registerDelayCalcs()
registerDelayCalc("simple_rc", makeSimpleRCDelayCalc); registerDelayCalc("simple_rc", makeSimpleRCDelayCalc);
registerDelayCalc("dmp_ceff_elmore", makeDmpCeffElmoreDelayCalc); registerDelayCalc("dmp_ceff_elmore", makeDmpCeffElmoreDelayCalc);
registerDelayCalc("dmp_ceff_two_pole", makeDmpCeffTwoPoleDelayCalc); registerDelayCalc("dmp_ceff_two_pole", makeDmpCeffTwoPoleDelayCalc);
registerDelayCalc("arnoldi", makeArnoldiDelayCalc);
} }
void void

View File

@ -18,6 +18,9 @@ lib_LTLIBRARIES = libdcalc.la
include_HEADERS = \ include_HEADERS = \
ArcDelayCalc.hh \ ArcDelayCalc.hh \
Arnoldi.hh \
ArnoldiDelayCalc.hh \
ArnoldiReduce.hh \
DelayCalc.hh \ DelayCalc.hh \
DcalcAnalysisPt.hh \ DcalcAnalysisPt.hh \
DmpCeff.hh \ DmpCeff.hh \
@ -32,6 +35,8 @@ include_HEADERS = \
libdcalc_la_SOURCES = \ libdcalc_la_SOURCES = \
ArcDelayCalc.cc \ ArcDelayCalc.cc \
ArnoldiDelayCalc.cc \
ArnoldiReduce.cc \
DcalcAnalysisPt.cc \ DcalcAnalysisPt.cc \
DelayCalc.cc \ DelayCalc.cc \
DmpCeff.cc \ DmpCeff.cc \