arnoldi delay calculator
This commit is contained in:
parent
c6a90ec151
commit
b952c58f92
|
|
@ -0,0 +1,83 @@
|
|||
// OpenSTA, Static Timing Analyzer
|
||||
// Copyright (c) 2018, Parallax Software, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
// (c) 2018 Nefelus, Inc.
|
||||
//
|
||||
// Author: W. Scott
|
||||
|
||||
#ifndef ARNOLDI_H
|
||||
#define ARNOLDI_H
|
||||
|
||||
#include "ConcreteParasiticsPvt.hh"
|
||||
|
||||
namespace sta {
|
||||
|
||||
struct delay_work;
|
||||
class rcmodel;
|
||||
|
||||
class GateTableModel;
|
||||
class Pin;
|
||||
|
||||
//
|
||||
// single-driver arnoldi model
|
||||
//
|
||||
class arnoldi1
|
||||
{
|
||||
public:
|
||||
arnoldi1() { order=0; n=0; d=NULL; e=NULL; U=NULL; ctot=0.0; sqc=0.0; }
|
||||
~arnoldi1();
|
||||
double elmore(int term_index);
|
||||
|
||||
//
|
||||
// calculate poles/residues for given rdrive
|
||||
//
|
||||
void calculate_poles_res(delay_work *D,double rdrive);
|
||||
|
||||
public:
|
||||
int order;
|
||||
int n; // number of terms, including driver
|
||||
double *d; // [order]
|
||||
double *e; // [order-1]
|
||||
double **U; // [order][n]
|
||||
double ctot;
|
||||
double sqc;
|
||||
};
|
||||
|
||||
// This is the rcmodel, without Rd.
|
||||
// n is the number of terms
|
||||
// The vectors U[j] are of size n
|
||||
class rcmodel : public ConcreteParasitic,
|
||||
public arnoldi1
|
||||
{
|
||||
public:
|
||||
rcmodel();
|
||||
virtual ~rcmodel();
|
||||
virtual float capacitance() const;
|
||||
|
||||
const Pin **pinV; // [n]
|
||||
};
|
||||
|
||||
struct timing_table
|
||||
{
|
||||
GateTableModel *table;
|
||||
const LibertyCell *cell;
|
||||
const Pvt *pvt;
|
||||
float in_slew;
|
||||
float relcap;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
The method is used for simulation with a time-and-voltage-dependent
|
||||
current source. But it is simpler to describe with a linear driver.
|
||||
Suppose we are given:
|
||||
voltage nodes 1,..n initialized to V[j]=1.0
|
||||
a resistor network described by a conductance matrix G[j,k]
|
||||
a drive resistance from node 1 to ground, Rdrv
|
||||
capacitance of the nodes, c[j], also written as a
|
||||
diagonal matrix C[j,k]
|
||||
The node voltages will fall to zero with time.
|
||||
Matrix equation:
|
||||
GV+CdV/dt = -(V[0]/Rdrv)e0
|
||||
where e0 is the unit vector (1,0,0,..0).
|
||||
Let G' be the matrix formed by taking G and adding 1/Rdrv in the
|
||||
[0,0] position. Then we are solving G'V + CdV/dt = 0.
|
||||
Let R be the inverse of G'. (In implementation, R may not actually
|
||||
be formed as a matrix, instead some method of producing RV given V).
|
||||
The exact solution would diagonalize sqrt(C) R sqrt(C).
|
||||
|
||||
The Arnoldi method takes a matrix M and a vector of interest V, and
|
||||
considers the subspace of vectors near V in terms of the action of M,
|
||||
that is, the space spanned by V, MV, MMV, etc, for a small number of
|
||||
powers. We do this here, but instead of finding the part of MV orthogonal
|
||||
to V, we find the part of RCV that is C-orthogonal of V. We use C
|
||||
as the metric, so the basis vectors U0, U1, U2 that we generate satisfy
|
||||
Ui.CUj = (i==j?1:0)
|
||||
|
||||
U0 = (1,1,..1)/sqrt(sum C)
|
||||
representing the initial value of V, V[j] = sqrt(n)U0[j] at t=0.
|
||||
sum(C) = C[0]+..C[n-1], so U0.C U0 = 1.
|
||||
Let:
|
||||
W = R C U0
|
||||
d0 = U0.C W
|
||||
W' = W - d0 U0
|
||||
e0 = sqrt(W'.C W')
|
||||
U1 = W'/e0
|
||||
Then: U0.C U0 = U1.C U1 = 1, U0.C U1 = 0, and
|
||||
R C U0 = d0 U0 + e0 U1
|
||||
Next step:
|
||||
W = R C U1
|
||||
d1 = U1.C W
|
||||
W' = W - d1 U1 - e0 U0
|
||||
e1 = sqrt(W'.C W')
|
||||
U2 = W'/e1
|
||||
and we have U2.C U2 = 1, U2.C U1 = U2.C U0 = 0, and
|
||||
RC U1 = e0 U0 + d1 U1 + e1 U2
|
||||
In this way, RC, which in nonsymmetric in the original basis,
|
||||
becomes a symmetric tridiagonal matrix in the U0,U1,.. basis.
|
||||
We stop at, say, U3. The resulting 4x4 tridiagonal matrix is
|
||||
positive definite, because it is the projection of sqrt(C)R sqrt(C)
|
||||
to a subspace. So the eigenvalues of this small tridiagonal matrix
|
||||
are guaranteed to be positive. This is the advantage over AWE.
|
||||
|
||||
In the actual implementation, I remember there was some way of
|
||||
isolating the node 0 where drive resistance is attached, so that the
|
||||
tridiagonal matrix (d,e) can be recalculated without knowing Rdrv,
|
||||
and then just the first d0,e0 updated when the Rdrv is known, or
|
||||
when Rdrv changes in a simulation.
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,26 @@
|
|||
// OpenSTA, Static Timing Analyzer
|
||||
// Copyright (c) 2018, Parallax Software, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef ARNOLDIDELAYCALC_H
|
||||
#define ARNOLDIDELAYCALC_H
|
||||
|
||||
namespace sta {
|
||||
|
||||
ArcDelayCalc *
|
||||
makeArnoldiDelayCalc(StaState *sta);
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
|
@ -0,0 +1,646 @@
|
|||
// OpenSTA, Static Timing Analyzer
|
||||
// Copyright (c) 2018, Parallax Software, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
// (c) 2018 Nefelus, Inc.
|
||||
//
|
||||
// Author: W. Scott
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include "Machine.hh"
|
||||
#include "Debug.hh"
|
||||
#include "Units.hh"
|
||||
#include "MinMax.hh"
|
||||
#include "Sdc.hh"
|
||||
#include "Network.hh"
|
||||
#include "ArnoldiReduce.hh"
|
||||
#include "Arnoldi.hh"
|
||||
#include "ConcreteParasiticsPvt.hh"
|
||||
|
||||
namespace sta {
|
||||
|
||||
rcmodel::rcmodel() :
|
||||
pinV(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
rcmodel::~rcmodel()
|
||||
{
|
||||
free(pinV);
|
||||
}
|
||||
|
||||
float
|
||||
rcmodel::capacitance() const
|
||||
{
|
||||
return ctot;
|
||||
}
|
||||
|
||||
struct ts_point
|
||||
{
|
||||
ParasiticNode *node_;
|
||||
int eN;
|
||||
bool is_term;
|
||||
int tindex; // index into termV of corresponding term
|
||||
ts_edge **eV;
|
||||
bool visited;
|
||||
ts_edge *in_edge;
|
||||
int ts;
|
||||
double c;
|
||||
double r;
|
||||
};
|
||||
|
||||
struct ts_edge
|
||||
{
|
||||
ConcreteParasiticResistor *resistor_;
|
||||
ts_point *from;
|
||||
ts_point *to;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
const int ArnoldiReduce::ts_point_count_incr_ = 1024;
|
||||
const int ArnoldiReduce::ts_edge_count_incr_ = 1024;
|
||||
|
||||
ArnoldiReduce::ArnoldiReduce(StaState *sta) :
|
||||
StaState(sta),
|
||||
ts_pointNmax(1024),
|
||||
ts_edgeNmax(1024),
|
||||
termNmax(256),
|
||||
dNmax(8)
|
||||
{
|
||||
ts_pointV = (ts_point*)malloc(ts_pointNmax*sizeof(ts_point));
|
||||
ts_ordV = (int*)malloc(ts_pointNmax*sizeof(int));
|
||||
ts_pordV = (ts_point**)malloc(ts_pointNmax*sizeof(ts_point*));
|
||||
_u0 = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
_u1 = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
y = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
iv = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
r = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
c = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
par = (int*)malloc(ts_pointNmax*sizeof(int));
|
||||
|
||||
ts_edgeV = (ts_edge*)malloc(ts_edgeNmax*sizeof(ts_edge));
|
||||
ts_stackV = (ts_edge**)malloc(ts_edgeNmax*sizeof(ts_edge*));
|
||||
ts_eV = (ts_edge**)malloc(2*ts_edgeNmax*sizeof(ts_edge*));
|
||||
|
||||
pinV = (const Pin**)malloc(termNmax*sizeof(const Pin*));
|
||||
termV = (int*)malloc(termNmax*sizeof(int));
|
||||
outV = (int*)malloc(termNmax*sizeof(int));
|
||||
|
||||
d = (double*)malloc(dNmax*sizeof(double));
|
||||
e = (double*)malloc(dNmax*sizeof(double));
|
||||
U = (double**)malloc(dNmax*sizeof(double*));
|
||||
U0 = (double*)malloc(dNmax*termNmax*sizeof(double));
|
||||
int h;
|
||||
for (h=0;h<dNmax;h++) U[h] = U0 + h*termNmax;
|
||||
}
|
||||
|
||||
ArnoldiReduce::~ArnoldiReduce()
|
||||
{
|
||||
free(U0);
|
||||
free(U);
|
||||
free(e);
|
||||
free(d);
|
||||
free(outV);
|
||||
free(termV);
|
||||
free(pinV);
|
||||
free(ts_eV);
|
||||
free(ts_edgeV);
|
||||
free(ts_stackV);
|
||||
free(par);
|
||||
free(c);
|
||||
free(r);
|
||||
free(iv);
|
||||
free(y);
|
||||
free(_u1);
|
||||
free(_u0);
|
||||
free(ts_pordV);
|
||||
free(ts_ordV);
|
||||
free(ts_pointV);
|
||||
}
|
||||
|
||||
Parasitic *
|
||||
ArnoldiReduce::reduceToArnoldi(Parasitic *parasitic,
|
||||
const Pin *drvr_pin,
|
||||
float coupling_cap_factor,
|
||||
const TransRiseFall *tr,
|
||||
const OperatingConditions *op_cond,
|
||||
const Corner *corner,
|
||||
const MinMax *cnst_min_max,
|
||||
const ParasiticAnalysisPt *ap)
|
||||
{
|
||||
parasitic_network_ = reinterpret_cast<ConcreteParasiticNetwork*>(parasitic);
|
||||
drvr_pin_ = drvr_pin;
|
||||
coupling_cap_factor_ = coupling_cap_factor;
|
||||
tr_ = tr;
|
||||
op_cond_ = op_cond;
|
||||
corner_ = corner;
|
||||
cnst_min_max_ = cnst_min_max;
|
||||
ap_ = ap;
|
||||
loadWork();
|
||||
return makeRcmodelDrv();
|
||||
}
|
||||
|
||||
void
|
||||
ArnoldiReduce::loadWork()
|
||||
{
|
||||
pt_map_.clear();
|
||||
|
||||
int resistor_count = 0;
|
||||
ConcreteParasiticDeviceSet devices;
|
||||
parasitic_network_->devices(devices);
|
||||
ConcreteParasiticDeviceSet::Iterator device_iter(devices);
|
||||
while (device_iter.hasNext()) {
|
||||
ParasiticDevice *device = device_iter.next();
|
||||
if (parasitics_->isResistor(device))
|
||||
resistor_count++;
|
||||
}
|
||||
|
||||
termN = parasitic_network_->pinNodes()->size();
|
||||
int subnode_count = parasitic_network_->subNodes()->size();
|
||||
ts_pointN = subnode_count + 1 + termN;
|
||||
ts_edgeN = resistor_count;
|
||||
allocPoints();
|
||||
allocTerms(termN);
|
||||
ts_point *p0 = ts_pointV;
|
||||
pterm0 = p0 + subnode_count + 1;
|
||||
ts_point *pend = p0 + ts_pointN;
|
||||
ts_point *p;
|
||||
ts_edge *e0 = ts_edgeV;
|
||||
ts_edge *eend = e0 + ts_edgeN;
|
||||
|
||||
ts_edge *e;
|
||||
int tindex;
|
||||
for (p = p0; p!=pend; p++) {
|
||||
p->node_ = NULL;
|
||||
p->eN = 0;
|
||||
p->is_term = false;
|
||||
}
|
||||
pend = pterm0;
|
||||
e = e0;
|
||||
int index = 0;
|
||||
ConcreteParasiticSubNodeMap::Iterator
|
||||
sub_node_iter(parasitic_network_->subNodes());
|
||||
while (sub_node_iter.hasNext()) {
|
||||
ConcreteParasiticSubNode *node = sub_node_iter.next();
|
||||
pt_map_[node] = index;
|
||||
p = p0 + index;
|
||||
p->node_ = node;
|
||||
p->eN = 0;
|
||||
p->is_term = false;
|
||||
index++;
|
||||
}
|
||||
|
||||
ConcreteParasiticPinNodeMap::Iterator
|
||||
pin_node_iter(parasitic_network_->pinNodes());
|
||||
while (pin_node_iter.hasNext()) {
|
||||
ConcreteParasiticPinNode *node = pin_node_iter.next();
|
||||
p = pend++;
|
||||
pt_map_[node] = p - p0;
|
||||
p->node_ = node;
|
||||
p->eN = 0;
|
||||
p->is_term = true;
|
||||
tindex = p - pterm0;
|
||||
p->tindex = tindex;
|
||||
const Pin *pin = parasitics_->connectionPin(node);
|
||||
pinV[tindex] = pin;
|
||||
}
|
||||
|
||||
ts_edge **eV = ts_eV;
|
||||
ConcreteParasiticDeviceSet::Iterator device_iter2(devices);
|
||||
while (device_iter2.hasNext()) {
|
||||
ParasiticDevice *device = device_iter2.next();
|
||||
if (parasitics_->isResistor(device)) {
|
||||
ConcreteParasiticResistor *resistor =
|
||||
reinterpret_cast<ConcreteParasiticResistor*>(device);
|
||||
ts_point *pt1 = findPt(resistor->node1());
|
||||
ts_point *pt2 = findPt(resistor->node2());
|
||||
e->from = pt1;
|
||||
e->to = pt2;
|
||||
e->resistor_ = resistor;
|
||||
pt1->eN++;
|
||||
if (e->from != e->to)
|
||||
pt2->eN++;
|
||||
e++;
|
||||
}
|
||||
}
|
||||
|
||||
for (p=p0;p!=pend;p++) {
|
||||
if (p->node_) {
|
||||
p->eV = eV;
|
||||
eV += p->eN;
|
||||
p->eN = 0;
|
||||
}
|
||||
}
|
||||
for (e=e0;e!=eend;e++) {
|
||||
e->from->eV[e->from->eN++] = e;
|
||||
if (e->to != e->from)
|
||||
e->to->eV[e->to->eN++] = e;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ArnoldiReduce::allocPoints()
|
||||
{
|
||||
if (ts_pointN > ts_pointNmax) {
|
||||
free(par);
|
||||
free(c);
|
||||
free(r);
|
||||
free(iv); free(y); free(_u1); free(_u0);
|
||||
free(ts_pordV);
|
||||
free(ts_ordV);
|
||||
free(ts_pointV);
|
||||
ts_pointNmax = ts_pointN + ts_point_count_incr_;
|
||||
ts_pointV = (ts_point*)malloc(ts_pointNmax*sizeof(ts_point));
|
||||
ts_ordV = (int*)malloc(ts_pointNmax*sizeof(int));
|
||||
ts_pordV = (ts_point**)malloc(ts_pointNmax*sizeof(ts_point*));
|
||||
_u0 = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
_u1 = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
y = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
iv = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
r = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
c = (double*)malloc(ts_pointNmax*sizeof(double));
|
||||
par = (int*)malloc(ts_pointNmax*sizeof(int));
|
||||
}
|
||||
if (ts_edgeN > ts_edgeNmax) {
|
||||
free(ts_edgeV);
|
||||
free(ts_eV);
|
||||
free(ts_stackV);
|
||||
ts_edgeNmax = ts_edgeN + ts_edge_count_incr_;
|
||||
ts_edgeV = (ts_edge*)malloc(ts_edgeNmax*sizeof(ts_edge));
|
||||
ts_stackV = (ts_edge**)malloc(ts_edgeNmax*sizeof(ts_edge*));
|
||||
ts_eV = (ts_edge**)malloc(2*ts_edgeNmax*sizeof(ts_edge*));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ArnoldiReduce::allocTerms(int nterms)
|
||||
{
|
||||
if (nterms > termNmax) {
|
||||
free(U0);
|
||||
free(outV);
|
||||
free(termV);
|
||||
free(pinV);
|
||||
termNmax = nterms+256;
|
||||
pinV = (const Pin**)malloc(termNmax*sizeof(const Pin*));
|
||||
termV = (int*)malloc(termNmax*sizeof(int));
|
||||
outV = (int*)malloc(termNmax*sizeof(int));
|
||||
|
||||
U0 = (double*)malloc(dNmax*termNmax*sizeof(double));
|
||||
int h;
|
||||
for (h=0;h<dNmax;h++) U[h] = U0 + h*termNmax;
|
||||
}
|
||||
}
|
||||
|
||||
ts_point *
|
||||
ArnoldiReduce::findPt(ParasiticNode *node)
|
||||
{
|
||||
return &ts_pointV[pt_map_[reinterpret_cast<ConcreteParasiticNode*>(node)]];
|
||||
}
|
||||
|
||||
rcmodel *
|
||||
ArnoldiReduce::makeRcmodelDrv()
|
||||
{
|
||||
ParasiticNode *drv_node = parasitics_->findNode(parasitic_network_,
|
||||
drvr_pin_);
|
||||
ts_point *pdrv = findPt(drv_node);
|
||||
makeRcmodelDfs(pdrv);
|
||||
getRC();
|
||||
if (ctot_ < 1e-22) // 1e-10ps
|
||||
return NULL;
|
||||
setTerms(pdrv);
|
||||
makeRcmodelFromTs();
|
||||
rcmodel *mod = makeRcmodelFromW();
|
||||
return mod;
|
||||
}
|
||||
|
||||
#define ts_orient( pp, ee) \
|
||||
if (ee->from!=pp) { ee->to = ee->from; ee->from = pp; }
|
||||
|
||||
void
|
||||
ArnoldiReduce::makeRcmodelDfs(ts_point *pdrv)
|
||||
{
|
||||
bool loop = false;
|
||||
int k;
|
||||
ts_point *p,*q;
|
||||
ts_point *p0 = ts_pointV;
|
||||
ts_point *pend = p0 + ts_pointN;
|
||||
for (p=p0;p!=pend;p++)
|
||||
p->visited = 0;
|
||||
ts_edge *e;
|
||||
|
||||
ts_edge **stackV = ts_stackV;
|
||||
int stackN = 1;
|
||||
stackV[0] = e = pdrv->eV[0];
|
||||
ts_orient(pdrv,e);
|
||||
pdrv->visited = 1;
|
||||
pdrv->in_edge = NULL;
|
||||
pdrv->ts = 0;
|
||||
ts_ordV[0] = pdrv-p0;
|
||||
ts_pordV[0] = pdrv;
|
||||
ts_ordN = 1;
|
||||
while (stackN>0) {
|
||||
e = stackV[stackN-1];
|
||||
q = e->to;
|
||||
|
||||
if (q->visited) {
|
||||
// if it is a one-rseg self-loop,
|
||||
// ignore, and do not even set *loop
|
||||
if (e->to != e->from)
|
||||
loop = true;
|
||||
} else {
|
||||
// try to descend
|
||||
q->visited = 1;
|
||||
q->ts = ts_ordN++;
|
||||
ts_pordV[q->ts] = q;
|
||||
ts_ordV[q->ts] = q-p0;
|
||||
q->in_edge = e;
|
||||
if (q->eN>1) {
|
||||
for (k=0;k<q->eN;k++) if (q->eV[k] != e) break;
|
||||
e = q->eV[k];
|
||||
ts_orient(q,e);
|
||||
stackV[stackN++] = e;
|
||||
continue; // descent
|
||||
}
|
||||
}
|
||||
// try to ascend
|
||||
while (--stackN>=0) {
|
||||
e = stackV[stackN];
|
||||
p = e->from;
|
||||
// find e in p->eV
|
||||
for (k=0;k<p->eN;k++) if (p->eV[k]==e) break;
|
||||
// if (k==p->eN) notice(0,"ERROR, e not found!\n");
|
||||
++k;
|
||||
if (k>=p->eN) continue;
|
||||
e = p->eV[k];
|
||||
// check that next sibling is not the incoming edge
|
||||
if (stackN>0 && e==stackV[stackN-1]) {
|
||||
++k;
|
||||
if (k>=p->eN) continue;
|
||||
e = p->eV[k];
|
||||
}
|
||||
ts_orient(p,e);
|
||||
stackV[stackN++] = e;
|
||||
break;
|
||||
}
|
||||
|
||||
} // while (stackN)
|
||||
|
||||
if (loop)
|
||||
debugPrint1(debug_, "arnoldi", 1,
|
||||
"net %s loop\n",
|
||||
network_->pathName(drvr_pin_));
|
||||
}
|
||||
|
||||
// makeRcmodelGetRC
|
||||
void
|
||||
ArnoldiReduce::getRC()
|
||||
{
|
||||
ts_point *p, *p0 = ts_pointV;
|
||||
ts_point *pend = p0 + ts_pointN;
|
||||
ctot_ = 0.0;
|
||||
for (p=p0;p!=pend;p++) {
|
||||
p->c = 0.0;
|
||||
p->r = 0.0;
|
||||
if (p->node_) {
|
||||
ParasiticNode *node = p->node_;
|
||||
double cap = parasitics_->nodeGndCap(node, ap_)
|
||||
+ pinCapacitance(node);
|
||||
if (cap > 0.0) {
|
||||
p->c = cap;
|
||||
ctot_ += cap;
|
||||
}
|
||||
else
|
||||
p->c = 0.0;
|
||||
if (p->in_edge && p->in_edge->resistor_)
|
||||
p->r = parasitics_->value(p->in_edge->resistor_, ap_);
|
||||
if (!(p->r>=0.0 && p->r<100e+3)) { // 0 < r < 100kohm
|
||||
debugPrint2(debug_, "arnoldi", 1,
|
||||
"R value %g out of range, drvr pin %s\n",
|
||||
p->r,
|
||||
network_->pathName(drvr_pin_));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float
|
||||
ArnoldiReduce::pinCapacitance(ParasiticNode *node)
|
||||
{
|
||||
const Pin *pin = parasitics_->connectionPin(node);
|
||||
float pin_cap = 0.0;
|
||||
if (pin) {
|
||||
Port *port = network_->port(pin);
|
||||
LibertyPort *lib_port = network_->libertyPort(port);
|
||||
if (lib_port)
|
||||
pin_cap = sdc_->pinCapacitance(pin,tr_, op_cond_, corner_, cnst_min_max_);
|
||||
else if (network_->isTopLevelPort(pin))
|
||||
pin_cap = sdc_->portExtCap(port, tr_, cnst_min_max_);
|
||||
}
|
||||
return pin_cap;
|
||||
}
|
||||
|
||||
void
|
||||
ArnoldiReduce::setTerms(ts_point *pdrv)
|
||||
{
|
||||
// termV: from drv-ordered to fixed order
|
||||
// outV: from drv-ordered to ts_pordV
|
||||
ts_point *p;
|
||||
int k,k0;
|
||||
termV[0] = k0 = pdrv->tindex;
|
||||
for (k=1;k<termN;k++) {
|
||||
if (k==k0) termV[k] = 0;
|
||||
else termV[k] = k;
|
||||
}
|
||||
for (k=0;k<termN;k++) {
|
||||
p = pterm0 + termV[k];
|
||||
outV[k] = p->ts;
|
||||
}
|
||||
}
|
||||
|
||||
// The guts of the arnoldi reducer.
|
||||
void
|
||||
ArnoldiReduce::makeRcmodelFromTs()
|
||||
{
|
||||
ts_point *p, *p0 = ts_pointV;
|
||||
int n = ts_ordN;
|
||||
int nterms = termN;
|
||||
int i,j,k,h;
|
||||
if (debug_->check("arnoldi", 1)) {
|
||||
for (k=0;k<ts_ordN;k++) {
|
||||
p = ts_pordV[k];
|
||||
debugPrint3(debug_, "arnoldi", 1, "T%d,P%ld c=%s",
|
||||
p->ts,p-p0,
|
||||
units_->capacitanceUnit()->asString(p->c));
|
||||
if (p->is_term)
|
||||
debug_->print(" term%d",p->tindex);
|
||||
if (p->in_edge)
|
||||
debug_->print(" from T%d,P%ld r=%s",
|
||||
p->in_edge->from->ts,
|
||||
p->in_edge->from-p0,
|
||||
units_->resistanceUnit()->asString(p->r));
|
||||
debug_->print("\n");
|
||||
}
|
||||
for (i=0;i<nterms;i++)
|
||||
debugPrint2(debug_, "arnoldi", 1, "outV[%d] = T%d\n",i,outV[i]);
|
||||
}
|
||||
|
||||
int max_order = 5;
|
||||
|
||||
double *u0, *u1;
|
||||
u0 = _u0; u1 = _u1;
|
||||
double sum,e1;
|
||||
order = max_order;
|
||||
if (n < order)
|
||||
order = n;
|
||||
|
||||
par[0] = -1; r[0] = 0.0;
|
||||
c[0] = ts_pordV[0]->c;
|
||||
for (j=1;j<n;j++) {
|
||||
p = ts_pordV[j];
|
||||
c[j] = p->c;
|
||||
r[j] = p->r;
|
||||
par[j] = p->in_edge->from->ts;
|
||||
}
|
||||
|
||||
sum = 0.0;
|
||||
for (j=0;j<n;j++) sum += c[j];
|
||||
debugPrint1(debug_, "arnoldi", 1, "ctot = %s\n",
|
||||
units_->capacitanceUnit()->asString(sum));
|
||||
ctot_ = sum;
|
||||
sqc_ = sqrt(sum);
|
||||
double sqrt_ctot_inv = 1.0/sqc_;
|
||||
for (j=0;j<n;j++) u0[j] = sqrt_ctot_inv;
|
||||
for (h=0;h<order;h++) {
|
||||
for (i=0;i<nterms;i++) U[h][i] = u0[outV[i]];
|
||||
|
||||
// y = R C u0
|
||||
for (j=0;j<n;j++) {
|
||||
iv[j] = 0.0;
|
||||
}
|
||||
for (j=n-1;j>0;j--) {
|
||||
iv[j] += c[j]*u0[j];
|
||||
iv[par[j]] += iv[j];
|
||||
}
|
||||
iv[0] += c[0]*u0[0];
|
||||
y[0] = 0.0;
|
||||
for (j=1;j<n;j++) {
|
||||
y[j] = y[par[j]] + r[j]*iv[j];
|
||||
}
|
||||
|
||||
// d[h] = u0 C y
|
||||
sum = 0.0;
|
||||
for (j=1;j<n;j++) {
|
||||
sum += u0[j]*c[j]*y[j];
|
||||
}
|
||||
d[h] = sum;
|
||||
if (h==order-1) break;
|
||||
if (d[h]<1e-13) { // .1ps
|
||||
order = h+1;
|
||||
break;
|
||||
}
|
||||
|
||||
// y = y - d[h]*u0 - e[h-1]*u1
|
||||
if (h==0) {
|
||||
for (j=0;j<n;j++) y[j] -= sum*u0[j];
|
||||
} else {
|
||||
e1 = e[h-1];
|
||||
for (j=0;j<n;j++) y[j] -= sum*u0[j] + e1*u1[j];
|
||||
}
|
||||
|
||||
// e[h] = sqrt(y C y)
|
||||
// u1 = y/e[h]
|
||||
sum = 0.0;
|
||||
for (j=0;j<n;j++) {
|
||||
sum += c[j]*y[j]*y[j];
|
||||
}
|
||||
if (sum<1e-30) { // (1e-6ns)^2
|
||||
order = h+1;
|
||||
break;
|
||||
}
|
||||
e[h] = sqrt(sum);
|
||||
sum = 1.0/e[h];
|
||||
for (j=0;j<n;j++) u1[j] = sum*y[j];
|
||||
|
||||
// swap u0, u1
|
||||
if (h%2) {
|
||||
u0 = _u0; u1 = _u1;
|
||||
} else {
|
||||
u0 = _u1; u1 = _u0;
|
||||
}
|
||||
}
|
||||
|
||||
if (debug_->check("arnoldi", 1)) {
|
||||
debugPrint1(debug_, "arnoldi", 1,
|
||||
"tridiagonal reduced matrix, drvr pin %s\n",
|
||||
network_->pathName(drvr_pin_));
|
||||
debugPrint2(debug_, "arnoldi", 1, "order %d n %d\n",order,n);
|
||||
for (h=0;h<order;h++) {
|
||||
debug_->print("d[%d] %s",
|
||||
h,
|
||||
units_->timeUnit()->asString(d[h]));
|
||||
if (h<order-1)
|
||||
debug_->print(" e[%d] %s",
|
||||
h,
|
||||
units_->timeUnit()->asString(e[h]));
|
||||
debug_->print("\n");
|
||||
debug_->print("U[%d]",h);
|
||||
for (i=0;i<nterms;i++)
|
||||
debug_->print(" %6.2e",U[h][i]);
|
||||
debug_->print("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rcmodel *
|
||||
ArnoldiReduce::makeRcmodelFromW()
|
||||
{
|
||||
int j,h;
|
||||
int n = termN;
|
||||
rcmodel *mod = new rcmodel();
|
||||
mod->order = order;
|
||||
mod->n = n;
|
||||
if (order>0) {
|
||||
int totd = order + order - 1 + order*n;
|
||||
mod->d = (double *)malloc(totd*sizeof(double));
|
||||
if (order>1) mod->e = mod->d + order;
|
||||
else mod->e = NULL;
|
||||
mod->U = (double **)malloc(order*sizeof(double*));
|
||||
mod->U[0] = mod->d + order + order - 1;
|
||||
for (h=1;h<order;h++) mod->U[h]=mod->U[0] + h*n;
|
||||
for (h=0;h<order;h++) {
|
||||
mod->d[h] = d[h];
|
||||
if (h<order-1) mod->e[h] = e[h];
|
||||
for (j=0;j<n;j++)
|
||||
mod->U[h][j] = U[h][j];
|
||||
}
|
||||
}
|
||||
|
||||
mod->pinV = (const Pin **)malloc(n*sizeof(const Pin*));
|
||||
for (j=0;j<n;j++) {
|
||||
int k = termV[j];
|
||||
mod->pinV[j] = pinV[k];
|
||||
}
|
||||
|
||||
mod->ctot = ctot_;
|
||||
mod->sqc = sqc_;
|
||||
return mod;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
// OpenSTA, Static Timing Analyzer
|
||||
// Copyright (c) 2018, Parallax Software, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
// (c) 2018 Nefelus, Inc.
|
||||
//
|
||||
// Author: W. Scott
|
||||
|
||||
#ifndef STA_ARNOLDI_REDUCE_H
|
||||
#define STA_ARNOLDI_REDUCE_H
|
||||
|
||||
#include "Map.hh"
|
||||
#include "NetworkClass.hh"
|
||||
#include "ParasiticsClass.hh"
|
||||
|
||||
namespace sta {
|
||||
|
||||
class ConcreteParasiticNetwork;
|
||||
class ConcreteParasiticNode;
|
||||
|
||||
class rcmodel;
|
||||
struct ts_edge;
|
||||
struct ts_point;
|
||||
|
||||
typedef Map<ConcreteParasiticNode*, int> ArnolidPtMap;
|
||||
|
||||
class ArnoldiReduce : public StaState
|
||||
{
|
||||
public:
|
||||
ArnoldiReduce(StaState *sta);
|
||||
~ArnoldiReduce();
|
||||
Parasitic *reduceToArnoldi(Parasitic *parasitic,
|
||||
const Pin *drvr_pin,
|
||||
float coupling_cap_factor,
|
||||
const TransRiseFall *tr,
|
||||
const OperatingConditions *op_cond,
|
||||
const Corner *corner,
|
||||
const MinMax *cnst_min_max,
|
||||
const ParasiticAnalysisPt *ap);
|
||||
|
||||
protected:
|
||||
void loadWork();
|
||||
rcmodel *makeRcmodelDrv();
|
||||
void allocPoints();
|
||||
void allocTerms(int nterms);
|
||||
ts_point *findPt(ParasiticNode *node);
|
||||
void makeRcmodelDfs(ts_point *pdrv);
|
||||
void getRC();
|
||||
float pinCapacitance(ParasiticNode *node);
|
||||
void setTerms(ts_point *pdrv);
|
||||
void makeRcmodelFromTs();
|
||||
rcmodel *makeRcmodelFromW();
|
||||
|
||||
ConcreteParasiticNetwork *parasitic_network_;
|
||||
const Pin *drvr_pin_;
|
||||
float coupling_cap_factor_;
|
||||
const TransRiseFall *tr_;
|
||||
const OperatingConditions *op_cond_;
|
||||
const Corner *corner_;
|
||||
const MinMax *cnst_min_max_;
|
||||
const ParasiticAnalysisPt *ap_;
|
||||
// ParasiticNode -> ts_point index.
|
||||
ArnolidPtMap pt_map_;
|
||||
|
||||
// rcWork
|
||||
ts_point *ts_pointV;
|
||||
int ts_pointN;
|
||||
int ts_pointNmax;
|
||||
static const int ts_point_count_incr_;
|
||||
ts_edge *ts_edgeV;
|
||||
int ts_edgeN;
|
||||
int ts_edgeNmax;
|
||||
static const int ts_edge_count_incr_;
|
||||
ts_edge **ts_eV;
|
||||
ts_edge **ts_stackV;
|
||||
int *ts_ordV;
|
||||
ts_point **ts_pordV;
|
||||
int ts_ordN;
|
||||
|
||||
int termNmax;
|
||||
int termN;
|
||||
ts_point *pterm0;
|
||||
const Pin **pinV; // fixed order, offset from pterm0
|
||||
int *termV; // from drv-ordered to fixed order
|
||||
int *outV; // from drv-ordered to ts_pordV
|
||||
|
||||
int dNmax;
|
||||
double *d;
|
||||
double *e;
|
||||
double *U0;
|
||||
double **U;
|
||||
|
||||
double ctot_;
|
||||
double sqc_;
|
||||
double *_u0, *_u1;
|
||||
double *y, *iv;
|
||||
double *c, *r;
|
||||
int *par;
|
||||
int order;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
||||
|
|
@ -21,6 +21,7 @@
|
|||
#include "LumpedCapDelayCalc.hh"
|
||||
#include "SimpleRCDelayCalc.hh"
|
||||
#include "DmpDelayCalc.hh"
|
||||
#include "ArnoldiDelayCalc.hh"
|
||||
#include "DelayCalc.hh"
|
||||
|
||||
namespace sta {
|
||||
|
|
@ -37,6 +38,7 @@ registerDelayCalcs()
|
|||
registerDelayCalc("simple_rc", makeSimpleRCDelayCalc);
|
||||
registerDelayCalc("dmp_ceff_elmore", makeDmpCeffElmoreDelayCalc);
|
||||
registerDelayCalc("dmp_ceff_two_pole", makeDmpCeffTwoPoleDelayCalc);
|
||||
registerDelayCalc("arnoldi", makeArnoldiDelayCalc);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -18,6 +18,9 @@ lib_LTLIBRARIES = libdcalc.la
|
|||
|
||||
include_HEADERS = \
|
||||
ArcDelayCalc.hh \
|
||||
Arnoldi.hh \
|
||||
ArnoldiDelayCalc.hh \
|
||||
ArnoldiReduce.hh \
|
||||
DelayCalc.hh \
|
||||
DcalcAnalysisPt.hh \
|
||||
DmpCeff.hh \
|
||||
|
|
@ -32,6 +35,8 @@ include_HEADERS = \
|
|||
|
||||
libdcalc_la_SOURCES = \
|
||||
ArcDelayCalc.cc \
|
||||
ArnoldiDelayCalc.cc \
|
||||
ArnoldiReduce.cc \
|
||||
DcalcAnalysisPt.cc \
|
||||
DelayCalc.cc \
|
||||
DmpCeff.cc \
|
||||
|
|
|
|||
Loading…
Reference in New Issue