diff --git a/dcalc/Arnoldi.hh b/dcalc/Arnoldi.hh
new file mode 100644
index 00000000..00b1b696
--- /dev/null
+++ b/dcalc/Arnoldi.hh
@@ -0,0 +1,83 @@
+// OpenSTA, Static Timing Analyzer
+// Copyright (c) 2018, Parallax Software, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+// (c) 2018 Nefelus, Inc.
+//
+// Author: W. Scott
+
+#ifndef ARNOLDI_H
+#define ARNOLDI_H
+
+#include "ConcreteParasiticsPvt.hh"
+
+namespace sta {
+
+struct delay_work;
+class rcmodel;
+
+class GateTableModel;
+class Pin;
+
+//
+// single-driver arnoldi model
+//
+class arnoldi1
+{
+public:
+ arnoldi1() { order=0; n=0; d=NULL; e=NULL; U=NULL; ctot=0.0; sqc=0.0; }
+ ~arnoldi1();
+ double elmore(int term_index);
+
+ //
+ // calculate poles/residues for given rdrive
+ //
+ void calculate_poles_res(delay_work *D,double rdrive);
+
+public:
+ int order;
+ int n; // number of terms, including driver
+ double *d; // [order]
+ double *e; // [order-1]
+ double **U; // [order][n]
+ double ctot;
+ double sqc;
+};
+
+// This is the rcmodel, without Rd.
+// n is the number of terms
+// The vectors U[j] are of size n
+class rcmodel : public ConcreteParasitic,
+ public arnoldi1
+{
+public:
+ rcmodel();
+ virtual ~rcmodel();
+ virtual float capacitance() const;
+
+ const Pin **pinV; // [n]
+};
+
+struct timing_table
+{
+ GateTableModel *table;
+ const LibertyCell *cell;
+ const Pvt *pvt;
+ float in_slew;
+ float relcap;
+};
+
+} // namespace
+#endif
diff --git a/dcalc/Arnoldi.txt b/dcalc/Arnoldi.txt
new file mode 100644
index 00000000..63ebad51
--- /dev/null
+++ b/dcalc/Arnoldi.txt
@@ -0,0 +1,57 @@
+The method is used for simulation with a time-and-voltage-dependent
+current source. But it is simpler to describe with a linear driver.
+Suppose we are given:
+ voltage nodes 1,..n initialized to V[j]=1.0
+ a resistor network described by a conductance matrix G[j,k]
+ a drive resistance from node 1 to ground, Rdrv
+ capacitance of the nodes, c[j], also written as a
+ diagonal matrix C[j,k]
+The node voltages will fall to zero with time.
+Matrix equation:
+ GV+CdV/dt = -(V[0]/Rdrv)e0
+where e0 is the unit vector (1,0,0,..0).
+Let G' be the matrix formed by taking G and adding 1/Rdrv in the
+[0,0] position. Then we are solving G'V + CdV/dt = 0.
+Let R be the inverse of G'. (In implementation, R may not actually
+be formed as a matrix, instead some method of producing RV given V).
+The exact solution would diagonalize sqrt(C) R sqrt(C).
+
+The Arnoldi method takes a matrix M and a vector of interest V, and
+considers the subspace of vectors near V in terms of the action of M,
+that is, the space spanned by V, MV, MMV, etc, for a small number of
+powers. We do this here, but instead of finding the part of MV orthogonal
+to V, we find the part of RCV that is C-orthogonal of V. We use C
+as the metric, so the basis vectors U0, U1, U2 that we generate satisfy
+ Ui.CUj = (i==j?1:0)
+
+U0 = (1,1,..1)/sqrt(sum C)
+ representing the initial value of V, V[j] = sqrt(n)U0[j] at t=0.
+ sum(C) = C[0]+..C[n-1], so U0.C U0 = 1.
+Let:
+ W = R C U0
+ d0 = U0.C W
+ W' = W - d0 U0
+ e0 = sqrt(W'.C W')
+ U1 = W'/e0
+Then: U0.C U0 = U1.C U1 = 1, U0.C U1 = 0, and
+ R C U0 = d0 U0 + e0 U1
+Next step:
+ W = R C U1
+ d1 = U1.C W
+ W' = W - d1 U1 - e0 U0
+ e1 = sqrt(W'.C W')
+ U2 = W'/e1
+and we have U2.C U2 = 1, U2.C U1 = U2.C U0 = 0, and
+ RC U1 = e0 U0 + d1 U1 + e1 U2
+In this way, RC, which in nonsymmetric in the original basis,
+becomes a symmetric tridiagonal matrix in the U0,U1,.. basis.
+We stop at, say, U3. The resulting 4x4 tridiagonal matrix is
+positive definite, because it is the projection of sqrt(C)R sqrt(C)
+to a subspace. So the eigenvalues of this small tridiagonal matrix
+are guaranteed to be positive. This is the advantage over AWE.
+
+In the actual implementation, I remember there was some way of
+isolating the node 0 where drive resistance is attached, so that the
+tridiagonal matrix (d,e) can be recalculated without knowing Rdrv,
+and then just the first d0,e0 updated when the Rdrv is known, or
+when Rdrv changes in a simulation.
diff --git a/dcalc/ArnoldiDelayCalc.cc b/dcalc/ArnoldiDelayCalc.cc
new file mode 100644
index 00000000..8c65e2d1
--- /dev/null
+++ b/dcalc/ArnoldiDelayCalc.cc
@@ -0,0 +1,1490 @@
+// OpenSTA, Static Timing Analyzer
+// Copyright (c) 2018, Parallax Software, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+// (c) 2018 Nefelus, Inc.
+//
+// Author: W. Scott
+
+#include
+#include // abs
+#include "Machine.hh"
+#include "Report.hh"
+#include "Debug.hh"
+#include "Units.hh"
+#include "Liberty.hh"
+#include "TimingModel.hh"
+#include "TimingArc.hh"
+#include "TableModel.hh"
+#include "Network.hh"
+#include "Graph.hh"
+#include "Parasitics.hh"
+#include "Sdc.hh"
+#include "DcalcAnalysisPt.hh"
+#include "DelayCalc.hh"
+#include "ArcDelayCalc.hh"
+#include "RCDelayCalc.hh"
+#include "GraphDelayCalc.hh"
+#include "Arnoldi.hh"
+#include "ArnoldiReduce.hh"
+#include "ArnoldiDelayCalc.hh"
+
+namespace sta {
+
+// wireload8 is n^2
+// do not delete arnoldi parasitics
+// handle rspf parasitics?
+
+// mv static functions to ArnoldiDelayCalc
+// need slew only lookup for
+// ra_delay
+// ra_get_r
+// ra_get_s
+
+using std::abs;
+
+struct delay_work;
+struct delay_c;
+
+////////////////////////////////////////////////////////////////
+
+static delay_work *delay_work_create();
+static void
+delay_work_destroy(delay_work *D);
+static double *
+delay_work_get_residues(delay_work *D,
+ int term_index);
+
+static bool
+tridiagEV(int n,double *d,double *e,double *p,double **v);
+
+//////////////////////////////////////////////////////////////
+
+struct delay_c
+{
+ double slew_derate;
+ double vlo;
+ double vhi;
+ double vlg;
+ double smin;
+ double x1;
+ double y1;
+ double vmid; // falling convention, should be >= 0.5
+};
+
+// workspace for pole-residue -> delay calculations
+// delay_work
+// max order is 32
+struct delay_work
+{
+ double slew_derate;
+ double slew_factor; // (0,1.0] table_slew = slew_factor * full_slew
+ delay_c cV[2];
+ delay_c *c;
+
+ double lo_thresh;
+ double hi_thresh;
+
+ int nmax;
+ double poles[32]; // 1/tau
+ double **resi; // resi[jrec][h] h=0,..order
+ double *v[32];
+ double *w[32];
+ double aa[32];
+};
+
+////////////////////////////////////////////////////////////////
+
+class ArnoldiDelayCalc : public RCDelayCalc
+{
+public:
+ ArnoldiDelayCalc(StaState *sta);
+ virtual ~ArnoldiDelayCalc();
+ virtual ArcDelayCalc *copy();
+ virtual void findParasitic(const Pin *drvr_pin,
+ const TransRiseFall *tr,
+ const DcalcAnalysisPt *dcalc_ap,
+ // Return values.
+ Parasitic *¶sitic,
+ bool &delete_at_finish);
+ virtual void gateDelay(const LibertyCell *drvr_cell,
+ TimingArc *arc,
+ const Slew &in_slew,
+ // Pass in load_cap or drvr_parasitic.
+ float load_cap,
+ Parasitic *drvr_parasitic,
+ float related_out_cap,
+ const Pvt *pvt,
+ const DcalcAnalysisPt *dcalc_ap,
+ // Return values.
+ ArcDelay &gate_delay,
+ Slew &drvr_slew);
+ virtual void loadDelay(const Pin *load_pin,
+ // Return values.
+ ArcDelay &wire_delay,
+ Slew &load_slew);
+ virtual void inputPortDelay(const Pin *port_pin,
+ float in_slew,
+ const TransRiseFall *tr,
+ Parasitic *parasitic,
+ const DcalcAnalysisPt *dcalc_ap);
+ virtual void reportGateDelay(const LibertyCell *drvr_cell,
+ TimingArc *arc,
+ const Slew &in_slew,
+ float load_cap,
+ Parasitic *,
+ float related_out_cap,
+ const Pvt *pvt,
+ const DcalcAnalysisPt *dcalc_ap,
+ int digits,
+ string *result);
+ void delay_work_set_thresholds(delay_work *D,
+ double lo,
+ double hi,
+ bool rising,
+ double derate);
+
+private:
+ void gateDelaySlew(const LibertyCell *drvr_cell,
+ GateTableModel *table_model,
+ const Slew &in_slew,
+ float related_out_cap,
+ const Pvt *pvt,
+ // Return values.
+ ArcDelay &gate_delay,
+ Slew &drvr_slew);
+ void ar1_ceff_delay(delay_work *D,timing_table *tab, arnoldi1 *mod,
+ double *delays, double *slews);
+ double ra_rdelay_1(timing_table *tab,
+ double ctot);
+ double ra_get_r(delay_work *D,
+ timing_table *tab,
+ double rdelay,
+ double ctot);
+ double ra_get_s(delay_work *D,
+ timing_table *tab,
+ double r,
+ double c);
+ void ra_solve_for_s(delay_work *D,
+ double p,
+ double tlohi,
+ double &s);
+ // from poles and residues, solve for t20,t50,t80
+ void pr_solve1(double s,
+ int order,
+ double *p,
+ double *rr,
+ double v1,
+ double *t1);
+ void pr_solve3(double s,
+ int order,
+ double *p,
+ double *rr,
+ double vhi,
+ double *thi,
+ double vmid,
+ double *tmid,
+ double vlo,
+ double *tlo);
+
+ //
+ // routines for linear drive model and ceff
+ //
+ double pr_ceff(double s,
+ double rdrive,
+ int order,
+ double *p,
+ double *rr,
+ double ceff_time);
+ double ra_solve_for_t(double p,
+ double s,
+ double v);
+ void ra_solve_for_pt(double ps,
+ double v,
+ double *pt,
+ double *d);
+ void ra_calc_c(double lo,
+ double hi,
+ double *c_smin,
+ double *c_x1,
+ double *c_y1);
+
+ rcmodel *rcmodel_;
+ int _pinNmax;
+ double *_delayV;
+ double *_slewV;
+ int pin_n_;
+ bool input_port_;
+ ArnoldiReduce *reduce_;
+ delay_work *delay_work_;
+};
+
+ArcDelayCalc *
+makeArnoldiDelayCalc(StaState *sta)
+{
+ return new ArnoldiDelayCalc(sta);
+}
+
+ArnoldiDelayCalc::ArnoldiDelayCalc(StaState *sta) :
+ RCDelayCalc(sta),
+ reduce_(new ArnoldiReduce(sta)),
+ delay_work_(delay_work_create())
+{
+ _pinNmax = 1024;
+ _delayV = (double*)malloc(_pinNmax * sizeof(double));
+ _slewV = (double*)malloc(_pinNmax * sizeof(double));
+}
+
+ArcDelayCalc *
+ArnoldiDelayCalc::copy()
+{
+ return new ArnoldiDelayCalc(this);
+}
+
+ArnoldiDelayCalc::~ArnoldiDelayCalc()
+{
+ delay_work_destroy(delay_work_);
+ free(_delayV);
+ free(_slewV);
+ delete reduce_;
+}
+
+void
+ArnoldiDelayCalc::findParasitic(const Pin *drvr_pin,
+ const TransRiseFall *drvr_tr,
+ const DcalcAnalysisPt *dcalc_ap,
+ // Return values.
+ Parasitic *¶sitic,
+ bool &delete_at_finish)
+{
+ parasitic = NULL;
+ delete_at_finish = false;
+ // set_load has precidence over parasitics.
+ if (!sdc_->drvrPinHasWireCap(drvr_pin)) {
+ const OperatingConditions *op_cond = dcalc_ap->operatingConditions();
+ const Corner *corner = dcalc_ap->corner();
+ const MinMax *cnst_min_max = dcalc_ap->constraintMinMax();
+ const ParasiticAnalysisPt *parasitic_ap = dcalc_ap->parasiticAnalysisPt();
+ Parasitic *parasitic_network =
+ parasitics_->findParasiticNetwork(drvr_pin, parasitic_ap);
+ bool delete_parasitic_network = false;
+ if (parasitic_network == NULL) {
+ Wireload *wireload = sdc_->wireloadDefaulted(cnst_min_max);
+ if (wireload) {
+ float pin_cap, wire_cap, fanout;
+ bool has_wire_cap;
+ graph_delay_calc_->netCaps(drvr_pin, drvr_tr, dcalc_ap,
+ pin_cap, wire_cap, fanout, has_wire_cap);
+ parasitic_network = parasitics_->makeWireloadNetwork(drvr_pin,
+ wireload,
+ fanout,
+ op_cond,
+ parasitic_ap);
+ delete_parasitic_network = true;
+ }
+ }
+ if (parasitic_network) {
+ parasitic = reduce_->reduceToArnoldi(parasitic_network,
+ drvr_pin,
+ parasitic_ap->couplingCapFactor(),
+ drvr_tr, op_cond, corner,
+ cnst_min_max, parasitic_ap);
+ if (delete_parasitic_network) {
+ Net *net = network_->net(drvr_pin);
+ parasitics_->deleteParasiticNetwork(net, parasitic_ap);
+ }
+ delete_at_finish = true;
+ }
+ }
+}
+
+void
+ArnoldiDelayCalc::inputPortDelay(const Pin *drvr_pin,
+ float in_slew,
+ const TransRiseFall *tr,
+ Parasitic *parasitic,
+ const DcalcAnalysisPt *dcalc_ap)
+{
+ RCDelayCalc::inputPortDelay(drvr_pin, in_slew, tr, parasitic, dcalc_ap);
+ rcmodel_ = NULL;
+ _delayV[0] = 0.0;
+ _slewV[0] = in_slew;
+
+ int j;
+ if (parasitic) {
+ rcmodel_ = reinterpret_cast(parasitic);
+ pin_n_ = rcmodel_->n;
+ if (pin_n_ >= _pinNmax) {
+ _pinNmax *= 2;
+ if (pin_n_ >= _pinNmax) _pinNmax += pin_n_;
+ _pinNmax *= 2;
+ _delayV = (double*)realloc(_delayV,_pinNmax * sizeof(double));
+ _slewV = (double*)realloc(_slewV,_pinNmax * sizeof(double));
+ }
+ pin_n_ = 1;
+
+ pin_n_ = rcmodel_->n;
+ double slew_derate = drvr_library_->slewDerateFromLibrary();
+ double lo_thresh = drvr_library_->slewLowerThreshold(drvr_tr_);
+ double hi_thresh = drvr_library_->slewUpperThreshold(drvr_tr_);
+ bool rising = (drvr_tr_ == TransRiseFall::rise());
+ delay_work_set_thresholds(delay_work_, lo_thresh, hi_thresh, rising,
+ slew_derate);
+ delay_c *c = delay_work_->c;
+ double c_log = c->vlg;
+
+ for (j=1;jelmore(j);
+ _delayV[j] = 0.6931472*elmore;
+ _slewV[j] = in_slew + c_log*elmore/slew_derate;
+ }
+ }
+}
+
+void
+ArnoldiDelayCalc::gateDelay(const LibertyCell *drvr_cell,
+ TimingArc *arc,
+ const Slew &in_slew,
+ float load_cap,
+ Parasitic *drvr_parasitic,
+ float related_out_cap,
+ const Pvt *pvt,
+ const DcalcAnalysisPt *dcalc_ap,
+ // Return values.
+ ArcDelay &gate_delay,
+ Slew &drvr_slew)
+{
+ input_port_ = false;
+ drvr_tr_ = arc->toTrans()->asRiseFall();
+ drvr_library_ = drvr_cell->libertyLibrary();
+ drvr_parasitic_ = drvr_parasitic;
+ ConcreteParasitic *drvr_cparasitic =
+ reinterpret_cast(drvr_parasitic);
+ rcmodel_ = dynamic_cast(drvr_cparasitic);
+ GateTimingModel *model = gateModel(arc, dcalc_ap);
+ GateTableModel *table_model = dynamic_cast(model);
+ if (table_model && rcmodel_)
+ gateDelaySlew(drvr_cell, table_model, in_slew,
+ related_out_cap, pvt,
+ gate_delay, drvr_slew);
+ else
+ LumpedCapDelayCalc::gateDelay(drvr_cell, arc, in_slew, load_cap,
+ drvr_parasitic, related_out_cap, pvt,
+ dcalc_ap, gate_delay, drvr_slew);
+ drvr_slew_ = drvr_slew;
+ multi_drvr_slew_factor_ = 1.0F;
+}
+
+void
+ArnoldiDelayCalc::gateDelaySlew(const LibertyCell *drvr_cell,
+ GateTableModel *table_model,
+ const Slew &in_slew,
+ float related_out_cap,
+ const Pvt *pvt,
+ // Return values.
+ ArcDelay &gate_delay,
+ Slew &drvr_slew)
+{
+ pin_n_ = rcmodel_->n;
+ if (pin_n_ >= _pinNmax) {
+ _pinNmax *= 2;
+ if (pin_n_ >= _pinNmax) _pinNmax += pin_n_;
+ _delayV = (double*)realloc(_delayV,_pinNmax * sizeof(double));
+ _slewV = (double*)realloc(_slewV,_pinNmax * sizeof(double));
+ }
+
+ pin_n_ = rcmodel_->n;
+ if (table_model) {
+ double slew_derate = drvr_library_->slewDerateFromLibrary();
+ double lo_thresh = drvr_library_->slewLowerThreshold(drvr_tr_);
+ double hi_thresh = drvr_library_->slewUpperThreshold(drvr_tr_);
+ bool rising = (drvr_tr_ == TransRiseFall::rise());
+ delay_work_set_thresholds(delay_work_, lo_thresh, hi_thresh, rising,
+ slew_derate);
+ if (rcmodel_->order > 0) {
+ timing_table tab;
+ tab.table = table_model;
+ tab.cell = drvr_cell;
+ tab.pvt = pvt;
+ tab.in_slew = in_slew;
+ tab.relcap = related_out_cap;
+ ar1_ceff_delay(delay_work_, &tab, rcmodel_,
+ _delayV, _slewV);
+ }
+ gate_delay = _delayV[0];
+ drvr_slew = _slewV[0];
+ }
+}
+
+void
+ArnoldiDelayCalc::loadDelay(const Pin *load_pin,
+ // Return values.
+ ArcDelay &wire_delay,
+ Slew &load_slew)
+{
+ wire_delay = 0.0;
+ load_slew = drvr_slew_ * multi_drvr_slew_factor_;
+ if (rcmodel_) {
+ // HACK
+ for (int i = 0; i < rcmodel_->n; i++) {
+ if (rcmodel_->pinV[i] == load_pin) {
+ wire_delay = _delayV[i] - _delayV[0];
+ load_slew = _slewV[i] * multi_drvr_slew_factor_;
+ break;
+ }
+ }
+ }
+ thresholdAdjust(load_pin, wire_delay, load_slew);
+}
+
+void
+ArnoldiDelayCalc::reportGateDelay(const LibertyCell *,
+ TimingArc *,
+ const Slew &,
+ float,
+ Parasitic *,
+ float,
+ const Pvt *,
+ const DcalcAnalysisPt *,
+ int,
+ string *)
+{
+}
+
+////////////////////////////////////////////////////////////////
+
+//
+// arnoldi1.cpp
+//
+
+arnoldi1::~arnoldi1()
+{
+ free(d);
+ free(U);
+}
+
+double
+arnoldi1::elmore(int k)
+{
+ if (order==0) return 0.0;
+ if (order==1) return d[0];
+ double sqctot = 1.0/U[0][0];
+ double tau = d[0] + e[0]*U[1][k]*sqctot;
+ return tau;
+}
+
+delay_work *
+delay_work_create()
+{
+ int j;
+ delay_work *D = (delay_work*)malloc(sizeof(delay_work));
+ D->nmax = 256;
+ D->resi = (double**)malloc(D->nmax*sizeof(double*));
+ D->resi[0] = (double*)malloc(D->nmax*32*sizeof(double));
+ for (j=1;jnmax;j++) D->resi[j] = D->resi[0] + j*32;
+ D->v[0] = (double*)malloc(32*32*sizeof(double));
+ for (j=1;j<32;j++) D->v[j] = D->v[0] + j*32;
+ D->w[0] = (double*)malloc(32*D->nmax*sizeof(double));
+ for (j=1;j<32;j++) D->w[j] = D->w[0] + j*D->nmax;
+ D->lo_thresh = 0.0;
+ D->hi_thresh = 0.0;
+ D->slew_derate = 0.0;
+ D->slew_factor = 0.0;
+ for (j=0;j<2;j++) {
+ D->cV[j].vlo = 0.0;
+ D->cV[j].vhi = 0.0;
+ D->cV[j].vlg = 0.0;
+ D->cV[j].smin = 0.0;
+ D->cV[j].x1 = 0.0;
+ D->cV[j].y1 = 0.0;
+ D->cV[j].vmid = 0.0;
+ }
+ D->c = D->cV;
+ return D;
+}
+
+static void
+delay_work_destroy(delay_work *D)
+{
+ free(D->resi[0]);
+ free(D->resi);
+ free(D->v[0]);
+ free(D->w[0]);
+ free(D);
+}
+
+static void
+delay_work_alloc(delay_work *D,int n)
+{
+ if (n<=D->nmax) return;
+ free(D->w[0]);
+ free(D->resi[0]);
+ free(D->resi);
+ D->nmax *= 2;
+ if (n > D->nmax) D->nmax = n;
+ int j;
+ D->resi = (double**)malloc(D->nmax*sizeof(double*));
+ D->resi[0] = (double*)malloc(D->nmax*32*sizeof(double));
+ for (j=1;jnmax;j++) D->resi[j] = D->resi[0] + j*32;
+ D->w[0] = (double*)malloc(32*D->nmax*sizeof(double));
+ for (j=1;j<32;j++) D->w[j] = D->w[0] + j*D->nmax;
+}
+
+void
+ArnoldiDelayCalc::delay_work_set_thresholds(delay_work *D,
+ double lo,
+ double hi,
+ bool rising,
+ double derate)
+{
+ double mid = 0.5; // 0.0:1.0
+ int i = rising?1:0;
+ D->c = D->cV+ i;
+ // WRONG
+ bool changed = (lo != D->c->vlo || hi != D->c->vhi);
+ if (changed) {
+ if (!(lo>0.01 && hi<0.99)) {
+ lo = 0.1;
+ hi = 0.9;
+ derate = 0.8;
+ }
+ D->c->slew_derate = derate;
+ D->c->vlo = lo;
+ D->c->vhi = hi;
+ D->c->vmid = mid;
+ D->c->vlg = log(hi/lo);
+ ra_calc_c(lo,hi,
+ &(D->c->smin), &(D->c->x1),&(D->c->y1));
+ }
+ D->lo_thresh = D->c->vlo;
+ D->hi_thresh = D->c->vhi;
+ D->slew_derate = derate;
+ double measured_swing = D->c->vhi - D->c->vlo;
+ double reported_swing = measured_swing/D->slew_derate;
+ D->slew_factor = reported_swing;
+}
+
+static double *
+delay_work_get_residues(delay_work *D,int term_index)
+{
+ return D->resi[term_index];
+}
+
+//////////////////////////////////////////////////////////////
+//
+// calculate_poles_res
+//
+
+void arnoldi1::calculate_poles_res(delay_work *D,double rdrive)
+{
+ if (n > D->nmax) delay_work_alloc(D,n);
+ double *p = D->poles;
+ double **v = D->v;
+ double **w = D->w;
+ double *aa = D->aa;
+ double **resi = D->resi;
+ int h,j,k;
+ double sum, dsave;
+
+ dsave = d[0];
+ d[0] += rdrive*ctot;
+ if (!tridiagEV(order,d,e,p,v))
+ internalError("arnoldi delay calc failed.\n");
+ d[0] = dsave;
+
+ for (h=0;h32)
+ return false;
+
+ for (i=0;i=1;h--) {
+ iter = 0;
+ while (abs(e[h])>1e-18) { // 1e-6ps
+ m=0;
+ if (m != h) {
+ if (iter++ == 20)
+ return false;
+ g = (d[h-1]-d[h])/(2.0*e[h]);
+ r = sqrt(1.0+g*g); // watch overflow
+ g = d[m]-d[h]+e[h]/(g + (g<0?-r:r));
+ s = c = 1.0;
+ p = 0.0;
+ for (i=m+1;i<=h;i++) {
+ f = s*e[i];
+ b = c*e[i];
+ e[i-1] = r = sqrt(f*f+g*g); // watch
+ if (r == 0.0) {
+ d[i-1] -= p;
+ e[m] = 0.0;
+ break;
+ }
+ s = f/r;
+ c = g/r;
+ g = d[i-1]-p;
+ r = (d[i]-g)*s+2.0*c*b;
+ d[i-1] = g + (p=s*r);
+ g = c*r-b;
+ for (k=0;k p) { k=j; p=d[k]; }
+ if (k != i) {
+ d[k] = d[i];
+ d[i] = p;
+ for (j=0;j= 0.0)
+ || (abs(2.0*f) > abs(dxold*df))) {
+ dxold = dx;
+ dx = 0.5*(xh-xl);
+ if (flast*f >0.0) {
+ // 2 successive bisections in same direction,
+ // accelerate
+ if (f<0.0) dx = 0.9348*(xh-xl);
+ else dx = 0.0625*(xh-xl);
+ }
+ flast = f;
+ rts = xl+dx;
+ if (xl == rts) {
+ return rts;
+ }
+ } else {
+ dxold = dx;
+ dx = f/df;
+ flast = 0.0;
+ temp = rts;
+ rts -= dx;
+ if (temp == rts) {
+ return rts;
+ }
+ }
+ if (abs(dx) < xacc) {
+ return rts;
+ }
+ get_dv(rts,s,order,p,rr,&f,&df); f -= val;
+ if (f<0.0)
+ xl = rts;
+ else
+ xh = rts;
+ }
+ if (abs(f)<1e-6) // 1uV
+ return rts;
+ return 0.5*(xl+xh);
+}
+
+void
+ArnoldiDelayCalc::pr_solve1(double s,
+ int order,
+ double *p,
+ double *rr,
+ double v1,
+ double *t1)
+{
+ double tmin = 0.0,tmax = 0.0,vmin = 0.0,vmax = 0.0;
+ int h, h0 = 0;
+ while (order>1
+ && rr[order-1]<1e-8 // 1e-8V
+ && rr[order-1]>-1e-8)
+ order--;
+ if (rr[0]<0.5) {
+ for (h=1;h0.3 && rr[h]>rr[0]) { h0 = h; break; }
+ }
+ double p0 = p[h0];
+ double ps,vs,ta,va;
+ vs = 0.0;
+ for (h=0;h1.0 && p[order-1]>500.0 && va>v1-0.002))
+ debugPrint0(debug_, "arnoldi", 1, "err, pr_solve1, vav1) {
+ tmin = ta; vmin = va;
+ ta *= 2.0;
+ pr_get_v(ta,s,order,p,rr,&va);
+ }
+ if (va>v1)
+ debugPrint0(debug_, "arnoldi", 1, "err, pr_solve1, va>v1\n");
+ tmax = ta; vmax = va;
+ }
+ } else {
+ // s is irrelevant
+ ta = s; va = vs;
+ while (va >= v1) {
+ tmin = ta;
+ vmin = va;
+ ta += 1.0/p0;
+ pr_get_v(ta,s,order,p,rr,&va);
+ }
+ tmax = ta; vmax = va;
+ }
+ *t1 = solve_t_bracketed(s,order,p,rr,v1,tmin,tmax,vmin,vmax);
+}
+
+void
+ArnoldiDelayCalc::pr_solve3(double s,
+ int order,
+ double *p,
+ double *rr,
+ double vhi,
+ double *thi,
+ double vmid,
+ double *tmid,
+ double vlo,
+ double *tlo)
+{
+ // falling, thi1
+ && rr[order-1]<1e-8 // 1e-8V
+ && rr[order-1]>-1e-8)
+ order--;
+ if (rr[0]<0.5) {
+ for (h=1;h0.3 && rr[h]>rr[0]) { h0 = h; break; }
+ }
+ double p0 = p[h0];
+ if (p0>10e+9) // 1/10ns
+ p0=10e+9;
+ double ps,vs,ta,va;
+ vs = 0.0;
+ for (h=0;hvhi) {
+ tmin2 = tmin5 = ta;
+ vmin2 = vmin5 = va;
+ tmin8 = ta; vmin8 = va;
+ if (va vhi) {
+ tmin2 = tmin5;
+ vmin2 = vmin5;
+ tmax2 = tmax5;
+ vmax2 = tmax5;
+ } else {
+ tmax2 = tmin5;
+ vmax2 = vmin5;
+ ta = vlo*s;
+ pr_get_v(ta,s,order,p,rr,&va);
+ tmin2 = ta; vmin2 = va;
+ }
+ }
+ } else if (vsvlo) {
+ tmin8 = ta; vmin8 = va;
+ ta += 1.0/p0;
+ pr_get_v(ta,s,order,p,rr,&va);
+ }
+ tmax8 = ta; vmax8 = va;
+ ta = vmid*s;
+ pr_get_v(ta,s,order,p,rr,&va);
+ tmin5 = ta; vmin5 = va;
+ if (va>vhi) {
+ tmin2 = ta; vmin2 = va;
+ } else {
+ tmax2 = ta; vmax2 = va;
+ ta = vlo*s;
+ pr_get_v(ta,s,order,p,rr,&va);
+ tmin2 = ta; vmin2 = va;
+ }
+ } else if (vsvmid) {
+ tmin5 = tmin8 = ta; vmin5 = tmin8 = va;
+ ta += 0.7/p0;
+ pr_get_v(ta,s,order,p,rr,&va);
+ }
+ tmax5 = ta; vmax5 = va;
+ if (va < vlo) {
+ tmax8 = ta; vmax8 = va;
+ } else {
+ tmin8 = ta; vmin8 = va;
+ ta += 1.0/p0;
+ pr_get_v(ta,s,order,p,rr,&va);
+ while (va>vlo) {
+ tmin8 = ta; vmin8 = va;
+ ta += 1.0/p0;
+ pr_get_v(ta,s,order,p,rr,&va);
+ }
+ tmax8 = ta; vmax8 = va;
+ }
+ } else {
+ // s is irrelevant
+ ta = s; va = vs;
+ tmin2 = tmin5 = tmin8 = ta;
+ vmin2 = vmin5 = vmin8 = va;
+ while (va > vhi) {
+ tmin2 = tmin5 = tmin8 = ta;
+ vmin2 = vmin5 = vmin8 = va;
+ ta += 1.0/p0;
+ pr_get_v(ta,s,order,p,rr,&va);
+ }
+ tmax2 = ta; vmax2 = va;
+ if (va < vmid) {
+ tmax5 = ta; vmax5 = va;
+ } else while (va > vmid) {
+ tmin5 = tmin8 = ta;
+ vmin5 = vmin8 = va;
+ ta += 1.0/p0;
+ pr_get_v(ta,s,order,p,rr,&va);
+ }
+ tmax5 = ta; vmax5 = va;
+ if (va < vlo) {
+ tmax8 = ta; vmax8 = va;
+ } else while (va > vlo) {
+ tmin8 = ta;
+ vmin8 = va;
+ ta += 1.0/p0;
+ pr_get_v(ta,s,order,p,rr,&va);
+ }
+ tmax8 = ta; vmax8 = va;
+ }
+
+ *thi = solve_t_bracketed(s,order,p,rr,vhi,tmin2,tmax2,vmin2,vmax2);
+ *tmid= solve_t_bracketed(s,order,p,rr,vmid,tmin5,tmax5,vmin5,vmax5);
+ *tlo= solve_t_bracketed(s,order,p,rr,vlo,tmin8,tmax8,vmin8,vmax8);
+}
+
+static double
+calc_integ(double p,double s,double t)
+{
+ // integral of f(t)-vin(t)
+ double ps = p*s;
+ double pt = p*t;
+ double y,ept,eps;
+ if (t<=s) {
+ ept = (pt>40.0)?0.0:exp(-pt);
+ y = ept-1.0+pt;
+ } else {
+ pt = pt-ps;
+ ept = (pt>40.0)?0.0:exp(-pt);
+ eps = (ps>40.0)?0.0:exp(-ps);
+ y = ps - (1.0-eps)*ept;
+ }
+ y /= ps*p;
+ return y;
+}
+
+
+double
+ArnoldiDelayCalc::pr_ceff(double s,
+ double rdrive,
+ int order,
+ double *p,
+ double *rr,
+ double ceff_time)
+{
+ double integi = 0.0;
+ double ceff, v0;
+ int j;
+ for (j=0;j1e-8)
+ debugPrint2(debug, "arnoldi", 1, "y f %g %g\n",y,f);
+ return x;
+}
+
+double
+ArnoldiDelayCalc::ra_solve_for_t(double p,
+ double s,
+ double v)
+{
+ double t;
+ double ps = p*s;
+ if (ps>30.0) {
+ t = (1.0+ps*(1.0-v)) / p;
+ return t;
+ }
+ double eps = exp(ps);
+ if ((1-ps*v)*eps >= 1.0) {
+ t = log((eps-1.0)/(ps*v)) / p;
+ } else {
+ t = ra_hinv((1-v)*ps, debug_)/p;
+ }
+ return t;
+}
+
+void
+ArnoldiDelayCalc::ra_solve_for_pt(double ps,
+ double v,
+ double *pt,
+ double *d)
+{
+ if (ps>30.0) {
+ *pt = 1.0+ps*(1.0-v);
+ *d = 1.0-v;
+ return;
+ }
+ double eps = exp(ps);
+ if ((1-ps*v)*eps >= 1.0) {
+ *pt = log((eps-1.0)/(ps*v));
+ *d = eps/(eps-1.0) - 1.0/ps;
+ } else {
+ *pt = ra_hinv((1-v)*ps, debug_);
+ *d = (1.0-v)/(*pt - (1-v)*ps);
+ }
+}
+
+void
+ArnoldiDelayCalc::ra_calc_c(double vlo,
+ double vhi,
+ double *c_smin,
+ double *c_x1,
+ double *c_y1)
+{
+ double a = log(1.0/vhi);
+ *c_smin = a + ra_hinv((1.0-vhi)/vhi - a, debug_);
+ double b = log(1.0/vlo);
+ double c_s1 = b + ra_hinv((1.0-vlo)/vlo - b, debug_);
+ double a1 = (exp(c_s1)-1.0)/c_s1;
+ double den = log(a1/vlo) - ra_hinv((1.0-vhi)*c_s1, debug_);
+ *c_x1 = (vhi-vlo)/den;
+ *c_y1 = c_s1*(*c_x1);
+}
+
+////////////////////////////////////////////////////////////////
+
+//
+// ceff.cpp
+//
+
+void
+ArnoldiDelayCalc::ra_solve_for_s(delay_work *D,
+ double p,
+ double tlohi,
+ double &s)
+{
+ delay_c *c = D->c;
+ double vhi = c->vhi;
+ double vlo = c->vlo;
+ // s is 0-100
+ // solve f(x,y)=0 with f = x*(ptlo(y/x)-pthi(y/x))-(vhi-vlo)
+ // (x=0,y=1)
+ // (x=x1,y=y1) c->x1,y1
+ // (x=x2,y=y2) x2=(vhi-vlo)/log(vhi/vlo) y2=(c->smin)*x2
+ double x1 = c->x1;
+ double y1 = c->y1;
+ double x2 = (vhi-vlo)/c->vlg;
+ double y2 = (c->smin)*x2;
+ double ptlo,dlo;
+ double pthi,dhi;
+ double f,df,x,y;
+ x = c->vlg/(p*tlohi);
+
+ if (x <= x1) {
+ y = y1 - 0.5*(x-x1);
+ if (y>1.0) y=1.0;
+ } else {
+ y = y1 - (x-x1)*(0.5 + 8*(x-x1));
+ if (y.5e-12) // .5ps
+ debugPrint3(debug_, "arnoldi", 1,
+ "ra_solve_for_s p %g tlohi %s err %s\n",
+ p,
+ units_->timeUnit()->asString(tlohi),
+ units_->timeUnit()->asString(f));
+}
+
+/////////////////////////////////////////////////////////////////////
+// method 0:
+// r = a match to slew to (ctot, limited by cmin,cmax)
+// if r>rdelay, lower r
+// Now at any ceff (limited)
+// If slew(r,0,ceff) is too big
+// s = s_start(r,ceff), not smaller than Smin
+// accept the pessimistic output slew
+// Else
+// solve for s
+
+// Rough translation of ra_get_r(sy_table) used by ar1_ceff_delay.
+double
+ArnoldiDelayCalc::ra_get_r(delay_work *D,
+ timing_table *tab,
+ double rdelay,
+ double ctot)
+{
+ // find the maximum r that allows a solution for s of
+ // (s,r,ctot)-> output_slew
+ // If this maximum is greater than rdelay, use rdelay.
+ delay_c *c = D->c;
+ double slew_derate = c->slew_derate;
+ double c_log = c->vlg;
+ float c1,d1,s1;
+ double tlohi,r;
+ c1 = ctot;
+ tab->table->gateDelay(tab->cell, tab->pvt, tab->in_slew,
+ c1, tab->relcap, d1, s1);
+ tlohi = slew_derate*s1;
+ r = tlohi/(c_log*c1);
+ if (rdelay>0.0 && r > rdelay)
+ r = rdelay;
+ // else printf("from rdelay %g to r %g\n",rdelay,r);
+ return r;
+}
+
+double
+ArnoldiDelayCalc::ra_get_s(delay_work *D,
+ timing_table *tab,
+ double r,
+ double c)
+{
+ delay_c *con = D->c;
+ double slew_derate = con->slew_derate;
+ double c_log = con->vlg;
+ double c_smin = con->smin;
+ double tlohi,smin,s;
+ float d1,s1;
+ tab->table->gateDelay(tab->cell, tab->pvt, tab->in_slew,
+ c, tab->relcap, d1, s1);
+ tlohi = slew_derate*s1;
+ smin = r*c*c_smin; // c_smin = ra_hinv((1-vhi)/vhi-log(vhi)) + log(vhi);
+ if (c_log*r*c >= tlohi) {
+ // printf("hit smin\n");
+ s = smin;
+ } else {
+ s = smin+0.3*tlohi;
+ ra_solve_for_s(D,1.0/(r*c),tlohi,s);
+ }
+ return s;
+}
+
+/////////////////////////////////////////////////////////////////////
+// method 1:
+// determine the drive resistance from change in delay versus ctot
+// find the maximum r that allows a solution for s of
+// (s,r,ctot)-> output_slew
+// If this maximum is greater than rdelay, use rdelay.
+// calculate s,r,mod -> t50_srmod,
+// then t50_srmod+t50_sy-t50_sr
+
+double
+ArnoldiDelayCalc::ra_rdelay_1(timing_table *tab,
+ double ctot)
+{
+ // determine the drive resistance from change in delay versus ctot
+ float c1 = ctot;
+ float c2 = 0.5*c1;
+ if (c1==c2)
+ return 0.0;
+ float d1, s1;
+ tab->table->gateDelay(tab->cell, tab->pvt, tab->in_slew,
+ c1, tab->relcap, d1, s1);
+ float d2, s2;
+ tab->table->gateDelay(tab->cell, tab->pvt, tab->in_slew,
+ c2, tab->relcap, d2, s2);
+ double dt50 = d1-d2;
+ if (dt50 <= 0.0)
+ return 0.0;
+ double rdelay = dt50/(c1-c2);
+ return rdelay;
+}
+
+void
+ArnoldiDelayCalc::ar1_ceff_delay(delay_work *D,
+ timing_table *tab,
+ arnoldi1 *mod,
+ double *delays,
+ double *slews)
+{
+ delay_c *con = D->c;
+ double slew_derate = con->slew_derate;
+ double vhi = con->vhi;
+ double vlo = con->vlo;
+ double ctot = mod->ctot;
+ double ceff,tlohi,t50_sy,r,s,t50_sr,rdelay;
+ float df,sf;
+
+ debugPrint1(debug_, "arnoldi", 1, "\nctot=%s\n",
+ units_->capacitanceUnit()->asString(ctot));
+
+ rdelay = ra_rdelay_1(tab,ctot);
+ if (rdelay == 0.0) {
+ rdelay = 1e+3; // 1kohm
+ }
+ r = rdelay;
+ r = ra_get_r(D,tab,rdelay,ctot);
+ if (! (r>0.0
+ && r<100e+3)) // 100khom
+ rdelay = 1e+3; // 1kohm
+
+ bool bad = (r0.0
+ && s<100e-9)) // 100ns
+ s = 0.5e-9; // .5ns
+
+ if (debug_->check("arnoldi", 1)) {
+ double p = 1.0/(r*ctot);
+ double thix,tlox;
+ if (bad) printf("bad\n");
+ debugPrint2(debug_, "arnoldi", 1, "at r=%s s=%s\n",
+ units_->resistanceUnit()->asString(r),
+ units_->timeUnit()->asString(s));
+ thix = ra_solve_for_t(p,s,vhi);
+ tlox = ra_solve_for_t(p,s,vlo);
+ tab->table->gateDelay(tab->cell, tab->pvt,tab->in_slew,
+ ctot, tab->relcap, df, sf);
+ debugPrint3(debug_, "arnoldi", 1,
+ "table slew (in_slew %s ctot %s) = %s\n",
+ units_->timeUnit()->asString(tab->in_slew),
+ units_->capacitanceUnit()->asString(ctot),
+ units_->timeUnit()->asString(sf));
+ tlohi = slew_derate*sf;
+ debugPrint2(debug_, "arnoldi", 1, "tlohi %s %s\n",
+ units_->timeUnit()->asString(tlohi),
+ units_->timeUnit()->asString(tlox-thix));
+ }
+ ceff = ctot;
+ tab->table->gateDelay(tab->cell, tab->pvt, tab->in_slew,
+ ceff, tab->relcap, df, sf);
+ t50_sy = df;
+ t50_sr = ra_solve_for_t(1.0/(r*ceff),s,0.5);
+
+ // calculate s,r,mod -> t50_srmod,
+ // then t50_srmod+t50_sy-t50_sr
+
+ mod->calculate_poles_res(D,r);
+ double *p = D->poles;
+ double *rr = delay_work_get_residues(D,0);
+ double thi,tlo,t50_srmod;
+ pr_solve1(s,mod->order,p,rr,0.5,&t50_srmod);
+
+ int ceff_it,j;
+ double ceff_time=0.0;
+
+ if (!bad) {
+ for (ceff_it=0;ceff_it<3;ceff_it++) {
+
+ // calculate ceff
+ ceff_time = s;
+ ceff = pr_ceff(s,r,mod->order,p,rr,ceff_time);
+
+ if((ceff-1e-20) < 0.0) { // 1e-8pf
+ debugPrint0(debug_, "arnoldi", 1,
+ "Invalid effective capacitance, using total capacitance\n");
+ ceff = ctot;
+ }
+
+ // new mvs at ceff
+ s = ra_get_s(D,tab,r,ceff);
+ debugPrint1(debug_, "arnoldi", 1, "new mvs s = %s\n",
+ units_->timeUnit()->asString(s));
+ }
+ }
+ debugPrint4(debug_, "arnoldi", 1, "r %s s %s ceff_time %s ceff %s\n",
+ units_->resistanceUnit()->asString(r),
+ units_->timeUnit()->asString(s),
+ units_->timeUnit()->asString(ceff_time),
+ units_->capacitanceUnit()->asString(ceff));
+
+ tab->table->gateDelay(tab->cell, tab->pvt, tab->in_slew, ceff,
+ tab->relcap, df, sf);
+ t50_sy = df;
+ t50_sr = ra_solve_for_t(1.0/(r*ceff),s,0.5);
+ for (j=0;jn;j++) {
+ rr = delay_work_get_residues(D,j);
+ pr_solve3(s,mod->order,p,rr,vhi,&thi,0.5,&t50_srmod,vlo,&tlo);
+ delays[j] = t50_srmod + t50_sy - t50_sr;
+ slews[j] = (tlo-thi)/slew_derate;
+ }
+ debugPrint3(debug_, "arnoldi", 1, "slews[0] %s thi %s tlo %s\n",
+ units_->timeUnit()->asString(slews[0]),
+ units_->timeUnit()->asString(tlo),
+ units_->timeUnit()->asString(thi));
+}
+
+} // namespace
diff --git a/dcalc/ArnoldiDelayCalc.hh b/dcalc/ArnoldiDelayCalc.hh
new file mode 100644
index 00000000..4ff74e6e
--- /dev/null
+++ b/dcalc/ArnoldiDelayCalc.hh
@@ -0,0 +1,26 @@
+// OpenSTA, Static Timing Analyzer
+// Copyright (c) 2018, Parallax Software, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+#ifndef ARNOLDIDELAYCALC_H
+#define ARNOLDIDELAYCALC_H
+
+namespace sta {
+
+ArcDelayCalc *
+makeArnoldiDelayCalc(StaState *sta);
+
+} // namespace
+#endif
diff --git a/dcalc/ArnoldiReduce.cc b/dcalc/ArnoldiReduce.cc
new file mode 100644
index 00000000..7dc910ef
--- /dev/null
+++ b/dcalc/ArnoldiReduce.cc
@@ -0,0 +1,646 @@
+// OpenSTA, Static Timing Analyzer
+// Copyright (c) 2018, Parallax Software, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+// (c) 2018 Nefelus, Inc.
+//
+// Author: W. Scott
+
+#include
+#include
+#include
+#include
+#include "Machine.hh"
+#include "Debug.hh"
+#include "Units.hh"
+#include "MinMax.hh"
+#include "Sdc.hh"
+#include "Network.hh"
+#include "ArnoldiReduce.hh"
+#include "Arnoldi.hh"
+#include "ConcreteParasiticsPvt.hh"
+
+namespace sta {
+
+rcmodel::rcmodel() :
+ pinV(NULL)
+{
+}
+
+rcmodel::~rcmodel()
+{
+ free(pinV);
+}
+
+float
+rcmodel::capacitance() const
+{
+ return ctot;
+}
+
+struct ts_point
+{
+ ParasiticNode *node_;
+ int eN;
+ bool is_term;
+ int tindex; // index into termV of corresponding term
+ ts_edge **eV;
+ bool visited;
+ ts_edge *in_edge;
+ int ts;
+ double c;
+ double r;
+};
+
+struct ts_edge
+{
+ ConcreteParasiticResistor *resistor_;
+ ts_point *from;
+ ts_point *to;
+};
+
+////////////////////////////////////////////////////////////////
+
+
+const int ArnoldiReduce::ts_point_count_incr_ = 1024;
+const int ArnoldiReduce::ts_edge_count_incr_ = 1024;
+
+ArnoldiReduce::ArnoldiReduce(StaState *sta) :
+ StaState(sta),
+ ts_pointNmax(1024),
+ ts_edgeNmax(1024),
+ termNmax(256),
+ dNmax(8)
+{
+ ts_pointV = (ts_point*)malloc(ts_pointNmax*sizeof(ts_point));
+ ts_ordV = (int*)malloc(ts_pointNmax*sizeof(int));
+ ts_pordV = (ts_point**)malloc(ts_pointNmax*sizeof(ts_point*));
+ _u0 = (double*)malloc(ts_pointNmax*sizeof(double));
+ _u1 = (double*)malloc(ts_pointNmax*sizeof(double));
+ y = (double*)malloc(ts_pointNmax*sizeof(double));
+ iv = (double*)malloc(ts_pointNmax*sizeof(double));
+ r = (double*)malloc(ts_pointNmax*sizeof(double));
+ c = (double*)malloc(ts_pointNmax*sizeof(double));
+ par = (int*)malloc(ts_pointNmax*sizeof(int));
+
+ ts_edgeV = (ts_edge*)malloc(ts_edgeNmax*sizeof(ts_edge));
+ ts_stackV = (ts_edge**)malloc(ts_edgeNmax*sizeof(ts_edge*));
+ ts_eV = (ts_edge**)malloc(2*ts_edgeNmax*sizeof(ts_edge*));
+
+ pinV = (const Pin**)malloc(termNmax*sizeof(const Pin*));
+ termV = (int*)malloc(termNmax*sizeof(int));
+ outV = (int*)malloc(termNmax*sizeof(int));
+
+ d = (double*)malloc(dNmax*sizeof(double));
+ e = (double*)malloc(dNmax*sizeof(double));
+ U = (double**)malloc(dNmax*sizeof(double*));
+ U0 = (double*)malloc(dNmax*termNmax*sizeof(double));
+ int h;
+ for (h=0;h(parasitic);
+ drvr_pin_ = drvr_pin;
+ coupling_cap_factor_ = coupling_cap_factor;
+ tr_ = tr;
+ op_cond_ = op_cond;
+ corner_ = corner;
+ cnst_min_max_ = cnst_min_max;
+ ap_ = ap;
+ loadWork();
+ return makeRcmodelDrv();
+}
+
+void
+ArnoldiReduce::loadWork()
+{
+ pt_map_.clear();
+
+ int resistor_count = 0;
+ ConcreteParasiticDeviceSet devices;
+ parasitic_network_->devices(devices);
+ ConcreteParasiticDeviceSet::Iterator device_iter(devices);
+ while (device_iter.hasNext()) {
+ ParasiticDevice *device = device_iter.next();
+ if (parasitics_->isResistor(device))
+ resistor_count++;
+ }
+
+ termN = parasitic_network_->pinNodes()->size();
+ int subnode_count = parasitic_network_->subNodes()->size();
+ ts_pointN = subnode_count + 1 + termN;
+ ts_edgeN = resistor_count;
+ allocPoints();
+ allocTerms(termN);
+ ts_point *p0 = ts_pointV;
+ pterm0 = p0 + subnode_count + 1;
+ ts_point *pend = p0 + ts_pointN;
+ ts_point *p;
+ ts_edge *e0 = ts_edgeV;
+ ts_edge *eend = e0 + ts_edgeN;
+
+ ts_edge *e;
+ int tindex;
+ for (p = p0; p!=pend; p++) {
+ p->node_ = NULL;
+ p->eN = 0;
+ p->is_term = false;
+ }
+ pend = pterm0;
+ e = e0;
+ int index = 0;
+ ConcreteParasiticSubNodeMap::Iterator
+ sub_node_iter(parasitic_network_->subNodes());
+ while (sub_node_iter.hasNext()) {
+ ConcreteParasiticSubNode *node = sub_node_iter.next();
+ pt_map_[node] = index;
+ p = p0 + index;
+ p->node_ = node;
+ p->eN = 0;
+ p->is_term = false;
+ index++;
+ }
+
+ ConcreteParasiticPinNodeMap::Iterator
+ pin_node_iter(parasitic_network_->pinNodes());
+ while (pin_node_iter.hasNext()) {
+ ConcreteParasiticPinNode *node = pin_node_iter.next();
+ p = pend++;
+ pt_map_[node] = p - p0;
+ p->node_ = node;
+ p->eN = 0;
+ p->is_term = true;
+ tindex = p - pterm0;
+ p->tindex = tindex;
+ const Pin *pin = parasitics_->connectionPin(node);
+ pinV[tindex] = pin;
+ }
+
+ ts_edge **eV = ts_eV;
+ ConcreteParasiticDeviceSet::Iterator device_iter2(devices);
+ while (device_iter2.hasNext()) {
+ ParasiticDevice *device = device_iter2.next();
+ if (parasitics_->isResistor(device)) {
+ ConcreteParasiticResistor *resistor =
+ reinterpret_cast(device);
+ ts_point *pt1 = findPt(resistor->node1());
+ ts_point *pt2 = findPt(resistor->node2());
+ e->from = pt1;
+ e->to = pt2;
+ e->resistor_ = resistor;
+ pt1->eN++;
+ if (e->from != e->to)
+ pt2->eN++;
+ e++;
+ }
+ }
+
+ for (p=p0;p!=pend;p++) {
+ if (p->node_) {
+ p->eV = eV;
+ eV += p->eN;
+ p->eN = 0;
+ }
+ }
+ for (e=e0;e!=eend;e++) {
+ e->from->eV[e->from->eN++] = e;
+ if (e->to != e->from)
+ e->to->eV[e->to->eN++] = e;
+ }
+}
+
+void
+ArnoldiReduce::allocPoints()
+{
+ if (ts_pointN > ts_pointNmax) {
+ free(par);
+ free(c);
+ free(r);
+ free(iv); free(y); free(_u1); free(_u0);
+ free(ts_pordV);
+ free(ts_ordV);
+ free(ts_pointV);
+ ts_pointNmax = ts_pointN + ts_point_count_incr_;
+ ts_pointV = (ts_point*)malloc(ts_pointNmax*sizeof(ts_point));
+ ts_ordV = (int*)malloc(ts_pointNmax*sizeof(int));
+ ts_pordV = (ts_point**)malloc(ts_pointNmax*sizeof(ts_point*));
+ _u0 = (double*)malloc(ts_pointNmax*sizeof(double));
+ _u1 = (double*)malloc(ts_pointNmax*sizeof(double));
+ y = (double*)malloc(ts_pointNmax*sizeof(double));
+ iv = (double*)malloc(ts_pointNmax*sizeof(double));
+ r = (double*)malloc(ts_pointNmax*sizeof(double));
+ c = (double*)malloc(ts_pointNmax*sizeof(double));
+ par = (int*)malloc(ts_pointNmax*sizeof(int));
+ }
+ if (ts_edgeN > ts_edgeNmax) {
+ free(ts_edgeV);
+ free(ts_eV);
+ free(ts_stackV);
+ ts_edgeNmax = ts_edgeN + ts_edge_count_incr_;
+ ts_edgeV = (ts_edge*)malloc(ts_edgeNmax*sizeof(ts_edge));
+ ts_stackV = (ts_edge**)malloc(ts_edgeNmax*sizeof(ts_edge*));
+ ts_eV = (ts_edge**)malloc(2*ts_edgeNmax*sizeof(ts_edge*));
+ }
+}
+
+void
+ArnoldiReduce::allocTerms(int nterms)
+{
+ if (nterms > termNmax) {
+ free(U0);
+ free(outV);
+ free(termV);
+ free(pinV);
+ termNmax = nterms+256;
+ pinV = (const Pin**)malloc(termNmax*sizeof(const Pin*));
+ termV = (int*)malloc(termNmax*sizeof(int));
+ outV = (int*)malloc(termNmax*sizeof(int));
+
+ U0 = (double*)malloc(dNmax*termNmax*sizeof(double));
+ int h;
+ for (h=0;h(node)]];
+}
+
+rcmodel *
+ArnoldiReduce::makeRcmodelDrv()
+{
+ ParasiticNode *drv_node = parasitics_->findNode(parasitic_network_,
+ drvr_pin_);
+ ts_point *pdrv = findPt(drv_node);
+ makeRcmodelDfs(pdrv);
+ getRC();
+ if (ctot_ < 1e-22) // 1e-10ps
+ return NULL;
+ setTerms(pdrv);
+ makeRcmodelFromTs();
+ rcmodel *mod = makeRcmodelFromW();
+ return mod;
+}
+
+#define ts_orient( pp, ee) \
+ if (ee->from!=pp) { ee->to = ee->from; ee->from = pp; }
+
+void
+ArnoldiReduce::makeRcmodelDfs(ts_point *pdrv)
+{
+ bool loop = false;
+ int k;
+ ts_point *p,*q;
+ ts_point *p0 = ts_pointV;
+ ts_point *pend = p0 + ts_pointN;
+ for (p=p0;p!=pend;p++)
+ p->visited = 0;
+ ts_edge *e;
+
+ ts_edge **stackV = ts_stackV;
+ int stackN = 1;
+ stackV[0] = e = pdrv->eV[0];
+ ts_orient(pdrv,e);
+ pdrv->visited = 1;
+ pdrv->in_edge = NULL;
+ pdrv->ts = 0;
+ ts_ordV[0] = pdrv-p0;
+ ts_pordV[0] = pdrv;
+ ts_ordN = 1;
+ while (stackN>0) {
+ e = stackV[stackN-1];
+ q = e->to;
+
+ if (q->visited) {
+ // if it is a one-rseg self-loop,
+ // ignore, and do not even set *loop
+ if (e->to != e->from)
+ loop = true;
+ } else {
+ // try to descend
+ q->visited = 1;
+ q->ts = ts_ordN++;
+ ts_pordV[q->ts] = q;
+ ts_ordV[q->ts] = q-p0;
+ q->in_edge = e;
+ if (q->eN>1) {
+ for (k=0;keN;k++) if (q->eV[k] != e) break;
+ e = q->eV[k];
+ ts_orient(q,e);
+ stackV[stackN++] = e;
+ continue; // descent
+ }
+ }
+ // try to ascend
+ while (--stackN>=0) {
+ e = stackV[stackN];
+ p = e->from;
+ // find e in p->eV
+ for (k=0;keN;k++) if (p->eV[k]==e) break;
+ // if (k==p->eN) notice(0,"ERROR, e not found!\n");
+ ++k;
+ if (k>=p->eN) continue;
+ e = p->eV[k];
+ // check that next sibling is not the incoming edge
+ if (stackN>0 && e==stackV[stackN-1]) {
+ ++k;
+ if (k>=p->eN) continue;
+ e = p->eV[k];
+ }
+ ts_orient(p,e);
+ stackV[stackN++] = e;
+ break;
+ }
+
+ } // while (stackN)
+
+ if (loop)
+ debugPrint1(debug_, "arnoldi", 1,
+ "net %s loop\n",
+ network_->pathName(drvr_pin_));
+}
+
+// makeRcmodelGetRC
+void
+ArnoldiReduce::getRC()
+{
+ ts_point *p, *p0 = ts_pointV;
+ ts_point *pend = p0 + ts_pointN;
+ ctot_ = 0.0;
+ for (p=p0;p!=pend;p++) {
+ p->c = 0.0;
+ p->r = 0.0;
+ if (p->node_) {
+ ParasiticNode *node = p->node_;
+ double cap = parasitics_->nodeGndCap(node, ap_)
+ + pinCapacitance(node);
+ if (cap > 0.0) {
+ p->c = cap;
+ ctot_ += cap;
+ }
+ else
+ p->c = 0.0;
+ if (p->in_edge && p->in_edge->resistor_)
+ p->r = parasitics_->value(p->in_edge->resistor_, ap_);
+ if (!(p->r>=0.0 && p->r<100e+3)) { // 0 < r < 100kohm
+ debugPrint2(debug_, "arnoldi", 1,
+ "R value %g out of range, drvr pin %s\n",
+ p->r,
+ network_->pathName(drvr_pin_));
+ }
+ }
+ }
+}
+
+float
+ArnoldiReduce::pinCapacitance(ParasiticNode *node)
+{
+ const Pin *pin = parasitics_->connectionPin(node);
+ float pin_cap = 0.0;
+ if (pin) {
+ Port *port = network_->port(pin);
+ LibertyPort *lib_port = network_->libertyPort(port);
+ if (lib_port)
+ pin_cap = sdc_->pinCapacitance(pin,tr_, op_cond_, corner_, cnst_min_max_);
+ else if (network_->isTopLevelPort(pin))
+ pin_cap = sdc_->portExtCap(port, tr_, cnst_min_max_);
+ }
+ return pin_cap;
+}
+
+void
+ArnoldiReduce::setTerms(ts_point *pdrv)
+{
+ // termV: from drv-ordered to fixed order
+ // outV: from drv-ordered to ts_pordV
+ ts_point *p;
+ int k,k0;
+ termV[0] = k0 = pdrv->tindex;
+ for (k=1;kts;
+ }
+}
+
+// The guts of the arnoldi reducer.
+void
+ArnoldiReduce::makeRcmodelFromTs()
+{
+ ts_point *p, *p0 = ts_pointV;
+ int n = ts_ordN;
+ int nterms = termN;
+ int i,j,k,h;
+ if (debug_->check("arnoldi", 1)) {
+ for (k=0;kts,p-p0,
+ units_->capacitanceUnit()->asString(p->c));
+ if (p->is_term)
+ debug_->print(" term%d",p->tindex);
+ if (p->in_edge)
+ debug_->print(" from T%d,P%ld r=%s",
+ p->in_edge->from->ts,
+ p->in_edge->from-p0,
+ units_->resistanceUnit()->asString(p->r));
+ debug_->print("\n");
+ }
+ for (i=0;ic;
+ for (j=1;jc;
+ r[j] = p->r;
+ par[j] = p->in_edge->from->ts;
+ }
+
+ sum = 0.0;
+ for (j=0;jcapacitanceUnit()->asString(sum));
+ ctot_ = sum;
+ sqc_ = sqrt(sum);
+ double sqrt_ctot_inv = 1.0/sqc_;
+ for (j=0;j0;j--) {
+ iv[j] += c[j]*u0[j];
+ iv[par[j]] += iv[j];
+ }
+ iv[0] += c[0]*u0[0];
+ y[0] = 0.0;
+ for (j=1;jcheck("arnoldi", 1)) {
+ debugPrint1(debug_, "arnoldi", 1,
+ "tridiagonal reduced matrix, drvr pin %s\n",
+ network_->pathName(drvr_pin_));
+ debugPrint2(debug_, "arnoldi", 1, "order %d n %d\n",order,n);
+ for (h=0;hprint("d[%d] %s",
+ h,
+ units_->timeUnit()->asString(d[h]));
+ if (hprint(" e[%d] %s",
+ h,
+ units_->timeUnit()->asString(e[h]));
+ debug_->print("\n");
+ debug_->print("U[%d]",h);
+ for (i=0;iprint(" %6.2e",U[h][i]);
+ debug_->print("\n");
+ }
+ }
+}
+
+rcmodel *
+ArnoldiReduce::makeRcmodelFromW()
+{
+ int j,h;
+ int n = termN;
+ rcmodel *mod = new rcmodel();
+ mod->order = order;
+ mod->n = n;
+ if (order>0) {
+ int totd = order + order - 1 + order*n;
+ mod->d = (double *)malloc(totd*sizeof(double));
+ if (order>1) mod->e = mod->d + order;
+ else mod->e = NULL;
+ mod->U = (double **)malloc(order*sizeof(double*));
+ mod->U[0] = mod->d + order + order - 1;
+ for (h=1;hU[h]=mod->U[0] + h*n;
+ for (h=0;hd[h] = d[h];
+ if (he[h] = e[h];
+ for (j=0;jU[h][j] = U[h][j];
+ }
+ }
+
+ mod->pinV = (const Pin **)malloc(n*sizeof(const Pin*));
+ for (j=0;jpinV[j] = pinV[k];
+ }
+
+ mod->ctot = ctot_;
+ mod->sqc = sqc_;
+ return mod;
+}
+
+} // namespace
diff --git a/dcalc/ArnoldiReduce.hh b/dcalc/ArnoldiReduce.hh
new file mode 100644
index 00000000..200bd750
--- /dev/null
+++ b/dcalc/ArnoldiReduce.hh
@@ -0,0 +1,116 @@
+// OpenSTA, Static Timing Analyzer
+// Copyright (c) 2018, Parallax Software, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+// (c) 2018 Nefelus, Inc.
+//
+// Author: W. Scott
+
+#ifndef STA_ARNOLDI_REDUCE_H
+#define STA_ARNOLDI_REDUCE_H
+
+#include "Map.hh"
+#include "NetworkClass.hh"
+#include "ParasiticsClass.hh"
+
+namespace sta {
+
+class ConcreteParasiticNetwork;
+class ConcreteParasiticNode;
+
+class rcmodel;
+struct ts_edge;
+struct ts_point;
+
+typedef Map ArnolidPtMap;
+
+class ArnoldiReduce : public StaState
+{
+public:
+ ArnoldiReduce(StaState *sta);
+ ~ArnoldiReduce();
+ Parasitic *reduceToArnoldi(Parasitic *parasitic,
+ const Pin *drvr_pin,
+ float coupling_cap_factor,
+ const TransRiseFall *tr,
+ const OperatingConditions *op_cond,
+ const Corner *corner,
+ const MinMax *cnst_min_max,
+ const ParasiticAnalysisPt *ap);
+
+protected:
+ void loadWork();
+ rcmodel *makeRcmodelDrv();
+ void allocPoints();
+ void allocTerms(int nterms);
+ ts_point *findPt(ParasiticNode *node);
+ void makeRcmodelDfs(ts_point *pdrv);
+ void getRC();
+ float pinCapacitance(ParasiticNode *node);
+ void setTerms(ts_point *pdrv);
+ void makeRcmodelFromTs();
+ rcmodel *makeRcmodelFromW();
+
+ ConcreteParasiticNetwork *parasitic_network_;
+ const Pin *drvr_pin_;
+ float coupling_cap_factor_;
+ const TransRiseFall *tr_;
+ const OperatingConditions *op_cond_;
+ const Corner *corner_;
+ const MinMax *cnst_min_max_;
+ const ParasiticAnalysisPt *ap_;
+ // ParasiticNode -> ts_point index.
+ ArnolidPtMap pt_map_;
+
+ // rcWork
+ ts_point *ts_pointV;
+ int ts_pointN;
+ int ts_pointNmax;
+ static const int ts_point_count_incr_;
+ ts_edge *ts_edgeV;
+ int ts_edgeN;
+ int ts_edgeNmax;
+ static const int ts_edge_count_incr_;
+ ts_edge **ts_eV;
+ ts_edge **ts_stackV;
+ int *ts_ordV;
+ ts_point **ts_pordV;
+ int ts_ordN;
+
+ int termNmax;
+ int termN;
+ ts_point *pterm0;
+ const Pin **pinV; // fixed order, offset from pterm0
+ int *termV; // from drv-ordered to fixed order
+ int *outV; // from drv-ordered to ts_pordV
+
+ int dNmax;
+ double *d;
+ double *e;
+ double *U0;
+ double **U;
+
+ double ctot_;
+ double sqc_;
+ double *_u0, *_u1;
+ double *y, *iv;
+ double *c, *r;
+ int *par;
+ int order;
+};
+
+} // namespace
+#endif
+
diff --git a/dcalc/DelayCalc.cc b/dcalc/DelayCalc.cc
index 469e1772..3c6bf26c 100644
--- a/dcalc/DelayCalc.cc
+++ b/dcalc/DelayCalc.cc
@@ -21,6 +21,7 @@
#include "LumpedCapDelayCalc.hh"
#include "SimpleRCDelayCalc.hh"
#include "DmpDelayCalc.hh"
+#include "ArnoldiDelayCalc.hh"
#include "DelayCalc.hh"
namespace sta {
@@ -37,6 +38,7 @@ registerDelayCalcs()
registerDelayCalc("simple_rc", makeSimpleRCDelayCalc);
registerDelayCalc("dmp_ceff_elmore", makeDmpCeffElmoreDelayCalc);
registerDelayCalc("dmp_ceff_two_pole", makeDmpCeffTwoPoleDelayCalc);
+ registerDelayCalc("arnoldi", makeArnoldiDelayCalc);
}
void
diff --git a/dcalc/Makefile.am b/dcalc/Makefile.am
index 35f710d5..00fae2ba 100644
--- a/dcalc/Makefile.am
+++ b/dcalc/Makefile.am
@@ -18,6 +18,9 @@ lib_LTLIBRARIES = libdcalc.la
include_HEADERS = \
ArcDelayCalc.hh \
+ Arnoldi.hh \
+ ArnoldiDelayCalc.hh \
+ ArnoldiReduce.hh \
DelayCalc.hh \
DcalcAnalysisPt.hh \
DmpCeff.hh \
@@ -32,6 +35,8 @@ include_HEADERS = \
libdcalc_la_SOURCES = \
ArcDelayCalc.cc \
+ ArnoldiDelayCalc.cc \
+ ArnoldiReduce.cc \
DcalcAnalysisPt.cc \
DelayCalc.cc \
DmpCeff.cc \