arnoldi delay calculator

2018-10-24 14:22:21 -07:00 · 2018-10-24 14:22:21 -07:00 · b952c58f92
parent c6a90ec151
commit b952c58f92
8 changed files with 2425 additions and 0 deletions
--- a/dcalc/Arnoldi.hh
+++ b/dcalc/Arnoldi.hh
@ -0,0 +1,83 @@
+// OpenSTA, Static Timing Analyzer
+// Copyright (c) 2018, Parallax Software, Inc.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+// (c) 2018 Nefelus, Inc.
+//
+// Author: W. Scott
+
+#ifndef ARNOLDI_H
+#define ARNOLDI_H
+
+#include "ConcreteParasiticsPvt.hh"
+
+namespace sta {
+
+struct delay_work;
+class rcmodel;
+
+class GateTableModel;
+class Pin;
+
+//
+// single-driver arnoldi model
+//
+class arnoldi1
+{
+public:
+  arnoldi1() { order=0; n=0; d=NULL; e=NULL; U=NULL; ctot=0.0; sqc=0.0; }
+  ~arnoldi1();
+  double elmore(int term_index);
+
+  //
+  // calculate poles/residues for given rdrive
+  //
+  void calculate_poles_res(delay_work *D,double rdrive);
+
+public:
+  int order;
+  int n;     // number of terms, including driver
+  double *d; // [order]
+  double *e; // [order-1]
+  double **U; // [order][n]
+  double ctot;
+  double sqc;
+};
+
+// This is the rcmodel, without Rd.
+// n is the number of terms
+// The vectors U[j] are of size n
+class rcmodel : public ConcreteParasitic,
+		public arnoldi1
+{
+public:
+  rcmodel();
+  virtual ~rcmodel();
+  virtual float capacitance() const;
+
+  const Pin **pinV; // [n]
+};
+
+struct timing_table
+{
+  GateTableModel *table;
+  const LibertyCell *cell;
+  const Pvt *pvt;
+  float in_slew;
+  float relcap;
+};
+
+} // namespace
+#endif
--- a/dcalc/Arnoldi.txt
+++ b/dcalc/Arnoldi.txt
@ -0,0 +1,57 @@
+The method is used for simulation with a time-and-voltage-dependent
+current source.  But it is simpler to describe with a linear driver.
+Suppose we are given:
+  voltage nodes 1,..n initialized to V[j]=1.0
+  a resistor network described by a conductance matrix G[j,k]
+  a drive resistance from node 1 to ground, Rdrv
+  capacitance of the nodes, c[j], also written as a
+  diagonal matrix C[j,k]
+The node voltages will fall to zero with time.
+Matrix equation:
+  GV+CdV/dt = -(V[0]/Rdrv)e0
+where e0 is the unit vector (1,0,0,..0).
+Let G' be the matrix formed by taking G and adding 1/Rdrv in the
+[0,0] position.  Then we are solving  G'V + CdV/dt = 0.
+Let R be the inverse of G'.  (In implementation, R may not actually
+be formed as a matrix, instead some method of producing RV given V).
+The exact solution would diagonalize sqrt(C) R sqrt(C).
+
+The Arnoldi method takes a matrix M and a vector of interest V, and
+considers the subspace of vectors near V in terms of the action of M,
+that is, the space spanned by V, MV, MMV, etc, for a small number of
+powers.  We do this here, but instead of finding the part of MV orthogonal
+to V, we find the part of RCV that is C-orthogonal of V.  We use C
+as the metric, so the basis vectors U0, U1, U2 that we generate satisfy
+  Ui.CUj = (i==j?1:0)
+
+U0 = (1,1,..1)/sqrt(sum C)
+  representing the initial value of V, V[j] = sqrt(n)U0[j] at t=0.
+  sum(C) = C[0]+..C[n-1], so U0.C U0 = 1.
+Let:
+  W = R C U0
+  d0 = U0.C W
+  W' = W - d0 U0
+  e0 = sqrt(W'.C W')
+  U1 = W'/e0
+Then:  U0.C U0 = U1.C U1 = 1, U0.C U1 = 0, and
+  R C U0 = d0 U0 + e0 U1
+Next step:
+  W = R C U1
+  d1 = U1.C W
+  W' = W - d1 U1 - e0 U0
+  e1 = sqrt(W'.C W')
+  U2 = W'/e1
+and we have U2.C U2 = 1, U2.C U1 = U2.C U0 = 0, and
+  RC U1 = e0 U0 + d1 U1 + e1 U2
+In this way, RC, which in nonsymmetric in the original basis,
+becomes a symmetric tridiagonal matrix in the U0,U1,.. basis.
+We stop at, say, U3.  The resulting 4x4 tridiagonal matrix is
+positive definite, because it is the projection of sqrt(C)R sqrt(C)
+to a subspace.  So the eigenvalues of this small tridiagonal matrix
+are guaranteed to be positive.  This is the advantage over AWE.
+
+In the actual implementation, I remember there was some way of
+isolating the node 0 where drive resistance is attached, so that the
+tridiagonal matrix (d,e) can be recalculated without knowing Rdrv,
+and then just the first d0,e0 updated when the Rdrv is known, or 
+when Rdrv changes in a simulation.
--- a/dcalc/ArnoldiDelayCalc.cc
+++ b/dcalc/ArnoldiDelayCalc.cc
--- a/dcalc/ArnoldiDelayCalc.hh
+++ b/dcalc/ArnoldiDelayCalc.hh
@ -0,0 +1,26 @@
+// OpenSTA, Static Timing Analyzer
+// Copyright (c) 2018, Parallax Software, Inc.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+#ifndef ARNOLDIDELAYCALC_H
+#define ARNOLDIDELAYCALC_H
+
+namespace sta {
+
+ArcDelayCalc *
+makeArnoldiDelayCalc(StaState *sta);
+
+} // namespace
+#endif
--- a/dcalc/ArnoldiReduce.cc
+++ b/dcalc/ArnoldiReduce.cc
@ -0,0 +1,646 @@
+// OpenSTA, Static Timing Analyzer
+// Copyright (c) 2018, Parallax Software, Inc.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+// (c) 2018 Nefelus, Inc.
+//
+// Author: W. Scott
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <math.h>
+#include "Machine.hh"
+#include "Debug.hh"
+#include "Units.hh"
+#include "MinMax.hh"
+#include "Sdc.hh"
+#include "Network.hh"
+#include "ArnoldiReduce.hh"
+#include "Arnoldi.hh"
+#include "ConcreteParasiticsPvt.hh"
+
+namespace sta {
+
+rcmodel::rcmodel() :
+  pinV(NULL)
+{
+}
+
+rcmodel::~rcmodel()
+{
+  free(pinV);
+}
+
+float
+rcmodel::capacitance() const
+{
+  return ctot;
+}
+
+struct ts_point
+{
+  ParasiticNode *node_;
+  int eN;
+  bool is_term;
+  int tindex; // index into termV of corresponding term
+  ts_edge **eV;
+  bool visited;
+  ts_edge *in_edge;
+  int ts;
+  double c;
+  double r;
+};
+
+struct ts_edge
+{
+  ConcreteParasiticResistor *resistor_;
+  ts_point *from;
+  ts_point *to;
+};
+
+////////////////////////////////////////////////////////////////
+
+
+const int ArnoldiReduce::ts_point_count_incr_ = 1024;
+const int ArnoldiReduce::ts_edge_count_incr_ = 1024;
+
+ArnoldiReduce::ArnoldiReduce(StaState *sta) :
+  StaState(sta),
+  ts_pointNmax(1024),
+  ts_edgeNmax(1024),
+  termNmax(256),
+  dNmax(8)
+{
+  ts_pointV = (ts_point*)malloc(ts_pointNmax*sizeof(ts_point));
+  ts_ordV = (int*)malloc(ts_pointNmax*sizeof(int));
+  ts_pordV = (ts_point**)malloc(ts_pointNmax*sizeof(ts_point*));
+  _u0 = (double*)malloc(ts_pointNmax*sizeof(double));
+  _u1 = (double*)malloc(ts_pointNmax*sizeof(double));
+  y = (double*)malloc(ts_pointNmax*sizeof(double));
+  iv = (double*)malloc(ts_pointNmax*sizeof(double));
+  r = (double*)malloc(ts_pointNmax*sizeof(double));
+  c = (double*)malloc(ts_pointNmax*sizeof(double));
+  par = (int*)malloc(ts_pointNmax*sizeof(int));
+
+  ts_edgeV = (ts_edge*)malloc(ts_edgeNmax*sizeof(ts_edge));
+  ts_stackV  = (ts_edge**)malloc(ts_edgeNmax*sizeof(ts_edge*));
+  ts_eV      = (ts_edge**)malloc(2*ts_edgeNmax*sizeof(ts_edge*));
+
+  pinV = (const Pin**)malloc(termNmax*sizeof(const Pin*));
+  termV = (int*)malloc(termNmax*sizeof(int));
+  outV = (int*)malloc(termNmax*sizeof(int));
+
+  d = (double*)malloc(dNmax*sizeof(double));
+  e = (double*)malloc(dNmax*sizeof(double));
+  U = (double**)malloc(dNmax*sizeof(double*));
+  U0 = (double*)malloc(dNmax*termNmax*sizeof(double));
+  int h;
+  for (h=0;h<dNmax;h++) U[h] = U0 + h*termNmax;
+}
+
+ArnoldiReduce::~ArnoldiReduce()
+{
+  free(U0);
+  free(U);
+  free(e);
+  free(d);
+  free(outV);
+  free(termV);
+  free(pinV);
+  free(ts_eV);
+  free(ts_edgeV);
+  free(ts_stackV);
+  free(par);
+  free(c);
+  free(r);
+  free(iv);
+  free(y);
+  free(_u1);
+  free(_u0);
+  free(ts_pordV);
+  free(ts_ordV);
+  free(ts_pointV);
+}
+
+Parasitic *
+ArnoldiReduce::reduceToArnoldi(Parasitic *parasitic,
+			       const Pin *drvr_pin,
+			       float coupling_cap_factor,
+			       const TransRiseFall *tr,
+			       const OperatingConditions *op_cond,
+			       const Corner *corner,
+			       const MinMax *cnst_min_max,
+			       const ParasiticAnalysisPt *ap)
+{
+  parasitic_network_ = reinterpret_cast<ConcreteParasiticNetwork*>(parasitic);
+  drvr_pin_ = drvr_pin;
+  coupling_cap_factor_ = coupling_cap_factor;
+  tr_ = tr;
+  op_cond_ = op_cond;
+  corner_ = corner;
+  cnst_min_max_ = cnst_min_max;
+  ap_ = ap;
+  loadWork();
+  return makeRcmodelDrv();
+}
+
+void
+ArnoldiReduce::loadWork()
+{
+  pt_map_.clear();
+
+  int resistor_count = 0;
+  ConcreteParasiticDeviceSet devices;
+  parasitic_network_->devices(devices);
+  ConcreteParasiticDeviceSet::Iterator device_iter(devices);
+  while (device_iter.hasNext()) {
+    ParasiticDevice *device = device_iter.next();
+    if (parasitics_->isResistor(device))
+      resistor_count++;
+  }
+
+  termN = parasitic_network_->pinNodes()->size();
+  int subnode_count = parasitic_network_->subNodes()->size();
+  ts_pointN = subnode_count + 1 + termN;
+  ts_edgeN = resistor_count;
+  allocPoints();
+  allocTerms(termN);
+  ts_point *p0 = ts_pointV;
+  pterm0 = p0 + subnode_count + 1;
+  ts_point *pend = p0 + ts_pointN;
+  ts_point *p;
+  ts_edge *e0 = ts_edgeV;
+  ts_edge *eend = e0 + ts_edgeN;
+
+  ts_edge *e;
+  int tindex;
+  for (p = p0; p!=pend; p++) {
+    p->node_ = NULL;
+    p->eN = 0;
+    p->is_term = false;
+  }
+  pend = pterm0;
+  e = e0;
+  int index = 0;
+  ConcreteParasiticSubNodeMap::Iterator 
+    sub_node_iter(parasitic_network_->subNodes());
+  while (sub_node_iter.hasNext()) {
+    ConcreteParasiticSubNode *node = sub_node_iter.next();
+    pt_map_[node] = index;
+    p = p0 + index;
+    p->node_ = node;
+    p->eN = 0;
+    p->is_term = false;
+    index++;
+  }
+
+  ConcreteParasiticPinNodeMap::Iterator 
+    pin_node_iter(parasitic_network_->pinNodes());
+  while (pin_node_iter.hasNext()) {
+    ConcreteParasiticPinNode *node = pin_node_iter.next();
+    p = pend++;
+    pt_map_[node] = p - p0;
+    p->node_ = node;
+    p->eN = 0;
+    p->is_term = true;
+    tindex = p - pterm0;
+    p->tindex = tindex;
+    const Pin *pin = parasitics_->connectionPin(node);
+    pinV[tindex] = pin;
+  }
+  
+  ts_edge **eV = ts_eV;
+  ConcreteParasiticDeviceSet::Iterator device_iter2(devices);
+  while (device_iter2.hasNext()) {
+    ParasiticDevice *device = device_iter2.next();
+    if (parasitics_->isResistor(device)) {
+      ConcreteParasiticResistor *resistor = 
+	reinterpret_cast<ConcreteParasiticResistor*>(device);
+      ts_point *pt1 = findPt(resistor->node1());
+      ts_point *pt2 = findPt(resistor->node2());
+      e->from = pt1;
+      e->to = pt2;
+      e->resistor_ = resistor;
+      pt1->eN++;
+      if (e->from != e->to)
+	pt2->eN++;
+      e++;
+    }
+  }
+
+  for (p=p0;p!=pend;p++) {
+    if (p->node_) {
+      p->eV = eV;
+      eV += p->eN;
+      p->eN = 0;
+    }
+  }
+  for (e=e0;e!=eend;e++) {
+    e->from->eV[e->from->eN++] = e;
+    if (e->to != e->from)
+      e->to->eV[e->to->eN++] = e;
+  }
+}
+
+void
+ArnoldiReduce::allocPoints()
+{
+  if (ts_pointN > ts_pointNmax) {
+    free(par);
+    free(c);
+    free(r);
+    free(iv); free(y); free(_u1); free(_u0);
+    free(ts_pordV);
+    free(ts_ordV);
+    free(ts_pointV);
+    ts_pointNmax = ts_pointN + ts_point_count_incr_;
+    ts_pointV = (ts_point*)malloc(ts_pointNmax*sizeof(ts_point));
+    ts_ordV = (int*)malloc(ts_pointNmax*sizeof(int));
+    ts_pordV = (ts_point**)malloc(ts_pointNmax*sizeof(ts_point*));
+    _u0 = (double*)malloc(ts_pointNmax*sizeof(double));
+    _u1 = (double*)malloc(ts_pointNmax*sizeof(double));
+    y = (double*)malloc(ts_pointNmax*sizeof(double));
+    iv = (double*)malloc(ts_pointNmax*sizeof(double));
+    r = (double*)malloc(ts_pointNmax*sizeof(double));
+    c = (double*)malloc(ts_pointNmax*sizeof(double));
+    par = (int*)malloc(ts_pointNmax*sizeof(int));
+  }
+  if (ts_edgeN > ts_edgeNmax) {
+    free(ts_edgeV);
+    free(ts_eV);
+    free(ts_stackV);
+    ts_edgeNmax = ts_edgeN + ts_edge_count_incr_;
+    ts_edgeV = (ts_edge*)malloc(ts_edgeNmax*sizeof(ts_edge));
+    ts_stackV  = (ts_edge**)malloc(ts_edgeNmax*sizeof(ts_edge*));
+    ts_eV      = (ts_edge**)malloc(2*ts_edgeNmax*sizeof(ts_edge*));
+  }
+}
+
+void
+ArnoldiReduce::allocTerms(int nterms)
+{
+  if (nterms > termNmax) {
+    free(U0);
+    free(outV);
+    free(termV);
+    free(pinV);
+    termNmax = nterms+256;
+    pinV = (const Pin**)malloc(termNmax*sizeof(const Pin*));
+    termV = (int*)malloc(termNmax*sizeof(int));
+    outV = (int*)malloc(termNmax*sizeof(int));
+
+    U0 = (double*)malloc(dNmax*termNmax*sizeof(double));
+    int h;
+    for (h=0;h<dNmax;h++) U[h] = U0 + h*termNmax;
+  }
+}
+
+ts_point *
+ArnoldiReduce::findPt(ParasiticNode *node)
+{
+  return &ts_pointV[pt_map_[reinterpret_cast<ConcreteParasiticNode*>(node)]];
+}
+
+rcmodel *
+ArnoldiReduce::makeRcmodelDrv()
+{
+  ParasiticNode *drv_node = parasitics_->findNode(parasitic_network_,
+						  drvr_pin_);
+  ts_point *pdrv = findPt(drv_node);
+  makeRcmodelDfs(pdrv);
+  getRC();
+  if (ctot_ < 1e-22) // 1e-10ps
+    return NULL;
+  setTerms(pdrv);
+  makeRcmodelFromTs();
+  rcmodel *mod = makeRcmodelFromW();
+  return mod;
+}
+
+#define ts_orient( pp, ee) \
+  if (ee->from!=pp) { ee->to = ee->from; ee->from = pp; }
+
+void
+ArnoldiReduce::makeRcmodelDfs(ts_point *pdrv)
+{
+  bool loop = false;
+  int k;
+  ts_point *p,*q;
+  ts_point *p0 = ts_pointV;
+  ts_point *pend = p0 + ts_pointN;
+  for (p=p0;p!=pend;p++)
+    p->visited = 0;
+  ts_edge *e;
+
+  ts_edge **stackV = ts_stackV;
+  int stackN = 1;
+  stackV[0] = e = pdrv->eV[0];
+  ts_orient(pdrv,e);
+  pdrv->visited = 1;
+  pdrv->in_edge = NULL;
+  pdrv->ts = 0;
+  ts_ordV[0] = pdrv-p0;
+  ts_pordV[0] = pdrv;
+  ts_ordN = 1;
+  while (stackN>0) {
+    e = stackV[stackN-1];
+    q = e->to;
+
+    if (q->visited) {
+      // if it is a one-rseg self-loop,
+      // ignore, and do not even set *loop
+      if (e->to != e->from)
+        loop = true;
+    } else {
+      // try to descend
+      q->visited = 1;
+      q->ts = ts_ordN++;
+      ts_pordV[q->ts] = q;
+      ts_ordV[q->ts] = q-p0;
+      q->in_edge = e;
+      if (q->eN>1) {
+        for (k=0;k<q->eN;k++) if (q->eV[k] != e) break;
+        e = q->eV[k];
+        ts_orient(q,e);
+        stackV[stackN++] = e;
+        continue; // descent
+      }
+    }
+    // try to ascend
+    while (--stackN>=0) {
+      e = stackV[stackN];
+      p = e->from;
+      // find e in p->eV
+      for (k=0;k<p->eN;k++) if (p->eV[k]==e) break;
+      // if (k==p->eN) notice(0,"ERROR, e not found!\n");
+      ++k;
+      if (k>=p->eN) continue;
+      e = p->eV[k];
+      // check that next sibling is not the incoming edge
+      if (stackN>0 && e==stackV[stackN-1]) {
+          ++k;
+          if (k>=p->eN) continue;
+          e = p->eV[k];
+      }
+      ts_orient(p,e);
+      stackV[stackN++] = e;
+      break;
+    }
+
+  } // while (stackN)
+
+  if (loop)
+    debugPrint1(debug_, "arnoldi", 1,
+		"net %s loop\n",
+		network_->pathName(drvr_pin_));
+}
+
+// makeRcmodelGetRC
+void
+ArnoldiReduce::getRC()
+{
+  ts_point *p, *p0 = ts_pointV;
+  ts_point *pend = p0 + ts_pointN;
+  ctot_ = 0.0;
+  for (p=p0;p!=pend;p++) {
+    p->c = 0.0;
+    p->r = 0.0;
+    if (p->node_) {
+      ParasiticNode *node = p->node_;
+      double cap = parasitics_->nodeGndCap(node, ap_)
+	+ pinCapacitance(node);
+      if (cap > 0.0) {
+	p->c = cap;
+	ctot_ += cap;
+      }
+      else
+	p->c = 0.0;
+      if (p->in_edge && p->in_edge->resistor_)
+        p->r = parasitics_->value(p->in_edge->resistor_, ap_);
+      if (!(p->r>=0.0 && p->r<100e+3)) { // 0 < r < 100kohm
+	debugPrint2(debug_, "arnoldi", 1,
+		    "R value %g out of range, drvr pin %s\n",
+		    p->r,
+		    network_->pathName(drvr_pin_));
+      }
+    }
+  }
+}
+
+float
+ArnoldiReduce::pinCapacitance(ParasiticNode *node)
+{
+  const Pin *pin = parasitics_->connectionPin(node);
+  float pin_cap = 0.0;
+  if (pin) {
+    Port *port = network_->port(pin);
+    LibertyPort *lib_port = network_->libertyPort(port);
+    if (lib_port)
+      pin_cap = sdc_->pinCapacitance(pin,tr_, op_cond_, corner_, cnst_min_max_);
+    else if (network_->isTopLevelPort(pin))
+      pin_cap = sdc_->portExtCap(port, tr_, cnst_min_max_);
+  }
+  return pin_cap;
+}
+
+void
+ArnoldiReduce::setTerms(ts_point *pdrv)
+{
+  // termV: from drv-ordered to fixed order
+  // outV:  from drv-ordered to ts_pordV
+  ts_point *p;
+  int k,k0;
+  termV[0] = k0 = pdrv->tindex;
+  for (k=1;k<termN;k++) {
+    if (k==k0) termV[k] = 0;
+    else termV[k] = k;
+  }
+  for (k=0;k<termN;k++) {
+    p = pterm0 + termV[k];
+    outV[k] = p->ts;
+  }
+}
+
+// The guts of the arnoldi reducer.
+void
+ArnoldiReduce::makeRcmodelFromTs()
+{
+  ts_point *p, *p0 = ts_pointV;
+  int n = ts_ordN;
+  int nterms = termN;
+  int i,j,k,h;
+  if (debug_->check("arnoldi", 1)) {
+    for (k=0;k<ts_ordN;k++) {
+      p = ts_pordV[k];
+      debugPrint3(debug_, "arnoldi", 1, "T%d,P%ld c=%s",
+		  p->ts,p-p0,
+		  units_->capacitanceUnit()->asString(p->c));
+      if (p->is_term)
+	debug_->print(" term%d",p->tindex);
+      if (p->in_edge)
+	debug_->print("  from T%d,P%ld r=%s",
+		      p->in_edge->from->ts,
+		      p->in_edge->from-p0,
+		      units_->resistanceUnit()->asString(p->r));
+      debug_->print("\n");
+    }
+    for (i=0;i<nterms;i++)
+      debugPrint2(debug_, "arnoldi", 1, "outV[%d] = T%d\n",i,outV[i]);
+  }
+
+  int max_order = 5;
+
+  double *u0, *u1;
+  u0 = _u0; u1 = _u1;
+  double sum,e1;
+  order = max_order;
+  if (n < order)
+    order = n;
+
+  par[0] = -1; r[0] = 0.0;
+  c[0] = ts_pordV[0]->c;
+  for (j=1;j<n;j++) {
+    p = ts_pordV[j];
+    c[j] = p->c;
+    r[j] = p->r;
+    par[j] = p->in_edge->from->ts;
+  }
+
+  sum = 0.0;
+  for (j=0;j<n;j++) sum += c[j];
+  debugPrint1(debug_, "arnoldi", 1, "ctot = %s\n",
+	      units_->capacitanceUnit()->asString(sum));
+  ctot_ = sum;
+  sqc_ = sqrt(sum);
+  double sqrt_ctot_inv = 1.0/sqc_;
+  for (j=0;j<n;j++) u0[j] = sqrt_ctot_inv;
+  for (h=0;h<order;h++) {
+    for (i=0;i<nterms;i++) U[h][i] = u0[outV[i]];
+
+    // y = R C u0
+    for (j=0;j<n;j++) {
+      iv[j] = 0.0;
+    }
+    for (j=n-1;j>0;j--) {
+      iv[j] += c[j]*u0[j];
+      iv[par[j]] += iv[j];
+    }
+    iv[0] += c[0]*u0[0];
+    y[0] = 0.0;
+    for (j=1;j<n;j++) {
+      y[j] = y[par[j]] + r[j]*iv[j];
+    }
+
+    // d[h] = u0 C y
+    sum = 0.0;
+    for (j=1;j<n;j++) {
+      sum += u0[j]*c[j]*y[j];
+    }
+    d[h] = sum;
+    if (h==order-1) break;
+    if (d[h]<1e-13) { // .1ps
+       order = h+1;
+       break;
+    }
+
+    // y = y - d[h]*u0 - e[h-1]*u1
+    if (h==0) {
+      for (j=0;j<n;j++) y[j] -= sum*u0[j];
+    } else {
+      e1 = e[h-1];
+      for (j=0;j<n;j++) y[j] -= sum*u0[j] + e1*u1[j];
+    }
+
+    // e[h] = sqrt(y C y)
+    // u1 = y/e[h]
+    sum = 0.0;
+    for (j=0;j<n;j++) {
+      sum += c[j]*y[j]*y[j];
+    }
+    if (sum<1e-30) { // (1e-6ns)^2
+      order = h+1;
+      break;
+    }
+    e[h] = sqrt(sum);
+    sum = 1.0/e[h];
+    for (j=0;j<n;j++) u1[j] = sum*y[j];
+
+    // swap u0, u1
+    if (h%2) {
+      u0 = _u0; u1 = _u1;
+    } else {
+      u0 = _u1; u1 = _u0;
+    }
+  }
+
+  if (debug_->check("arnoldi", 1)) {
+    debugPrint1(debug_, "arnoldi", 1,
+		"tridiagonal reduced matrix, drvr pin %s\n",
+		network_->pathName(drvr_pin_));
+    debugPrint2(debug_, "arnoldi", 1, "order %d n %d\n",order,n);
+    for (h=0;h<order;h++) {
+      debug_->print("d[%d] %s",
+		    h,
+		    units_->timeUnit()->asString(d[h]));
+      if (h<order-1)
+	debug_->print("    e[%d] %s",
+		      h,
+		      units_->timeUnit()->asString(e[h]));
+      debug_->print("\n");
+      debug_->print("U[%d]",h);
+      for (i=0;i<nterms;i++)
+	debug_->print(" %6.2e",U[h][i]);
+      debug_->print("\n");
+    }
+  }
+}
+
+rcmodel *
+ArnoldiReduce::makeRcmodelFromW()
+{
+  int j,h;
+  int n = termN;
+  rcmodel *mod = new rcmodel();
+  mod->order = order;
+  mod->n = n;
+  if (order>0) {
+    int totd = order + order - 1 + order*n;
+    mod->d = (double *)malloc(totd*sizeof(double));
+    if (order>1) mod->e = mod->d + order;
+    else mod->e = NULL;
+    mod->U = (double **)malloc(order*sizeof(double*));
+    mod->U[0] = mod->d + order + order - 1;
+    for (h=1;h<order;h++) mod->U[h]=mod->U[0] + h*n;
+    for (h=0;h<order;h++) {
+      mod->d[h] = d[h];
+      if (h<order-1) mod->e[h] = e[h];
+      for (j=0;j<n;j++)
+        mod->U[h][j] = U[h][j];
+    }
+  }
+
+  mod->pinV = (const Pin **)malloc(n*sizeof(const Pin*));
+  for (j=0;j<n;j++) {
+    int k = termV[j];
+    mod->pinV[j] = pinV[k];
+  }
+
+  mod->ctot = ctot_;
+  mod->sqc = sqc_;
+  return mod;
+}
+
+} // namespace
--- a/dcalc/ArnoldiReduce.hh
+++ b/dcalc/ArnoldiReduce.hh
@ -0,0 +1,116 @@
+// OpenSTA, Static Timing Analyzer
+// Copyright (c) 2018, Parallax Software, Inc.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+// (c) 2018 Nefelus, Inc.
+//
+// Author: W. Scott
+
+#ifndef STA_ARNOLDI_REDUCE_H
+#define STA_ARNOLDI_REDUCE_H
+
+#include "Map.hh"
+#include "NetworkClass.hh"
+#include "ParasiticsClass.hh"
+
+namespace sta {
+
+class ConcreteParasiticNetwork;
+class ConcreteParasiticNode;
+
+class rcmodel;
+struct ts_edge;
+struct ts_point;
+
+typedef Map<ConcreteParasiticNode*, int> ArnolidPtMap;
+
+class ArnoldiReduce : public StaState
+{
+public:
+  ArnoldiReduce(StaState *sta);
+  ~ArnoldiReduce();
+  Parasitic *reduceToArnoldi(Parasitic *parasitic,
+			     const Pin *drvr_pin,
+			     float coupling_cap_factor,
+			     const TransRiseFall *tr,
+			     const OperatingConditions *op_cond,
+			     const Corner *corner,
+			     const MinMax *cnst_min_max,
+			     const ParasiticAnalysisPt *ap);
+
+protected:
+  void loadWork();
+  rcmodel *makeRcmodelDrv();
+  void allocPoints();
+  void allocTerms(int nterms);
+  ts_point *findPt(ParasiticNode *node);
+  void makeRcmodelDfs(ts_point *pdrv);
+  void getRC();
+  float pinCapacitance(ParasiticNode *node);
+  void setTerms(ts_point *pdrv);
+  void makeRcmodelFromTs();
+  rcmodel *makeRcmodelFromW();
+  
+  ConcreteParasiticNetwork *parasitic_network_;
+  const Pin *drvr_pin_;
+  float coupling_cap_factor_;
+  const TransRiseFall *tr_;
+  const OperatingConditions *op_cond_;
+  const Corner *corner_;
+  const MinMax *cnst_min_max_;
+  const ParasiticAnalysisPt *ap_;
+  // ParasiticNode -> ts_point index.
+  ArnolidPtMap pt_map_;
+
+  // rcWork
+  ts_point *ts_pointV;
+  int ts_pointN;
+  int ts_pointNmax;
+  static const int ts_point_count_incr_;
+  ts_edge *ts_edgeV;
+  int ts_edgeN;
+  int ts_edgeNmax;
+  static const int ts_edge_count_incr_;
+  ts_edge **ts_eV;
+  ts_edge **ts_stackV;
+  int *ts_ordV;
+  ts_point **ts_pordV;
+  int ts_ordN;
+
+  int termNmax;
+  int termN;
+  ts_point *pterm0;
+  const Pin **pinV; // fixed order, offset from pterm0
+  int *termV; // from drv-ordered to fixed order
+  int *outV;  // from drv-ordered to ts_pordV
+
+  int dNmax;
+  double *d;
+  double *e;
+  double *U0;
+  double **U;
+
+  double ctot_;
+  double sqc_;
+  double *_u0, *_u1;
+  double *y, *iv;
+  double *c, *r;
+  int    *par;
+  int order;
+};
+
+} // namespace
+#endif
+
--- a/dcalc/DelayCalc.cc
+++ b/dcalc/DelayCalc.cc
@ -21,6 +21,7 @@
 #include "LumpedCapDelayCalc.hh"
 #include "SimpleRCDelayCalc.hh"
 #include "DmpDelayCalc.hh"
+#include "ArnoldiDelayCalc.hh"
 #include "DelayCalc.hh"

 namespace sta {
@ -37,6 +38,7 @@ registerDelayCalcs()
  registerDelayCalc("simple_rc", makeSimpleRCDelayCalc);
  registerDelayCalc("dmp_ceff_elmore", makeDmpCeffElmoreDelayCalc);
  registerDelayCalc("dmp_ceff_two_pole", makeDmpCeffTwoPoleDelayCalc);
+  registerDelayCalc("arnoldi", makeArnoldiDelayCalc);
 }

 void
--- a/dcalc/Makefile.am
+++ b/dcalc/Makefile.am
@ -18,6 +18,9 @@ lib_LTLIBRARIES = libdcalc.la

 include_HEADERS = \
 	ArcDelayCalc.hh \
+	Arnoldi.hh \
+	ArnoldiDelayCalc.hh \
+	ArnoldiReduce.hh \
 	DelayCalc.hh \
 	DcalcAnalysisPt.hh \
 	DmpCeff.hh \
@ -32,6 +35,8 @@ include_HEADERS = \

 libdcalc_la_SOURCES = \
 	ArcDelayCalc.cc \
+	ArnoldiDelayCalc.cc \
+	ArnoldiReduce.cc \
 	DcalcAnalysisPt.cc \
 	DelayCalc.cc \
 	DmpCeff.cc \