OpenSTA/dcalc/ArnoldiDelayCalc.cc

1496 lines
38 KiB
C++

// OpenSTA, Static Timing Analyzer
// Copyright (c) 2025, Parallax Software, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software.
//
// Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
//
// This notice may not be removed or altered from any source distribution.
// (c) 2018 Nefelus, Inc.
//
// Author: W. Scott
#include "ArnoldiDelayCalc.hh"
#include <cstdio>
#include <cmath> // abs
#include "Report.hh"
#include "Debug.hh"
#include "Units.hh"
#include "Liberty.hh"
#include "TimingModel.hh"
#include "TimingArc.hh"
#include "TableModel.hh"
#include "PortDirection.hh"
#include "Network.hh"
#include "Graph.hh"
#include "Parasitics.hh"
#include "Sdc.hh"
#include "DcalcAnalysisPt.hh"
#include "DelayCalc.hh"
#include "ArcDelayCalc.hh"
#include "LumpedCapDelayCalc.hh"
#include "GraphDelayCalc.hh"
#include "Variables.hh"
#include "Arnoldi.hh"
#include "ArnoldiReduce.hh"
namespace sta {
// wireload8 is n^2
// do not delete arnoldi parasitics
// handle rspf parasitics?
// mv static functions to ArnoldiDelayCalc
// need slew only lookup for
// ra_delay
// ra_get_r
// ra_get_s
using std::string;
using std::abs;
using std::vector;
struct delay_work;
struct delay_c;
////////////////////////////////////////////////////////////////
static delay_work *delay_work_create();
static void
delay_work_destroy(delay_work *D);
static double *
delay_work_get_residues(delay_work *D,
int term_index);
static bool
tridiagEV(int n,double *d,double *e,double *p,double **v);
//////////////////////////////////////////////////////////////
struct delay_c
{
double slew_derate;
double vlo;
double vhi;
double vlg;
double smin;
double x1;
double y1;
double vmid; // falling convention, should be >= 0.5
};
// workspace for pole-residue -> delay calculations
// delay_work
// max order is 32
struct delay_work
{
double slew_derate;
double slew_factor; // (0,1.0] table_slew = slew_factor * full_slew
delay_c cV[2];
delay_c *c;
double lo_thresh;
double hi_thresh;
int nmax;
double poles[32]; // 1/tau
double **resi; // resi[jrec][h] h=0,..order
double *v[32];
double *w[32];
double aa[32];
};
////////////////////////////////////////////////////////////////
class ArnoldiDelayCalc : public LumpedCapDelayCalc
{
public:
ArnoldiDelayCalc(StaState *sta);
virtual ~ArnoldiDelayCalc();
ArcDelayCalc *copy() override;
const char *name() const override { return "arnoldi"; }
Parasitic *findParasitic(const Pin *drvr_pin,
const RiseFall *rf,
const DcalcAnalysisPt *dcalc_ap) override;
Parasitic *reduceParasitic(const Parasitic *parasitic_network,
const Pin *drvr_pin,
const RiseFall *rf,
const DcalcAnalysisPt *dcalc_ap) override;
ArcDcalcResult inputPortDelay(const Pin *port_pin,
float in_slew,
const RiseFall *rf,
const Parasitic *parasitic,
const LoadPinIndexMap &load_pin_index_map,
const DcalcAnalysisPt *dcalc_ap) override;
ArcDcalcResult gateDelay(const Pin *drvr_pin,
const TimingArc *arc,
const Slew &in_slew,
// Pass in load_cap or parasitic.
float load_cap,
const Parasitic *parasitic,
const LoadPinIndexMap &load_pin_index_map,
const DcalcAnalysisPt *dcalc_ap) override;
string reportGateDelay(const Pin *drvr_pin,
const TimingArc *arc,
const Slew &in_slew,
float load_cap,
const Parasitic *parasitic,
const LoadPinIndexMap &load_pin_index_map,
const DcalcAnalysisPt *dcalc_ap,
int digits) override;
void finishDrvrPin() override;
void delay_work_set_thresholds(delay_work *D,
double lo,
double hi,
bool rising,
double derate);
private:
ArcDcalcResult gateDelaySlew(const LibertyCell *drvr_cell,
const TimingArc *arc,
const GateTableModel *table_model,
const Slew &in_slew,
const LoadPinIndexMap &load_pin_index_map,
const Pvt *pvt);
void ar1_ceff_delay(delay_work *D,
timing_table *tab,
arnoldi1 *mod,
double *delays,
double *slews);
double ra_rdelay_1(timing_table *tab,
double ctot);
double ra_get_r(delay_work *D,
timing_table *tab,
double rdelay,
double ctot);
double ra_get_s(delay_work *D,
timing_table *tab,
double r,
double c);
void ra_solve_for_s(delay_work *D,
double p,
double tlohi,
double &s);
// from poles and residues, solve for t20,t50,t80
void pr_solve1(double s,
int order,
double *p,
double *rr,
double v1,
double *t1);
void pr_solve3(double s,
int order,
double *p,
double *rr,
double vhi,
double *thi,
double vmid,
double *tmid,
double vlo,
double *tlo);
//
// routines for linear drive model and ceff
//
double pr_ceff(double s,
double rdrive,
int order,
double *p,
double *rr,
double ceff_time);
double ra_solve_for_t(double p,
double s,
double v);
void ra_solve_for_pt(double ps,
double v,
double *pt,
double *d);
void ra_calc_c(double lo,
double hi,
double *c_smin,
double *c_x1,
double *c_y1);
rcmodel *rcmodel_;
int _pinNmax;
double *_delayV;
double *_slewV;
int pin_n_;
ArnoldiReduce *reduce_;
delay_work *delay_work_;
vector<rcmodel*> unsaved_parasitics_;
bool pocv_enabled_;
};
ArcDelayCalc *
makeArnoldiDelayCalc(StaState *sta)
{
return new ArnoldiDelayCalc(sta);
}
ArnoldiDelayCalc::ArnoldiDelayCalc(StaState *sta) :
LumpedCapDelayCalc(sta),
reduce_(new ArnoldiReduce(sta)),
delay_work_(delay_work_create())
{
_pinNmax = 1024;
_delayV = (double*)malloc(_pinNmax * sizeof(double));
_slewV = (double*)malloc(_pinNmax * sizeof(double));
}
ArcDelayCalc *
ArnoldiDelayCalc::copy()
{
return new ArnoldiDelayCalc(this);
}
ArnoldiDelayCalc::~ArnoldiDelayCalc()
{
delay_work_destroy(delay_work_);
free(_delayV);
free(_slewV);
delete reduce_;
}
Parasitic *
ArnoldiDelayCalc::findParasitic(const Pin *drvr_pin,
const RiseFall *drvr_rf,
const DcalcAnalysisPt *dcalc_ap)
{
Parasitic *parasitic = nullptr;
const Corner *corner = dcalc_ap->corner();
// set_load net has precedence over parasitics.
if (sdc_->drvrPinHasWireCap(drvr_pin, corner)
|| network_->direction(drvr_pin)->isInternal())
return nullptr;
const ParasiticAnalysisPt *parasitic_ap = dcalc_ap->parasiticAnalysisPt();
Parasitic *parasitic_network =
parasitics_->findParasiticNetwork(drvr_pin, parasitic_ap);
const MinMax *min_max = dcalc_ap->constraintMinMax();
if (parasitic_network == nullptr) {
Wireload *wireload = sdc_->wireload(min_max);
if (wireload) {
float pin_cap, wire_cap, fanout;
bool has_wire_cap;
graph_delay_calc_->netCaps(drvr_pin, drvr_rf, dcalc_ap,
pin_cap, wire_cap, fanout, has_wire_cap);
parasitic_network = parasitics_->makeWireloadNetwork(drvr_pin, wireload,
fanout, min_max,
parasitic_ap);
}
}
if (parasitic_network) {
rcmodel *rcmodel = reduce_->reduceToArnoldi(parasitic_network, drvr_pin,
parasitic_ap->couplingCapFactor(),
drvr_rf, corner, min_max, parasitic_ap);
// Arnoldi parasitics are their own class that are not saved in the parasitic db.
unsaved_parasitics_.push_back(rcmodel);
parasitic = rcmodel;
}
return parasitic;
}
Parasitic *
ArnoldiDelayCalc::reduceParasitic(const Parasitic *,
const Pin *,
const RiseFall *,
const DcalcAnalysisPt *)
{
// Decline because reduced arnoldi parasitics are not stored in the parasitics db.
return nullptr;
}
void
ArnoldiDelayCalc::finishDrvrPin()
{
for (auto parasitic : unsaved_parasitics_)
delete parasitic;
unsaved_parasitics_.clear();
}
ArcDcalcResult
ArnoldiDelayCalc::inputPortDelay(const Pin *,
float in_slew,
const RiseFall *rf,
const Parasitic *parasitic,
const LoadPinIndexMap &load_pin_index_map,
const DcalcAnalysisPt *)
{
rcmodel_ = nullptr;
_delayV[0] = 0.0;
_slewV[0] = in_slew;
LibertyLibrary *drvr_library = network_->defaultLibertyLibrary();
ArcDcalcResult dcalc_result(load_pin_index_map.size());
if (parasitic) {
rcmodel_ = reinterpret_cast<rcmodel*>(const_cast<Parasitic*>(parasitic));
pin_n_ = rcmodel_->n;
if (pin_n_ >= _pinNmax) {
_pinNmax *= 2;
if (pin_n_ >= _pinNmax) _pinNmax += pin_n_;
_pinNmax *= 2;
_delayV = (double*)realloc(_delayV,_pinNmax * sizeof(double));
_slewV = (double*)realloc(_slewV,_pinNmax * sizeof(double));
}
pin_n_ = rcmodel_->n;
double slew_derate = drvr_library->slewDerateFromLibrary();
double lo_thresh = drvr_library->slewLowerThreshold(rf);
double hi_thresh = drvr_library->slewUpperThreshold(rf);
bool rising = (rf == RiseFall::rise());
delay_work_set_thresholds(delay_work_, lo_thresh, hi_thresh, rising, slew_derate);
delay_c *c = delay_work_->c;
double c_log = c->vlg;
for (int j=1;j<pin_n_;j++) {
double elmore = rcmodel_->elmore(j);
double wire_delay = 0.6931472*elmore;
double load_slew = in_slew + c_log*elmore/slew_derate;
_delayV[j] = wire_delay;
_slewV[j] = load_slew;
const Pin *load_pin = rcmodel_->pinV[j];
auto load_idx_itr = load_pin_index_map.find(load_pin);
if (load_idx_itr != load_pin_index_map.end()) {
size_t load_idx = load_idx_itr->second;
dcalc_result.setWireDelay(load_idx, wire_delay);
dcalc_result.setLoadSlew(load_idx, load_slew);
}
}
}
else
dcalc_result = makeResult(drvr_library, rf, 0.0, in_slew, load_pin_index_map);
return dcalc_result;
}
ArcDcalcResult
ArnoldiDelayCalc::gateDelay(const Pin *drvr_pin,
const TimingArc *arc,
const Slew &in_slew,
float load_cap,
const Parasitic *parasitic,
const LoadPinIndexMap &load_pin_index_map,
const DcalcAnalysisPt *dcalc_ap)
{
const LibertyCell *drvr_cell = arc->from()->libertyCell();
ConcreteParasitic *cparasitic =
reinterpret_cast<ConcreteParasitic*>(const_cast<Parasitic*>(parasitic));
rcmodel_ = dynamic_cast<rcmodel*>(cparasitic);
pocv_enabled_ = variables_->pocvEnabled();
GateTableModel *table_model = arc->gateTableModel(dcalc_ap);
if (table_model && rcmodel_) {
const Pvt *pvt = pinPvt(drvr_pin, dcalc_ap);
return gateDelaySlew(drvr_cell, arc, table_model, in_slew, load_pin_index_map, pvt);
}
else
return LumpedCapDelayCalc::gateDelay(drvr_pin, arc, in_slew, load_cap,
parasitic, load_pin_index_map, dcalc_ap);
}
ArcDcalcResult
ArnoldiDelayCalc::gateDelaySlew(const LibertyCell *drvr_cell,
const TimingArc *arc,
const GateTableModel *table_model,
const Slew &in_slew,
const LoadPinIndexMap &load_pin_index_map,
const Pvt *pvt)
{
pin_n_ = rcmodel_->n;
if (pin_n_ >= _pinNmax) {
_pinNmax *= 2;
if (pin_n_ >= _pinNmax) _pinNmax += pin_n_;
_delayV = (double*)realloc(_delayV,_pinNmax * sizeof(double));
_slewV = (double*)realloc(_slewV,_pinNmax * sizeof(double));
}
ArcDcalcResult dcalc_result(load_pin_index_map.size());
pin_n_ = rcmodel_->n;
const RiseFall *rf = arc->toEdge()->asRiseFall();
if (table_model && rf) {
const LibertyLibrary *drvr_library = drvr_cell->libertyLibrary();
double slew_derate = drvr_library->slewDerateFromLibrary();
double lo_thresh = drvr_library->slewLowerThreshold(rf);
double hi_thresh = drvr_library->slewUpperThreshold(rf);
bool rising = (rf == RiseFall::rise());
delay_work_set_thresholds(delay_work_, lo_thresh, hi_thresh, rising,
slew_derate);
if (rcmodel_->order > 0) {
timing_table tab;
tab.table = table_model;
tab.cell = drvr_cell;
tab.pvt = pvt;
tab.in_slew = delayAsFloat(in_slew);
ar1_ceff_delay(delay_work_, &tab, rcmodel_,
_delayV, _slewV);
}
dcalc_result.setGateDelay(_delayV[0]);
dcalc_result.setDrvrSlew(_slewV[0]);
if (rcmodel_) {
for (int i = 0; i < rcmodel_->n; i++) {
const Pin *load_pin = rcmodel_->pinV[i];
auto load_idx_itr = load_pin_index_map.find(load_pin);
if (load_idx_itr != load_pin_index_map.end()) {
size_t load_idx = load_idx_itr->second;
ArcDelay wire_delay = _delayV[i] - _delayV[0];
Slew load_slew = _slewV[i];
thresholdAdjust(load_pin, drvr_library, rf, wire_delay, load_slew);
dcalc_result.setWireDelay(load_idx, wire_delay);
dcalc_result.setLoadSlew(load_idx, load_slew);
}
}
}
}
return dcalc_result;
}
string
ArnoldiDelayCalc::reportGateDelay(const Pin *drvr_pin,
const TimingArc *arc,
const Slew &in_slew,
float load_cap,
const Parasitic *parasitic,
const LoadPinIndexMap &load_pin_index_map,
const DcalcAnalysisPt *dcalc_ap,
int digits)
{
return LumpedCapDelayCalc::reportGateDelay(drvr_pin, arc, in_slew, load_cap,
parasitic, load_pin_index_map,
dcalc_ap, digits);
}
////////////////////////////////////////////////////////////////
//
// arnoldi1.cpp
//
arnoldi1::~arnoldi1()
{
free(d);
free(U);
}
double
arnoldi1::elmore(int k)
{
if (order==0) return 0.0;
if (order==1) return d[0];
double sqctot = 1.0/U[0][0];
double tau = d[0] + e[0]*U[1][k]*sqctot;
return tau;
}
delay_work *
delay_work_create()
{
int j;
delay_work *D = (delay_work*)malloc(sizeof(delay_work));
D->nmax = 256;
D->resi = (double**)malloc(D->nmax*sizeof(double*));
D->resi[0] = (double*)malloc(D->nmax*32*sizeof(double));
for (j=1;j<D->nmax;j++) D->resi[j] = D->resi[0] + j*32;
D->v[0] = (double*)malloc(32*32*sizeof(double));
for (j=1;j<32;j++) D->v[j] = D->v[0] + j*32;
D->w[0] = (double*)malloc(32*D->nmax*sizeof(double));
for (j=1;j<32;j++) D->w[j] = D->w[0] + j*D->nmax;
D->lo_thresh = 0.0;
D->hi_thresh = 0.0;
D->slew_derate = 0.0;
D->slew_factor = 0.0;
for (j=0;j<2;j++) {
D->cV[j].slew_derate = 0.0;
D->cV[j].vlo = 0.0;
D->cV[j].vhi = 0.0;
D->cV[j].vlg = 0.0;
D->cV[j].smin = 0.0;
D->cV[j].x1 = 0.0;
D->cV[j].y1 = 0.0;
D->cV[j].vmid = 0.0;
}
D->c = D->cV;
return D;
}
static void
delay_work_destroy(delay_work *D)
{
free(D->resi[0]);
free(D->resi);
free(D->v[0]);
free(D->w[0]);
free(D);
}
static void
delay_work_alloc(delay_work *D,int n)
{
if (n<=D->nmax) return;
free(D->w[0]);
free(D->resi[0]);
free(D->resi);
D->nmax *= 2;
if (n > D->nmax) D->nmax = n;
int j;
D->resi = (double**)malloc(D->nmax*sizeof(double*));
D->resi[0] = (double*)malloc(D->nmax*32*sizeof(double));
for (j=1;j<D->nmax;j++) D->resi[j] = D->resi[0] + j*32;
D->w[0] = (double*)malloc(32*D->nmax*sizeof(double));
for (j=1;j<32;j++) D->w[j] = D->w[0] + j*D->nmax;
}
void
ArnoldiDelayCalc::delay_work_set_thresholds(delay_work *D,
double lo,
double hi,
bool rising,
double derate)
{
double mid = 0.5; // 0.0:1.0
int i = rising?1:0;
D->c = D->cV+ i;
// WRONG
bool changed = (lo != D->c->vlo || hi != D->c->vhi);
if (changed) {
if (!(lo>0.01 && hi<0.99)) {
lo = 0.1;
hi = 0.9;
derate = 0.8;
}
D->c->slew_derate = derate;
D->c->vlo = lo;
D->c->vhi = hi;
D->c->vmid = mid;
D->c->vlg = log(hi/lo);
ra_calc_c(lo,hi,
&(D->c->smin), &(D->c->x1),&(D->c->y1));
}
D->lo_thresh = D->c->vlo;
D->hi_thresh = D->c->vhi;
D->slew_derate = derate;
double measured_swing = D->c->vhi - D->c->vlo;
double reported_swing = measured_swing/D->slew_derate;
D->slew_factor = reported_swing;
}
static double *
delay_work_get_residues(delay_work *D,int term_index)
{
return D->resi[term_index];
}
//////////////////////////////////////////////////////////////
//
// calculate_poles_res
//
void arnoldi1::calculate_poles_res(delay_work *D,double rdrive)
{
if (n > D->nmax) delay_work_alloc(D,n);
double *p = D->poles;
double **v = D->v;
double **w = D->w;
double *aa = D->aa;
double **resi = D->resi;
int h,j,k;
double sum, dsave;
dsave = d[0];
d[0] += rdrive*ctot;
if (!tridiagEV(order,d,e,p,v))
criticalError(204, "arnoldi delay calc failed.");
d[0] = dsave;
for (h=0;h<order;h++) {
if (p[h]<1e-14) // .01ps
p[h]=1e-14;
p[h] = 1.0/p[h];
}
for (h=0;h<order;h++) {
for (k=0;k<n;k++) {
sum = 0.0;
for (j=0;j<order;j++)
sum += v[h][j]*U[j][k];
w[h][k] = sum;
}
aa[h] = sqc*v[h][0];
}
for (j=0;j<n;j++) {
for (h=0;h<order;h++)
resi[j][h] = aa[h]*w[h][j];
}
}
////////////////////////////////////////////////////////////////
//
// tridiag.cpp
//
//
// tridiagonal eigenvalues and eigenvectors
// assuming all eigenvalues are positive
//
// tridiagEV(int n,double *d,double *e,double *p,double **v)
// d[0]..d[n-1] diagonal elements
// e[0]..e[n-2] off-diagonal elements
// p[0],..p[n-1] the eigenvalues
// v[0],..v[n-1] the eigenvectors
// M*v[j] = p[j]*v[j]
//
// (M*v[j])[0] = d[0]*v[j][0]+e[0]*v[j][1]
// (M*v[j])[k] = d[k]*v[j][k]+e[k-1]*v[j][k-1]+e[k]*v[j][k+1] 0<k<n-1
// (M*v[j])[n-1] = d[n-1]*v[j][n-1]+e[n-2]*v[j][n-2]
//
static bool
tridiagEV(int n,double *din,double *ein,double *d,double **v)
{
int j,k;
for (j=0;j<n;j++) for (k=0;k<n;k++) v[j][k]=0.0;
for (j=0;j<n;j++) v[j][j] = 1.0;
int m,h,iter,i;
double s,r,p,g,f,c,b;
double e[32];
if (n>32)
return false;
for (i=0;i<n;i++) d[i] = din[i];
for (i=0;i<n-1;i++) e[i+1] = ein[i];
e[0] = 0.0;
for (h=n-1;h>=1;h--) {
iter = 0;
while (abs(e[h])>1e-18) { // 1e-6ps
m=0;
if (m != h) {
if (iter++ == 20)
return false;
g = (d[h-1]-d[h])/(2.0*e[h]);
r = sqrt(1.0+g*g); // watch overflow
g = d[m]-d[h]+e[h]/(g + (g<0?-r:r));
s = c = 1.0;
p = 0.0;
for (i=m+1;i<=h;i++) {
f = s*e[i];
b = c*e[i];
e[i-1] = r = sqrt(f*f+g*g); // watch
if (r == 0.0) {
d[i-1] -= p;
e[m] = 0.0;
break;
}
s = f/r;
c = g/r;
g = d[i-1]-p;
r = (d[i]-g)*s+2.0*c*b;
d[i-1] = g + (p=s*r);
g = c*r-b;
for (k=0;k<n;k++) {
f = v[i-1][k];
v[i-1][k] = s*v[i][k]+c*f;
v[i][k] = c*v[i][k]-s*f;
}
}
if (r == 0.0 && i <= h) continue;
d[h] -= p;
e[h] = g;
e[m] = 0.0;
}
}
}
for (i=0;i<n-1;i++) {
k = i;
p = d[k];
for (j=i+1;j<n;j++)
if (d[j] > p) { k=j; p=d[k]; }
if (k != i) {
d[k] = d[i];
d[i] = p;
for (j=0;j<n;j++) {
p = v[i][j];
v[i][j] = v[k][j];
v[k][j] = p;
}
}
}
return true;
}
////////////////////////////////////////////////////////////////
// prsolve.cpp
// get a waveform point
static void
pr_get_v(double t, double s, int order, double *p, double *rr, double *va)
{
*va = 0.0;
int h;
for (h=0;h<order;h++) {
double pt = p[h]*t;
double ps = p[h]*s;
double f;
if (t<s) {
f = 1.0-t/s + (1.0-exp(-pt))/ps;
} else {
f = exp(ps-pt)*(1.0-exp(-ps))/ps;
}
*va += rr[h]*f;
}
}
static void
get_dv(double t, double s, int order, double *p, double *rr,
double *va, double *dva)
{
*va = 0.0;
*dva = 0.0;
int h;
for (h=0;h<order;h++) {
double p1 = p[h];
double pt = p1*t;
double ps = p1*s;
double f,df,xtmp;
if (t<s) {
xtmp = (1.0-exp(-pt))/ps;
f = 1.0-t/s + xtmp;
df = -p1*xtmp;
} else {
f = exp(ps-pt)*(1.0-exp(-ps))/ps;
df = -p1*f;
}
*va += rr[h]*f;
*dva += rr[h]*df;
}
}
static double
solve_t_bracketed(double s,int order,double *p,double *rr,
double val,double x1,double x2,double v1,double v2)
{
int j;
double df,dx,dxold,f,f2,f1;
double temp,xh,xl,rts;
double xacc = .001e-12; // .001ps
f1 = v1-val;
f2 = v2-val;
if (f1==0.0) return x1;
if (f2==0.0) return x2;
rts = (f1*x2-f2*x1)/(f1-f2);
if (f1<f2) {
xl = x1;
xh = x2;
if (0.0<f1) return x1;
if (f2<0.0) return x2;
} else {
xl = x2;
xh = x1;
if (0.0<f2) return x2;
if (f1<0.0) return x1;
}
dxold = abs(x2-x1);
dx = dxold;
get_dv(rts,s,order,p,rr,&f,&df);
f -= val;
double flast = 0.0;
for (j=1;j<10;j++) {
if ((((rts-xh)*df-f)*((rts-xl)*df-f) >= 0.0)
|| (abs(2.0*f) > abs(dxold*df))) {
dxold = dx;
dx = 0.5*(xh-xl);
if (flast*f >0.0) {
// 2 successive bisections in same direction,
// accelerate
if (f<0.0) dx = 0.9348*(xh-xl);
else dx = 0.0625*(xh-xl);
}
flast = f;
rts = xl+dx;
if (xl == rts) {
return rts;
}
} else {
dxold = dx;
dx = f/df;
flast = 0.0;
temp = rts;
rts -= dx;
if (temp == rts) {
return rts;
}
}
if (abs(dx) < xacc) {
return rts;
}
get_dv(rts,s,order,p,rr,&f,&df); f -= val;
if (f<0.0)
xl = rts;
else
xh = rts;
}
if (abs(f)<1e-6) // 1uV
return rts;
return 0.5*(xl+xh);
}
void
ArnoldiDelayCalc::pr_solve1(double s,
int order,
double *p,
double *rr,
double v1,
double *t1)
{
double tmin = 0.0,tmax = 0.0,vmin = 0.0,vmax = 0.0;
int h, h0 = 0;
while (order>1
&& rr[order-1]<1e-8 // 1e-8V
&& rr[order-1]>-1e-8)
order--;
if (rr[0]<0.5) {
for (h=1;h<order;h++) if (rr[h]>0.3 && rr[h]>rr[0]) { h0 = h; break; }
}
double p0 = p[h0];
double ps,vs,ta,va;
vs = 0.0;
for (h=0;h<order;h++) {
ps = p[h]*s;
vs += rr[h]*(1-exp(-ps))/ps;
}
if (vs<v1) {
// s dominates
ta = 0.5*(1+v1)*s;
pr_get_v(ta,s,order,p,rr,&va);
if (va<v1) {
tmax = ta; vmax = va;
ta = v1*s;
pr_get_v(ta,s,order,p,rr,&va);
if (va<v1) {
// ignoring a typical error at drive node, that comes
// from slight inaccuracies in rr
if (!(rr[order-1]>1.0 && p[order-1]>500.0 && va>v1-0.002))
debugPrint(debug_, "arnoldi", 1, "err, pr_solve1, va<v1");
}
tmin = ta; vmin = va;
} else {
tmin = ta; vmin = va;
ta = s;
pr_get_v(ta,s,order,p,rr,&va);
while (va>v1) {
tmin = ta; vmin = va;
ta *= 2.0;
pr_get_v(ta,s,order,p,rr,&va);
}
if (va>v1)
debugPrint(debug_, "arnoldi", 1, "err, pr_solve1, va>v1");
tmax = ta; vmax = va;
}
} else {
// s is irrelevant
ta = s; va = vs;
while (va >= v1) {
tmin = ta;
vmin = va;
ta += 1.0/p0;
pr_get_v(ta,s,order,p,rr,&va);
}
tmax = ta; vmax = va;
}
*t1 = solve_t_bracketed(s,order,p,rr,v1,tmin,tmax,vmin,vmax);
}
void
ArnoldiDelayCalc::pr_solve3(double s,
int order,
double *p,
double *rr,
double vhi,
double *thi,
double vmid,
double *tmid,
double vlo,
double *tlo)
{
// falling, thi<tmin<tlo
double tmin2,tmax2,vmin2,vmax2;
double tmin5,tmax5,vmin5,vmax5;
double tmin8,tmax8,vmin8,vmax8;
int h, h0 = 0;
while (order>1
&& rr[order-1]<1e-8 // 1e-8V
&& rr[order-1]>-1e-8)
order--;
if (rr[0]<0.5) {
for (h=1;h<order;h++) if (rr[h]>0.3 && rr[h]>rr[0]) { h0 = h; break; }
}
double p0 = p[h0];
if (p0>10e+9) // 1/10ns
p0=10e+9;
double ps,vs,ta,va;
vs = 0.0;
for (h=0;h<order;h++) {
ps = p[h]*s;
vs += rr[h]*(1-exp(-ps))/ps;
}
if (vs<vlo) {
// s dominates
tmax8 = s; vmax8 = vs;
ta = vhi*s;
pr_get_v(ta,s,order,p,rr,&va);
if (va < vmid) {
tmax2 = tmax5 = tmin8 = ta;
vmax2 = vmax5 = vmin8 = va;
ta = vmid*s;
pr_get_v(ta,s,order,p,rr,&va);
if (va>vhi) {
tmin2 = tmin5 = ta;
vmin2 = vmin5 = va;
tmin8 = ta; vmin8 = va;
if (va<vmid) {
tmax5 = ta; vmax5 = va;
} else {
tmin5 = ta; vmin5 = va;
}
} else {
tmax2 = tmin5 = ta;
vmax2 = vmin5 = va;
ta = vlo*s;
pr_get_v(ta,s,order,p,rr,&va);
tmin2 = ta; vmin2 = va;
}
} else {
// rare, s dominates but t=vhi*s is still above vmid
tmin5 = tmin8 = ta;
vmin5 = vmin8 = va;
tmax5 = tmax8;
vmax5 = vmax8;
if (va > vhi) {
tmin2 = tmin5;
vmin2 = vmin5;
tmax2 = tmax5;
vmax2 = tmax5;
} else {
tmax2 = tmin5;
vmax2 = vmin5;
ta = vlo*s;
pr_get_v(ta,s,order,p,rr,&va);
tmin2 = ta; vmin2 = va;
}
}
} else if (vs<vmid) {
// not far from s
tmax2 = tmax5 = tmin8 = s;
vmax2 = vmax5 = vmin8 = vs;
ta = s + 1.6/p0;
pr_get_v(ta,s,order,p,rr,&va);
while (va>vlo) {
tmin8 = ta; vmin8 = va;
ta += 1.0/p0;
pr_get_v(ta,s,order,p,rr,&va);
}
tmax8 = ta; vmax8 = va;
ta = vmid*s;
pr_get_v(ta,s,order,p,rr,&va);
tmin5 = ta; vmin5 = va;
if (va>vhi) {
tmin2 = ta; vmin2 = va;
} else {
tmax2 = ta; vmax2 = va;
ta = vlo*s;
pr_get_v(ta,s,order,p,rr,&va);
tmin2 = ta; vmin2 = va;
}
} else if (vs<vhi) {
tmax2 = tmin5 = tmin8 = s;
vmax2 = vmin5 = vmin8 = vs;
ta = vlo*s;
pr_get_v(ta,s,order,p,rr,&va);
tmin2 = ta; vmin2 = va;
ta = s + 0.7/p0;
pr_get_v(ta,s,order,p,rr,&va);
while (va>vmid) {
tmin5 = tmin8 = ta; vmin5 = tmin8 = va;
ta += 0.7/p0;
pr_get_v(ta,s,order,p,rr,&va);
}
tmax5 = ta; vmax5 = va;
if (va < vlo) {
tmax8 = ta; vmax8 = va;
} else {
tmin8 = ta; vmin8 = va;
ta += 1.0/p0;
pr_get_v(ta,s,order,p,rr,&va);
while (va>vlo) {
tmin8 = ta; vmin8 = va;
ta += 1.0/p0;
pr_get_v(ta,s,order,p,rr,&va);
}
tmax8 = ta; vmax8 = va;
}
} else {
// s is irrelevant
ta = s; va = vs;
tmin2 = tmin5 = tmin8 = ta;
vmin2 = vmin5 = vmin8 = va;
while (va > vhi) {
tmin2 = tmin5 = tmin8 = ta;
vmin2 = vmin5 = vmin8 = va;
ta += 1.0/p0;
pr_get_v(ta,s,order,p,rr,&va);
}
tmax2 = ta; vmax2 = va;
if (va < vmid) {
tmax5 = ta; vmax5 = va;
} else while (va > vmid) {
tmin5 = tmin8 = ta;
vmin5 = vmin8 = va;
ta += 1.0/p0;
pr_get_v(ta,s,order,p,rr,&va);
}
tmax5 = ta; vmax5 = va;
if (va < vlo) {
tmax8 = ta; vmax8 = va;
} else while (va > vlo) {
tmin8 = ta;
vmin8 = va;
ta += 1.0/p0;
pr_get_v(ta,s,order,p,rr,&va);
}
tmax8 = ta; vmax8 = va;
}
*thi = solve_t_bracketed(s,order,p,rr,vhi,tmin2,tmax2,vmin2,vmax2);
*tmid= solve_t_bracketed(s,order,p,rr,vmid,tmin5,tmax5,vmin5,vmax5);
*tlo= solve_t_bracketed(s,order,p,rr,vlo,tmin8,tmax8,vmin8,vmax8);
}
static double
calc_integ(double p,double s,double t)
{
// integral of f(t)-vin(t)
double ps = p*s;
double pt = p*t;
double y,ept,eps;
if (t<=s) {
ept = (pt>40.0)?0.0:exp(-pt);
y = ept-1.0+pt;
} else {
pt = pt-ps;
ept = (pt>40.0)?0.0:exp(-pt);
eps = (ps>40.0)?0.0:exp(-ps);
y = ps - (1.0-eps)*ept;
}
y /= ps*p;
return y;
}
double
ArnoldiDelayCalc::pr_ceff(double s,
double rdrive,
int order,
double *p,
double *rr,
double ceff_time)
{
double integi = 0.0;
double ceff, v0;
int j;
for (j=0;j<order;j++) {
integi += rr[j]*calc_integ(p[j],s,ceff_time);
}
integi /= rdrive;
pr_get_v(ceff_time,s,order,p,rr,&v0);
ceff = integi/(1.0-v0);
return ceff;
}
//////////////////////////////////////////////////////////////////
static double
ra_hinv(double y,
Debug *debug)
{
double x;
if (y<1.0) {
x = sqrt(2*y)+0.4*y;
if (y<1e-4) return x;
} else {
x = y+1.0;
}
double ex = exp(-x);
double f = x+ex-1.0-y;
x += f/(ex-1.0);
ex = exp(-x);
f = x+ex-1.0-y;
x += f/(ex-1.0);
ex = exp(-x);
f = x+ex-1.0-y;
x += f/(ex-1.0);
ex = exp(-x);
f = x+ex-1.0-y;
if (f<-1e-8 || f>1e-8)
debugPrint(debug, "arnoldi", 1, "y f %g %g", y, f);
return x;
}
double
ArnoldiDelayCalc::ra_solve_for_t(double p,
double s,
double v)
{
double t;
double ps = p*s;
if (ps>30.0) {
t = (1.0+ps*(1.0-v)) / p;
return t;
}
double eps = exp(ps);
if ((1-ps*v)*eps >= 1.0) {
t = log((eps-1.0)/(ps*v)) / p;
} else {
t = ra_hinv((1-v)*ps, debug_)/p;
}
return t;
}
void
ArnoldiDelayCalc::ra_solve_for_pt(double ps,
double v,
double *pt,
double *d)
{
if (ps>30.0) {
*pt = 1.0+ps*(1.0-v);
*d = 1.0-v;
return;
}
double eps = exp(ps);
if ((1-ps*v)*eps >= 1.0) {
*pt = log((eps-1.0)/(ps*v));
*d = eps/(eps-1.0) - 1.0/ps;
} else {
*pt = ra_hinv((1-v)*ps, debug_);
*d = (1.0-v)/(*pt - (1-v)*ps);
}
}
void
ArnoldiDelayCalc::ra_calc_c(double vlo,
double vhi,
double *c_smin,
double *c_x1,
double *c_y1)
{
double a = log(1.0/vhi);
*c_smin = a + ra_hinv((1.0-vhi)/vhi - a, debug_);
double b = log(1.0/vlo);
double c_s1 = b + ra_hinv((1.0-vlo)/vlo - b, debug_);
double a1 = (exp(c_s1)-1.0)/c_s1;
double den = log(a1/vlo) - ra_hinv((1.0-vhi)*c_s1, debug_);
*c_x1 = (vhi-vlo)/den;
*c_y1 = c_s1*(*c_x1);
}
////////////////////////////////////////////////////////////////
//
// ceff.cpp
//
void
ArnoldiDelayCalc::ra_solve_for_s(delay_work *D,
double p,
double tlohi,
double &s)
{
delay_c *c = D->c;
double vhi = c->vhi;
double vlo = c->vlo;
// s is 0-100
// solve f(x,y)=0 with f = x*(ptlo(y/x)-pthi(y/x))-(vhi-vlo)
// (x=0,y=1)
// (x=x1,y=y1) c->x1,y1
// (x=x2,y=y2) x2=(vhi-vlo)/log(vhi/vlo) y2=(c->smin)*x2
double x1 = c->x1;
double y1 = c->y1;
double x2 = (vhi-vlo)/c->vlg;
double y2 = (c->smin)*x2;
double ptlo,dlo;
double pthi,dhi;
double f,df,x,y;
x = c->vlg/(p*tlohi);
if (x <= x1) {
y = y1 - 0.5*(x-x1);
if (y>1.0) y=1.0;
} else {
y = y1 - (x-x1)*(0.5 + 8*(x-x1));
if (y<y2) y=y2;
}
ra_solve_for_pt(p*s,vlo,&ptlo,&dlo);
ra_solve_for_pt(p*s,vhi,&pthi,&dhi);
f = (ptlo-pthi)/p - tlohi;
df = dlo-dhi;
s = s - f/df;
if (abs(f)<.001e-12) return; // .001ps
ra_solve_for_pt(p*s,vlo,&ptlo,&dlo);
ra_solve_for_pt(p*s,vhi,&pthi,&dhi);
f = (ptlo-pthi)/p - tlohi;
df = dlo-dhi;
s = s - f/df;
if (abs(f)<.001e-12) return; // .001ps
ra_solve_for_pt(p*s,vlo,&ptlo,&dlo);
ra_solve_for_pt(p*s,vhi,&pthi,&dhi);
f = (ptlo-pthi)/p - tlohi;
df = dlo-dhi;
s = s - f/df;
if (abs(f)<.001e-12) return; // .001ps
ra_solve_for_pt(p*s,vlo,&ptlo,&dlo);
ra_solve_for_pt(p*s,vhi,&pthi,&dhi);
f = (ptlo-pthi)/p - tlohi;
df = dlo-dhi;
s = s - f/df;
if (abs(f)<.001e-12) return; // .001ps
ra_solve_for_pt(p*s,vlo,&ptlo,&dlo);
ra_solve_for_pt(p*s,vhi,&pthi,&dhi);
f = (ptlo-pthi)/p - tlohi;
df = dlo-dhi;
s = s - f/df;
if (abs(f)>.5e-12) // .5ps
debugPrint(debug_, "arnoldi", 1, "ra_solve_for_s p %g tlohi %s err %s",
p,
units_->timeUnit()->asString(tlohi),
units_->timeUnit()->asString(f));
}
/////////////////////////////////////////////////////////////////////
// method 0:
// r = a match to slew to (ctot, limited by cmin,cmax)
// if r>rdelay, lower r
// Now at any ceff (limited)
// If slew(r,0,ceff) is too big
// s = s_start(r,ceff), not smaller than Smin
// accept the pessimistic output slew
// Else
// solve for s
// Rough translation of ra_get_r(sy_table) used by ar1_ceff_delay.
double
ArnoldiDelayCalc::ra_get_r(delay_work *D,
timing_table *tab,
double rdelay,
double ctot)
{
// find the maximum r that allows a solution for s of
// (s,r,ctot)-> output_slew
// If this maximum is greater than rdelay, use rdelay.
delay_c *c = D->c;
double slew_derate = c->slew_derate;
double c_log = c->vlg;
float c1;
double tlohi,r;
c1 = ctot;
ArcDelay d1;
Slew s1;
tab->table->gateDelay(tab->pvt, tab->in_slew, c1, pocv_enabled_, d1, s1);
tlohi = slew_derate*delayAsFloat(s1);
r = tlohi/(c_log*c1);
if (rdelay>0.0 && r > rdelay)
r = rdelay;
return r;
}
double
ArnoldiDelayCalc::ra_get_s(delay_work *D,
timing_table *tab,
double r,
double c)
{
delay_c *con = D->c;
double slew_derate = con->slew_derate;
double c_log = con->vlg;
double c_smin = con->smin;
double tlohi,smin,s;
ArcDelay d1;
Slew s1;
tab->table->gateDelay(tab->pvt, tab->in_slew, c, pocv_enabled_, d1, s1);
tlohi = slew_derate*delayAsFloat(s1);
smin = r*c*c_smin; // c_smin = ra_hinv((1-vhi)/vhi-log(vhi)) + log(vhi);
if (c_log*r*c >= tlohi) {
s = smin;
} else {
s = smin+0.3*tlohi;
ra_solve_for_s(D,1.0/(r*c),tlohi,s);
}
return s;
}
/////////////////////////////////////////////////////////////////////
// method 1:
// determine the drive resistance from change in delay versus ctot
// find the maximum r that allows a solution for s of
// (s,r,ctot)-> output_slew
// If this maximum is greater than rdelay, use rdelay.
// calculate s,r,mod -> t50_srmod,
// then t50_srmod+t50_sy-t50_sr
double
ArnoldiDelayCalc::ra_rdelay_1(timing_table *tab,
double ctot)
{
// determine the drive resistance from change in delay versus ctot
float c1 = ctot;
float c2 = 0.5*c1;
if (c1==c2)
return 0.0;
ArcDelay d1, d2;
Slew s1, s2;
tab->table->gateDelay(tab->pvt, tab->in_slew, c1, pocv_enabled_, d1, s1);
tab->table->gateDelay(tab->pvt, tab->in_slew, c2, pocv_enabled_, d2, s2);
double dt50 = delayAsFloat(d1)-delayAsFloat(d2);
if (dt50 <= 0.0)
return 0.0;
double rdelay = dt50/(c1-c2);
return rdelay;
}
void
ArnoldiDelayCalc::ar1_ceff_delay(delay_work *D,
timing_table *tab,
arnoldi1 *mod,
double *delays,
double *slews)
{
delay_c *con = D->c;
double slew_derate = con->slew_derate;
double vhi = con->vhi;
double vlo = con->vlo;
double ctot = mod->ctot;
double ceff,tlohi,t50_sy,r,s,t50_sr,rdelay;
ArcDelay df;
Slew sf;
debugPrint(debug_, "arnoldi", 1, "ctot=%s",
units_->capacitanceUnit()->asString(ctot));
rdelay = ra_rdelay_1(tab,ctot);
if (rdelay == 0.0) {
rdelay = 1e+3; // 1kohm
}
r = rdelay;
r = ra_get_r(D,tab,rdelay,ctot);
if (! (r>0.0
&& r<100e+3)) // 100khom
rdelay = 1e+3; // 1kohm
bool bad = (r<rdelay);
s = ra_get_s(D,tab,r,ctot);
if (! (s>0.0
&& s<100e-9)) // 100ns
s = 0.5e-9; // .5ns
if (debug_->check("arnoldi", 1)) {
double p = 1.0/(r*ctot);
double thix,tlox;
debugPrint(debug_, "arnoldi", 1, "at r=%s s=%s",
units_->resistanceUnit()->asString(r),
units_->timeUnit()->asString(s));
thix = ra_solve_for_t(p,s,vhi);
tlox = ra_solve_for_t(p,s,vlo);
tab->table->gateDelay(tab->pvt,tab->in_slew, ctot, pocv_enabled_, df, sf);
debugPrint(debug_, "arnoldi", 1, "table slew (in_slew %s ctot %s) = %s",
units_->timeUnit()->asString(tab->in_slew),
units_->capacitanceUnit()->asString(ctot),
delayAsString(sf, this));
tlohi = slew_derate*delayAsFloat(sf);
debugPrint(debug_, "arnoldi", 1, "tlohi %s %s",
units_->timeUnit()->asString(tlohi),
units_->timeUnit()->asString(tlox-thix));
}
ceff = ctot;
tab->table->gateDelay(tab->pvt, tab->in_slew, ceff, pocv_enabled_,
df, sf);
t50_sy = delayAsFloat(df);
t50_sr = ra_solve_for_t(1.0/(r*ceff),s,0.5);
// calculate s,r,mod -> t50_srmod,
// then t50_srmod+t50_sy-t50_sr
mod->calculate_poles_res(D,r);
double *p = D->poles;
double *rr = delay_work_get_residues(D,0);
double thi,tlo,t50_srmod;
pr_solve1(s,mod->order,p,rr,0.5,&t50_srmod);
int ceff_it,j;
double ceff_time=0.0;
if (!bad) {
for (ceff_it=0;ceff_it<3;ceff_it++) {
// calculate ceff
ceff_time = s;
ceff = pr_ceff(s,r,mod->order,p,rr,ceff_time);
if ((ceff-1e-20) < 0.0) { // 1e-8pf
debugPrint(debug_, "arnoldi", 1,
"Invalid effective capacitance, using total capacitance");
ceff = ctot;
}
// new mvs at ceff
s = ra_get_s(D,tab,r,ceff);
debugPrint(debug_, "arnoldi", 1, "new mvs s = %s",
units_->timeUnit()->asString(s));
}
}
debugPrint(debug_, "arnoldi", 1, "r %s s %s ceff_time %s ceff %s",
units_->resistanceUnit()->asString(r),
units_->timeUnit()->asString(s),
units_->timeUnit()->asString(ceff_time),
units_->capacitanceUnit()->asString(ceff));
tab->table->gateDelay(tab->pvt, tab->in_slew, ceff, pocv_enabled_, df, sf);
t50_sy = delayAsFloat(df);
t50_sr = ra_solve_for_t(1.0/(r*ceff),s,0.5);
for (j=0;j<mod->n;j++) {
rr = delay_work_get_residues(D,j);
pr_solve3(s,mod->order,p,rr,vhi,&thi,0.5,&t50_srmod,vlo,&tlo);
delays[j] = t50_srmod + t50_sy - t50_sr;
slews[j] = (tlo-thi)/slew_derate;
}
}
} // namespace