1496 lines
38 KiB
C++
1496 lines
38 KiB
C++
// OpenSTA, Static Timing Analyzer
|
|
// Copyright (c) 2025, Parallax Software, Inc.
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
//
|
|
// The origin of this software must not be misrepresented; you must not
|
|
// claim that you wrote the original software.
|
|
//
|
|
// Altered source versions must be plainly marked as such, and must not be
|
|
// misrepresented as being the original software.
|
|
//
|
|
// This notice may not be removed or altered from any source distribution.
|
|
|
|
// (c) 2018 Nefelus, Inc.
|
|
//
|
|
// Author: W. Scott
|
|
|
|
#include "ArnoldiDelayCalc.hh"
|
|
|
|
#include <cstdio>
|
|
#include <cmath> // abs
|
|
|
|
#include "Report.hh"
|
|
#include "Debug.hh"
|
|
#include "Units.hh"
|
|
#include "Liberty.hh"
|
|
#include "TimingModel.hh"
|
|
#include "TimingArc.hh"
|
|
#include "TableModel.hh"
|
|
#include "PortDirection.hh"
|
|
#include "Network.hh"
|
|
#include "Graph.hh"
|
|
#include "Parasitics.hh"
|
|
#include "Sdc.hh"
|
|
#include "DcalcAnalysisPt.hh"
|
|
#include "DelayCalc.hh"
|
|
#include "ArcDelayCalc.hh"
|
|
#include "LumpedCapDelayCalc.hh"
|
|
#include "GraphDelayCalc.hh"
|
|
#include "Variables.hh"
|
|
#include "Arnoldi.hh"
|
|
#include "ArnoldiReduce.hh"
|
|
|
|
namespace sta {
|
|
|
|
// wireload8 is n^2
|
|
// do not delete arnoldi parasitics
|
|
// handle rspf parasitics?
|
|
|
|
// mv static functions to ArnoldiDelayCalc
|
|
// need slew only lookup for
|
|
// ra_delay
|
|
// ra_get_r
|
|
// ra_get_s
|
|
|
|
using std::string;
|
|
using std::abs;
|
|
using std::vector;
|
|
|
|
struct delay_work;
|
|
struct delay_c;
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
static delay_work *delay_work_create();
|
|
static void
|
|
delay_work_destroy(delay_work *D);
|
|
static double *
|
|
delay_work_get_residues(delay_work *D,
|
|
int term_index);
|
|
|
|
static bool
|
|
tridiagEV(int n,double *d,double *e,double *p,double **v);
|
|
|
|
//////////////////////////////////////////////////////////////
|
|
|
|
struct delay_c
|
|
{
|
|
double slew_derate;
|
|
double vlo;
|
|
double vhi;
|
|
double vlg;
|
|
double smin;
|
|
double x1;
|
|
double y1;
|
|
double vmid; // falling convention, should be >= 0.5
|
|
};
|
|
|
|
// workspace for pole-residue -> delay calculations
|
|
// delay_work
|
|
// max order is 32
|
|
struct delay_work
|
|
{
|
|
double slew_derate;
|
|
double slew_factor; // (0,1.0] table_slew = slew_factor * full_slew
|
|
delay_c cV[2];
|
|
delay_c *c;
|
|
|
|
double lo_thresh;
|
|
double hi_thresh;
|
|
|
|
int nmax;
|
|
double poles[32]; // 1/tau
|
|
double **resi; // resi[jrec][h] h=0,..order
|
|
double *v[32];
|
|
double *w[32];
|
|
double aa[32];
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
class ArnoldiDelayCalc : public LumpedCapDelayCalc
|
|
{
|
|
public:
|
|
ArnoldiDelayCalc(StaState *sta);
|
|
virtual ~ArnoldiDelayCalc();
|
|
ArcDelayCalc *copy() override;
|
|
const char *name() const override { return "arnoldi"; }
|
|
Parasitic *findParasitic(const Pin *drvr_pin,
|
|
const RiseFall *rf,
|
|
const DcalcAnalysisPt *dcalc_ap) override;
|
|
Parasitic *reduceParasitic(const Parasitic *parasitic_network,
|
|
const Pin *drvr_pin,
|
|
const RiseFall *rf,
|
|
const DcalcAnalysisPt *dcalc_ap) override;
|
|
ArcDcalcResult inputPortDelay(const Pin *port_pin,
|
|
float in_slew,
|
|
const RiseFall *rf,
|
|
const Parasitic *parasitic,
|
|
const LoadPinIndexMap &load_pin_index_map,
|
|
const DcalcAnalysisPt *dcalc_ap) override;
|
|
ArcDcalcResult gateDelay(const Pin *drvr_pin,
|
|
const TimingArc *arc,
|
|
const Slew &in_slew,
|
|
// Pass in load_cap or parasitic.
|
|
float load_cap,
|
|
const Parasitic *parasitic,
|
|
const LoadPinIndexMap &load_pin_index_map,
|
|
const DcalcAnalysisPt *dcalc_ap) override;
|
|
string reportGateDelay(const Pin *drvr_pin,
|
|
const TimingArc *arc,
|
|
const Slew &in_slew,
|
|
float load_cap,
|
|
const Parasitic *parasitic,
|
|
const LoadPinIndexMap &load_pin_index_map,
|
|
const DcalcAnalysisPt *dcalc_ap,
|
|
int digits) override;
|
|
void finishDrvrPin() override;
|
|
void delay_work_set_thresholds(delay_work *D,
|
|
double lo,
|
|
double hi,
|
|
bool rising,
|
|
double derate);
|
|
|
|
private:
|
|
ArcDcalcResult gateDelaySlew(const LibertyCell *drvr_cell,
|
|
const TimingArc *arc,
|
|
const GateTableModel *table_model,
|
|
const Slew &in_slew,
|
|
const LoadPinIndexMap &load_pin_index_map,
|
|
const Pvt *pvt);
|
|
void ar1_ceff_delay(delay_work *D,
|
|
timing_table *tab,
|
|
arnoldi1 *mod,
|
|
double *delays,
|
|
double *slews);
|
|
double ra_rdelay_1(timing_table *tab,
|
|
double ctot);
|
|
double ra_get_r(delay_work *D,
|
|
timing_table *tab,
|
|
double rdelay,
|
|
double ctot);
|
|
double ra_get_s(delay_work *D,
|
|
timing_table *tab,
|
|
double r,
|
|
double c);
|
|
void ra_solve_for_s(delay_work *D,
|
|
double p,
|
|
double tlohi,
|
|
double &s);
|
|
// from poles and residues, solve for t20,t50,t80
|
|
void pr_solve1(double s,
|
|
int order,
|
|
double *p,
|
|
double *rr,
|
|
double v1,
|
|
double *t1);
|
|
void pr_solve3(double s,
|
|
int order,
|
|
double *p,
|
|
double *rr,
|
|
double vhi,
|
|
double *thi,
|
|
double vmid,
|
|
double *tmid,
|
|
double vlo,
|
|
double *tlo);
|
|
|
|
//
|
|
// routines for linear drive model and ceff
|
|
//
|
|
double pr_ceff(double s,
|
|
double rdrive,
|
|
int order,
|
|
double *p,
|
|
double *rr,
|
|
double ceff_time);
|
|
double ra_solve_for_t(double p,
|
|
double s,
|
|
double v);
|
|
void ra_solve_for_pt(double ps,
|
|
double v,
|
|
double *pt,
|
|
double *d);
|
|
void ra_calc_c(double lo,
|
|
double hi,
|
|
double *c_smin,
|
|
double *c_x1,
|
|
double *c_y1);
|
|
|
|
rcmodel *rcmodel_;
|
|
int _pinNmax;
|
|
double *_delayV;
|
|
double *_slewV;
|
|
int pin_n_;
|
|
ArnoldiReduce *reduce_;
|
|
delay_work *delay_work_;
|
|
vector<rcmodel*> unsaved_parasitics_;
|
|
bool pocv_enabled_;
|
|
};
|
|
|
|
ArcDelayCalc *
|
|
makeArnoldiDelayCalc(StaState *sta)
|
|
{
|
|
return new ArnoldiDelayCalc(sta);
|
|
}
|
|
|
|
ArnoldiDelayCalc::ArnoldiDelayCalc(StaState *sta) :
|
|
LumpedCapDelayCalc(sta),
|
|
reduce_(new ArnoldiReduce(sta)),
|
|
delay_work_(delay_work_create())
|
|
{
|
|
_pinNmax = 1024;
|
|
_delayV = (double*)malloc(_pinNmax * sizeof(double));
|
|
_slewV = (double*)malloc(_pinNmax * sizeof(double));
|
|
}
|
|
|
|
ArcDelayCalc *
|
|
ArnoldiDelayCalc::copy()
|
|
{
|
|
return new ArnoldiDelayCalc(this);
|
|
}
|
|
|
|
ArnoldiDelayCalc::~ArnoldiDelayCalc()
|
|
{
|
|
delay_work_destroy(delay_work_);
|
|
free(_delayV);
|
|
free(_slewV);
|
|
delete reduce_;
|
|
}
|
|
|
|
Parasitic *
|
|
ArnoldiDelayCalc::findParasitic(const Pin *drvr_pin,
|
|
const RiseFall *drvr_rf,
|
|
const DcalcAnalysisPt *dcalc_ap)
|
|
{
|
|
Parasitic *parasitic = nullptr;
|
|
const Corner *corner = dcalc_ap->corner();
|
|
// set_load net has precedence over parasitics.
|
|
if (sdc_->drvrPinHasWireCap(drvr_pin, corner)
|
|
|| network_->direction(drvr_pin)->isInternal())
|
|
return nullptr;
|
|
const ParasiticAnalysisPt *parasitic_ap = dcalc_ap->parasiticAnalysisPt();
|
|
Parasitic *parasitic_network =
|
|
parasitics_->findParasiticNetwork(drvr_pin, parasitic_ap);
|
|
const MinMax *min_max = dcalc_ap->constraintMinMax();
|
|
if (parasitic_network == nullptr) {
|
|
Wireload *wireload = sdc_->wireload(min_max);
|
|
if (wireload) {
|
|
float pin_cap, wire_cap, fanout;
|
|
bool has_wire_cap;
|
|
graph_delay_calc_->netCaps(drvr_pin, drvr_rf, dcalc_ap,
|
|
pin_cap, wire_cap, fanout, has_wire_cap);
|
|
parasitic_network = parasitics_->makeWireloadNetwork(drvr_pin, wireload,
|
|
fanout, min_max,
|
|
parasitic_ap);
|
|
}
|
|
}
|
|
|
|
if (parasitic_network) {
|
|
rcmodel *rcmodel = reduce_->reduceToArnoldi(parasitic_network, drvr_pin,
|
|
parasitic_ap->couplingCapFactor(),
|
|
drvr_rf, corner, min_max, parasitic_ap);
|
|
// Arnoldi parasitics are their own class that are not saved in the parasitic db.
|
|
unsaved_parasitics_.push_back(rcmodel);
|
|
parasitic = rcmodel;
|
|
}
|
|
return parasitic;
|
|
}
|
|
|
|
Parasitic *
|
|
ArnoldiDelayCalc::reduceParasitic(const Parasitic *,
|
|
const Pin *,
|
|
const RiseFall *,
|
|
const DcalcAnalysisPt *)
|
|
{
|
|
// Decline because reduced arnoldi parasitics are not stored in the parasitics db.
|
|
return nullptr;
|
|
}
|
|
|
|
void
|
|
ArnoldiDelayCalc::finishDrvrPin()
|
|
{
|
|
for (auto parasitic : unsaved_parasitics_)
|
|
delete parasitic;
|
|
unsaved_parasitics_.clear();
|
|
}
|
|
|
|
ArcDcalcResult
|
|
ArnoldiDelayCalc::inputPortDelay(const Pin *,
|
|
float in_slew,
|
|
const RiseFall *rf,
|
|
const Parasitic *parasitic,
|
|
const LoadPinIndexMap &load_pin_index_map,
|
|
const DcalcAnalysisPt *)
|
|
{
|
|
rcmodel_ = nullptr;
|
|
_delayV[0] = 0.0;
|
|
_slewV[0] = in_slew;
|
|
|
|
LibertyLibrary *drvr_library = network_->defaultLibertyLibrary();
|
|
ArcDcalcResult dcalc_result(load_pin_index_map.size());
|
|
if (parasitic) {
|
|
rcmodel_ = reinterpret_cast<rcmodel*>(const_cast<Parasitic*>(parasitic));
|
|
pin_n_ = rcmodel_->n;
|
|
if (pin_n_ >= _pinNmax) {
|
|
_pinNmax *= 2;
|
|
if (pin_n_ >= _pinNmax) _pinNmax += pin_n_;
|
|
_pinNmax *= 2;
|
|
_delayV = (double*)realloc(_delayV,_pinNmax * sizeof(double));
|
|
_slewV = (double*)realloc(_slewV,_pinNmax * sizeof(double));
|
|
}
|
|
|
|
pin_n_ = rcmodel_->n;
|
|
double slew_derate = drvr_library->slewDerateFromLibrary();
|
|
double lo_thresh = drvr_library->slewLowerThreshold(rf);
|
|
double hi_thresh = drvr_library->slewUpperThreshold(rf);
|
|
bool rising = (rf == RiseFall::rise());
|
|
delay_work_set_thresholds(delay_work_, lo_thresh, hi_thresh, rising, slew_derate);
|
|
delay_c *c = delay_work_->c;
|
|
double c_log = c->vlg;
|
|
|
|
for (int j=1;j<pin_n_;j++) {
|
|
double elmore = rcmodel_->elmore(j);
|
|
double wire_delay = 0.6931472*elmore;
|
|
double load_slew = in_slew + c_log*elmore/slew_derate;
|
|
_delayV[j] = wire_delay;
|
|
_slewV[j] = load_slew;
|
|
|
|
const Pin *load_pin = rcmodel_->pinV[j];
|
|
auto load_idx_itr = load_pin_index_map.find(load_pin);
|
|
if (load_idx_itr != load_pin_index_map.end()) {
|
|
size_t load_idx = load_idx_itr->second;
|
|
dcalc_result.setWireDelay(load_idx, wire_delay);
|
|
dcalc_result.setLoadSlew(load_idx, load_slew);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
dcalc_result = makeResult(drvr_library, rf, 0.0, in_slew, load_pin_index_map);
|
|
return dcalc_result;
|
|
}
|
|
|
|
ArcDcalcResult
|
|
ArnoldiDelayCalc::gateDelay(const Pin *drvr_pin,
|
|
const TimingArc *arc,
|
|
const Slew &in_slew,
|
|
float load_cap,
|
|
const Parasitic *parasitic,
|
|
const LoadPinIndexMap &load_pin_index_map,
|
|
const DcalcAnalysisPt *dcalc_ap)
|
|
{
|
|
const LibertyCell *drvr_cell = arc->from()->libertyCell();
|
|
ConcreteParasitic *cparasitic =
|
|
reinterpret_cast<ConcreteParasitic*>(const_cast<Parasitic*>(parasitic));
|
|
rcmodel_ = dynamic_cast<rcmodel*>(cparasitic);
|
|
pocv_enabled_ = variables_->pocvEnabled();
|
|
GateTableModel *table_model = arc->gateTableModel(dcalc_ap);
|
|
if (table_model && rcmodel_) {
|
|
const Pvt *pvt = pinPvt(drvr_pin, dcalc_ap);
|
|
return gateDelaySlew(drvr_cell, arc, table_model, in_slew, load_pin_index_map, pvt);
|
|
}
|
|
else
|
|
return LumpedCapDelayCalc::gateDelay(drvr_pin, arc, in_slew, load_cap,
|
|
parasitic, load_pin_index_map, dcalc_ap);
|
|
}
|
|
|
|
ArcDcalcResult
|
|
ArnoldiDelayCalc::gateDelaySlew(const LibertyCell *drvr_cell,
|
|
const TimingArc *arc,
|
|
const GateTableModel *table_model,
|
|
const Slew &in_slew,
|
|
const LoadPinIndexMap &load_pin_index_map,
|
|
const Pvt *pvt)
|
|
{
|
|
pin_n_ = rcmodel_->n;
|
|
if (pin_n_ >= _pinNmax) {
|
|
_pinNmax *= 2;
|
|
if (pin_n_ >= _pinNmax) _pinNmax += pin_n_;
|
|
_delayV = (double*)realloc(_delayV,_pinNmax * sizeof(double));
|
|
_slewV = (double*)realloc(_slewV,_pinNmax * sizeof(double));
|
|
}
|
|
|
|
ArcDcalcResult dcalc_result(load_pin_index_map.size());
|
|
pin_n_ = rcmodel_->n;
|
|
const RiseFall *rf = arc->toEdge()->asRiseFall();
|
|
if (table_model && rf) {
|
|
const LibertyLibrary *drvr_library = drvr_cell->libertyLibrary();
|
|
double slew_derate = drvr_library->slewDerateFromLibrary();
|
|
double lo_thresh = drvr_library->slewLowerThreshold(rf);
|
|
double hi_thresh = drvr_library->slewUpperThreshold(rf);
|
|
bool rising = (rf == RiseFall::rise());
|
|
delay_work_set_thresholds(delay_work_, lo_thresh, hi_thresh, rising,
|
|
slew_derate);
|
|
if (rcmodel_->order > 0) {
|
|
timing_table tab;
|
|
tab.table = table_model;
|
|
tab.cell = drvr_cell;
|
|
tab.pvt = pvt;
|
|
tab.in_slew = delayAsFloat(in_slew);
|
|
ar1_ceff_delay(delay_work_, &tab, rcmodel_,
|
|
_delayV, _slewV);
|
|
}
|
|
dcalc_result.setGateDelay(_delayV[0]);
|
|
dcalc_result.setDrvrSlew(_slewV[0]);
|
|
|
|
if (rcmodel_) {
|
|
for (int i = 0; i < rcmodel_->n; i++) {
|
|
const Pin *load_pin = rcmodel_->pinV[i];
|
|
auto load_idx_itr = load_pin_index_map.find(load_pin);
|
|
if (load_idx_itr != load_pin_index_map.end()) {
|
|
size_t load_idx = load_idx_itr->second;
|
|
ArcDelay wire_delay = _delayV[i] - _delayV[0];
|
|
Slew load_slew = _slewV[i];
|
|
thresholdAdjust(load_pin, drvr_library, rf, wire_delay, load_slew);
|
|
dcalc_result.setWireDelay(load_idx, wire_delay);
|
|
dcalc_result.setLoadSlew(load_idx, load_slew);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return dcalc_result;
|
|
}
|
|
|
|
string
|
|
ArnoldiDelayCalc::reportGateDelay(const Pin *drvr_pin,
|
|
const TimingArc *arc,
|
|
const Slew &in_slew,
|
|
float load_cap,
|
|
const Parasitic *parasitic,
|
|
const LoadPinIndexMap &load_pin_index_map,
|
|
const DcalcAnalysisPt *dcalc_ap,
|
|
int digits)
|
|
{
|
|
return LumpedCapDelayCalc::reportGateDelay(drvr_pin, arc, in_slew, load_cap,
|
|
parasitic, load_pin_index_map,
|
|
dcalc_ap, digits);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// arnoldi1.cpp
|
|
//
|
|
|
|
arnoldi1::~arnoldi1()
|
|
{
|
|
free(d);
|
|
free(U);
|
|
}
|
|
|
|
double
|
|
arnoldi1::elmore(int k)
|
|
{
|
|
if (order==0) return 0.0;
|
|
if (order==1) return d[0];
|
|
double sqctot = 1.0/U[0][0];
|
|
double tau = d[0] + e[0]*U[1][k]*sqctot;
|
|
return tau;
|
|
}
|
|
|
|
delay_work *
|
|
delay_work_create()
|
|
{
|
|
int j;
|
|
delay_work *D = (delay_work*)malloc(sizeof(delay_work));
|
|
D->nmax = 256;
|
|
D->resi = (double**)malloc(D->nmax*sizeof(double*));
|
|
D->resi[0] = (double*)malloc(D->nmax*32*sizeof(double));
|
|
for (j=1;j<D->nmax;j++) D->resi[j] = D->resi[0] + j*32;
|
|
D->v[0] = (double*)malloc(32*32*sizeof(double));
|
|
for (j=1;j<32;j++) D->v[j] = D->v[0] + j*32;
|
|
D->w[0] = (double*)malloc(32*D->nmax*sizeof(double));
|
|
for (j=1;j<32;j++) D->w[j] = D->w[0] + j*D->nmax;
|
|
D->lo_thresh = 0.0;
|
|
D->hi_thresh = 0.0;
|
|
D->slew_derate = 0.0;
|
|
D->slew_factor = 0.0;
|
|
for (j=0;j<2;j++) {
|
|
D->cV[j].slew_derate = 0.0;
|
|
D->cV[j].vlo = 0.0;
|
|
D->cV[j].vhi = 0.0;
|
|
D->cV[j].vlg = 0.0;
|
|
D->cV[j].smin = 0.0;
|
|
D->cV[j].x1 = 0.0;
|
|
D->cV[j].y1 = 0.0;
|
|
D->cV[j].vmid = 0.0;
|
|
}
|
|
D->c = D->cV;
|
|
return D;
|
|
}
|
|
|
|
static void
|
|
delay_work_destroy(delay_work *D)
|
|
{
|
|
free(D->resi[0]);
|
|
free(D->resi);
|
|
free(D->v[0]);
|
|
free(D->w[0]);
|
|
free(D);
|
|
}
|
|
|
|
static void
|
|
delay_work_alloc(delay_work *D,int n)
|
|
{
|
|
if (n<=D->nmax) return;
|
|
free(D->w[0]);
|
|
free(D->resi[0]);
|
|
free(D->resi);
|
|
D->nmax *= 2;
|
|
if (n > D->nmax) D->nmax = n;
|
|
int j;
|
|
D->resi = (double**)malloc(D->nmax*sizeof(double*));
|
|
D->resi[0] = (double*)malloc(D->nmax*32*sizeof(double));
|
|
for (j=1;j<D->nmax;j++) D->resi[j] = D->resi[0] + j*32;
|
|
D->w[0] = (double*)malloc(32*D->nmax*sizeof(double));
|
|
for (j=1;j<32;j++) D->w[j] = D->w[0] + j*D->nmax;
|
|
}
|
|
|
|
void
|
|
ArnoldiDelayCalc::delay_work_set_thresholds(delay_work *D,
|
|
double lo,
|
|
double hi,
|
|
bool rising,
|
|
double derate)
|
|
{
|
|
double mid = 0.5; // 0.0:1.0
|
|
int i = rising?1:0;
|
|
D->c = D->cV+ i;
|
|
// WRONG
|
|
bool changed = (lo != D->c->vlo || hi != D->c->vhi);
|
|
if (changed) {
|
|
if (!(lo>0.01 && hi<0.99)) {
|
|
lo = 0.1;
|
|
hi = 0.9;
|
|
derate = 0.8;
|
|
}
|
|
D->c->slew_derate = derate;
|
|
D->c->vlo = lo;
|
|
D->c->vhi = hi;
|
|
D->c->vmid = mid;
|
|
D->c->vlg = log(hi/lo);
|
|
ra_calc_c(lo,hi,
|
|
&(D->c->smin), &(D->c->x1),&(D->c->y1));
|
|
}
|
|
D->lo_thresh = D->c->vlo;
|
|
D->hi_thresh = D->c->vhi;
|
|
D->slew_derate = derate;
|
|
double measured_swing = D->c->vhi - D->c->vlo;
|
|
double reported_swing = measured_swing/D->slew_derate;
|
|
D->slew_factor = reported_swing;
|
|
}
|
|
|
|
static double *
|
|
delay_work_get_residues(delay_work *D,int term_index)
|
|
{
|
|
return D->resi[term_index];
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////
|
|
//
|
|
// calculate_poles_res
|
|
//
|
|
|
|
void arnoldi1::calculate_poles_res(delay_work *D,double rdrive)
|
|
{
|
|
if (n > D->nmax) delay_work_alloc(D,n);
|
|
double *p = D->poles;
|
|
double **v = D->v;
|
|
double **w = D->w;
|
|
double *aa = D->aa;
|
|
double **resi = D->resi;
|
|
int h,j,k;
|
|
double sum, dsave;
|
|
|
|
dsave = d[0];
|
|
d[0] += rdrive*ctot;
|
|
if (!tridiagEV(order,d,e,p,v))
|
|
criticalError(204, "arnoldi delay calc failed.");
|
|
d[0] = dsave;
|
|
|
|
for (h=0;h<order;h++) {
|
|
if (p[h]<1e-14) // .01ps
|
|
p[h]=1e-14;
|
|
p[h] = 1.0/p[h];
|
|
}
|
|
|
|
for (h=0;h<order;h++) {
|
|
for (k=0;k<n;k++) {
|
|
sum = 0.0;
|
|
for (j=0;j<order;j++)
|
|
sum += v[h][j]*U[j][k];
|
|
w[h][k] = sum;
|
|
}
|
|
aa[h] = sqc*v[h][0];
|
|
}
|
|
|
|
for (j=0;j<n;j++) {
|
|
for (h=0;h<order;h++)
|
|
resi[j][h] = aa[h]*w[h][j];
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// tridiag.cpp
|
|
//
|
|
|
|
//
|
|
// tridiagonal eigenvalues and eigenvectors
|
|
// assuming all eigenvalues are positive
|
|
//
|
|
// tridiagEV(int n,double *d,double *e,double *p,double **v)
|
|
// d[0]..d[n-1] diagonal elements
|
|
// e[0]..e[n-2] off-diagonal elements
|
|
// p[0],..p[n-1] the eigenvalues
|
|
// v[0],..v[n-1] the eigenvectors
|
|
// M*v[j] = p[j]*v[j]
|
|
//
|
|
// (M*v[j])[0] = d[0]*v[j][0]+e[0]*v[j][1]
|
|
// (M*v[j])[k] = d[k]*v[j][k]+e[k-1]*v[j][k-1]+e[k]*v[j][k+1] 0<k<n-1
|
|
// (M*v[j])[n-1] = d[n-1]*v[j][n-1]+e[n-2]*v[j][n-2]
|
|
//
|
|
static bool
|
|
tridiagEV(int n,double *din,double *ein,double *d,double **v)
|
|
{
|
|
int j,k;
|
|
for (j=0;j<n;j++) for (k=0;k<n;k++) v[j][k]=0.0;
|
|
for (j=0;j<n;j++) v[j][j] = 1.0;
|
|
|
|
int m,h,iter,i;
|
|
double s,r,p,g,f,c,b;
|
|
double e[32];
|
|
if (n>32)
|
|
return false;
|
|
|
|
for (i=0;i<n;i++) d[i] = din[i];
|
|
|
|
for (i=0;i<n-1;i++) e[i+1] = ein[i];
|
|
e[0] = 0.0;
|
|
for (h=n-1;h>=1;h--) {
|
|
iter = 0;
|
|
while (abs(e[h])>1e-18) { // 1e-6ps
|
|
m=0;
|
|
if (m != h) {
|
|
if (iter++ == 20)
|
|
return false;
|
|
g = (d[h-1]-d[h])/(2.0*e[h]);
|
|
r = sqrt(1.0+g*g); // watch overflow
|
|
g = d[m]-d[h]+e[h]/(g + (g<0?-r:r));
|
|
s = c = 1.0;
|
|
p = 0.0;
|
|
for (i=m+1;i<=h;i++) {
|
|
f = s*e[i];
|
|
b = c*e[i];
|
|
e[i-1] = r = sqrt(f*f+g*g); // watch
|
|
if (r == 0.0) {
|
|
d[i-1] -= p;
|
|
e[m] = 0.0;
|
|
break;
|
|
}
|
|
s = f/r;
|
|
c = g/r;
|
|
g = d[i-1]-p;
|
|
r = (d[i]-g)*s+2.0*c*b;
|
|
d[i-1] = g + (p=s*r);
|
|
g = c*r-b;
|
|
for (k=0;k<n;k++) {
|
|
f = v[i-1][k];
|
|
v[i-1][k] = s*v[i][k]+c*f;
|
|
v[i][k] = c*v[i][k]-s*f;
|
|
}
|
|
}
|
|
if (r == 0.0 && i <= h) continue;
|
|
d[h] -= p;
|
|
e[h] = g;
|
|
e[m] = 0.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i=0;i<n-1;i++) {
|
|
k = i;
|
|
p = d[k];
|
|
for (j=i+1;j<n;j++)
|
|
if (d[j] > p) { k=j; p=d[k]; }
|
|
if (k != i) {
|
|
d[k] = d[i];
|
|
d[i] = p;
|
|
for (j=0;j<n;j++) {
|
|
p = v[i][j];
|
|
v[i][j] = v[k][j];
|
|
v[k][j] = p;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
// prsolve.cpp
|
|
|
|
// get a waveform point
|
|
static void
|
|
pr_get_v(double t, double s, int order, double *p, double *rr, double *va)
|
|
{
|
|
*va = 0.0;
|
|
int h;
|
|
for (h=0;h<order;h++) {
|
|
double pt = p[h]*t;
|
|
double ps = p[h]*s;
|
|
double f;
|
|
if (t<s) {
|
|
f = 1.0-t/s + (1.0-exp(-pt))/ps;
|
|
} else {
|
|
f = exp(ps-pt)*(1.0-exp(-ps))/ps;
|
|
}
|
|
*va += rr[h]*f;
|
|
}
|
|
}
|
|
|
|
static void
|
|
get_dv(double t, double s, int order, double *p, double *rr,
|
|
double *va, double *dva)
|
|
{
|
|
*va = 0.0;
|
|
*dva = 0.0;
|
|
int h;
|
|
for (h=0;h<order;h++) {
|
|
double p1 = p[h];
|
|
double pt = p1*t;
|
|
double ps = p1*s;
|
|
double f,df,xtmp;
|
|
if (t<s) {
|
|
xtmp = (1.0-exp(-pt))/ps;
|
|
f = 1.0-t/s + xtmp;
|
|
df = -p1*xtmp;
|
|
} else {
|
|
f = exp(ps-pt)*(1.0-exp(-ps))/ps;
|
|
df = -p1*f;
|
|
}
|
|
*va += rr[h]*f;
|
|
*dva += rr[h]*df;
|
|
}
|
|
}
|
|
|
|
static double
|
|
solve_t_bracketed(double s,int order,double *p,double *rr,
|
|
double val,double x1,double x2,double v1,double v2)
|
|
{
|
|
int j;
|
|
double df,dx,dxold,f,f2,f1;
|
|
double temp,xh,xl,rts;
|
|
double xacc = .001e-12; // .001ps
|
|
f1 = v1-val;
|
|
f2 = v2-val;
|
|
if (f1==0.0) return x1;
|
|
if (f2==0.0) return x2;
|
|
rts = (f1*x2-f2*x1)/(f1-f2);
|
|
if (f1<f2) {
|
|
xl = x1;
|
|
xh = x2;
|
|
if (0.0<f1) return x1;
|
|
if (f2<0.0) return x2;
|
|
} else {
|
|
xl = x2;
|
|
xh = x1;
|
|
if (0.0<f2) return x2;
|
|
if (f1<0.0) return x1;
|
|
}
|
|
dxold = abs(x2-x1);
|
|
dx = dxold;
|
|
get_dv(rts,s,order,p,rr,&f,&df);
|
|
f -= val;
|
|
double flast = 0.0;
|
|
for (j=1;j<10;j++) {
|
|
if ((((rts-xh)*df-f)*((rts-xl)*df-f) >= 0.0)
|
|
|| (abs(2.0*f) > abs(dxold*df))) {
|
|
dxold = dx;
|
|
dx = 0.5*(xh-xl);
|
|
if (flast*f >0.0) {
|
|
// 2 successive bisections in same direction,
|
|
// accelerate
|
|
if (f<0.0) dx = 0.9348*(xh-xl);
|
|
else dx = 0.0625*(xh-xl);
|
|
}
|
|
flast = f;
|
|
rts = xl+dx;
|
|
if (xl == rts) {
|
|
return rts;
|
|
}
|
|
} else {
|
|
dxold = dx;
|
|
dx = f/df;
|
|
flast = 0.0;
|
|
temp = rts;
|
|
rts -= dx;
|
|
if (temp == rts) {
|
|
return rts;
|
|
}
|
|
}
|
|
if (abs(dx) < xacc) {
|
|
return rts;
|
|
}
|
|
get_dv(rts,s,order,p,rr,&f,&df); f -= val;
|
|
if (f<0.0)
|
|
xl = rts;
|
|
else
|
|
xh = rts;
|
|
}
|
|
if (abs(f)<1e-6) // 1uV
|
|
return rts;
|
|
return 0.5*(xl+xh);
|
|
}
|
|
|
|
void
|
|
ArnoldiDelayCalc::pr_solve1(double s,
|
|
int order,
|
|
double *p,
|
|
double *rr,
|
|
double v1,
|
|
double *t1)
|
|
{
|
|
double tmin = 0.0,tmax = 0.0,vmin = 0.0,vmax = 0.0;
|
|
int h, h0 = 0;
|
|
while (order>1
|
|
&& rr[order-1]<1e-8 // 1e-8V
|
|
&& rr[order-1]>-1e-8)
|
|
order--;
|
|
if (rr[0]<0.5) {
|
|
for (h=1;h<order;h++) if (rr[h]>0.3 && rr[h]>rr[0]) { h0 = h; break; }
|
|
}
|
|
double p0 = p[h0];
|
|
double ps,vs,ta,va;
|
|
vs = 0.0;
|
|
for (h=0;h<order;h++) {
|
|
ps = p[h]*s;
|
|
vs += rr[h]*(1-exp(-ps))/ps;
|
|
}
|
|
if (vs<v1) {
|
|
// s dominates
|
|
ta = 0.5*(1+v1)*s;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
if (va<v1) {
|
|
tmax = ta; vmax = va;
|
|
ta = v1*s;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
if (va<v1) {
|
|
// ignoring a typical error at drive node, that comes
|
|
// from slight inaccuracies in rr
|
|
if (!(rr[order-1]>1.0 && p[order-1]>500.0 && va>v1-0.002))
|
|
debugPrint(debug_, "arnoldi", 1, "err, pr_solve1, va<v1");
|
|
}
|
|
tmin = ta; vmin = va;
|
|
} else {
|
|
tmin = ta; vmin = va;
|
|
ta = s;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
while (va>v1) {
|
|
tmin = ta; vmin = va;
|
|
ta *= 2.0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
}
|
|
if (va>v1)
|
|
debugPrint(debug_, "arnoldi", 1, "err, pr_solve1, va>v1");
|
|
tmax = ta; vmax = va;
|
|
}
|
|
} else {
|
|
// s is irrelevant
|
|
ta = s; va = vs;
|
|
while (va >= v1) {
|
|
tmin = ta;
|
|
vmin = va;
|
|
ta += 1.0/p0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
}
|
|
tmax = ta; vmax = va;
|
|
}
|
|
*t1 = solve_t_bracketed(s,order,p,rr,v1,tmin,tmax,vmin,vmax);
|
|
}
|
|
|
|
void
|
|
ArnoldiDelayCalc::pr_solve3(double s,
|
|
int order,
|
|
double *p,
|
|
double *rr,
|
|
double vhi,
|
|
double *thi,
|
|
double vmid,
|
|
double *tmid,
|
|
double vlo,
|
|
double *tlo)
|
|
{
|
|
// falling, thi<tmin<tlo
|
|
double tmin2,tmax2,vmin2,vmax2;
|
|
double tmin5,tmax5,vmin5,vmax5;
|
|
double tmin8,tmax8,vmin8,vmax8;
|
|
int h, h0 = 0;
|
|
while (order>1
|
|
&& rr[order-1]<1e-8 // 1e-8V
|
|
&& rr[order-1]>-1e-8)
|
|
order--;
|
|
if (rr[0]<0.5) {
|
|
for (h=1;h<order;h++) if (rr[h]>0.3 && rr[h]>rr[0]) { h0 = h; break; }
|
|
}
|
|
double p0 = p[h0];
|
|
if (p0>10e+9) // 1/10ns
|
|
p0=10e+9;
|
|
double ps,vs,ta,va;
|
|
vs = 0.0;
|
|
for (h=0;h<order;h++) {
|
|
ps = p[h]*s;
|
|
vs += rr[h]*(1-exp(-ps))/ps;
|
|
}
|
|
if (vs<vlo) {
|
|
// s dominates
|
|
tmax8 = s; vmax8 = vs;
|
|
ta = vhi*s;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
if (va < vmid) {
|
|
tmax2 = tmax5 = tmin8 = ta;
|
|
vmax2 = vmax5 = vmin8 = va;
|
|
ta = vmid*s;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
if (va>vhi) {
|
|
tmin2 = tmin5 = ta;
|
|
vmin2 = vmin5 = va;
|
|
tmin8 = ta; vmin8 = va;
|
|
if (va<vmid) {
|
|
tmax5 = ta; vmax5 = va;
|
|
} else {
|
|
tmin5 = ta; vmin5 = va;
|
|
}
|
|
} else {
|
|
tmax2 = tmin5 = ta;
|
|
vmax2 = vmin5 = va;
|
|
ta = vlo*s;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
tmin2 = ta; vmin2 = va;
|
|
}
|
|
} else {
|
|
// rare, s dominates but t=vhi*s is still above vmid
|
|
tmin5 = tmin8 = ta;
|
|
vmin5 = vmin8 = va;
|
|
tmax5 = tmax8;
|
|
vmax5 = vmax8;
|
|
if (va > vhi) {
|
|
tmin2 = tmin5;
|
|
vmin2 = vmin5;
|
|
tmax2 = tmax5;
|
|
vmax2 = tmax5;
|
|
} else {
|
|
tmax2 = tmin5;
|
|
vmax2 = vmin5;
|
|
ta = vlo*s;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
tmin2 = ta; vmin2 = va;
|
|
}
|
|
}
|
|
} else if (vs<vmid) {
|
|
// not far from s
|
|
tmax2 = tmax5 = tmin8 = s;
|
|
vmax2 = vmax5 = vmin8 = vs;
|
|
ta = s + 1.6/p0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
while (va>vlo) {
|
|
tmin8 = ta; vmin8 = va;
|
|
ta += 1.0/p0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
}
|
|
tmax8 = ta; vmax8 = va;
|
|
ta = vmid*s;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
tmin5 = ta; vmin5 = va;
|
|
if (va>vhi) {
|
|
tmin2 = ta; vmin2 = va;
|
|
} else {
|
|
tmax2 = ta; vmax2 = va;
|
|
ta = vlo*s;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
tmin2 = ta; vmin2 = va;
|
|
}
|
|
} else if (vs<vhi) {
|
|
tmax2 = tmin5 = tmin8 = s;
|
|
vmax2 = vmin5 = vmin8 = vs;
|
|
ta = vlo*s;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
tmin2 = ta; vmin2 = va;
|
|
ta = s + 0.7/p0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
while (va>vmid) {
|
|
tmin5 = tmin8 = ta; vmin5 = tmin8 = va;
|
|
ta += 0.7/p0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
}
|
|
tmax5 = ta; vmax5 = va;
|
|
if (va < vlo) {
|
|
tmax8 = ta; vmax8 = va;
|
|
} else {
|
|
tmin8 = ta; vmin8 = va;
|
|
ta += 1.0/p0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
while (va>vlo) {
|
|
tmin8 = ta; vmin8 = va;
|
|
ta += 1.0/p0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
}
|
|
tmax8 = ta; vmax8 = va;
|
|
}
|
|
} else {
|
|
// s is irrelevant
|
|
ta = s; va = vs;
|
|
tmin2 = tmin5 = tmin8 = ta;
|
|
vmin2 = vmin5 = vmin8 = va;
|
|
while (va > vhi) {
|
|
tmin2 = tmin5 = tmin8 = ta;
|
|
vmin2 = vmin5 = vmin8 = va;
|
|
ta += 1.0/p0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
}
|
|
tmax2 = ta; vmax2 = va;
|
|
if (va < vmid) {
|
|
tmax5 = ta; vmax5 = va;
|
|
} else while (va > vmid) {
|
|
tmin5 = tmin8 = ta;
|
|
vmin5 = vmin8 = va;
|
|
ta += 1.0/p0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
}
|
|
tmax5 = ta; vmax5 = va;
|
|
if (va < vlo) {
|
|
tmax8 = ta; vmax8 = va;
|
|
} else while (va > vlo) {
|
|
tmin8 = ta;
|
|
vmin8 = va;
|
|
ta += 1.0/p0;
|
|
pr_get_v(ta,s,order,p,rr,&va);
|
|
}
|
|
tmax8 = ta; vmax8 = va;
|
|
}
|
|
|
|
*thi = solve_t_bracketed(s,order,p,rr,vhi,tmin2,tmax2,vmin2,vmax2);
|
|
*tmid= solve_t_bracketed(s,order,p,rr,vmid,tmin5,tmax5,vmin5,vmax5);
|
|
*tlo= solve_t_bracketed(s,order,p,rr,vlo,tmin8,tmax8,vmin8,vmax8);
|
|
}
|
|
|
|
static double
|
|
calc_integ(double p,double s,double t)
|
|
{
|
|
// integral of f(t)-vin(t)
|
|
double ps = p*s;
|
|
double pt = p*t;
|
|
double y,ept,eps;
|
|
if (t<=s) {
|
|
ept = (pt>40.0)?0.0:exp(-pt);
|
|
y = ept-1.0+pt;
|
|
} else {
|
|
pt = pt-ps;
|
|
ept = (pt>40.0)?0.0:exp(-pt);
|
|
eps = (ps>40.0)?0.0:exp(-ps);
|
|
y = ps - (1.0-eps)*ept;
|
|
}
|
|
y /= ps*p;
|
|
return y;
|
|
}
|
|
|
|
|
|
double
|
|
ArnoldiDelayCalc::pr_ceff(double s,
|
|
double rdrive,
|
|
int order,
|
|
double *p,
|
|
double *rr,
|
|
double ceff_time)
|
|
{
|
|
double integi = 0.0;
|
|
double ceff, v0;
|
|
int j;
|
|
for (j=0;j<order;j++) {
|
|
integi += rr[j]*calc_integ(p[j],s,ceff_time);
|
|
}
|
|
integi /= rdrive;
|
|
pr_get_v(ceff_time,s,order,p,rr,&v0);
|
|
ceff = integi/(1.0-v0);
|
|
return ceff;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////
|
|
|
|
static double
|
|
ra_hinv(double y,
|
|
Debug *debug)
|
|
{
|
|
double x;
|
|
if (y<1.0) {
|
|
x = sqrt(2*y)+0.4*y;
|
|
if (y<1e-4) return x;
|
|
} else {
|
|
x = y+1.0;
|
|
}
|
|
double ex = exp(-x);
|
|
double f = x+ex-1.0-y;
|
|
x += f/(ex-1.0);
|
|
ex = exp(-x);
|
|
f = x+ex-1.0-y;
|
|
x += f/(ex-1.0);
|
|
ex = exp(-x);
|
|
f = x+ex-1.0-y;
|
|
x += f/(ex-1.0);
|
|
ex = exp(-x);
|
|
f = x+ex-1.0-y;
|
|
if (f<-1e-8 || f>1e-8)
|
|
debugPrint(debug, "arnoldi", 1, "y f %g %g", y, f);
|
|
return x;
|
|
}
|
|
|
|
double
|
|
ArnoldiDelayCalc::ra_solve_for_t(double p,
|
|
double s,
|
|
double v)
|
|
{
|
|
double t;
|
|
double ps = p*s;
|
|
if (ps>30.0) {
|
|
t = (1.0+ps*(1.0-v)) / p;
|
|
return t;
|
|
}
|
|
double eps = exp(ps);
|
|
if ((1-ps*v)*eps >= 1.0) {
|
|
t = log((eps-1.0)/(ps*v)) / p;
|
|
} else {
|
|
t = ra_hinv((1-v)*ps, debug_)/p;
|
|
}
|
|
return t;
|
|
}
|
|
|
|
void
|
|
ArnoldiDelayCalc::ra_solve_for_pt(double ps,
|
|
double v,
|
|
double *pt,
|
|
double *d)
|
|
{
|
|
if (ps>30.0) {
|
|
*pt = 1.0+ps*(1.0-v);
|
|
*d = 1.0-v;
|
|
return;
|
|
}
|
|
double eps = exp(ps);
|
|
if ((1-ps*v)*eps >= 1.0) {
|
|
*pt = log((eps-1.0)/(ps*v));
|
|
*d = eps/(eps-1.0) - 1.0/ps;
|
|
} else {
|
|
*pt = ra_hinv((1-v)*ps, debug_);
|
|
*d = (1.0-v)/(*pt - (1-v)*ps);
|
|
}
|
|
}
|
|
|
|
void
|
|
ArnoldiDelayCalc::ra_calc_c(double vlo,
|
|
double vhi,
|
|
double *c_smin,
|
|
double *c_x1,
|
|
double *c_y1)
|
|
{
|
|
double a = log(1.0/vhi);
|
|
*c_smin = a + ra_hinv((1.0-vhi)/vhi - a, debug_);
|
|
double b = log(1.0/vlo);
|
|
double c_s1 = b + ra_hinv((1.0-vlo)/vlo - b, debug_);
|
|
double a1 = (exp(c_s1)-1.0)/c_s1;
|
|
double den = log(a1/vlo) - ra_hinv((1.0-vhi)*c_s1, debug_);
|
|
*c_x1 = (vhi-vlo)/den;
|
|
*c_y1 = c_s1*(*c_x1);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// ceff.cpp
|
|
//
|
|
|
|
void
|
|
ArnoldiDelayCalc::ra_solve_for_s(delay_work *D,
|
|
double p,
|
|
double tlohi,
|
|
double &s)
|
|
{
|
|
delay_c *c = D->c;
|
|
double vhi = c->vhi;
|
|
double vlo = c->vlo;
|
|
// s is 0-100
|
|
// solve f(x,y)=0 with f = x*(ptlo(y/x)-pthi(y/x))-(vhi-vlo)
|
|
// (x=0,y=1)
|
|
// (x=x1,y=y1) c->x1,y1
|
|
// (x=x2,y=y2) x2=(vhi-vlo)/log(vhi/vlo) y2=(c->smin)*x2
|
|
double x1 = c->x1;
|
|
double y1 = c->y1;
|
|
double x2 = (vhi-vlo)/c->vlg;
|
|
double y2 = (c->smin)*x2;
|
|
double ptlo,dlo;
|
|
double pthi,dhi;
|
|
double f,df,x,y;
|
|
x = c->vlg/(p*tlohi);
|
|
|
|
if (x <= x1) {
|
|
y = y1 - 0.5*(x-x1);
|
|
if (y>1.0) y=1.0;
|
|
} else {
|
|
y = y1 - (x-x1)*(0.5 + 8*(x-x1));
|
|
if (y<y2) y=y2;
|
|
}
|
|
|
|
ra_solve_for_pt(p*s,vlo,&ptlo,&dlo);
|
|
ra_solve_for_pt(p*s,vhi,&pthi,&dhi);
|
|
f = (ptlo-pthi)/p - tlohi;
|
|
df = dlo-dhi;
|
|
s = s - f/df;
|
|
if (abs(f)<.001e-12) return; // .001ps
|
|
|
|
ra_solve_for_pt(p*s,vlo,&ptlo,&dlo);
|
|
ra_solve_for_pt(p*s,vhi,&pthi,&dhi);
|
|
f = (ptlo-pthi)/p - tlohi;
|
|
df = dlo-dhi;
|
|
s = s - f/df;
|
|
if (abs(f)<.001e-12) return; // .001ps
|
|
|
|
ra_solve_for_pt(p*s,vlo,&ptlo,&dlo);
|
|
ra_solve_for_pt(p*s,vhi,&pthi,&dhi);
|
|
f = (ptlo-pthi)/p - tlohi;
|
|
df = dlo-dhi;
|
|
s = s - f/df;
|
|
if (abs(f)<.001e-12) return; // .001ps
|
|
|
|
ra_solve_for_pt(p*s,vlo,&ptlo,&dlo);
|
|
ra_solve_for_pt(p*s,vhi,&pthi,&dhi);
|
|
f = (ptlo-pthi)/p - tlohi;
|
|
df = dlo-dhi;
|
|
s = s - f/df;
|
|
if (abs(f)<.001e-12) return; // .001ps
|
|
|
|
ra_solve_for_pt(p*s,vlo,&ptlo,&dlo);
|
|
ra_solve_for_pt(p*s,vhi,&pthi,&dhi);
|
|
f = (ptlo-pthi)/p - tlohi;
|
|
df = dlo-dhi;
|
|
s = s - f/df;
|
|
|
|
if (abs(f)>.5e-12) // .5ps
|
|
debugPrint(debug_, "arnoldi", 1, "ra_solve_for_s p %g tlohi %s err %s",
|
|
p,
|
|
units_->timeUnit()->asString(tlohi),
|
|
units_->timeUnit()->asString(f));
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////
|
|
// method 0:
|
|
// r = a match to slew to (ctot, limited by cmin,cmax)
|
|
// if r>rdelay, lower r
|
|
// Now at any ceff (limited)
|
|
// If slew(r,0,ceff) is too big
|
|
// s = s_start(r,ceff), not smaller than Smin
|
|
// accept the pessimistic output slew
|
|
// Else
|
|
// solve for s
|
|
|
|
// Rough translation of ra_get_r(sy_table) used by ar1_ceff_delay.
|
|
double
|
|
ArnoldiDelayCalc::ra_get_r(delay_work *D,
|
|
timing_table *tab,
|
|
double rdelay,
|
|
double ctot)
|
|
{
|
|
// find the maximum r that allows a solution for s of
|
|
// (s,r,ctot)-> output_slew
|
|
// If this maximum is greater than rdelay, use rdelay.
|
|
delay_c *c = D->c;
|
|
double slew_derate = c->slew_derate;
|
|
double c_log = c->vlg;
|
|
float c1;
|
|
double tlohi,r;
|
|
c1 = ctot;
|
|
ArcDelay d1;
|
|
Slew s1;
|
|
tab->table->gateDelay(tab->pvt, tab->in_slew, c1, pocv_enabled_, d1, s1);
|
|
tlohi = slew_derate*delayAsFloat(s1);
|
|
r = tlohi/(c_log*c1);
|
|
if (rdelay>0.0 && r > rdelay)
|
|
r = rdelay;
|
|
return r;
|
|
}
|
|
|
|
double
|
|
ArnoldiDelayCalc::ra_get_s(delay_work *D,
|
|
timing_table *tab,
|
|
double r,
|
|
double c)
|
|
{
|
|
delay_c *con = D->c;
|
|
double slew_derate = con->slew_derate;
|
|
double c_log = con->vlg;
|
|
double c_smin = con->smin;
|
|
double tlohi,smin,s;
|
|
ArcDelay d1;
|
|
Slew s1;
|
|
tab->table->gateDelay(tab->pvt, tab->in_slew, c, pocv_enabled_, d1, s1);
|
|
tlohi = slew_derate*delayAsFloat(s1);
|
|
smin = r*c*c_smin; // c_smin = ra_hinv((1-vhi)/vhi-log(vhi)) + log(vhi);
|
|
if (c_log*r*c >= tlohi) {
|
|
s = smin;
|
|
} else {
|
|
s = smin+0.3*tlohi;
|
|
ra_solve_for_s(D,1.0/(r*c),tlohi,s);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////
|
|
// method 1:
|
|
// determine the drive resistance from change in delay versus ctot
|
|
// find the maximum r that allows a solution for s of
|
|
// (s,r,ctot)-> output_slew
|
|
// If this maximum is greater than rdelay, use rdelay.
|
|
// calculate s,r,mod -> t50_srmod,
|
|
// then t50_srmod+t50_sy-t50_sr
|
|
|
|
double
|
|
ArnoldiDelayCalc::ra_rdelay_1(timing_table *tab,
|
|
double ctot)
|
|
{
|
|
// determine the drive resistance from change in delay versus ctot
|
|
float c1 = ctot;
|
|
float c2 = 0.5*c1;
|
|
if (c1==c2)
|
|
return 0.0;
|
|
ArcDelay d1, d2;
|
|
Slew s1, s2;
|
|
tab->table->gateDelay(tab->pvt, tab->in_slew, c1, pocv_enabled_, d1, s1);
|
|
tab->table->gateDelay(tab->pvt, tab->in_slew, c2, pocv_enabled_, d2, s2);
|
|
double dt50 = delayAsFloat(d1)-delayAsFloat(d2);
|
|
if (dt50 <= 0.0)
|
|
return 0.0;
|
|
double rdelay = dt50/(c1-c2);
|
|
return rdelay;
|
|
}
|
|
|
|
void
|
|
ArnoldiDelayCalc::ar1_ceff_delay(delay_work *D,
|
|
timing_table *tab,
|
|
arnoldi1 *mod,
|
|
double *delays,
|
|
double *slews)
|
|
{
|
|
delay_c *con = D->c;
|
|
double slew_derate = con->slew_derate;
|
|
double vhi = con->vhi;
|
|
double vlo = con->vlo;
|
|
double ctot = mod->ctot;
|
|
double ceff,tlohi,t50_sy,r,s,t50_sr,rdelay;
|
|
ArcDelay df;
|
|
Slew sf;
|
|
|
|
debugPrint(debug_, "arnoldi", 1, "ctot=%s",
|
|
units_->capacitanceUnit()->asString(ctot));
|
|
|
|
rdelay = ra_rdelay_1(tab,ctot);
|
|
if (rdelay == 0.0) {
|
|
rdelay = 1e+3; // 1kohm
|
|
}
|
|
r = rdelay;
|
|
r = ra_get_r(D,tab,rdelay,ctot);
|
|
if (! (r>0.0
|
|
&& r<100e+3)) // 100khom
|
|
rdelay = 1e+3; // 1kohm
|
|
|
|
bool bad = (r<rdelay);
|
|
s = ra_get_s(D,tab,r,ctot);
|
|
if (! (s>0.0
|
|
&& s<100e-9)) // 100ns
|
|
s = 0.5e-9; // .5ns
|
|
|
|
if (debug_->check("arnoldi", 1)) {
|
|
double p = 1.0/(r*ctot);
|
|
double thix,tlox;
|
|
debugPrint(debug_, "arnoldi", 1, "at r=%s s=%s",
|
|
units_->resistanceUnit()->asString(r),
|
|
units_->timeUnit()->asString(s));
|
|
thix = ra_solve_for_t(p,s,vhi);
|
|
tlox = ra_solve_for_t(p,s,vlo);
|
|
tab->table->gateDelay(tab->pvt,tab->in_slew, ctot, pocv_enabled_, df, sf);
|
|
debugPrint(debug_, "arnoldi", 1, "table slew (in_slew %s ctot %s) = %s",
|
|
units_->timeUnit()->asString(tab->in_slew),
|
|
units_->capacitanceUnit()->asString(ctot),
|
|
delayAsString(sf, this));
|
|
tlohi = slew_derate*delayAsFloat(sf);
|
|
debugPrint(debug_, "arnoldi", 1, "tlohi %s %s",
|
|
units_->timeUnit()->asString(tlohi),
|
|
units_->timeUnit()->asString(tlox-thix));
|
|
}
|
|
ceff = ctot;
|
|
tab->table->gateDelay(tab->pvt, tab->in_slew, ceff, pocv_enabled_,
|
|
df, sf);
|
|
t50_sy = delayAsFloat(df);
|
|
t50_sr = ra_solve_for_t(1.0/(r*ceff),s,0.5);
|
|
|
|
// calculate s,r,mod -> t50_srmod,
|
|
// then t50_srmod+t50_sy-t50_sr
|
|
|
|
mod->calculate_poles_res(D,r);
|
|
double *p = D->poles;
|
|
double *rr = delay_work_get_residues(D,0);
|
|
double thi,tlo,t50_srmod;
|
|
pr_solve1(s,mod->order,p,rr,0.5,&t50_srmod);
|
|
|
|
int ceff_it,j;
|
|
double ceff_time=0.0;
|
|
|
|
if (!bad) {
|
|
for (ceff_it=0;ceff_it<3;ceff_it++) {
|
|
|
|
// calculate ceff
|
|
ceff_time = s;
|
|
ceff = pr_ceff(s,r,mod->order,p,rr,ceff_time);
|
|
|
|
if ((ceff-1e-20) < 0.0) { // 1e-8pf
|
|
debugPrint(debug_, "arnoldi", 1,
|
|
"Invalid effective capacitance, using total capacitance");
|
|
ceff = ctot;
|
|
}
|
|
|
|
// new mvs at ceff
|
|
s = ra_get_s(D,tab,r,ceff);
|
|
debugPrint(debug_, "arnoldi", 1, "new mvs s = %s",
|
|
units_->timeUnit()->asString(s));
|
|
}
|
|
}
|
|
debugPrint(debug_, "arnoldi", 1, "r %s s %s ceff_time %s ceff %s",
|
|
units_->resistanceUnit()->asString(r),
|
|
units_->timeUnit()->asString(s),
|
|
units_->timeUnit()->asString(ceff_time),
|
|
units_->capacitanceUnit()->asString(ceff));
|
|
|
|
tab->table->gateDelay(tab->pvt, tab->in_slew, ceff, pocv_enabled_, df, sf);
|
|
t50_sy = delayAsFloat(df);
|
|
t50_sr = ra_solve_for_t(1.0/(r*ceff),s,0.5);
|
|
for (j=0;j<mod->n;j++) {
|
|
rr = delay_work_get_residues(D,j);
|
|
pr_solve3(s,mod->order,p,rr,vhi,&thi,0.5,&t50_srmod,vlo,&tlo);
|
|
delays[j] = t50_srmod + t50_sy - t50_sr;
|
|
slews[j] = (tlo-thi)/slew_derate;
|
|
}
|
|
}
|
|
|
|
} // namespace
|