1 // OpenSTA, Static Timing Analyzer
2 // Copyright (c) 2021, Parallax Software, Inc.
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17 // "Performance Computation for Precharacterized CMOS Gates with RC Loads",
18 // Florentin Dartu, Noel Menezes and Lawrence Pileggi, IEEE Transactions
19 // on Computer-Aided Design of Integrated Circuits and Systems, Vol 15, No 5,
20 // May 1996, pg 544-553.
21 //
22 // The only real change from the paper is that Vl, the measured low
23 // slew voltage is matched instead of y20 in eqn 12.
24
25 #include "DmpCeff.hh"
26
27 #include <algorithm> // abs, min
28 #include <cmath> // sqrt, log
29
30 #include "Report.hh"
31 #include "Debug.hh"
32 #include "Units.hh"
33 #include "TimingArc.hh"
34 #include "TableModel.hh"
35 #include "Liberty.hh"
36 #include "Network.hh"
37 #include "Sdc.hh"
38 #include "Parasitics.hh"
39 #include "DcalcAnalysisPt.hh"
40 #include "ArcDelayCalc.hh"
41
42 namespace sta {
43
44 using std::abs;
45 using std::min;
46 using std::max;
47 using std::sqrt;
48 using std::log;
49 using std::exp;
50
51 // Tolerance (as a scale of value) for driver parameters (Ceff, delta t, t0).
52 static const double driver_param_tol = .01;
53 // Waveform threshold crossing time tolerance (1.0 = 100%).
54 static const double vth_time_tol = .01;
55 // A small number used by luDecomp.
56 static const double tiny_double = 1.0e-20;
57 // Max iterations for findRoot.
58 static const int find_root_max_iter = 20;
59
60 // Indices of Newton-Raphson parameter vector.
61 enum DmpParam { t0, dt, ceff };
62
63 static const char *dmp_param_index_strings[] = {"t0", "dt", "Ceff"};
64
65 // Indices of Newton-Raphson function value vector.
66 enum DmpFunc { y20, y50, ipi };
67
68 static const char *dmp_func_index_strings[] = {"y20", "y50", "Ipi"};
69
70 class DmpError : public Exception
71 {
72 public:
73 DmpError(const char *what);
~DmpError()74 virtual ~DmpError() {}
what() const75 virtual const char *what() const noexcept { return what_; }
76
77 private:
78 const char *what_;
79 };
80
81 static double
82 gateModelRd(const LibertyCell *cell,
83 GateTableModel *gate_model,
84 const RiseFall *rf,
85 double in_slew,
86 double c2,
87 double c1,
88 float related_out_cap,
89 const Pvt *pvt,
90 bool pocv_enabled);
91 static void
92 evalDmpEqnsState(void *state);
93 static void
94 evalVoEqns(void *state,
95 double x,
96 double &y,
97 double &dy);
98 static void
99 evalVlEqns(void *state,
100 double x,
101 double &y,
102 double &dy);
103
104 static double
105 findRoot(void (*func)(void *state, double x, double &y, double &dy),
106 void *state,
107 double x1,
108 double x2,
109 double x_tol,
110 int max_iter);
111 static void
112 newtonRaphson(const int max_iter,
113 double x[],
114 const int n,
115 const double x_tol,
116 // eval(state) is called to fill fvec and fjac.
117 // Returns false if fails.
118 void (*eval)(void *state),
119 void *state,
120 // Temporaries supplied by caller.
121 double *fvec,
122 double **fjac,
123 int *index,
124 double *p,
125 double *scale);
126 static void
127 luSolve(double **a,
128 const int size,
129 const int *index,
130 double b[]);
131 static void
132 luDecomp(double **a,
133 const int size,
134 int *index,
135 double *scale);
136
137 ////////////////////////////////////////////////////////////////
138
139 // Base class for Dartu/Menezes/Pileggi algorithm.
140 // Derived classes handle different cases of zero values in the Pi model.
141 class DmpAlg : public StaState
142 {
143 public:
144 DmpAlg(int nr_order, StaState *sta);
145 virtual ~DmpAlg();
146 virtual const char *name() = 0;
147 // Set driver model and pi model parameters for delay calculation.
148 virtual void init(const LibertyLibrary *library,
149 const LibertyCell *drvr_cell,
150 const Pvt *pvt,
151 const GateTableModel *gate_model,
152 const RiseFall *rf,
153 double rd,
154 double in_slew,
155 float related_out_cap,
156 double c2,
157 double rpi,
158 double c1);
159 virtual void gateDelaySlew(double &delay,
160 double &slew) = 0;
161 virtual void loadDelaySlew(const Pin *load_pin,
162 double elmore,
163 ArcDelay &delay,
164 Slew &slew);
ceff()165 double ceff() { return ceff_; }
166
167 // Given x_ as a vector of input parameters, fill fvec_ with the
168 // equations evaluated at x_ and fjac_ with the jabobian evaluated at x_.
169 virtual void evalDmpEqns() = 0;
170 // Output response to vs(t) ramp driving pi model load.
171 double vo(double t);
172 double dVoDt(double t);
173 // Load responce to driver waveform.
174 double vl(double t);
175 double dVlDt(double t);
vCross()176 double vCross() { return v_cross_; }
177
178 protected:
179 // Find driver parameters t0, delta_t, Ceff.
180 void findDriverParams(double ceff);
181 void gateCapDelaySlew(double cl,
182 double &delay,
183 double &slew);
184 void gateDelays(double ceff,
185 double &t_vth,
186 double &t_vl,
187 double &slew);
188 virtual double dv0dt(double t) = 0;
189 // Partial derivatives of y(t) (jacobian).
190 void dy(double t,
191 double t0,
192 double dt,
193 double cl,
194 double &dydt0,
195 double &dyddt,
196 double &dydcl);
197 double y0dt(double t,
198 double cl);
199 double y0dcl(double t,
200 double cl);
201 void showX();
202 void showFvec();
203 void showJacobian();
204 void findDriverDelaySlew(double &delay,
205 double &slew);
206 double findVoCrossing(double vth);
207 void showVo();
208 double findVlCrossing(double vth);
209 void showVl();
210 void fail(const char *reason);
211
212 // Output response to vs(t) ramp driving capacitive load.
213 double y(double t,
214 double t0,
215 double dt,
216 double cl);
217 // Output response to unit ramp driving capacitive load.
218 double y0(double t,
219 double cl);
220 // Output response to unit ramp driving pi model load.
221 virtual double v0(double t) = 0;
222 // Upper bound on time that vo crosses vh.
223 virtual double voCrossingUpperBound() = 0;
224 // Load responce to driver unit ramp.
225 virtual double vl0(double t) = 0;
226 // Upper bound on time that vl crosses vh.
227 double vlCrossingUpperBound();
228
229 // Inputs to the delay calculator.
230 const LibertyCell *drvr_cell_;
231 const LibertyLibrary *drvr_library_;
232 const Pvt *pvt_;
233 const GateTableModel *gate_model_;
234 double in_slew_;
235 float related_out_cap_;
236 double c2_;
237 double rpi_;
238 double c1_;
239
240 double rd_;
241 // Logic threshold (percentage of supply voltage).
242 double vth_;
243 // Slew lower limit (percentage of supply voltage).
244 double vl_;
245 // Slew upper limit (percentage of supply voltage).
246 double vh_;
247 // Table slews are scaled by slew_derate to get
248 // measured slews from vl to vh.
249 double slew_derate_;
250
251 // Driver parameters calculated by this algorithm.
252 double t0_;
253 double dt_;
254 double ceff_;
255
256 // Driver parameter Newton-Raphson state.
257 int nr_order_;
258 double *x_;
259 double *fvec_;
260 double **fjac_;
261 double *scale_;
262 double *p_;
263 int *index_;
264
265 // Gate slew used to check load delay.
266 double gate_slew_;
267 double vo_delay_;
268 // True if the driver parameters are valid for finding the load delays.
269 bool driver_valid_;
270 // Load rspf elmore delay.
271 double elmore_;
272 double p3_;
273
274 private:
275 virtual double dvl0dt(double t) = 0;
276
277 // Implicit argument passed to evalVoEqns, evalVlEqns.
278 double v_cross_;
279 };
280
DmpAlg(int nr_order,StaState * sta)281 DmpAlg::DmpAlg(int nr_order,
282 StaState *sta):
283 StaState(sta),
284 c2_(0.0),
285 rpi_(0.0),
286 c1_(0.0),
287 nr_order_(nr_order)
288 {
289 x_ = new double[nr_order_];
290 fvec_ = new double[nr_order_];
291 scale_ = new double[nr_order_];
292 p_ = new double[nr_order_];
293 fjac_ = new double*[nr_order_];
294 for (int i = 0; i < nr_order_; i++)
295 fjac_[i] = new double[nr_order_];
296 index_ = new int[nr_order_];
297 }
298
~DmpAlg()299 DmpAlg::~DmpAlg()
300 {
301 delete [] x_;
302 delete [] fvec_;
303 delete [] scale_;
304 delete [] p_;
305 for (int i = 0; i < nr_order_; i++)
306 delete [] fjac_[i];
307 delete [] fjac_;
308 delete [] index_;
309 }
310
311 void
init(const LibertyLibrary * drvr_library,const LibertyCell * drvr_cell,const Pvt * pvt,const GateTableModel * gate_model,const RiseFall * rf,double rd,double in_slew,float related_out_cap,double c2,double rpi,double c1)312 DmpAlg::init(const LibertyLibrary *drvr_library,
313 const LibertyCell *drvr_cell,
314 const Pvt *pvt,
315 const GateTableModel *gate_model,
316 const RiseFall *rf,
317 double rd,
318 double in_slew,
319 float related_out_cap,
320 // Pi model.
321 double c2,
322 double rpi,
323 double c1)
324 {
325 drvr_library_ = drvr_library;
326 drvr_cell_ = drvr_cell;
327 pvt_ = pvt;
328 gate_model_ = gate_model;
329 rd_ = rd;
330 in_slew_ = in_slew;
331 related_out_cap_ = related_out_cap;
332 c2_ = c2;
333 rpi_ = rpi;
334 c1_ = c1;
335 driver_valid_ = false;
336 vth_ = drvr_library->outputThreshold(rf);
337 vl_ = drvr_library->slewLowerThreshold(rf);
338 vh_ = drvr_library->slewUpperThreshold(rf);
339 slew_derate_ = drvr_library->slewDerateFromLibrary();
340 }
341
342 // Find Ceff, delta_t and t0 for the driver.
343 void
findDriverParams(double ceff)344 DmpAlg::findDriverParams(double ceff)
345 {
346 if (nr_order_ == 3)
347 x_[DmpParam::ceff] = ceff;
348 double t_vth, t_vl, slew;
349 gateDelays(ceff, t_vth, t_vl, slew);
350 // Scale slew to 0-100%
351 double dt = slew / (vh_ - vl_);
352 double t0 = t_vth + log(1.0 - vth_) * rd_ * ceff - vth_ * dt;
353 x_[DmpParam::dt] = dt;
354 x_[DmpParam::t0] = t0;
355 newtonRaphson(100, x_, nr_order_, driver_param_tol, evalDmpEqnsState,
356 this, fvec_, fjac_, index_, p_, scale_);
357 t0_ = x_[DmpParam::t0];
358 dt_ = x_[DmpParam::dt];
359 debugPrint(debug_, "dmp_ceff", 3, " t0 = %s dt = %s ceff = %s",
360 units_->timeUnit()->asString(t0_),
361 units_->timeUnit()->asString(dt_),
362 units_->capacitanceUnit()->asString(x_[DmpParam::ceff]));
363 if (debug_->check("dmp_ceff", 4))
364 showVo();
365 }
366
367 static void
evalDmpEqnsState(void * state)368 evalDmpEqnsState(void *state)
369 {
370 DmpAlg *alg = reinterpret_cast<DmpAlg *>(state);
371 alg->evalDmpEqns();
372 }
373
374 void
gateCapDelaySlew(double ceff,double & delay,double & slew)375 DmpAlg::gateCapDelaySlew(double ceff,
376 double &delay,
377 double &slew)
378 {
379 ArcDelay model_delay;
380 Slew model_slew;
381 gate_model_->gateDelay(drvr_cell_, pvt_, in_slew_, ceff, related_out_cap_,
382 pocv_enabled_, model_delay, model_slew);
383 delay = delayAsFloat(model_delay);
384 slew = delayAsFloat(model_slew);
385 }
386
387 void
gateDelays(double ceff,double & t_vth,double & t_vl,double & slew)388 DmpAlg::gateDelays(double ceff,
389 double &t_vth,
390 double &t_vl,
391 double &slew)
392 {
393 double table_slew;
394 gateCapDelaySlew(ceff, t_vth, table_slew);
395 // Convert reported/table slew to measured slew.
396 slew = table_slew * slew_derate_;
397 t_vl = t_vth - slew * (vth_ - vl_) / (vh_ - vl_);
398 }
399
400 double
y(double t,double t0,double dt,double cl)401 DmpAlg::y(double t,
402 double t0,
403 double dt,
404 double cl)
405 {
406 double t1 = t - t0;
407 if (t1 <= 0.0)
408 return 0.0;
409 else if (t1 <= dt)
410 return y0(t1, cl) / dt;
411 else
412 return (y0(t1, cl) - y0(t1 - dt, cl)) / dt;
413 }
414
415 double
y0(double t,double cl)416 DmpAlg::y0(double t,
417 double cl)
418 {
419 return t - rd_ * cl * (1.0 - exp(-t / (rd_ * cl)));
420 }
421
422 void
dy(double t,double t0,double dt,double cl,double & dydt0,double & dyddt,double & dydcl)423 DmpAlg::dy(double t,
424 double t0,
425 double dt,
426 double cl,
427 // Return values.
428 double &dydt0,
429 double &dyddt,
430 double &dydcl)
431 {
432 double t1 = t - t0;
433 if (t1 <= 0.0)
434 dydt0 = dyddt = dydcl = 0.0;
435 else if (t1 <= dt) {
436 dydt0 = -y0dt(t1, cl) / dt;
437 dyddt = -y0(t1, cl) / (dt * dt);
438 dydcl = y0dcl(t1, cl) / dt;
439 }
440 else {
441 dydt0 = -(y0dt(t1, cl) - y0dt(t1 - dt, cl)) / dt;
442 dyddt = -(y0(t1, cl) + y0(t1 - dt, cl)) / (dt * dt)
443 + y0dt(t1 - dt, cl) / dt;
444 dydcl = (y0dcl(t1, cl) - y0dcl(t1 - dt, cl)) / dt;
445 }
446 }
447
448 double
y0dt(double t,double cl)449 DmpAlg::y0dt(double t,
450 double cl)
451 {
452 return 1.0 - exp(-t / (rd_ * cl));
453 }
454
455 double
y0dcl(double t,double cl)456 DmpAlg::y0dcl(double t,
457 double cl)
458 {
459 return rd_ * ((1.0 + t / (rd_ * cl)) * exp(-t / (rd_ * cl)) - 1);
460 }
461
462 void
showX()463 DmpAlg::showX()
464 {
465 for (int i = 0; i < nr_order_; i++)
466 report_->reportLine("%4s %12.3e", dmp_param_index_strings[i], x_[i]);
467 }
468
469 void
showFvec()470 DmpAlg::showFvec()
471 {
472 for (int i = 0; i < nr_order_; i++)
473 report_->reportLine("%4s %12.3e", dmp_func_index_strings[i], fvec_[i]);
474 }
475
476 void
showJacobian()477 DmpAlg::showJacobian()
478 {
479 string line = " ";
480 for (int j = 0; j < nr_order_; j++)
481 line += stdstrPrint("%12s", dmp_param_index_strings[j]);
482 report_->reportLineString(line);
483 line.clear();
484 for (int i = 0; i < nr_order_; i++) {
485 line += stdstrPrint("%4s ", dmp_func_index_strings[i]);
486 for (int j = 0; j < nr_order_; j++)
487 line += stdstrPrint("%12.3e ", fjac_[i][j]);
488 report_->reportLineString(line);
489 }
490 }
491
492 void
findDriverDelaySlew(double & delay,double & slew)493 DmpAlg::findDriverDelaySlew(double &delay,
494 double &slew)
495 {
496 delay = findVoCrossing(vth_);
497 double tl = findVoCrossing(vl_);
498 double th = findVoCrossing(vh_);
499 // Convert measured slew to table slew.
500 slew = (th - tl) / slew_derate_;
501 }
502
503 // Find t such that vo(t)=v.
504 double
findVoCrossing(double vth)505 DmpAlg::findVoCrossing(double vth)
506 {
507 v_cross_ = vth;
508 double ub = voCrossingUpperBound();
509 return findRoot(evalVoEqns, this, t0_, ub, vth_time_tol, find_root_max_iter);
510 }
511
512 static void
evalVoEqns(void * state,double x,double & y,double & dy)513 evalVoEqns(void *state,
514 double x,
515 double &y,
516 double &dy)
517 {
518 DmpAlg *pi_ceff = reinterpret_cast<DmpAlg *>(state);
519 y = pi_ceff->vo(x) - pi_ceff->vCross();
520 dy = pi_ceff->dVoDt(x);
521 }
522
523 double
vo(double t)524 DmpAlg::vo(double t)
525 {
526 double t1 = t - t0_;
527 if (t1 <= 0.0)
528 return 0.0;
529 else if (t1 <= dt_)
530 return v0(t1) / dt_;
531 else
532 return (v0(t1) - v0(t1 - dt_)) / dt_;
533 }
534
535 double
dVoDt(double t)536 DmpAlg::dVoDt(double t)
537 {
538 double t1 = t - t0_;
539 if (t1 <= 0)
540 return 0.0;
541 else if (t1 <= dt_)
542 return dv0dt(t1) / dt_;
543 else
544 return (dv0dt(t1) - dv0dt(t1 - dt_)) / dt_;
545 }
546
547 void
showVo()548 DmpAlg::showVo()
549 {
550 report_->reportLine(" t vo(t)");
551 double ub = voCrossingUpperBound();
552 for (double t = t0_; t < t0_ + ub; t += dt_ / 10.0)
553 report_->reportLine(" %g %g", t, vo(t));
554 }
555
556 void
loadDelaySlew(const Pin *,double elmore,ArcDelay & delay,Slew & slew)557 DmpAlg::loadDelaySlew(const Pin *,
558 double elmore,
559 ArcDelay &delay,
560 Slew &slew)
561 {
562 if (!driver_valid_
563 || elmore == 0.0
564 // Elmore delay is small compared to driver slew.
565 || elmore < gate_slew_ * 1e-3) {
566 delay = elmore;
567 slew = gate_slew_;
568 }
569 else {
570 // Use the driver thresholds and rely on thresholdAdjust to
571 // convert the delay and slew to the load's thresholds.
572 try {
573 if (debug_->check("dmp_ceff", 4))
574 showVl();
575 elmore_ = elmore;
576 p3_ = 1.0 / elmore;
577 double load_delay = findVlCrossing(vth_);
578 double tl = findVlCrossing(vl_);
579 double th = findVlCrossing(vh_);
580 // Measure delay from Vo, the load dependent source excitation.
581 double delay1 = load_delay - vo_delay_;
582 // Convert measured slew to reported/table slew.
583 double slew1 = (th - tl) / slew_derate_;
584 if (delay1 < 0.0) {
585 // Only report a problem if the difference is significant.
586 if (-delay1 > vth_time_tol * vo_delay_)
587 fail("load delay less than zero");
588 // Use elmore delay.
589 delay1 = elmore;
590 }
591 if (slew1 < gate_slew_) {
592 // Only report a problem if the difference is significant.
593 if ((gate_slew_ - slew1) > vth_time_tol * gate_slew_)
594 fail("load slew less than driver slew");
595 slew1 = gate_slew_;
596 }
597 delay = delay1;
598 slew = slew1;
599 }
600 catch (DmpError &error) {
601 delay = elmore_;
602 slew = gate_slew_;
603 }
604 }
605 }
606
607 // Find t such that vl(t)=v.
608 // Return true if successful.
609 double
findVlCrossing(double vth)610 DmpAlg::findVlCrossing(double vth)
611 {
612 v_cross_ = vth;
613 double ub = vlCrossingUpperBound();
614 return findRoot(evalVlEqns, this, t0_, ub, vth_time_tol, find_root_max_iter);
615 }
616
617 double
vlCrossingUpperBound()618 DmpAlg::vlCrossingUpperBound()
619 {
620 return voCrossingUpperBound() + elmore_;
621 }
622
623 static void
evalVlEqns(void * state,double x,double & y,double & dy)624 evalVlEqns(void *state,
625 double x,
626 double &y,
627 double &dy)
628 {
629 DmpAlg *pi_ceff = reinterpret_cast<DmpAlg *>(state);
630 y = pi_ceff->vl(x) - pi_ceff->vCross();
631 dy = pi_ceff->dVlDt(x);
632 }
633
634 double
vl(double t)635 DmpAlg::vl(double t)
636 {
637 double t1 = t - t0_;
638 if (t1 <= 0)
639 return 0.0;
640 else if (t1 <= dt_)
641 return vl0(t1) / dt_;
642 else
643 return (vl0(t1) - vl0(t1 - dt_)) / dt_;
644 }
645
646 double
dVlDt(double t)647 DmpAlg::dVlDt(double t)
648 {
649 double t1 = t - t0_;
650 if (t1 <= 0)
651 return 0.0;
652 else if (t1 <= dt_)
653 return dvl0dt(t1) / dt_;
654 else
655 return (dvl0dt(t1) - dvl0dt(t1 - dt_)) / dt_;
656 }
657
658 void
showVl()659 DmpAlg::showVl()
660 {
661 report_->reportLine(" t vl(t)");
662 double ub = vlCrossingUpperBound();
663 for (double t = t0_; t < t0_ + ub * 2.0; t += ub / 10.0)
664 report_->reportLine(" %g %g", t, vl(t));
665 }
666
667 void
fail(const char * reason)668 DmpAlg::fail(const char *reason)
669 {
670 // Allow only failures to be reported with a unique debug flag.
671 if (debug_->check("dmp_ceff", 1) || debug_->check("dmp_ceff_fail", 1))
672 report_->reportLine("delay_calc: DMP failed - %s c2=%s rpi=%s c1=%s rd=%s",
673 reason,
674 units_->capacitanceUnit()->asString(c2_),
675 units_->resistanceUnit()->asString(rpi_),
676 units_->capacitanceUnit()->asString(c1_),
677 units_->resistanceUnit()->asString(rd_));
678 }
679
680 ////////////////////////////////////////////////////////////////
681
682 // Capacitive load.
683 class DmpCap : public DmpAlg
684 {
685 public:
686 DmpCap(StaState *sta);
name()687 virtual const char *name() { return "cap"; }
688 virtual void init(const LibertyLibrary *library,
689 const LibertyCell *drvr_cell,
690 const Pvt *pvt,
691 const GateTableModel *gate_model,
692 const RiseFall *rf,
693 double rd,
694 double in_slew,
695 float related_out_cap,
696 double c2,
697 double rpi,
698 double c1);
699 virtual void gateDelaySlew(double &delay,
700 double &slew);
701 virtual void loadDelaySlew(const Pin *,
702 double elmore,
703 ArcDelay &delay,
704 Slew &slew);
705 virtual void evalDmpEqns();
706 virtual double voCrossingUpperBound();
707
708 private:
709 virtual double v0(double t);
710 virtual double dv0dt(double t);
711 virtual double vl0(double t);
712 virtual double dvl0dt(double t);
713 };
714
DmpCap(StaState * sta)715 DmpCap::DmpCap(StaState *sta):
716 DmpAlg(1, sta)
717 {
718 }
719
720 void
init(const LibertyLibrary * drvr_library,const LibertyCell * drvr_cell,const Pvt * pvt,const GateTableModel * gate_model,const RiseFall * rf,double rd,double in_slew,float related_out_cap,double c2,double rpi,double c1)721 DmpCap::init(const LibertyLibrary *drvr_library,
722 const LibertyCell *drvr_cell,
723 const Pvt *pvt,
724 const GateTableModel *gate_model,
725 const RiseFall *rf,
726 double rd,
727 double in_slew,
728 float related_out_cap,
729 double c2,
730 double rpi,
731 double c1)
732 {
733 debugPrint(debug_, "dmp_ceff", 3, "Using DMP cap");
734 DmpAlg::init(drvr_library, drvr_cell, pvt, gate_model, rf,
735 rd, in_slew, related_out_cap, c2, rpi, c1);
736 ceff_ = c1 + c2;
737 }
738
739 void
gateDelaySlew(double & delay,double & slew)740 DmpCap::gateDelaySlew(double &delay,
741 double &slew)
742 {
743 debugPrint(debug_, "dmp_ceff", 3, " ceff = %s",
744 units_->capacitanceUnit()->asString(ceff_));
745 gateCapDelaySlew(ceff_, delay, slew);
746 gate_slew_ = slew;
747 }
748
749 void
loadDelaySlew(const Pin *,double elmore,ArcDelay & delay,Slew & slew)750 DmpCap::loadDelaySlew(const Pin *,
751 double elmore,
752 ArcDelay &delay,
753 Slew &slew)
754 {
755 delay = elmore;
756 slew = gate_slew_;
757 }
758
759 void
evalDmpEqns()760 DmpCap::evalDmpEqns()
761 {
762 }
763
764 double
v0(double)765 DmpCap::v0(double)
766 {
767 return 0.0;
768 }
769
770 double
dv0dt(double)771 DmpCap::dv0dt(double)
772 {
773 return 0.0;
774 }
775
776 double
voCrossingUpperBound()777 DmpCap::voCrossingUpperBound()
778 {
779 return 0.0;
780 }
781
782 double
vl0(double)783 DmpCap::vl0(double)
784 {
785 return 0.0;
786 }
787
788 double
dvl0dt(double)789 DmpCap::dvl0dt(double)
790 {
791 return 0.0;
792 }
793
794 ////////////////////////////////////////////////////////////////
795
796 // No non-zero pi model parameters, two poles, one zero
797 class DmpPi : public DmpAlg
798 {
799 public:
800 DmpPi(StaState *sta);
name()801 virtual const char *name() { return "Pi"; }
802 virtual void init(const LibertyLibrary *library,
803 const LibertyCell *drvr_cell,
804 const Pvt *pvt,
805 const GateTableModel *gate_model,
806 const RiseFall *rf,
807 double rd,
808 double in_slew,
809 float related_out_cap,
810 double c2,
811 double rpi,
812 double c1);
813 virtual void gateDelaySlew(double &delay,
814 double &slew);
815 virtual void evalDmpEqns();
816 virtual double voCrossingUpperBound();
817
818 private:
819 void findDriverParamsPi();
820 virtual double v0(double t);
821 virtual double dv0dt(double t);
822 double ipiIceff(double t0,
823 double dt,
824 double ceff_time,
825 double ceff);
826 virtual double vl0(double t);
827 virtual double dvl0dt(double t);
828
829 // Poles/zero.
830 double p1_;
831 double p2_;
832 double z1_;
833 // Residues.
834 double k0_;
835 double k1_;
836 double k2_;
837 double k3_;
838 double k4_;
839 // Ipi coefficients.
840 double A_;
841 double B_;
842 double D_;
843 };
844
DmpPi(StaState * sta)845 DmpPi::DmpPi(StaState *sta) :
846 DmpAlg(3, sta),
847 p1_(0.0),
848 p2_(0.0),
849 z1_(0.0),
850 k0_(0.0),
851 k1_(0.0),
852 k2_(0.0),
853 k3_(0.0),
854 k4_(0.0),
855 A_(0.0),
856 B_(0.0),
857 D_(0.0)
858 {
859 }
860
861 void
init(const LibertyLibrary * drvr_library,const LibertyCell * drvr_cell,const Pvt * pvt,const GateTableModel * gate_model,const RiseFall * rf,double rd,double in_slew,float related_out_cap,double c2,double rpi,double c1)862 DmpPi::init(const LibertyLibrary *drvr_library,
863 const LibertyCell *drvr_cell,
864 const Pvt *pvt,
865 const GateTableModel *gate_model,
866 const RiseFall *rf,
867 double rd,
868 double in_slew,
869 float related_out_cap,
870 double c2,
871 double rpi,
872 double c1)
873 {
874 debugPrint(debug_, "dmp_ceff", 3, "Using DMP Pi");
875 DmpAlg::init(drvr_library, drvr_cell, pvt, gate_model, rf, rd,
876 in_slew, related_out_cap, c2, rpi, c1);
877
878 // Find poles/zeros.
879 z1_ = 1.0 / (rpi_ * c1_);
880 k0_ = 1.0 / (rd_ * c2_);
881 double a = rpi_ * rd_ * c1_ * c2_;
882 double b = rd_ * (c1_ + c2_) + rpi_ * c1_;
883 double sqrt_ = sqrt(b * b - 4 * a);
884 p1_ = (b + sqrt_) / (2 * a);
885 p2_ = (b - sqrt_) / (2 * a);
886
887 double p1p2 = (p1_ * p2_);
888 k2_ = z1_ / p1p2;
889 k1_ = (1.0 - k2_ * (p1_ + p2_)) / p1p2;
890 k4_ = (k1_ * p1_ + k2_) / (p2_ - p1_);
891 k3_ = -k1_ - k4_;
892
893 double z_ = (c1_ + c2_) / (rpi_ * c1_ * c2_);
894 A_ = z_ / p1p2;
895 B_ = (z_ - p1_) / (p1_ * (p1_ - p2_));
896 D_ = (z_ - p2_) / (p2_ * (p2_ - p1_));
897 }
898
899 void
gateDelaySlew(double & delay,double & slew)900 DmpPi::gateDelaySlew(double &delay,
901 double &slew)
902 {
903 driver_valid_ = false;
904 try {
905 findDriverParamsPi();
906 ceff_ = x_[DmpParam::ceff];
907 double table_delay, table_slew;
908 gateCapDelaySlew(ceff_, table_delay, table_slew);
909 delay = table_delay;
910 //slew = table_slew;
911 try {
912 double vo_delay, vo_slew;
913 findDriverDelaySlew(vo_delay, vo_slew);
914 driver_valid_ = true;
915 // Save Vo delay to measure load wire delay waveform.
916 vo_delay_ = vo_delay;
917 //delay = vo_delay;
918 slew = vo_slew;
919 }
920 catch (DmpError &error) {
921 fail(error.what());
922 // Fall back to table slew.
923 slew = table_slew;
924 }
925 }
926 catch (DmpError &error) {
927 fail(error.what());
928 // Driver calculation failed - use Ceff=c1+c2.
929 ceff_ = c1_ + c2_;
930 gateCapDelaySlew(ceff_, delay, slew);
931 }
932 gate_slew_ = slew;
933 }
934
935 void
findDriverParamsPi()936 DmpPi::findDriverParamsPi()
937 {
938 try {
939 findDriverParams(c2_ + c1_);
940 }
941 catch (DmpError &) {
942 findDriverParams(c2_);
943 }
944 }
945
946 // Given x_ as a vector of input parameters, fill fvec_ with the
947 // equations evaluated at x_ and fjac_ with the jacobian evaluated at x_.
948 void
evalDmpEqns()949 DmpPi::evalDmpEqns()
950 {
951 double t0 = x_[DmpParam::t0];
952 double dt = x_[DmpParam::dt];
953 double ceff = x_[DmpParam::ceff];
954
955 if (ceff < 0.0)
956 throw DmpError("eqn eval failed: ceff < 0");
957 if (ceff > (c1_ + c2_))
958 throw DmpError("eqn eval failed: ceff > c2 + c1");
959
960 double t_vth, t_vl, slew;
961 gateDelays(ceff, t_vth, t_vl, slew);
962 double ceff_time = slew / (vh_ - vl_);
963 if (ceff_time > 1.4 * dt)
964 ceff_time = 1.4 * dt;
965
966 if (dt <= 0.0)
967 throw DmpError("eqn eval failed: dt < 0");
968
969 double exp_p1_dt = exp(-p1_ * dt);
970 double exp_p2_dt = exp(-p2_ * dt);
971 double exp_dt_rd_ceff = exp(-dt / (rd_ * ceff));
972
973 double y50 = y(t_vth, t0, dt, ceff);
974 // Match Vl.
975 double y20 = y(t_vl, t0, dt, ceff);
976 fvec_[DmpFunc::ipi] = ipiIceff(t0, dt, ceff_time, ceff);
977 fvec_[DmpFunc::y50] = y50 - vth_;
978 fvec_[DmpFunc::y20] = y20 - vl_;
979 fjac_[DmpFunc::ipi][DmpParam::t0] = 0.0;
980 fjac_[DmpFunc::ipi][DmpParam::dt] =
981 (-A_ * dt + B_ * dt * exp_p1_dt - (2 * B_ / p1_) * (1.0 - exp_p1_dt)
982 + D_ * dt * exp_p2_dt - (2 * D_ / p2_) * (1.0 - exp_p2_dt)
983 + rd_ * ceff * (dt + dt * exp_dt_rd_ceff
984 - 2 * rd_ * ceff * (1.0 - exp_dt_rd_ceff)))
985 / (rd_ * dt * dt * dt);
986 fjac_[DmpFunc::ipi][DmpParam::ceff] =
987 (2 * rd_ * ceff - dt - (2 * rd_ * ceff + dt) * exp(-dt / (rd_ * ceff)))
988 / (dt * dt);
989
990 dy(t_vl, t0, dt, ceff,
991 fjac_[DmpFunc::y20][DmpParam::t0],
992 fjac_[DmpFunc::y20][DmpParam::dt],
993 fjac_[DmpFunc::y20][DmpParam::ceff]);
994
995 dy(t_vth, t0, dt, ceff,
996 fjac_[DmpFunc::y50][DmpParam::t0],
997 fjac_[DmpFunc::y50][DmpParam::dt],
998 fjac_[DmpFunc::y50][DmpParam::ceff]);
999
1000 if (debug_->check("dmp_ceff", 4)) {
1001 showX();
1002 showFvec();
1003 showJacobian();
1004 report_->reportLine(".................");
1005 }
1006 }
1007
1008 // Eqn 13, Eqn 14.
1009 double
ipiIceff(double,double dt,double ceff_time,double ceff)1010 DmpPi::ipiIceff(double, double dt,
1011 double ceff_time,
1012 double ceff)
1013 {
1014 double exp_p1_dt = exp(-p1_ * ceff_time);
1015 double exp_p2_dt = exp(-p2_ * ceff_time);
1016 double exp_dt_rd_ceff = exp(-ceff_time / (rd_ * ceff));
1017 double ipi = (A_ * ceff_time + (B_ / p1_) * (1.0 - exp_p1_dt)
1018 + (D_ / p2_) * (1.0 - exp_p2_dt))
1019 / (rd_ * ceff_time * dt);
1020 double iceff = (rd_ * ceff * ceff_time - (rd_ * ceff) * (rd_ * ceff)
1021 * (1.0 - exp_dt_rd_ceff))
1022 / (rd_ * ceff_time * dt);
1023 return ipi - iceff;
1024 }
1025
1026 double
v0(double t)1027 DmpPi::v0(double t)
1028 {
1029 return k0_ * (k1_ + k2_ * t + k3_ * exp(-p1_ * t) + k4_ * exp(-p2_ * t));
1030 }
1031
1032 double
dv0dt(double t)1033 DmpPi::dv0dt(double t)
1034 {
1035 return k0_ * (k2_ - k3_ * p1_ * exp(-p1_ * t) - k4_ * p2_ * exp(-p2_ * t));
1036 }
1037
1038 double
vl0(double t)1039 DmpPi::vl0(double t)
1040 {
1041 double D1 = k0_ * (k1_ - k2_ / p3_);
1042 double D3 = -p3_ * k0_ * k3_ / (p1_ - p3_);
1043 double D4 = -p3_ * k0_ * k4_ / (p2_ - p3_);
1044 double D5 = k0_ * (k2_ / p3_ - k1_ + p3_ * k3_ / (p1_ - p3_)
1045 + p3_ * k4_ / (p2_ - p3_));
1046 return D1 + t + D3 * exp(-p1_ * t) + D4 * exp(-p2_ * t) + D5 * exp(-p3_ * t);
1047 }
1048
1049 double
voCrossingUpperBound()1050 DmpPi::voCrossingUpperBound()
1051 {
1052 return t0_ + dt_ + (c1_ + c2_) * (rd_ + rpi_) * 2.0;
1053 }
1054
1055 double
dvl0dt(double t)1056 DmpPi::dvl0dt(double t)
1057 {
1058 double D3 = -p3_ * k0_ * k3_ / (p1_ - p3_);
1059 double D4 = -p3_ * k0_ * k4_ / (p2_ - p3_);
1060 double D5 = k0_ * (k2_ / p3_ - k1_ + p3_ * k3_ / (p1_ - p3_)
1061 + p3_ * k4_ / (p2_ - p3_));
1062 return 1.0 - D3 * p1_ * exp(-p1_ * t) - D4 * p2_ * exp(-p2_ * t)
1063 - D5 * p3_ * exp(-p3_ * t);
1064 }
1065
1066 ////////////////////////////////////////////////////////////////
1067
1068 // Capacitive load, so Ceff is known.
1069 // Solve for t0, delta t.
1070 class DmpOnePole : public DmpAlg
1071 {
1072 public:
1073 DmpOnePole(StaState *sta);
1074 virtual void evalDmpEqns();
1075 virtual double voCrossingUpperBound();
1076 };
1077
DmpOnePole(StaState * sta)1078 DmpOnePole::DmpOnePole(StaState *sta) :
1079 DmpAlg(2, sta)
1080 {
1081 }
1082
1083 void
evalDmpEqns()1084 DmpOnePole::evalDmpEqns()
1085 {
1086 double t0 = x_[DmpParam::t0];
1087 double dt = x_[DmpParam::dt];
1088
1089 double t_vth, t_vl, ignore1, ignore2;
1090 gateDelays(ceff_, t_vth, t_vl, ignore1);
1091
1092 if (dt <= 0.0)
1093 dt = x_[DmpParam::dt] = (t_vl - t_vth) / 100;
1094
1095 fvec_[DmpFunc::y50] = y(t_vth, t0, dt, ceff_) - vth_;
1096 fvec_[DmpFunc::y20] = y(t_vl, t0, dt, ceff_) - vl_;
1097
1098 if (debug_->check("dmp_ceff", 4)) {
1099 showX();
1100 showFvec();
1101 }
1102
1103 dy(t_vl, t0, dt, ceff_,
1104 fjac_[DmpFunc::y20][DmpParam::t0],
1105 fjac_[DmpFunc::y20][DmpParam::dt],
1106 ignore2);
1107
1108 dy(t_vth, t0, dt, ceff_,
1109 fjac_[DmpFunc::y50][DmpParam::t0],
1110 fjac_[DmpFunc::y50][DmpParam::dt],
1111 ignore2);
1112
1113 if (debug_->check("dmp_ceff", 4)) {
1114 showJacobian();
1115 report_->reportLine(".................");
1116 }
1117 }
1118
1119 double
voCrossingUpperBound()1120 DmpOnePole::voCrossingUpperBound()
1121 {
1122 return t0_ + dt_ + ceff_ * rd_ * 2.0;
1123 }
1124
1125 ////////////////////////////////////////////////////////////////
1126
1127 // C2 = 0, one pole, one zero.
1128 class DmpZeroC2 : public DmpOnePole
1129 {
1130 public:
1131 DmpZeroC2(StaState *sta);
name()1132 virtual const char *name() { return "c2=0"; }
1133 virtual void init(const LibertyLibrary *drvr_library,
1134 const LibertyCell *drvr_cell,
1135 const Pvt *pvt,
1136 const GateTableModel *gate_model,
1137 const RiseFall *rf,
1138 double rd,
1139 double in_slew,
1140 float related_out_cap,
1141 double c2,
1142 double rpi,
1143 double c1);
1144 virtual void gateDelaySlew(double &delay,
1145 double &slew);
1146
1147 private:
1148 virtual double v0(double t);
1149 virtual double dv0dt(double t);
1150 virtual double vl0(double t);
1151 virtual double dvl0dt(double t);
1152 virtual double voCrossingUpperBound();
1153
1154 // Pole/zero.
1155 double p1_;
1156 double z1_;
1157 // Residues.
1158 double k0_;
1159 double k1_;
1160 double k2_;
1161 double k3_;
1162 };
1163
DmpZeroC2(StaState * sta)1164 DmpZeroC2::DmpZeroC2(StaState *sta) :
1165 DmpOnePole(sta),
1166 p1_(0.0),
1167 z1_(0.0),
1168 k0_(0.0),
1169 k1_(0.0),
1170 k2_(0.0),
1171 k3_(0.0)
1172 {
1173 }
1174
1175 void
init(const LibertyLibrary * drvr_library,const LibertyCell * drvr_cell,const Pvt * pvt,const GateTableModel * gate_model,const RiseFall * rf,double rd,double in_slew,float related_out_cap,double c2,double rpi,double c1)1176 DmpZeroC2::init(const LibertyLibrary *drvr_library,
1177 const LibertyCell *drvr_cell,
1178 const Pvt *pvt,
1179 const GateTableModel *gate_model,
1180 const RiseFall *rf,
1181 double rd,
1182 double in_slew,
1183 float related_out_cap,
1184 double c2,
1185 double rpi,
1186 double c1)
1187 {
1188 debugPrint(debug_, "dmp_ceff", 3, "Using DMP C2=0");
1189 DmpAlg::init(drvr_library, drvr_cell, pvt, gate_model, rf, rd,
1190 in_slew, related_out_cap, c2, rpi, c1);
1191 ceff_ = c1;
1192
1193 z1_ = 1.0 / (rpi_ * c1_);
1194 p1_ = 1.0 / (c1_ * (rd_ + rpi_));
1195
1196 k0_ = p1_ / z1_;
1197 k2_ = 1.0 / k0_;
1198 k1_ = (p1_ - z1_) / (p1_ * p1_);
1199 k3_ = -k1_;
1200 }
1201
1202 void
gateDelaySlew(double & delay,double & slew)1203 DmpZeroC2::gateDelaySlew(double &delay,
1204 double &slew)
1205 {
1206 try {
1207 findDriverParams(c1_);
1208 ceff_ = c1_;
1209 findDriverDelaySlew(delay, slew);
1210 driver_valid_ = true;
1211 vo_delay_ = delay;
1212 }
1213 catch (DmpError &error) {
1214 fail(error.what());
1215 // Fall back to table slew.
1216 driver_valid_ = false;
1217 ceff_ = c1_;
1218 gateCapDelaySlew(ceff_, delay, slew);
1219 }
1220 gate_slew_ = slew;
1221 }
1222
1223 double
v0(double t)1224 DmpZeroC2::v0(double t)
1225 {
1226 return k0_ * (k1_ + k2_ * t + k3_ * exp(-p1_ * t));
1227 }
1228
1229 double
dv0dt(double t)1230 DmpZeroC2::dv0dt(double t)
1231 {
1232 return k0_ * (k2_ - k3_ * p1_ * exp(-p1_ * t));
1233 }
1234
1235 double
vl0(double t)1236 DmpZeroC2::vl0(double t)
1237 {
1238 double D1 = k0_ * (k1_ - k2_ / p3_);
1239 double D3 = -p3_ * k0_ * k3_ / (p1_ - p3_);
1240 double D5 = k0_ * (k2_ / p3_ - k1_ + p3_ * k3_ / (p1_ - p3_));
1241 return D1 + t + D3 * exp(-p1_ * t) + D5 * exp(-p3_ * t);
1242 }
1243
1244 double
dvl0dt(double t)1245 DmpZeroC2::dvl0dt(double t)
1246 {
1247 double D3 = -p3_ * k0_ * k3_ / (p1_ - p3_);
1248 double D5 = k0_ * (k2_ / p3_ - k1_ + p3_ * k3_ / (p1_ - p3_));
1249 return 1.0 - D3 * p1_ * exp(-p1_ * t) - D5 * p3_ * exp(-p3_ * t);
1250 }
1251
1252 double
voCrossingUpperBound()1253 DmpZeroC2::voCrossingUpperBound()
1254 {
1255 return t0_ + dt_ + c1_ * (rd_ + rpi_) * 2.0;
1256 }
1257
1258 ////////////////////////////////////////////////////////////////
1259
1260 // Find the root of a function between x1 and x2 using a combination
1261 // of Newton-Raphson and bisection search.
1262 // x_tol is a percentage that change in x must be less than (1.0 = 100%).
1263 // error is non-null if a problem occurs.
1264 static double
findRoot(void (* func)(void * state,double x,double & y,double & dy),void * state,double x1,double x2,double x_tol,int max_iter)1265 findRoot(void (*func)(void *state, double x, double &y, double &dy),
1266 void *state,
1267 double x1,
1268 double x2,
1269 double x_tol,
1270 int max_iter)
1271 {
1272 double y1, y2, dy;
1273 func(state, x1, y1, dy);
1274 func(state, x2, y2, dy);
1275
1276 if ((y1 > 0.0 && y2 > 0.0) || (y1 < 0.0 && y2 < 0.0))
1277 throw DmpError("findRoot: initial bounds do not surround a root");
1278
1279 if (y1 == 0.0)
1280 return x1;
1281
1282 if (y2 == 0.0)
1283 return x2;
1284
1285 if (y1 > 0.0) {
1286 // Swap x1/x2 so func(x1) < 0.
1287 double tmp = x1;
1288 x1 = x2;
1289 x2 = tmp;
1290 }
1291 double root = (x1 + x2) * 0.5;
1292 double dx_prev = abs(x2 - x1);
1293 double dx = dx_prev;
1294 double y;
1295 func(state, root, y, dy);
1296 for (int iter = 0; iter < max_iter; iter++) {
1297 // Newton/raphson out of range.
1298 if ((((root - x2) * dy - y) * ((root - x1) * dy - y) > 0.0)
1299 // Not decreasing fast enough.
1300 || (abs(2.0 * y) > abs(dx_prev * dy))) {
1301 // Bisect x1/x2 interval.
1302 dx_prev = dx;
1303 dx = (x2 - x1) * 0.5;
1304 root = x1 + dx;
1305 }
1306 else {
1307 dx_prev = dx;
1308 dx = y / dy;
1309 root -= dx;
1310 }
1311 if (abs(dx) <= x_tol * abs(root))
1312 // Converged.
1313 return root;
1314
1315 func(state, root, y, dy);
1316 if (y < 0.0)
1317 x1 = root;
1318 else
1319 x2 = root;
1320 }
1321 throw DmpError("findRoot: max iterations exceeded");
1322 }
1323
1324 // Newton-Raphson iteration to find zeros of a function.
1325 // x_tol is percentage that all changes in x must be less than (1.0 = 100%).
1326 // Eval(state) is called to fill fvec and fjac (returns false if fails).
1327 // Return error msg on failure.
1328 static void
newtonRaphson(const int max_iter,double x[],const int size,const double x_tol,void (* eval)(void * state),void * state,double * fvec,double ** fjac,int * index,double * p,double * scale)1329 newtonRaphson(const int max_iter,
1330 double x[],
1331 const int size,
1332 const double x_tol,
1333 void (*eval)(void *state),
1334 void *state,
1335 // Temporaries supplied by caller.
1336 double *fvec,
1337 double **fjac,
1338 int *index,
1339 double *p,
1340 double *scale)
1341 {
1342 for (int k = 0; k < max_iter; k++) {
1343 eval(state);
1344 for (int i = 0; i < size; i++)
1345 // Right-hand side of linear equations.
1346 p[i] = -fvec[i];
1347 luDecomp(fjac, size, index, scale);
1348 luSolve(fjac, size, index, p);
1349
1350 bool all_under_x_tol = true;
1351 for (int i = 0; i < size; i++) {
1352 if (abs(p[i]) > abs(x[i]) * x_tol)
1353 all_under_x_tol = false;
1354 x[i] += p[i];
1355 }
1356 if (all_under_x_tol)
1357 return;
1358 }
1359 throw DmpError("Newton-Raphson max iterations exceeded");
1360 }
1361
1362 // luDecomp, luSolve based on MatClass from C. R. Birchenhall,
1363 // University of Manchester
1364 // ftp://ftp.mcc.ac.uk/pub/matclass/libmat.tar.Z
1365
1366 // Crout's Method of LU decomposition of square matrix, with implicit
1367 // partial pivoting. A is overwritten. U is explicit in the upper
1368 // triangle and L is in multiplier form in the subdiagionals i.e. subdiag
1369 // a[i,j] is the multiplier used to eliminate the [i,j] term.
1370 //
1371 // Replaces a[0..size-1][0..size-1] by the LU decomposition.
1372 // index[0..size-1] is an output vector of the row permutations.
1373 // Return error msg on failure.
1374 void
luDecomp(double ** a,const int size,int * index,double * scale)1375 luDecomp(double **a,
1376 const int size,
1377 int *index,
1378 // Temporary supplied by caller.
1379 // scale stores the implicit scaling of each row.
1380 double *scale)
1381 {
1382 // Find implicit scaling factors.
1383 for (int i = 0; i < size; i++) {
1384 double big = 0.0;
1385 for (int j = 0; j < size; j++) {
1386 double temp = abs(a[i][j]);
1387 if (temp > big)
1388 big = temp;
1389 }
1390 if (big == 0.0)
1391 throw DmpError("LU decomposition: no non-zero row element");
1392 scale[i] = 1.0 / big;
1393 }
1394 int size_1 = size - 1;
1395 for (int j = 0; j < size; j++) {
1396 // Run down jth column from top to diag, to form the elements of U.
1397 for (int i = 0; i < j; i++) {
1398 double sum = a[i][j];
1399 for (int k = 0; k < i; k++)
1400 sum -= a[i][k] * a[k][j];
1401 a[i][j] = sum;
1402 }
1403 // Run down jth subdiag to form the residuals after the elimination
1404 // of the first j-1 subdiags. These residuals divided by the
1405 // appropriate diagonal term will become the multipliers in the
1406 // elimination of the jth. subdiag. Find index of largest scaled
1407 // term in imax.
1408 double big = 0.0;
1409 int imax = 0;
1410 for (int i = j; i < size; i++) {
1411 double sum = a[i][j];
1412 for (int k = 0; k < j; k++)
1413 sum -= a[i][k] * a[k][j];
1414 a[i][j] = sum;
1415 double dum = scale[i] * abs(sum);
1416 if (dum >= big) {
1417 big = dum;
1418 imax = i;
1419 }
1420 }
1421 // Permute current row with imax.
1422 if (j != imax) {
1423 // Yes, do so...
1424 for (int k = 0; k < size; k++) {
1425 double dum = a[imax][k];
1426 a[imax][k] = a[j][k];
1427 a[j][k] = dum;
1428 }
1429 scale[imax] = scale[j];
1430 }
1431 index[j] = imax;
1432 // If diag term is not zero divide subdiag to form multipliers.
1433 if (a[j][j] == 0.0)
1434 a[j][j] = tiny_double;
1435 if (j != size_1) {
1436 double pivot = 1.0 / a[j][j];
1437 for (int i = j + 1; i < size; i++)
1438 a[i][j] *= pivot;
1439 }
1440 }
1441 }
1442
1443 // Solves the set of size linear equations a*x=b, assuming A is LU form
1444 // but assume b has not been transformed.
1445 // a[0..size-1] is LU decomposition
1446 // Returns the solution vector x in b.
1447 // a and index are not modified.
1448 void
luSolve(double ** a,const int size,const int * index,double b[])1449 luSolve(double **a,
1450 const int size,
1451 const int *index,
1452 double b[])
1453 {
1454 // Transform b allowing for leading zeros.
1455 int non_zero = -1;
1456 for (int i = 0; i < size; i++) {
1457 int iperm = index[i];
1458 double sum = b[iperm];
1459 b[iperm] = b[i];
1460 if (non_zero != -1) {
1461 for (int j = non_zero; j <= i - 1; j++)
1462 sum -= a[i][j] * b[j];
1463 }
1464 else {
1465 if (sum != 0.0)
1466 non_zero = i;
1467 }
1468 b[i] = sum;
1469 }
1470 // Backsubstitution.
1471 for (int i = size - 1; i >= 0; i--) {
1472 double sum = b[i];
1473 for (int j = i + 1; j < size; j++)
1474 sum -= a[i][j] * b[j];
1475 b[i] = sum / a[i][i];
1476 }
1477 }
1478
1479 #if 0
1480 // Solve:
1481 // x + y = 5
1482 // x - y = 1
1483 // x = 3
1484 // y = 2
1485 void
1486 testLuDecomp1()
1487 {
1488 double a0[2] = {1, 1};
1489 double a1[2] = {1, -1};
1490 double *a[2] = {a0, a1};
1491 int index[2];
1492 double b[2] = {5, 1};
1493 double scale[2];
1494 luDecomp(a, 2, index, scale);
1495 luSolve(a, 2, index, b);
1496 printf("x = %f y= %f\n", b[0], b[1]);
1497 }
1498
1499 // Solve
1500 // x + 2y = 3
1501 // 3x - 4y = 19
1502 // x = 5
1503 // y = -1
1504 void
1505 testLuDecomp2()
1506 {
1507 double a0[2] = {1, 2};
1508 double a1[2] = {3, -4};
1509 double *a[2] = {a0, a1};
1510 int index[2];
1511 double b[2] = {3, 19};
1512 double scale[2];
1513 luDecomp(a, 2, index, scale);
1514 luSolve(a, 2, index, b);
1515 printf("x = %f y= %f\n", b[0], b[1]);
1516 }
1517 #endif
1518
1519 ////////////////////////////////////////////////////////////////
1520
1521 bool DmpCeffDelayCalc::unsuppored_model_warned_ = false;
1522
DmpCeffDelayCalc(StaState * sta)1523 DmpCeffDelayCalc::DmpCeffDelayCalc(StaState *sta) :
1524 RCDelayCalc(sta),
1525 dmp_cap_(new DmpCap(sta)),
1526 dmp_pi_(new DmpPi(sta)),
1527 dmp_zero_c2_(new DmpZeroC2(sta)),
1528 dmp_alg_(nullptr)
1529 {
1530 }
1531
~DmpCeffDelayCalc()1532 DmpCeffDelayCalc::~DmpCeffDelayCalc()
1533 {
1534 delete dmp_cap_;
1535 delete dmp_pi_;
1536 delete dmp_zero_c2_;
1537 }
1538
1539 void
inputPortDelay(const Pin * port_pin,float in_slew,const RiseFall * rf,Parasitic * parasitic,const DcalcAnalysisPt * dcalc_ap)1540 DmpCeffDelayCalc::inputPortDelay(const Pin *port_pin,
1541 float in_slew,
1542 const RiseFall *rf,
1543 Parasitic *parasitic,
1544 const DcalcAnalysisPt *dcalc_ap)
1545 {
1546 dmp_alg_ = nullptr;
1547 input_port_ = true;
1548 RCDelayCalc::inputPortDelay(port_pin, in_slew, rf, parasitic, dcalc_ap);
1549 }
1550
1551 void
gateDelay(const LibertyCell * drvr_cell,TimingArc * arc,const Slew & in_slew,float load_cap,Parasitic * drvr_parasitic,float related_out_cap,const Pvt * pvt,const DcalcAnalysisPt * dcalc_ap,ArcDelay & gate_delay,Slew & drvr_slew)1552 DmpCeffDelayCalc::gateDelay(const LibertyCell *drvr_cell,
1553 TimingArc *arc,
1554 const Slew &in_slew,
1555 float load_cap,
1556 Parasitic *drvr_parasitic,
1557 float related_out_cap,
1558 const Pvt *pvt,
1559 const DcalcAnalysisPt *dcalc_ap,
1560 // Return values.
1561 ArcDelay &gate_delay,
1562 Slew &drvr_slew)
1563 {
1564 input_port_ = false;
1565 drvr_rf_ = arc->toTrans()->asRiseFall();
1566 drvr_library_ = drvr_cell->libertyLibrary();
1567 drvr_parasitic_ = drvr_parasitic;
1568 GateTimingModel *model = gateModel(arc, dcalc_ap);
1569 GateTableModel *table_model = dynamic_cast<GateTableModel*>(model);
1570 if (table_model && drvr_parasitic) {
1571 float in_slew1 = delayAsFloat(in_slew);
1572 float c2, rpi, c1;
1573 parasitics_->piModel(drvr_parasitic, c2, rpi, c1);
1574 setCeffAlgorithm(drvr_library_, drvr_cell, pvt, table_model,
1575 drvr_rf_, in_slew1, related_out_cap,
1576 c2, rpi, c1);
1577 double dmp_gate_delay, dmp_drvr_slew;
1578 gateDelaySlew(dmp_gate_delay, dmp_drvr_slew);
1579 gate_delay = dmp_gate_delay;
1580 drvr_slew = dmp_drvr_slew;
1581 }
1582 else {
1583 LumpedCapDelayCalc::gateDelay(drvr_cell, arc, in_slew, load_cap,
1584 drvr_parasitic, related_out_cap, pvt,
1585 dcalc_ap, gate_delay, drvr_slew);
1586 if (drvr_parasitic
1587 && !unsuppored_model_warned_) {
1588 unsuppored_model_warned_ = true;
1589 report_->warn(1, "cell %s delay model not supported on SPF parasitics by DMP delay calculator",
1590 drvr_cell->name());
1591 }
1592 }
1593 drvr_slew_ = drvr_slew;
1594 multi_drvr_slew_factor_ = 1.0F;
1595 }
1596
1597 void
setCeffAlgorithm(const LibertyLibrary * drvr_library,const LibertyCell * drvr_cell,const Pvt * pvt,GateTableModel * gate_model,const RiseFall * rf,double in_slew,float related_out_cap,double c2,double rpi,double c1)1598 DmpCeffDelayCalc::setCeffAlgorithm(const LibertyLibrary *drvr_library,
1599 const LibertyCell *drvr_cell,
1600 const Pvt *pvt,
1601 GateTableModel *gate_model,
1602 const RiseFall *rf,
1603 double in_slew,
1604 float related_out_cap,
1605 double c2,
1606 double rpi,
1607 double c1)
1608 {
1609 double rd = 0.0;
1610 if (gate_model) {
1611 rd = gateModelRd(drvr_cell, gate_model, rf, in_slew, c2, c1,
1612 related_out_cap, pvt, pocv_enabled_);
1613 // Zero Rd means the table is constant and thus independent of load cap.
1614 if (rd < 1e-2
1615 // Rpi is small compared to Rd, which makes the load capacitive.
1616 || rpi < rd * 1e-3
1617 // c1/Rpi can be ignored.
1618 || (c1 == 0.0 || c1 < c2 * 1e-3 || rpi == 0.0))
1619 dmp_alg_ = dmp_cap_;
1620 else if (c2 < c1 * 1e-3)
1621 dmp_alg_ = dmp_zero_c2_;
1622 else
1623 // The full monty.
1624 dmp_alg_ = dmp_pi_;
1625 }
1626 else
1627 dmp_alg_ = dmp_cap_;
1628 dmp_alg_->init(drvr_library, drvr_cell, pvt, gate_model,
1629 drvr_rf_, rd, in_slew, related_out_cap, c2, rpi, c1);
1630 debugPrint(debug_, "dmp_ceff", 3,
1631 " DMP in_slew = %s c2 = %s rpi = %s c1 = %s Rd = %s (%s alg)",
1632 units_->timeUnit()->asString(in_slew),
1633 units_->capacitanceUnit()->asString(c2),
1634 units_->resistanceUnit()->asString(rpi),
1635 units_->capacitanceUnit()->asString(c1),
1636 units_->resistanceUnit()->asString(rd),
1637 dmp_alg_->name());
1638 }
1639
1640 float
ceff(const LibertyCell * drvr_cell,TimingArc * arc,const Slew & in_slew,float load_cap,Parasitic * drvr_parasitic,float related_out_cap,const Pvt * pvt,const DcalcAnalysisPt * dcalc_ap)1641 DmpCeffDelayCalc::ceff(const LibertyCell *drvr_cell,
1642 TimingArc *arc,
1643 const Slew &in_slew,
1644 float load_cap,
1645 Parasitic *drvr_parasitic,
1646 float related_out_cap,
1647 const Pvt *pvt,
1648 const DcalcAnalysisPt *dcalc_ap)
1649 {
1650 ArcDelay gate_delay;
1651 Slew drvr_slew;
1652 gateDelay(drvr_cell, arc, in_slew, load_cap,
1653 drvr_parasitic, related_out_cap, pvt, dcalc_ap,
1654 gate_delay, drvr_slew);
1655 if (dmp_alg_)
1656 return dmp_alg_->ceff();
1657 else
1658 return load_cap;
1659 }
1660
1661 void
reportGateDelay(const LibertyCell * drvr_cell,TimingArc * arc,const Slew & in_slew,float load_cap,Parasitic * drvr_parasitic,float related_out_cap,const Pvt * pvt,const DcalcAnalysisPt * dcalc_ap,int digits,string * result)1662 DmpCeffDelayCalc::reportGateDelay(const LibertyCell *drvr_cell,
1663 TimingArc *arc,
1664 const Slew &in_slew,
1665 float load_cap,
1666 Parasitic *drvr_parasitic,
1667 float related_out_cap,
1668 const Pvt *pvt,
1669 const DcalcAnalysisPt *dcalc_ap,
1670 int digits,
1671 string *result)
1672 {
1673 ArcDelay gate_delay;
1674 Slew drvr_slew;
1675 gateDelay(drvr_cell, arc, in_slew, load_cap,
1676 drvr_parasitic, related_out_cap, pvt, dcalc_ap,
1677 gate_delay, drvr_slew);
1678 GateTimingModel *model = gateModel(arc, dcalc_ap);
1679 float c_eff = 0.0;
1680 if (drvr_parasitic_ && dmp_alg_) {
1681 c_eff = dmp_alg_->ceff();
1682 const LibertyLibrary *drvr_library = drvr_cell->libertyLibrary();
1683 const Units *units = drvr_library->units();
1684 const Unit *cap_unit = units->capacitanceUnit();
1685 const Unit *res_unit = units->resistanceUnit();
1686 float c2, rpi, c1;
1687 parasitics_->piModel(drvr_parasitic_, c2, rpi, c1);
1688 *result += "Pi model C2=";
1689 *result += cap_unit->asString(c2, digits);
1690 *result += " Rpi=";
1691 *result += res_unit->asString(rpi, digits);
1692 *result += " C1=";
1693 *result += cap_unit->asString(c1, digits);
1694 *result += ", Ceff=";
1695 *result += cap_unit->asString(c_eff, digits);
1696 *result += '\n';
1697 }
1698 else
1699 c_eff = load_cap;
1700 if (model) {
1701 float in_slew1 = delayAsFloat(in_slew);
1702 model->reportGateDelay(drvr_cell, pvt, in_slew1, c_eff,
1703 related_out_cap, pocv_enabled_,
1704 digits, result);
1705 }
1706 }
1707
1708 static double
gateModelRd(const LibertyCell * cell,GateTableModel * gate_model,const RiseFall * rf,double in_slew,double c2,double c1,float related_out_cap,const Pvt * pvt,bool pocv_enabled)1709 gateModelRd(const LibertyCell *cell,
1710 GateTableModel *gate_model,
1711 const RiseFall *rf,
1712 double in_slew,
1713 double c2,
1714 double c1,
1715 float related_out_cap,
1716 const Pvt *pvt,
1717 bool pocv_enabled)
1718 {
1719 float cap1 = c1 + c2;
1720 float cap2 = cap1 + 1e-15;
1721 ArcDelay d1, d2;
1722 Slew s1, s2;
1723 gate_model->gateDelay(cell, pvt, in_slew, cap1, related_out_cap, pocv_enabled,
1724 d1, s1);
1725 gate_model->gateDelay(cell, pvt, in_slew, cap2, related_out_cap, pocv_enabled,
1726 d2, s2);
1727 double vth = cell->libertyLibrary()->outputThreshold(rf);
1728 float rd = -log(vth) * abs(delayAsFloat(d1) - delayAsFloat(d2)) / (cap2 - cap1);
1729 return rd;
1730 }
1731
1732 void
gateDelaySlew(double & delay,double & slew)1733 DmpCeffDelayCalc::gateDelaySlew(double &delay,
1734 double &slew)
1735 {
1736 dmp_alg_->gateDelaySlew(delay, slew);
1737 }
1738
1739 void
loadDelaySlew(const Pin * load_pin,double elmore,ArcDelay & delay,Slew & slew)1740 DmpCeffDelayCalc::loadDelaySlew(const Pin *load_pin,
1741 double elmore,
1742 ArcDelay &delay,
1743 Slew &slew)
1744 {
1745 if (dmp_alg_)
1746 dmp_alg_->loadDelaySlew(load_pin, elmore, delay, slew);
1747 }
1748
1749 // Notify algorithm components.
1750 void
copyState(const StaState * sta)1751 DmpCeffDelayCalc::copyState(const StaState *sta)
1752 {
1753 StaState::copyState(sta);
1754 dmp_cap_->copyState(sta);
1755 dmp_pi_->copyState(sta);
1756 dmp_zero_c2_->copyState(sta);
1757 }
1758
DmpError(const char * what)1759 DmpError::DmpError(const char *what) :
1760 what_(what)
1761 {
1762 //printf("DmpError %s\n", what);
1763 }
1764
1765 } // namespace
1766