1 // OpenSTA, Static Timing Analyzer
2 // Copyright (c) 2021, Parallax Software, Inc.
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 
17 // "Performance Computation for Precharacterized CMOS Gates with RC Loads",
18 // Florentin Dartu, Noel Menezes and Lawrence Pileggi, IEEE Transactions
19 // on Computer-Aided Design of Integrated Circuits and Systems, Vol 15, No 5,
20 // May 1996, pg 544-553.
21 //
22 // The only real change from the paper is that Vl, the measured low
23 // slew voltage is matched instead of y20 in eqn 12.
24 
25 #include "DmpCeff.hh"
26 
27 #include <algorithm> // abs, min
28 #include <cmath>    // sqrt, log
29 
30 #include "Report.hh"
31 #include "Debug.hh"
32 #include "Units.hh"
33 #include "TimingArc.hh"
34 #include "TableModel.hh"
35 #include "Liberty.hh"
36 #include "Network.hh"
37 #include "Sdc.hh"
38 #include "Parasitics.hh"
39 #include "DcalcAnalysisPt.hh"
40 #include "ArcDelayCalc.hh"
41 
42 namespace sta {
43 
44 using std::abs;
45 using std::min;
46 using std::max;
47 using std::sqrt;
48 using std::log;
49 using std::exp;
50 
51 // Tolerance (as a scale of value) for driver parameters (Ceff, delta t, t0).
52 static const double driver_param_tol = .01;
53 // Waveform threshold crossing time tolerance (1.0 = 100%).
54 static const double vth_time_tol = .01;
55 // A small number used by luDecomp.
56 static const double tiny_double = 1.0e-20;
57 // Max iterations for findRoot.
58 static const int find_root_max_iter = 20;
59 
60 // Indices of Newton-Raphson parameter vector.
61 enum DmpParam { t0, dt, ceff };
62 
63 static const char *dmp_param_index_strings[] = {"t0", "dt", "Ceff"};
64 
65 // Indices of Newton-Raphson function value vector.
66 enum DmpFunc { y20, y50, ipi };
67 
68 static const char *dmp_func_index_strings[] = {"y20", "y50", "Ipi"};
69 
70 class DmpError : public Exception
71 {
72 public:
73   DmpError(const char *what);
~DmpError()74   virtual ~DmpError() {}
what() const75   virtual const char *what() const noexcept { return what_; }
76 
77 private:
78   const char *what_;
79 };
80 
81 static double
82 gateModelRd(const LibertyCell *cell,
83 	    GateTableModel *gate_model,
84 	    const RiseFall *rf,
85 	    double in_slew,
86 	    double c2,
87 	    double c1,
88 	    float related_out_cap,
89 	    const Pvt *pvt,
90 	    bool pocv_enabled);
91 static void
92 evalDmpEqnsState(void *state);
93 static void
94 evalVoEqns(void *state,
95 	   double x,
96 	   double &y,
97 	   double &dy);
98 static void
99 evalVlEqns(void *state,
100 	   double x,
101 	   double &y,
102 	   double &dy);
103 
104 static double
105 findRoot(void (*func)(void *state, double x, double &y, double &dy),
106 	 void *state,
107 	 double x1,
108 	 double x2,
109 	 double x_tol,
110 	 int max_iter);
111 static void
112 newtonRaphson(const int max_iter,
113 	      double x[],
114 	      const int n,
115 	      const double x_tol,
116 	      // eval(state) is called to fill fvec and fjac.
117 	      // Returns false if fails.
118 	      void (*eval)(void *state),
119 	      void *state,
120 	      // Temporaries supplied by caller.
121 	      double *fvec,
122 	      double **fjac,
123 	      int *index,
124 	      double *p,
125 	      double *scale);
126 static void
127 luSolve(double **a,
128 	const int size,
129 	const int *index,
130 	double b[]);
131 static void
132 luDecomp(double **a,
133 	 const int size,
134 	 int *index,
135 	 double *scale);
136 
137 ////////////////////////////////////////////////////////////////
138 
139 // Base class for Dartu/Menezes/Pileggi algorithm.
140 // Derived classes handle different cases of zero values in the Pi model.
141 class DmpAlg : public StaState
142 {
143 public:
144   DmpAlg(int nr_order, StaState *sta);
145   virtual ~DmpAlg();
146   virtual const char *name() = 0;
147   // Set driver model and pi model parameters for delay calculation.
148   virtual void init(const LibertyLibrary *library,
149 		    const LibertyCell *drvr_cell,
150 		    const Pvt *pvt,
151 		    const GateTableModel *gate_model,
152 		    const RiseFall *rf,
153 		    double rd,
154 		    double in_slew,
155 		    float related_out_cap,
156 		    double c2,
157 		    double rpi,
158 		    double c1);
159   virtual void gateDelaySlew(double &delay,
160 			     double &slew) = 0;
161   virtual void loadDelaySlew(const Pin *load_pin,
162 			     double elmore,
163 			     ArcDelay &delay,
164 			     Slew &slew);
ceff()165   double ceff() { return ceff_; }
166 
167   // Given x_ as a vector of input parameters, fill fvec_ with the
168   // equations evaluated at x_ and fjac_ with the jabobian evaluated at x_.
169   virtual void evalDmpEqns() = 0;
170   // Output response to vs(t) ramp driving pi model load.
171   double vo(double t);
172   double dVoDt(double t);
173   // Load responce to driver waveform.
174   double vl(double t);
175   double dVlDt(double t);
vCross()176   double vCross() { return v_cross_; }
177 
178 protected:
179   // Find driver parameters t0, delta_t, Ceff.
180   void findDriverParams(double ceff);
181   void gateCapDelaySlew(double cl,
182 			double &delay,
183 			double &slew);
184   void gateDelays(double ceff,
185 		  double &t_vth,
186 		  double &t_vl,
187 		  double &slew);
188   virtual double dv0dt(double t) = 0;
189   // Partial derivatives of y(t) (jacobian).
190   void dy(double t,
191 	  double t0,
192 	  double dt,
193 	  double cl,
194 	  double &dydt0,
195 	  double &dyddt,
196 	  double &dydcl);
197   double y0dt(double t,
198 	      double cl);
199   double y0dcl(double t,
200 	       double cl);
201   void showX();
202   void showFvec();
203   void showJacobian();
204   void findDriverDelaySlew(double &delay,
205 			   double &slew);
206   double findVoCrossing(double vth);
207   void showVo();
208   double findVlCrossing(double vth);
209   void showVl();
210   void fail(const char *reason);
211 
212   // Output response to vs(t) ramp driving capacitive load.
213   double y(double t,
214 	   double t0,
215 	   double dt,
216 	   double cl);
217   // Output response to unit ramp driving capacitive load.
218   double y0(double t,
219 	    double cl);
220   // Output response to unit ramp driving pi model load.
221   virtual double v0(double t) = 0;
222   // Upper bound on time that vo crosses vh.
223   virtual double voCrossingUpperBound() = 0;
224   // Load responce to driver unit ramp.
225   virtual double vl0(double t) = 0;
226   // Upper bound on time that vl crosses vh.
227   double vlCrossingUpperBound();
228 
229   // Inputs to the delay calculator.
230   const LibertyCell *drvr_cell_;
231   const LibertyLibrary *drvr_library_;
232   const Pvt *pvt_;
233   const GateTableModel *gate_model_;
234   double in_slew_;
235   float related_out_cap_;
236   double c2_;
237   double rpi_;
238   double c1_;
239 
240   double rd_;
241   // Logic threshold (percentage of supply voltage).
242   double vth_;
243   // Slew lower limit (percentage of supply voltage).
244   double vl_;
245   // Slew upper limit (percentage of supply voltage).
246   double vh_;
247   // Table slews are scaled by slew_derate to get
248   // measured slews from vl to vh.
249   double slew_derate_;
250 
251   // Driver parameters calculated by this algorithm.
252   double t0_;
253   double dt_;
254   double ceff_;
255 
256   // Driver parameter Newton-Raphson state.
257   int nr_order_;
258   double *x_;
259   double *fvec_;
260   double **fjac_;
261   double *scale_;
262   double *p_;
263   int *index_;
264 
265   // Gate slew used to check load delay.
266   double gate_slew_;
267   double vo_delay_;
268   // True if the driver parameters are valid for finding the load delays.
269   bool driver_valid_;
270   // Load rspf elmore delay.
271   double elmore_;
272   double p3_;
273 
274 private:
275   virtual double dvl0dt(double t) = 0;
276 
277   // Implicit argument passed to evalVoEqns, evalVlEqns.
278   double v_cross_;
279 };
280 
DmpAlg(int nr_order,StaState * sta)281 DmpAlg::DmpAlg(int nr_order,
282 	       StaState *sta):
283   StaState(sta),
284   c2_(0.0),
285   rpi_(0.0),
286   c1_(0.0),
287   nr_order_(nr_order)
288 {
289   x_ = new double[nr_order_];
290   fvec_ = new double[nr_order_];
291   scale_ = new double[nr_order_];
292   p_ = new double[nr_order_];
293   fjac_ = new double*[nr_order_];
294   for (int i = 0; i < nr_order_; i++)
295     fjac_[i] = new double[nr_order_];
296   index_ = new int[nr_order_];
297 }
298 
~DmpAlg()299 DmpAlg::~DmpAlg()
300 {
301   delete [] x_;
302   delete [] fvec_;
303   delete [] scale_;
304   delete [] p_;
305   for (int i = 0; i < nr_order_; i++)
306     delete [] fjac_[i];
307   delete [] fjac_;
308   delete [] index_;
309 }
310 
311 void
init(const LibertyLibrary * drvr_library,const LibertyCell * drvr_cell,const Pvt * pvt,const GateTableModel * gate_model,const RiseFall * rf,double rd,double in_slew,float related_out_cap,double c2,double rpi,double c1)312 DmpAlg::init(const LibertyLibrary *drvr_library,
313 	     const LibertyCell *drvr_cell,
314 	     const Pvt *pvt,
315 	     const GateTableModel *gate_model,
316 	     const RiseFall *rf,
317 	     double rd,
318 	     double in_slew,
319 	     float related_out_cap,
320 	     // Pi model.
321 	     double c2,
322 	     double rpi,
323 	     double c1)
324 {
325   drvr_library_ = drvr_library;
326   drvr_cell_ = drvr_cell;
327   pvt_ = pvt;
328   gate_model_ = gate_model;
329   rd_ = rd;
330   in_slew_ = in_slew;
331   related_out_cap_ = related_out_cap;
332   c2_ = c2;
333   rpi_ = rpi;
334   c1_ = c1;
335   driver_valid_ = false;
336   vth_ = drvr_library->outputThreshold(rf);
337   vl_ = drvr_library->slewLowerThreshold(rf);
338   vh_ = drvr_library->slewUpperThreshold(rf);
339   slew_derate_ = drvr_library->slewDerateFromLibrary();
340 }
341 
342 // Find Ceff, delta_t and t0 for the driver.
343 void
findDriverParams(double ceff)344 DmpAlg::findDriverParams(double ceff)
345 {
346   if (nr_order_ == 3)
347     x_[DmpParam::ceff] = ceff;
348   double t_vth, t_vl, slew;
349   gateDelays(ceff, t_vth, t_vl, slew);
350   // Scale slew to 0-100%
351   double dt = slew / (vh_ - vl_);
352   double t0 = t_vth + log(1.0 - vth_) * rd_ * ceff - vth_ * dt;
353   x_[DmpParam::dt] = dt;
354   x_[DmpParam::t0] = t0;
355   newtonRaphson(100, x_, nr_order_, driver_param_tol, evalDmpEqnsState,
356 		this, fvec_, fjac_, index_, p_, scale_);
357   t0_ = x_[DmpParam::t0];
358   dt_ = x_[DmpParam::dt];
359   debugPrint(debug_, "dmp_ceff", 3, "    t0 = %s dt = %s ceff = %s",
360              units_->timeUnit()->asString(t0_),
361              units_->timeUnit()->asString(dt_),
362              units_->capacitanceUnit()->asString(x_[DmpParam::ceff]));
363   if (debug_->check("dmp_ceff", 4))
364     showVo();
365 }
366 
367 static void
evalDmpEqnsState(void * state)368 evalDmpEqnsState(void *state)
369 {
370   DmpAlg *alg = reinterpret_cast<DmpAlg *>(state);
371   alg->evalDmpEqns();
372 }
373 
374 void
gateCapDelaySlew(double ceff,double & delay,double & slew)375 DmpAlg::gateCapDelaySlew(double ceff,
376 			 double &delay,
377 			 double &slew)
378 {
379   ArcDelay model_delay;
380   Slew model_slew;
381   gate_model_->gateDelay(drvr_cell_, pvt_, in_slew_, ceff, related_out_cap_,
382 			 pocv_enabled_, model_delay, model_slew);
383   delay = delayAsFloat(model_delay);
384   slew = delayAsFloat(model_slew);
385 }
386 
387 void
gateDelays(double ceff,double & t_vth,double & t_vl,double & slew)388 DmpAlg::gateDelays(double ceff,
389 		   double &t_vth,
390 		   double &t_vl,
391 		   double &slew)
392 {
393   double table_slew;
394   gateCapDelaySlew(ceff, t_vth, table_slew);
395   // Convert reported/table slew to measured slew.
396   slew = table_slew * slew_derate_;
397   t_vl = t_vth - slew * (vth_ - vl_) / (vh_ - vl_);
398 }
399 
400 double
y(double t,double t0,double dt,double cl)401 DmpAlg::y(double t,
402 	  double t0,
403 	  double dt,
404 	  double cl)
405 {
406   double t1 = t - t0;
407   if (t1 <= 0.0)
408     return 0.0;
409   else if (t1 <= dt)
410     return y0(t1, cl) / dt;
411   else
412     return (y0(t1, cl) - y0(t1 - dt, cl)) / dt;
413 }
414 
415 double
y0(double t,double cl)416 DmpAlg::y0(double t,
417 	   double cl)
418 {
419   return t - rd_ * cl * (1.0 - exp(-t / (rd_ * cl)));
420 }
421 
422 void
dy(double t,double t0,double dt,double cl,double & dydt0,double & dyddt,double & dydcl)423 DmpAlg::dy(double t,
424 	   double t0,
425 	   double dt,
426 	   double cl,
427 	   // Return values.
428 	   double &dydt0,
429 	   double &dyddt,
430 	   double &dydcl)
431 {
432   double t1 = t - t0;
433   if (t1 <= 0.0)
434     dydt0 = dyddt = dydcl = 0.0;
435   else if (t1 <= dt) {
436     dydt0 = -y0dt(t1, cl) / dt;
437     dyddt = -y0(t1, cl) / (dt * dt);
438     dydcl = y0dcl(t1, cl) / dt;
439   }
440   else {
441     dydt0 = -(y0dt(t1, cl) - y0dt(t1 - dt, cl)) / dt;
442     dyddt = -(y0(t1, cl) + y0(t1 - dt, cl)) / (dt * dt)
443       + y0dt(t1 - dt, cl) / dt;
444     dydcl = (y0dcl(t1, cl) - y0dcl(t1 - dt, cl)) / dt;
445   }
446 }
447 
448 double
y0dt(double t,double cl)449 DmpAlg::y0dt(double t,
450 	     double cl)
451 {
452   return 1.0 - exp(-t / (rd_ * cl));
453 }
454 
455 double
y0dcl(double t,double cl)456 DmpAlg::y0dcl(double t,
457 	      double cl)
458 {
459   return rd_ * ((1.0 + t / (rd_ * cl)) * exp(-t / (rd_ * cl)) - 1);
460 }
461 
462 void
showX()463 DmpAlg::showX()
464 {
465   for (int i = 0; i < nr_order_; i++)
466     report_->reportLine("%4s %12.3e", dmp_param_index_strings[i], x_[i]);
467 }
468 
469 void
showFvec()470 DmpAlg::showFvec()
471 {
472   for (int i = 0; i < nr_order_; i++)
473     report_->reportLine("%4s %12.3e", dmp_func_index_strings[i], fvec_[i]);
474 }
475 
476 void
showJacobian()477 DmpAlg::showJacobian()
478 {
479   string line = "    ";
480   for (int j = 0; j < nr_order_; j++)
481     line += stdstrPrint("%12s", dmp_param_index_strings[j]);
482   report_->reportLineString(line);
483   line.clear();
484   for (int i = 0; i < nr_order_; i++) {
485     line += stdstrPrint("%4s ", dmp_func_index_strings[i]);
486     for (int j = 0; j < nr_order_; j++)
487       line += stdstrPrint("%12.3e ", fjac_[i][j]);
488     report_->reportLineString(line);
489   }
490 }
491 
492 void
findDriverDelaySlew(double & delay,double & slew)493 DmpAlg::findDriverDelaySlew(double &delay,
494 			    double &slew)
495 {
496   delay = findVoCrossing(vth_);
497   double tl = findVoCrossing(vl_);
498   double th = findVoCrossing(vh_);
499   // Convert measured slew to table slew.
500   slew = (th - tl) / slew_derate_;
501 }
502 
503 // Find t such that vo(t)=v.
504 double
findVoCrossing(double vth)505 DmpAlg::findVoCrossing(double vth)
506 {
507   v_cross_ = vth;
508   double ub = voCrossingUpperBound();
509   return findRoot(evalVoEqns, this, t0_, ub, vth_time_tol, find_root_max_iter);
510 }
511 
512 static void
evalVoEqns(void * state,double x,double & y,double & dy)513 evalVoEqns(void *state,
514 	   double x,
515 	   double &y,
516 	   double &dy)
517 {
518   DmpAlg *pi_ceff = reinterpret_cast<DmpAlg *>(state);
519   y = pi_ceff->vo(x) - pi_ceff->vCross();
520   dy = pi_ceff->dVoDt(x);
521 }
522 
523 double
vo(double t)524 DmpAlg::vo(double t)
525 {
526   double t1 = t - t0_;
527   if (t1 <= 0.0)
528     return 0.0;
529   else if (t1 <= dt_)
530     return v0(t1) / dt_;
531   else
532     return (v0(t1) - v0(t1 - dt_)) / dt_;
533 }
534 
535 double
dVoDt(double t)536 DmpAlg::dVoDt(double t)
537 {
538   double t1 = t - t0_;
539   if (t1 <= 0)
540     return 0.0;
541   else if (t1 <= dt_)
542     return dv0dt(t1) / dt_;
543   else
544     return (dv0dt(t1) - dv0dt(t1 - dt_)) / dt_;
545 }
546 
547 void
showVo()548 DmpAlg::showVo()
549 {
550   report_->reportLine("  t    vo(t)");
551   double ub = voCrossingUpperBound();
552   for (double t = t0_; t < t0_ + ub; t += dt_ / 10.0)
553     report_->reportLine(" %g %g", t, vo(t));
554 }
555 
556 void
loadDelaySlew(const Pin *,double elmore,ArcDelay & delay,Slew & slew)557 DmpAlg::loadDelaySlew(const Pin *,
558 		      double elmore,
559 		      ArcDelay &delay,
560 		      Slew &slew)
561 {
562   if (!driver_valid_
563       || elmore == 0.0
564       // Elmore delay is small compared to driver slew.
565       || elmore < gate_slew_ * 1e-3) {
566     delay = elmore;
567     slew = gate_slew_;
568   }
569   else {
570     // Use the driver thresholds and rely on thresholdAdjust to
571     // convert the delay and slew to the load's thresholds.
572     try {
573       if (debug_->check("dmp_ceff", 4))
574 	showVl();
575       elmore_ = elmore;
576       p3_ = 1.0 / elmore;
577       double load_delay = findVlCrossing(vth_);
578       double tl = findVlCrossing(vl_);
579       double th = findVlCrossing(vh_);
580       // Measure delay from Vo, the load dependent source excitation.
581       double delay1 = load_delay - vo_delay_;
582       // Convert measured slew to reported/table slew.
583       double slew1 = (th - tl) / slew_derate_;
584       if (delay1 < 0.0) {
585 	// Only report a problem if the difference is significant.
586 	if (-delay1 > vth_time_tol * vo_delay_)
587 	  fail("load delay less than zero");
588 	// Use elmore delay.
589 	delay1 = elmore;
590       }
591       if (slew1 < gate_slew_) {
592 	// Only report a problem if the difference is significant.
593 	if ((gate_slew_ - slew1) > vth_time_tol * gate_slew_)
594 	  fail("load slew less than driver slew");
595 	slew1 = gate_slew_;
596       }
597       delay = delay1;
598       slew = slew1;
599     }
600     catch (DmpError &error) {
601       delay = elmore_;
602       slew = gate_slew_;
603     }
604   }
605 }
606 
607 // Find t such that vl(t)=v.
608 // Return true if successful.
609 double
findVlCrossing(double vth)610 DmpAlg::findVlCrossing(double vth)
611 {
612   v_cross_ = vth;
613   double ub = vlCrossingUpperBound();
614   return findRoot(evalVlEqns, this, t0_, ub, vth_time_tol, find_root_max_iter);
615 }
616 
617 double
vlCrossingUpperBound()618 DmpAlg::vlCrossingUpperBound()
619 {
620   return voCrossingUpperBound() + elmore_;
621 }
622 
623 static void
evalVlEqns(void * state,double x,double & y,double & dy)624 evalVlEqns(void *state,
625 	   double x,
626 	   double &y,
627 	   double &dy)
628 {
629   DmpAlg *pi_ceff = reinterpret_cast<DmpAlg *>(state);
630   y = pi_ceff->vl(x) - pi_ceff->vCross();
631   dy = pi_ceff->dVlDt(x);
632 }
633 
634 double
vl(double t)635 DmpAlg::vl(double t)
636 {
637   double t1 = t - t0_;
638   if (t1 <= 0)
639     return 0.0;
640   else if (t1 <= dt_)
641     return vl0(t1) / dt_;
642   else
643     return (vl0(t1) - vl0(t1 - dt_)) / dt_;
644 }
645 
646 double
dVlDt(double t)647 DmpAlg::dVlDt(double t)
648 {
649   double t1 = t - t0_;
650   if (t1 <= 0)
651     return 0.0;
652   else if (t1 <= dt_)
653     return dvl0dt(t1) / dt_;
654   else
655     return (dvl0dt(t1) - dvl0dt(t1 - dt_)) / dt_;
656 }
657 
658 void
showVl()659 DmpAlg::showVl()
660 {
661   report_->reportLine("  t    vl(t)");
662   double ub = vlCrossingUpperBound();
663   for (double t = t0_; t < t0_ + ub * 2.0; t += ub / 10.0)
664     report_->reportLine(" %g %g", t, vl(t));
665 }
666 
667 void
fail(const char * reason)668 DmpAlg::fail(const char *reason)
669 {
670   // Allow only failures to be reported with a unique debug flag.
671   if (debug_->check("dmp_ceff", 1) || debug_->check("dmp_ceff_fail", 1))
672     report_->reportLine("delay_calc: DMP failed - %s c2=%s rpi=%s c1=%s rd=%s",
673                         reason,
674                         units_->capacitanceUnit()->asString(c2_),
675                         units_->resistanceUnit()->asString(rpi_),
676                         units_->capacitanceUnit()->asString(c1_),
677                         units_->resistanceUnit()->asString(rd_));
678 }
679 
680 ////////////////////////////////////////////////////////////////
681 
682 // Capacitive load.
683 class DmpCap : public DmpAlg
684 {
685 public:
686   DmpCap(StaState *sta);
name()687   virtual const char *name() { return "cap"; }
688   virtual void init(const LibertyLibrary *library,
689 		    const LibertyCell *drvr_cell,
690 		    const Pvt *pvt,
691 		    const GateTableModel *gate_model,
692 		    const RiseFall *rf,
693 		    double rd,
694 		    double in_slew,
695 		    float related_out_cap,
696 		    double c2,
697 		    double rpi,
698 		    double c1);
699   virtual void gateDelaySlew(double &delay,
700 			     double &slew);
701   virtual void loadDelaySlew(const Pin *,
702 			     double elmore,
703 			     ArcDelay &delay,
704 			     Slew &slew);
705   virtual void evalDmpEqns();
706   virtual double voCrossingUpperBound();
707 
708 private:
709   virtual double v0(double t);
710   virtual double dv0dt(double t);
711   virtual double vl0(double t);
712   virtual double dvl0dt(double t);
713 };
714 
DmpCap(StaState * sta)715 DmpCap::DmpCap(StaState *sta):
716   DmpAlg(1, sta)
717 {
718 }
719 
720 void
init(const LibertyLibrary * drvr_library,const LibertyCell * drvr_cell,const Pvt * pvt,const GateTableModel * gate_model,const RiseFall * rf,double rd,double in_slew,float related_out_cap,double c2,double rpi,double c1)721 DmpCap::init(const LibertyLibrary *drvr_library,
722 	     const LibertyCell *drvr_cell,
723 	     const Pvt *pvt,
724 	     const GateTableModel *gate_model,
725 	     const RiseFall *rf,
726 	     double rd,
727 	     double in_slew,
728 	     float related_out_cap,
729 	     double c2,
730 	     double rpi,
731 	     double c1)
732 {
733   debugPrint(debug_, "dmp_ceff", 3, "Using DMP cap");
734   DmpAlg::init(drvr_library, drvr_cell, pvt, gate_model, rf,
735 	       rd, in_slew, related_out_cap, c2, rpi, c1);
736   ceff_ = c1 + c2;
737 }
738 
739 void
gateDelaySlew(double & delay,double & slew)740 DmpCap::gateDelaySlew(double &delay,
741 		      double &slew)
742 {
743   debugPrint(debug_, "dmp_ceff", 3, "    ceff = %s",
744              units_->capacitanceUnit()->asString(ceff_));
745   gateCapDelaySlew(ceff_, delay, slew);
746   gate_slew_ = slew;
747 }
748 
749 void
loadDelaySlew(const Pin *,double elmore,ArcDelay & delay,Slew & slew)750 DmpCap::loadDelaySlew(const Pin *,
751 		      double elmore,
752 		      ArcDelay &delay,
753 		      Slew &slew)
754 {
755   delay = elmore;
756   slew = gate_slew_;
757 }
758 
759 void
evalDmpEqns()760 DmpCap::evalDmpEqns()
761 {
762 }
763 
764 double
v0(double)765 DmpCap::v0(double)
766 {
767   return 0.0;
768 }
769 
770 double
dv0dt(double)771 DmpCap::dv0dt(double)
772 {
773   return 0.0;
774 }
775 
776 double
voCrossingUpperBound()777 DmpCap::voCrossingUpperBound()
778 {
779   return 0.0;
780 }
781 
782 double
vl0(double)783 DmpCap::vl0(double)
784 {
785   return 0.0;
786 }
787 
788 double
dvl0dt(double)789 DmpCap::dvl0dt(double)
790 {
791   return 0.0;
792 }
793 
794 ////////////////////////////////////////////////////////////////
795 
796 // No non-zero pi model parameters, two poles, one zero
797 class DmpPi : public DmpAlg
798 {
799 public:
800   DmpPi(StaState *sta);
name()801   virtual const char *name() { return "Pi"; }
802   virtual void init(const LibertyLibrary *library,
803 		    const LibertyCell *drvr_cell,
804 		    const Pvt *pvt,
805 		    const GateTableModel *gate_model,
806 		    const RiseFall *rf,
807 		    double rd,
808 		    double in_slew,
809 		    float related_out_cap,
810 		    double c2,
811 		    double rpi,
812 		    double c1);
813   virtual void gateDelaySlew(double &delay,
814 			     double &slew);
815   virtual void evalDmpEqns();
816   virtual double voCrossingUpperBound();
817 
818 private:
819   void findDriverParamsPi();
820   virtual double v0(double t);
821   virtual double dv0dt(double t);
822   double ipiIceff(double t0,
823 		  double dt,
824 		  double ceff_time,
825 		  double ceff);
826   virtual double vl0(double t);
827   virtual double dvl0dt(double t);
828 
829   // Poles/zero.
830   double p1_;
831   double p2_;
832   double z1_;
833   // Residues.
834   double k0_;
835   double k1_;
836   double k2_;
837   double k3_;
838   double k4_;
839   // Ipi coefficients.
840   double A_;
841   double B_;
842   double D_;
843 };
844 
DmpPi(StaState * sta)845 DmpPi::DmpPi(StaState *sta) :
846   DmpAlg(3, sta),
847   p1_(0.0),
848   p2_(0.0),
849   z1_(0.0),
850   k0_(0.0),
851   k1_(0.0),
852   k2_(0.0),
853   k3_(0.0),
854   k4_(0.0),
855   A_(0.0),
856   B_(0.0),
857   D_(0.0)
858 {
859 }
860 
861 void
init(const LibertyLibrary * drvr_library,const LibertyCell * drvr_cell,const Pvt * pvt,const GateTableModel * gate_model,const RiseFall * rf,double rd,double in_slew,float related_out_cap,double c2,double rpi,double c1)862 DmpPi::init(const LibertyLibrary *drvr_library,
863 	    const LibertyCell *drvr_cell,
864 	    const Pvt *pvt,
865 	    const GateTableModel *gate_model,
866 	    const RiseFall *rf,
867 	    double rd,
868 	    double in_slew,
869 	    float related_out_cap,
870 	    double c2,
871 	    double rpi,
872 	    double c1)
873 {
874   debugPrint(debug_, "dmp_ceff", 3, "Using DMP Pi");
875   DmpAlg::init(drvr_library, drvr_cell, pvt, gate_model, rf, rd,
876 	       in_slew, related_out_cap, c2, rpi, c1);
877 
878   // Find poles/zeros.
879   z1_ = 1.0 / (rpi_ * c1_);
880   k0_ = 1.0 / (rd_ * c2_);
881   double a = rpi_ * rd_ * c1_ * c2_;
882   double b = rd_ * (c1_ + c2_) + rpi_ * c1_;
883   double sqrt_ = sqrt(b * b - 4 * a);
884   p1_ = (b + sqrt_) / (2 * a);
885   p2_ = (b - sqrt_) / (2 * a);
886 
887   double p1p2 = (p1_ * p2_);
888   k2_ = z1_ / p1p2;
889   k1_ = (1.0 - k2_ * (p1_ + p2_)) / p1p2;
890   k4_ = (k1_ * p1_ + k2_) / (p2_ - p1_);
891   k3_ = -k1_ - k4_;
892 
893   double z_ = (c1_ + c2_) / (rpi_ * c1_ * c2_);
894   A_ = z_ / p1p2;
895   B_ = (z_ - p1_) / (p1_ * (p1_ - p2_));
896   D_ = (z_ - p2_) / (p2_ * (p2_ - p1_));
897 }
898 
899 void
gateDelaySlew(double & delay,double & slew)900 DmpPi::gateDelaySlew(double &delay,
901 		     double &slew)
902 {
903   driver_valid_ = false;
904   try {
905     findDriverParamsPi();
906     ceff_ = x_[DmpParam::ceff];
907     double table_delay, table_slew;
908     gateCapDelaySlew(ceff_, table_delay, table_slew);
909     delay = table_delay;
910     //slew = table_slew;
911     try {
912       double vo_delay, vo_slew;
913       findDriverDelaySlew(vo_delay, vo_slew);
914       driver_valid_ = true;
915       // Save Vo delay to measure load wire delay waveform.
916       vo_delay_ = vo_delay;
917       //delay = vo_delay;
918       slew = vo_slew;
919     }
920     catch (DmpError &error) {
921       fail(error.what());
922       // Fall back to table slew.
923       slew = table_slew;
924     }
925   }
926   catch (DmpError &error) {
927     fail(error.what());
928     // Driver calculation failed - use Ceff=c1+c2.
929     ceff_ = c1_ + c2_;
930     gateCapDelaySlew(ceff_, delay, slew);
931   }
932   gate_slew_ = slew;
933 }
934 
935 void
findDriverParamsPi()936 DmpPi::findDriverParamsPi()
937 {
938   try {
939     findDriverParams(c2_ + c1_);
940   }
941   catch (DmpError &) {
942     findDriverParams(c2_);
943   }
944 }
945 
946 // Given x_ as a vector of input parameters, fill fvec_ with the
947 // equations evaluated at x_ and fjac_ with the jacobian evaluated at x_.
948 void
evalDmpEqns()949 DmpPi::evalDmpEqns()
950 {
951   double t0 = x_[DmpParam::t0];
952   double dt = x_[DmpParam::dt];
953   double ceff = x_[DmpParam::ceff];
954 
955   if (ceff < 0.0)
956     throw DmpError("eqn eval failed: ceff < 0");
957   if (ceff > (c1_ + c2_))
958     throw DmpError("eqn eval failed: ceff > c2 + c1");
959 
960   double t_vth, t_vl, slew;
961   gateDelays(ceff, t_vth, t_vl, slew);
962   double ceff_time = slew / (vh_ - vl_);
963   if (ceff_time > 1.4 * dt)
964     ceff_time = 1.4 * dt;
965 
966   if (dt <= 0.0)
967     throw DmpError("eqn eval failed: dt < 0");
968 
969   double exp_p1_dt = exp(-p1_ * dt);
970   double exp_p2_dt = exp(-p2_ * dt);
971   double exp_dt_rd_ceff = exp(-dt / (rd_ * ceff));
972 
973   double y50 = y(t_vth, t0, dt, ceff);
974   // Match Vl.
975   double y20 = y(t_vl, t0, dt, ceff);
976   fvec_[DmpFunc::ipi] = ipiIceff(t0, dt, ceff_time, ceff);
977   fvec_[DmpFunc::y50] = y50 - vth_;
978   fvec_[DmpFunc::y20] = y20 - vl_;
979   fjac_[DmpFunc::ipi][DmpParam::t0] = 0.0;
980   fjac_[DmpFunc::ipi][DmpParam::dt] =
981     (-A_ * dt + B_ * dt * exp_p1_dt - (2 * B_ / p1_) * (1.0 - exp_p1_dt)
982      + D_ * dt * exp_p2_dt - (2 * D_ / p2_) * (1.0 - exp_p2_dt)
983      + rd_ * ceff * (dt + dt * exp_dt_rd_ceff
984 		     - 2 * rd_ * ceff * (1.0 - exp_dt_rd_ceff)))
985     / (rd_ * dt * dt * dt);
986   fjac_[DmpFunc::ipi][DmpParam::ceff] =
987     (2 * rd_ * ceff - dt - (2 * rd_ * ceff + dt) * exp(-dt / (rd_ * ceff)))
988     / (dt * dt);
989 
990   dy(t_vl, t0, dt, ceff,
991      fjac_[DmpFunc::y20][DmpParam::t0],
992      fjac_[DmpFunc::y20][DmpParam::dt],
993      fjac_[DmpFunc::y20][DmpParam::ceff]);
994 
995   dy(t_vth, t0, dt, ceff,
996      fjac_[DmpFunc::y50][DmpParam::t0],
997      fjac_[DmpFunc::y50][DmpParam::dt],
998      fjac_[DmpFunc::y50][DmpParam::ceff]);
999 
1000   if (debug_->check("dmp_ceff", 4)) {
1001     showX();
1002     showFvec();
1003     showJacobian();
1004     report_->reportLine(".................");
1005   }
1006 }
1007 
1008 // Eqn 13, Eqn 14.
1009 double
ipiIceff(double,double dt,double ceff_time,double ceff)1010 DmpPi::ipiIceff(double, double dt,
1011 		double ceff_time,
1012 		double ceff)
1013 {
1014   double exp_p1_dt = exp(-p1_ * ceff_time);
1015   double exp_p2_dt = exp(-p2_ * ceff_time);
1016   double exp_dt_rd_ceff = exp(-ceff_time / (rd_ * ceff));
1017   double ipi = (A_ * ceff_time + (B_ / p1_) * (1.0 - exp_p1_dt)
1018 	       + (D_ / p2_) * (1.0 - exp_p2_dt))
1019     / (rd_ * ceff_time * dt);
1020   double iceff = (rd_ * ceff * ceff_time - (rd_ * ceff) * (rd_ * ceff)
1021 		  * (1.0 - exp_dt_rd_ceff))
1022     / (rd_ * ceff_time * dt);
1023   return ipi - iceff;
1024 }
1025 
1026 double
v0(double t)1027 DmpPi::v0(double t)
1028 {
1029   return k0_ * (k1_ + k2_ * t + k3_ * exp(-p1_ * t) + k4_ * exp(-p2_ * t));
1030 }
1031 
1032 double
dv0dt(double t)1033 DmpPi::dv0dt(double t)
1034 {
1035   return k0_ * (k2_ - k3_ * p1_ * exp(-p1_ * t) - k4_ * p2_ * exp(-p2_ * t));
1036 }
1037 
1038 double
vl0(double t)1039 DmpPi::vl0(double t)
1040 {
1041   double D1 = k0_ * (k1_ - k2_ / p3_);
1042   double D3 = -p3_ * k0_ * k3_ / (p1_ - p3_);
1043   double D4 = -p3_ * k0_ * k4_ / (p2_ - p3_);
1044   double D5 = k0_ * (k2_ / p3_ - k1_ + p3_ * k3_ / (p1_ - p3_)
1045 		    + p3_ * k4_ / (p2_ - p3_));
1046   return D1 + t + D3 * exp(-p1_ * t) + D4 * exp(-p2_ * t) + D5 * exp(-p3_ * t);
1047 }
1048 
1049 double
voCrossingUpperBound()1050 DmpPi::voCrossingUpperBound()
1051 {
1052   return t0_ + dt_ + (c1_ + c2_) * (rd_ + rpi_) * 2.0;
1053 }
1054 
1055 double
dvl0dt(double t)1056 DmpPi::dvl0dt(double t)
1057 {
1058   double D3 = -p3_ * k0_ * k3_ / (p1_ - p3_);
1059   double D4 = -p3_ * k0_ * k4_ / (p2_ - p3_);
1060   double D5 = k0_ * (k2_ / p3_ - k1_ + p3_ * k3_ / (p1_ - p3_)
1061 		    + p3_ * k4_ / (p2_ - p3_));
1062   return 1.0 - D3 * p1_ * exp(-p1_ * t) - D4 * p2_ * exp(-p2_ * t)
1063     - D5 * p3_ * exp(-p3_ * t);
1064 }
1065 
1066 ////////////////////////////////////////////////////////////////
1067 
1068 // Capacitive load, so Ceff is known.
1069 // Solve for t0, delta t.
1070 class DmpOnePole : public DmpAlg
1071 {
1072 public:
1073   DmpOnePole(StaState *sta);
1074   virtual void evalDmpEqns();
1075   virtual double voCrossingUpperBound();
1076 };
1077 
DmpOnePole(StaState * sta)1078 DmpOnePole::DmpOnePole(StaState *sta) :
1079   DmpAlg(2, sta)
1080 {
1081 }
1082 
1083 void
evalDmpEqns()1084 DmpOnePole::evalDmpEqns()
1085 {
1086   double t0 = x_[DmpParam::t0];
1087   double dt = x_[DmpParam::dt];
1088 
1089   double t_vth, t_vl, ignore1, ignore2;
1090   gateDelays(ceff_, t_vth, t_vl, ignore1);
1091 
1092   if (dt <= 0.0)
1093     dt = x_[DmpParam::dt] = (t_vl - t_vth) / 100;
1094 
1095   fvec_[DmpFunc::y50] = y(t_vth, t0, dt, ceff_) - vth_;
1096   fvec_[DmpFunc::y20] = y(t_vl, t0, dt, ceff_) - vl_;
1097 
1098   if (debug_->check("dmp_ceff", 4)) {
1099     showX();
1100     showFvec();
1101   }
1102 
1103   dy(t_vl, t0, dt, ceff_,
1104      fjac_[DmpFunc::y20][DmpParam::t0],
1105      fjac_[DmpFunc::y20][DmpParam::dt],
1106      ignore2);
1107 
1108   dy(t_vth, t0, dt, ceff_,
1109      fjac_[DmpFunc::y50][DmpParam::t0],
1110      fjac_[DmpFunc::y50][DmpParam::dt],
1111      ignore2);
1112 
1113   if (debug_->check("dmp_ceff", 4)) {
1114     showJacobian();
1115     report_->reportLine(".................");
1116   }
1117 }
1118 
1119 double
voCrossingUpperBound()1120 DmpOnePole::voCrossingUpperBound()
1121 {
1122   return t0_ + dt_ + ceff_ * rd_ * 2.0;
1123 }
1124 
1125 ////////////////////////////////////////////////////////////////
1126 
1127 // C2 = 0, one pole, one zero.
1128 class DmpZeroC2 : public DmpOnePole
1129 {
1130 public:
1131   DmpZeroC2(StaState *sta);
name()1132   virtual const char *name() { return "c2=0"; }
1133   virtual void init(const LibertyLibrary *drvr_library,
1134 		    const LibertyCell *drvr_cell,
1135 		    const Pvt *pvt,
1136 		    const GateTableModel *gate_model,
1137 		    const RiseFall *rf,
1138 		    double rd,
1139 		    double in_slew,
1140 		    float related_out_cap,
1141 		    double c2,
1142 		    double rpi,
1143 		    double c1);
1144   virtual void gateDelaySlew(double &delay,
1145 			     double &slew);
1146 
1147 private:
1148   virtual double v0(double t);
1149   virtual double dv0dt(double t);
1150   virtual double vl0(double t);
1151   virtual double dvl0dt(double t);
1152   virtual double voCrossingUpperBound();
1153 
1154   // Pole/zero.
1155   double p1_;
1156   double z1_;
1157   // Residues.
1158   double k0_;
1159   double k1_;
1160   double k2_;
1161   double k3_;
1162 };
1163 
DmpZeroC2(StaState * sta)1164 DmpZeroC2::DmpZeroC2(StaState *sta) :
1165   DmpOnePole(sta),
1166   p1_(0.0),
1167   z1_(0.0),
1168   k0_(0.0),
1169   k1_(0.0),
1170   k2_(0.0),
1171   k3_(0.0)
1172 {
1173 }
1174 
1175 void
init(const LibertyLibrary * drvr_library,const LibertyCell * drvr_cell,const Pvt * pvt,const GateTableModel * gate_model,const RiseFall * rf,double rd,double in_slew,float related_out_cap,double c2,double rpi,double c1)1176 DmpZeroC2::init(const LibertyLibrary *drvr_library,
1177 		const LibertyCell *drvr_cell,
1178 		const Pvt *pvt,
1179 		const GateTableModel *gate_model,
1180 		const RiseFall *rf,
1181 		double rd,
1182 		double in_slew,
1183 		float related_out_cap,
1184 		double c2,
1185 		double rpi,
1186 		double c1)
1187 {
1188   debugPrint(debug_, "dmp_ceff", 3, "Using DMP C2=0");
1189   DmpAlg::init(drvr_library, drvr_cell, pvt, gate_model, rf, rd,
1190 	       in_slew, related_out_cap, c2, rpi, c1);
1191   ceff_ = c1;
1192 
1193   z1_ = 1.0 / (rpi_ * c1_);
1194   p1_ = 1.0 / (c1_ * (rd_ + rpi_));
1195 
1196   k0_ = p1_ / z1_;
1197   k2_ = 1.0 / k0_;
1198   k1_ = (p1_ - z1_) / (p1_ * p1_);
1199   k3_ = -k1_;
1200 }
1201 
1202 void
gateDelaySlew(double & delay,double & slew)1203 DmpZeroC2::gateDelaySlew(double &delay,
1204 			 double &slew)
1205 {
1206   try {
1207     findDriverParams(c1_);
1208     ceff_ = c1_;
1209     findDriverDelaySlew(delay, slew);
1210     driver_valid_ = true;
1211     vo_delay_ = delay;
1212   }
1213   catch (DmpError &error) {
1214     fail(error.what());
1215     // Fall back to table slew.
1216     driver_valid_ = false;
1217     ceff_ = c1_;
1218     gateCapDelaySlew(ceff_, delay, slew);
1219   }
1220   gate_slew_ = slew;
1221 }
1222 
1223 double
v0(double t)1224 DmpZeroC2::v0(double t)
1225 {
1226   return k0_ * (k1_ + k2_ * t + k3_ * exp(-p1_ * t));
1227 }
1228 
1229 double
dv0dt(double t)1230 DmpZeroC2::dv0dt(double t)
1231 {
1232   return k0_ * (k2_ - k3_ * p1_ * exp(-p1_ * t));
1233 }
1234 
1235 double
vl0(double t)1236 DmpZeroC2::vl0(double t)
1237 {
1238   double D1 = k0_ * (k1_ - k2_ / p3_);
1239   double D3 = -p3_ * k0_ * k3_ / (p1_ - p3_);
1240   double D5 = k0_ * (k2_ / p3_ - k1_ + p3_ * k3_ / (p1_ - p3_));
1241   return D1 + t + D3 * exp(-p1_ * t) + D5 * exp(-p3_ * t);
1242 }
1243 
1244 double
dvl0dt(double t)1245 DmpZeroC2::dvl0dt(double t)
1246 {
1247   double D3 = -p3_ * k0_ * k3_ / (p1_ - p3_);
1248   double D5 = k0_ * (k2_ / p3_ - k1_ + p3_ * k3_ / (p1_ - p3_));
1249   return 1.0 - D3 * p1_ * exp(-p1_ * t) - D5 * p3_ * exp(-p3_ * t);
1250 }
1251 
1252 double
voCrossingUpperBound()1253 DmpZeroC2::voCrossingUpperBound()
1254 {
1255   return t0_ + dt_ + c1_ * (rd_ + rpi_) * 2.0;
1256 }
1257 
1258 ////////////////////////////////////////////////////////////////
1259 
1260 // Find the root of a function between x1 and x2 using a combination
1261 // of Newton-Raphson and bisection search.
1262 // x_tol is a percentage that change in x must be less than (1.0 = 100%).
1263 // error is non-null if a problem occurs.
1264 static double
findRoot(void (* func)(void * state,double x,double & y,double & dy),void * state,double x1,double x2,double x_tol,int max_iter)1265 findRoot(void (*func)(void *state, double x, double &y, double &dy),
1266 	 void *state,
1267 	 double x1,
1268 	 double x2,
1269 	 double x_tol,
1270 	 int max_iter)
1271 {
1272   double y1, y2, dy;
1273   func(state, x1, y1, dy);
1274   func(state, x2, y2, dy);
1275 
1276   if ((y1 > 0.0 && y2 > 0.0) || (y1 < 0.0 && y2 < 0.0))
1277     throw DmpError("findRoot: initial bounds do not surround a root");
1278 
1279   if (y1 == 0.0)
1280     return x1;
1281 
1282   if (y2 == 0.0)
1283     return x2;
1284 
1285   if (y1 > 0.0) {
1286     // Swap x1/x2 so func(x1) < 0.
1287     double tmp = x1;
1288     x1 = x2;
1289     x2 = tmp;
1290   }
1291   double root = (x1 + x2) * 0.5;
1292   double dx_prev = abs(x2 - x1);
1293   double dx = dx_prev;
1294   double y;
1295   func(state, root, y, dy);
1296   for (int iter = 0; iter < max_iter; iter++) {
1297     // Newton/raphson out of range.
1298     if ((((root - x2) * dy - y) * ((root - x1) * dy - y) > 0.0)
1299 	// Not decreasing fast enough.
1300 	|| (abs(2.0 * y) > abs(dx_prev * dy))) {
1301       // Bisect x1/x2 interval.
1302       dx_prev = dx;
1303       dx = (x2 - x1) * 0.5;
1304       root = x1 + dx;
1305     }
1306     else {
1307       dx_prev = dx;
1308       dx = y / dy;
1309       root -= dx;
1310     }
1311     if (abs(dx) <= x_tol * abs(root))
1312       // Converged.
1313       return root;
1314 
1315     func(state, root, y, dy);
1316     if (y < 0.0)
1317       x1 = root;
1318     else
1319       x2 = root;
1320   }
1321   throw DmpError("findRoot: max iterations exceeded");
1322 }
1323 
1324 // Newton-Raphson iteration to find zeros of a function.
1325 // x_tol is percentage that all changes in x must be less than (1.0 = 100%).
1326 // Eval(state) is called to fill fvec and fjac (returns false if fails).
1327 // Return error msg on failure.
1328 static void
newtonRaphson(const int max_iter,double x[],const int size,const double x_tol,void (* eval)(void * state),void * state,double * fvec,double ** fjac,int * index,double * p,double * scale)1329 newtonRaphson(const int max_iter,
1330 	      double x[],
1331 	      const int size,
1332 	      const double x_tol,
1333 	      void (*eval)(void *state),
1334 	      void *state,
1335 	      // Temporaries supplied by caller.
1336 	      double *fvec,
1337 	      double **fjac,
1338 	      int *index,
1339 	      double *p,
1340 	      double *scale)
1341 {
1342   for (int k = 0; k < max_iter; k++) {
1343     eval(state);
1344     for (int i = 0; i < size; i++)
1345       // Right-hand side of linear equations.
1346       p[i] = -fvec[i];
1347     luDecomp(fjac, size, index, scale);
1348     luSolve(fjac, size, index, p);
1349 
1350     bool all_under_x_tol = true;
1351     for (int i = 0; i < size; i++) {
1352       if (abs(p[i]) > abs(x[i]) * x_tol)
1353 	all_under_x_tol = false;
1354       x[i] += p[i];
1355     }
1356     if (all_under_x_tol)
1357       return;
1358   }
1359   throw DmpError("Newton-Raphson max iterations exceeded");
1360 }
1361 
1362 // luDecomp, luSolve based on MatClass from C. R. Birchenhall,
1363 // University of Manchester
1364 // ftp://ftp.mcc.ac.uk/pub/matclass/libmat.tar.Z
1365 
1366 // Crout's Method of LU decomposition of square matrix, with implicit
1367 // partial pivoting.  A is overwritten. U is explicit in the upper
1368 // triangle and L is in multiplier form in the subdiagionals i.e. subdiag
1369 // a[i,j] is the multiplier used to eliminate the [i,j] term.
1370 //
1371 // Replaces a[0..size-1][0..size-1] by the LU decomposition.
1372 // index[0..size-1] is an output vector of the row permutations.
1373 // Return error msg on failure.
1374 void
luDecomp(double ** a,const int size,int * index,double * scale)1375 luDecomp(double **a,
1376 	 const int size,
1377 	 int *index,
1378 	 // Temporary supplied by caller.
1379 	 // scale stores the implicit scaling of each row.
1380 	 double *scale)
1381 {
1382   // Find implicit scaling factors.
1383   for (int i = 0; i < size; i++) {
1384     double big = 0.0;
1385     for (int j = 0; j < size; j++) {
1386       double temp = abs(a[i][j]);
1387       if (temp > big)
1388 	big = temp;
1389     }
1390     if (big == 0.0)
1391       throw DmpError("LU decomposition: no non-zero row element");
1392     scale[i] = 1.0 / big;
1393   }
1394   int size_1 = size - 1;
1395   for (int j = 0; j < size; j++) {
1396     // Run down jth column from top to diag, to form the elements of U.
1397     for (int i = 0; i < j; i++) {
1398       double sum = a[i][j];
1399       for (int k = 0; k < i; k++)
1400 	sum -= a[i][k] * a[k][j];
1401       a[i][j] = sum;
1402     }
1403     // Run down jth subdiag to form the residuals after the elimination
1404     // of the first j-1 subdiags.  These residuals divided by the
1405     // appropriate diagonal term will become the multipliers in the
1406     // elimination of the jth. subdiag. Find index of largest scaled
1407     // term in imax.
1408     double big = 0.0;
1409     int imax = 0;
1410     for (int i = j; i < size; i++) {
1411       double sum = a[i][j];
1412       for (int k = 0; k < j; k++)
1413 	sum -= a[i][k] * a[k][j];
1414       a[i][j] = sum;
1415       double dum = scale[i] * abs(sum);
1416       if (dum >= big) {
1417 	big = dum;
1418 	imax = i;
1419       }
1420     }
1421     // Permute current row with imax.
1422     if (j != imax) {
1423       // Yes, do so...
1424       for (int k = 0; k < size; k++) {
1425 	double dum = a[imax][k];
1426 	a[imax][k] = a[j][k];
1427 	a[j][k] = dum;
1428       }
1429       scale[imax] = scale[j];
1430     }
1431     index[j] = imax;
1432     // If diag term is not zero divide subdiag to form multipliers.
1433     if (a[j][j] == 0.0)
1434       a[j][j] = tiny_double;
1435     if (j != size_1) {
1436       double pivot = 1.0 / a[j][j];
1437       for (int i = j + 1; i < size; i++)
1438 	a[i][j] *= pivot;
1439     }
1440   }
1441 }
1442 
1443 // Solves the set of size linear equations a*x=b, assuming A is LU form
1444 // but assume b has not been transformed.
1445 //  a[0..size-1] is LU decomposition
1446 // Returns the solution vector x in b.
1447 // a and index are not modified.
1448 void
luSolve(double ** a,const int size,const int * index,double b[])1449 luSolve(double **a,
1450 	const int size,
1451 	const int *index,
1452 	double b[])
1453 {
1454   // Transform b allowing for leading zeros.
1455   int non_zero = -1;
1456   for (int i = 0; i < size; i++) {
1457     int iperm = index[i];
1458     double sum = b[iperm];
1459     b[iperm] = b[i];
1460     if (non_zero != -1) {
1461       for (int j = non_zero; j <= i - 1; j++)
1462 	sum -= a[i][j] * b[j];
1463     }
1464     else {
1465       if (sum != 0.0)
1466 	non_zero = i;
1467     }
1468     b[i] = sum;
1469   }
1470   // Backsubstitution.
1471   for (int i = size - 1; i >= 0; i--) {
1472     double sum = b[i];
1473     for (int j = i + 1; j < size; j++)
1474       sum -= a[i][j] * b[j];
1475     b[i] = sum / a[i][i];
1476   }
1477 }
1478 
1479 #if 0
1480 // Solve:
1481 //  x + y = 5
1482 //  x - y = 1
1483 // x = 3
1484 // y = 2
1485 void
1486 testLuDecomp1()
1487 {
1488   double a0[2] = {1,  1};
1489   double a1[2] = {1, -1};
1490   double *a[2] = {a0, a1};
1491   int index[2];
1492   double b[2] = {5, 1};
1493   double scale[2];
1494   luDecomp(a, 2, index, scale);
1495   luSolve(a, 2, index, b);
1496   printf("x = %f y= %f\n", b[0], b[1]);
1497 }
1498 
1499 // Solve
1500 //   x + 2y =  3
1501 //  3x - 4y = 19
1502 // x = 5
1503 // y = -1
1504 void
1505 testLuDecomp2()
1506 {
1507   double a0[2] = {1,  2};
1508   double a1[2] = {3, -4};
1509   double *a[2] = {a0, a1};
1510   int index[2];
1511   double b[2] = {3, 19};
1512   double scale[2];
1513   luDecomp(a, 2, index, scale);
1514   luSolve(a, 2, index, b);
1515   printf("x = %f y= %f\n", b[0], b[1]);
1516 }
1517 #endif
1518 
1519 ////////////////////////////////////////////////////////////////
1520 
1521 bool DmpCeffDelayCalc::unsuppored_model_warned_ = false;
1522 
DmpCeffDelayCalc(StaState * sta)1523 DmpCeffDelayCalc::DmpCeffDelayCalc(StaState *sta) :
1524   RCDelayCalc(sta),
1525   dmp_cap_(new DmpCap(sta)),
1526   dmp_pi_(new DmpPi(sta)),
1527   dmp_zero_c2_(new DmpZeroC2(sta)),
1528   dmp_alg_(nullptr)
1529 {
1530 }
1531 
~DmpCeffDelayCalc()1532 DmpCeffDelayCalc::~DmpCeffDelayCalc()
1533 {
1534   delete dmp_cap_;
1535   delete dmp_pi_;
1536   delete dmp_zero_c2_;
1537 }
1538 
1539 void
inputPortDelay(const Pin * port_pin,float in_slew,const RiseFall * rf,Parasitic * parasitic,const DcalcAnalysisPt * dcalc_ap)1540 DmpCeffDelayCalc::inputPortDelay(const Pin *port_pin,
1541 				 float in_slew,
1542 				 const RiseFall *rf,
1543 				 Parasitic *parasitic,
1544 				 const DcalcAnalysisPt *dcalc_ap)
1545 {
1546   dmp_alg_ = nullptr;
1547   input_port_ = true;
1548   RCDelayCalc::inputPortDelay(port_pin, in_slew, rf, parasitic, dcalc_ap);
1549 }
1550 
1551 void
gateDelay(const LibertyCell * drvr_cell,TimingArc * arc,const Slew & in_slew,float load_cap,Parasitic * drvr_parasitic,float related_out_cap,const Pvt * pvt,const DcalcAnalysisPt * dcalc_ap,ArcDelay & gate_delay,Slew & drvr_slew)1552 DmpCeffDelayCalc::gateDelay(const LibertyCell *drvr_cell,
1553 			    TimingArc *arc,
1554 			    const Slew &in_slew,
1555 			    float load_cap,
1556 			    Parasitic *drvr_parasitic,
1557 			    float related_out_cap,
1558 			    const Pvt *pvt,
1559 			    const DcalcAnalysisPt *dcalc_ap,
1560 			    // Return values.
1561 			    ArcDelay &gate_delay,
1562 			    Slew &drvr_slew)
1563 {
1564   input_port_ = false;
1565   drvr_rf_ = arc->toTrans()->asRiseFall();
1566   drvr_library_ = drvr_cell->libertyLibrary();
1567   drvr_parasitic_ = drvr_parasitic;
1568   GateTimingModel *model = gateModel(arc, dcalc_ap);
1569   GateTableModel *table_model = dynamic_cast<GateTableModel*>(model);
1570   if (table_model && drvr_parasitic) {
1571     float in_slew1 = delayAsFloat(in_slew);
1572     float c2, rpi, c1;
1573     parasitics_->piModel(drvr_parasitic, c2, rpi, c1);
1574     setCeffAlgorithm(drvr_library_, drvr_cell, pvt, table_model,
1575 		     drvr_rf_, in_slew1, related_out_cap,
1576 		     c2, rpi, c1);
1577     double dmp_gate_delay, dmp_drvr_slew;
1578     gateDelaySlew(dmp_gate_delay, dmp_drvr_slew);
1579     gate_delay = dmp_gate_delay;
1580     drvr_slew = dmp_drvr_slew;
1581   }
1582   else {
1583     LumpedCapDelayCalc::gateDelay(drvr_cell, arc, in_slew, load_cap,
1584 				  drvr_parasitic, related_out_cap, pvt,
1585 				  dcalc_ap, gate_delay, drvr_slew);
1586     if (drvr_parasitic
1587 	&& !unsuppored_model_warned_) {
1588       unsuppored_model_warned_ = true;
1589       report_->warn(1, "cell %s delay model not supported on SPF parasitics by DMP delay calculator",
1590 		    drvr_cell->name());
1591     }
1592   }
1593   drvr_slew_ = drvr_slew;
1594   multi_drvr_slew_factor_ = 1.0F;
1595 }
1596 
1597 void
setCeffAlgorithm(const LibertyLibrary * drvr_library,const LibertyCell * drvr_cell,const Pvt * pvt,GateTableModel * gate_model,const RiseFall * rf,double in_slew,float related_out_cap,double c2,double rpi,double c1)1598 DmpCeffDelayCalc::setCeffAlgorithm(const LibertyLibrary *drvr_library,
1599 				   const LibertyCell *drvr_cell,
1600 				   const Pvt *pvt,
1601 				   GateTableModel *gate_model,
1602 				   const RiseFall *rf,
1603 				   double in_slew,
1604 				   float related_out_cap,
1605 				   double c2,
1606 				   double rpi,
1607 				   double c1)
1608 {
1609   double rd = 0.0;
1610   if (gate_model) {
1611     rd = gateModelRd(drvr_cell, gate_model, rf, in_slew, c2, c1,
1612 		     related_out_cap, pvt, pocv_enabled_);
1613     // Zero Rd means the table is constant and thus independent of load cap.
1614     if (rd < 1e-2
1615 	// Rpi is small compared to Rd, which makes the load capacitive.
1616 	|| rpi < rd * 1e-3
1617 	// c1/Rpi can be ignored.
1618 	|| (c1 == 0.0 || c1 < c2 * 1e-3 || rpi == 0.0))
1619       dmp_alg_ = dmp_cap_;
1620     else if (c2 < c1 * 1e-3)
1621       dmp_alg_ = dmp_zero_c2_;
1622     else
1623       // The full monty.
1624       dmp_alg_ = dmp_pi_;
1625   }
1626   else
1627     dmp_alg_ = dmp_cap_;
1628   dmp_alg_->init(drvr_library, drvr_cell, pvt, gate_model,
1629 		 drvr_rf_, rd, in_slew, related_out_cap, c2, rpi, c1);
1630   debugPrint(debug_, "dmp_ceff", 3,
1631              "    DMP in_slew = %s c2 = %s rpi = %s c1 = %s Rd = %s (%s alg)",
1632              units_->timeUnit()->asString(in_slew),
1633              units_->capacitanceUnit()->asString(c2),
1634              units_->resistanceUnit()->asString(rpi),
1635              units_->capacitanceUnit()->asString(c1),
1636              units_->resistanceUnit()->asString(rd),
1637              dmp_alg_->name());
1638 }
1639 
1640 float
ceff(const LibertyCell * drvr_cell,TimingArc * arc,const Slew & in_slew,float load_cap,Parasitic * drvr_parasitic,float related_out_cap,const Pvt * pvt,const DcalcAnalysisPt * dcalc_ap)1641 DmpCeffDelayCalc::ceff(const LibertyCell *drvr_cell,
1642 		       TimingArc *arc,
1643 		       const Slew &in_slew,
1644 		       float load_cap,
1645 		       Parasitic *drvr_parasitic,
1646 		       float related_out_cap,
1647 		       const Pvt *pvt,
1648 		       const DcalcAnalysisPt *dcalc_ap)
1649 {
1650   ArcDelay gate_delay;
1651   Slew drvr_slew;
1652   gateDelay(drvr_cell, arc, in_slew, load_cap,
1653 	    drvr_parasitic, related_out_cap, pvt, dcalc_ap,
1654 	    gate_delay, drvr_slew);
1655   if (dmp_alg_)
1656     return dmp_alg_->ceff();
1657   else
1658     return load_cap;
1659 }
1660 
1661 void
reportGateDelay(const LibertyCell * drvr_cell,TimingArc * arc,const Slew & in_slew,float load_cap,Parasitic * drvr_parasitic,float related_out_cap,const Pvt * pvt,const DcalcAnalysisPt * dcalc_ap,int digits,string * result)1662 DmpCeffDelayCalc::reportGateDelay(const LibertyCell *drvr_cell,
1663 				  TimingArc *arc,
1664 				  const Slew &in_slew,
1665 				  float load_cap,
1666 				  Parasitic *drvr_parasitic,
1667 				  float related_out_cap,
1668 				  const Pvt *pvt,
1669 				  const DcalcAnalysisPt *dcalc_ap,
1670 				  int digits,
1671 				  string *result)
1672 {
1673   ArcDelay gate_delay;
1674   Slew drvr_slew;
1675   gateDelay(drvr_cell, arc, in_slew, load_cap,
1676 	    drvr_parasitic, related_out_cap, pvt, dcalc_ap,
1677 	    gate_delay, drvr_slew);
1678   GateTimingModel *model = gateModel(arc, dcalc_ap);
1679   float c_eff = 0.0;
1680   if (drvr_parasitic_ && dmp_alg_) {
1681     c_eff = dmp_alg_->ceff();
1682     const LibertyLibrary *drvr_library = drvr_cell->libertyLibrary();
1683     const Units *units = drvr_library->units();
1684     const Unit *cap_unit = units->capacitanceUnit();
1685     const Unit *res_unit = units->resistanceUnit();
1686     float c2, rpi, c1;
1687     parasitics_->piModel(drvr_parasitic_, c2, rpi, c1);
1688     *result += "Pi model C2=";
1689     *result += cap_unit->asString(c2, digits);
1690     *result += " Rpi=";
1691     *result += res_unit->asString(rpi, digits);
1692     *result += " C1=";
1693     *result += cap_unit->asString(c1, digits);
1694     *result += ", Ceff=";
1695     *result += cap_unit->asString(c_eff, digits);
1696     *result += '\n';
1697   }
1698   else
1699     c_eff = load_cap;
1700   if (model) {
1701     float in_slew1 = delayAsFloat(in_slew);
1702     model->reportGateDelay(drvr_cell, pvt, in_slew1, c_eff,
1703 			   related_out_cap, pocv_enabled_,
1704 			   digits, result);
1705   }
1706 }
1707 
1708 static double
gateModelRd(const LibertyCell * cell,GateTableModel * gate_model,const RiseFall * rf,double in_slew,double c2,double c1,float related_out_cap,const Pvt * pvt,bool pocv_enabled)1709 gateModelRd(const LibertyCell *cell,
1710 	    GateTableModel *gate_model,
1711 	    const RiseFall *rf,
1712 	    double in_slew,
1713 	    double c2,
1714 	    double c1,
1715 	    float related_out_cap,
1716 	    const Pvt *pvt,
1717 	    bool pocv_enabled)
1718 {
1719   float cap1 = c1 + c2;
1720   float cap2 = cap1 + 1e-15;
1721   ArcDelay d1, d2;
1722   Slew s1, s2;
1723   gate_model->gateDelay(cell, pvt, in_slew, cap1, related_out_cap, pocv_enabled,
1724 			d1, s1);
1725   gate_model->gateDelay(cell, pvt, in_slew, cap2, related_out_cap, pocv_enabled,
1726 			d2, s2);
1727   double vth = cell->libertyLibrary()->outputThreshold(rf);
1728   float rd = -log(vth) * abs(delayAsFloat(d1) - delayAsFloat(d2)) / (cap2 - cap1);
1729   return rd;
1730 }
1731 
1732 void
gateDelaySlew(double & delay,double & slew)1733 DmpCeffDelayCalc::gateDelaySlew(double &delay,
1734 				double &slew)
1735 {
1736   dmp_alg_->gateDelaySlew(delay, slew);
1737 }
1738 
1739 void
loadDelaySlew(const Pin * load_pin,double elmore,ArcDelay & delay,Slew & slew)1740 DmpCeffDelayCalc::loadDelaySlew(const Pin *load_pin,
1741 				double elmore,
1742 				ArcDelay &delay,
1743 				Slew &slew)
1744 {
1745   if (dmp_alg_)
1746     dmp_alg_->loadDelaySlew(load_pin, elmore, delay, slew);
1747 }
1748 
1749 // Notify algorithm components.
1750 void
copyState(const StaState * sta)1751 DmpCeffDelayCalc::copyState(const StaState *sta)
1752 {
1753   StaState::copyState(sta);
1754   dmp_cap_->copyState(sta);
1755   dmp_pi_->copyState(sta);
1756   dmp_zero_c2_->copyState(sta);
1757 }
1758 
DmpError(const char * what)1759 DmpError::DmpError(const char *what) :
1760   what_(what)
1761 {
1762   //printf("DmpError %s\n", what);
1763 }
1764 
1765 } // namespace
1766